scrapio 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 0b01cff647c80bdc948a24eef3b9dfe4095466678b6eda74cf01bbfc6a706c57
4
+ data.tar.gz: '08880b616f26a60b6ce18949253cd96c2d24aca227e702843b6371dcab60c7d6'
5
+ SHA512:
6
+ metadata.gz: 0f321767a8ef0e4775ee52836ba7fa620df758ad341c3d19abf5d52ddaf2a90ba4e9c2e46b22d76d7cbc3ee056cbed63b34d8801d5a3737dbd471c51768122e4
7
+ data.tar.gz: da66af15d073cce2719fab455fd940d272215d76e3abb894c332197ed817d9b79a1392302e04c85f74e896d9726bd4957886d8f833cf34a949906c932fdd8c80
data/README.md ADDED
@@ -0,0 +1,140 @@
1
+ # scrapio
2
+
3
+ Official Ruby SDK for [Scrapio](https://scrapio.dev) — fetch, crawl, search, and extract structured data from any URL.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ gem install scrapio
9
+ ```
10
+
11
+ Or add to your Gemfile:
12
+
13
+ ```ruby
14
+ gem "scrapio"
15
+ ```
16
+
17
+ Requires Ruby 2.7 or later.
18
+
19
+ ## Quickstart
20
+
21
+ ```ruby
22
+ require "scrapio"
23
+
24
+ client = Scrapio::Client.new(ENV["SCRAPIO_API_KEY"])
25
+
26
+ result = client.fetch.fetch(url: "https://example.com", output: ["markdown"])
27
+ puts result["outputs"]["markdown"]
28
+ ```
29
+
30
+ ## Usage
31
+
32
+ ### Fetch a page
33
+
34
+ ```ruby
35
+ result = client.fetch.fetch(
36
+ url: "https://news.ycombinator.com",
37
+ render_js: true,
38
+ output: ["markdown"]
39
+ )
40
+ ```
41
+
42
+ ### Google Search
43
+
44
+ ```ruby
45
+ results = client.google.search(
46
+ search: "best web scraping API 2025",
47
+ country_code: "us"
48
+ )
49
+ puts results["results"]
50
+ ```
51
+
52
+ ### Amazon product
53
+
54
+ ```ruby
55
+ product = client.amazon.get_product("B08N5WRWNW")
56
+ puts "#{product["title"]} — $#{product["price"]}"
57
+ ```
58
+
59
+ ### Walmart search
60
+
61
+ ```ruby
62
+ items = client.walmart.search("headphones")
63
+ ```
64
+
65
+ ### YouTube video
66
+
67
+ ```ruby
68
+ video = client.youtube.get_video("dQw4w9WgXcQ")
69
+ ```
70
+
71
+ ### Browser automation
72
+
73
+ ```ruby
74
+ result = client.interact.interact(
75
+ url: "https://example.com",
76
+ actions: [
77
+ { type: "click", selector: "#login" },
78
+ { type: "type", selector: "#email", value: "user@example.com" },
79
+ ]
80
+ )
81
+ ```
82
+
83
+ ### Crawl a site
84
+
85
+ ```ruby
86
+ result = client.crawl.crawl(
87
+ seeds: ["https://docs.example.com"],
88
+ max_pages: 50,
89
+ output: ["markdown"]
90
+ )
91
+ puts result["result"]["summary"]["pages_succeeded"]
92
+ ```
93
+
94
+ ### Async jobs
95
+
96
+ ```ruby
97
+ job = client.jobs.create(
98
+ job_type: "fetch",
99
+ payload: { url: "https://example.com", output: ["markdown"] }
100
+ )
101
+
102
+ result = client.jobs.wait_for_completion(job["job_id"])
103
+ ```
104
+
105
+ ## Configuration
106
+
107
+ ```ruby
108
+ client = Scrapio::Client.new(
109
+ ENV["SCRAPIO_API_KEY"],
110
+ base_url: "https://api.scrapio.dev", # optional override
111
+ timeout: 30 # optional, default 30s
112
+ )
113
+ ```
114
+
115
+ ## Error handling
116
+
117
+ ```ruby
118
+ begin
119
+ result = client.fetch.fetch(url: "https://example.com")
120
+ rescue Scrapio::AuthError
121
+ puts "Invalid API key"
122
+ rescue Scrapio::CreditsExhaustedError
123
+ puts "No credits remaining"
124
+ rescue Scrapio::RateLimitError
125
+ puts "Rate limited — back off and retry"
126
+ rescue Scrapio::ScrapioError => e
127
+ puts "API error #{e.status_code}: #{e.message}"
128
+ end
129
+ ```
130
+
131
+ ## Links
132
+
133
+ - [Documentation](https://scrapio.dev/docs)
134
+ - [API Reference](https://scrapio.dev/docs/api-reference/fetch)
135
+ - [Dashboard](https://app.scrapio.dev)
136
+ - [Get an API key](https://scrapio.dev#pricing)
137
+
138
+ ## License
139
+
140
+ MIT
@@ -0,0 +1,28 @@
1
+ require_relative "http_client"
2
+ require_relative "resources/fetch"
3
+ require_relative "resources/google"
4
+ require_relative "resources/amazon"
5
+ require_relative "resources/walmart"
6
+ require_relative "resources/youtube"
7
+ require_relative "resources/jobs"
8
+ require_relative "resources/crawl"
9
+ require_relative "resources/interact"
10
+
11
+ module Scrapio
12
+ class Client
13
+ attr_reader :fetch, :google, :amazon, :walmart, :youtube, :jobs, :crawl, :interact
14
+
15
+ def initialize(api_key, base_url: HttpClient::DEFAULT_BASE_URL, timeout: HttpClient::DEFAULT_TIMEOUT)
16
+ http = HttpClient.new(api_key, base_url: base_url, timeout: timeout)
17
+
18
+ @fetch = Resources::Fetch.new(http)
19
+ @google = Resources::Google.new(http)
20
+ @amazon = Resources::Amazon.new(http)
21
+ @walmart = Resources::Walmart.new(http)
22
+ @youtube = Resources::YouTube.new(http)
23
+ @jobs = Resources::Jobs.new(http)
24
+ @crawl = Resources::Crawl.new(http)
25
+ @interact = Resources::Interact.new(http)
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,14 @@
1
+ module Scrapio
2
+ class ScrapioError < StandardError
3
+ attr_reader :status_code
4
+
5
+ def initialize(message, status_code: 0)
6
+ super(message)
7
+ @status_code = status_code
8
+ end
9
+ end
10
+
11
+ class AuthError < ScrapioError; end
12
+ class RateLimitError < ScrapioError; end
13
+ class CreditsExhaustedError < ScrapioError; end
14
+ end
@@ -0,0 +1,53 @@
1
+ require "net/http"
2
+ require "uri"
3
+ require "json"
4
+
5
+ module Scrapio
6
+ class HttpClient
7
+ DEFAULT_BASE_URL = "https://api.scrapio.dev"
8
+ DEFAULT_TIMEOUT = 30
9
+
10
+ def initialize(api_key, base_url: DEFAULT_BASE_URL, timeout: DEFAULT_TIMEOUT)
11
+ @api_key = api_key
12
+ @base_url = base_url
13
+ @timeout = timeout
14
+ end
15
+
16
+ def get(path, params = {})
17
+ uri = URI("#{@base_url}#{path}")
18
+ filtered = params.compact
19
+ uri.query = URI.encode_www_form(filtered) unless filtered.empty?
20
+ request(Net::HTTP::Get.new(uri))
21
+ end
22
+
23
+ def post(path, body = {})
24
+ uri = URI("#{@base_url}#{path}")
25
+ req = Net::HTTP::Post.new(uri)
26
+ req["Content-Type"] = "application/json"
27
+ req.body = JSON.generate(body.compact)
28
+ request(req)
29
+ end
30
+
31
+ private
32
+
33
+ def request(req)
34
+ req["Authorization"] = "Bearer #{@api_key}"
35
+ req["Accept"] = "application/json"
36
+
37
+ uri = req.uri
38
+ Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https",
39
+ read_timeout: @timeout, open_timeout: @timeout) do |http|
40
+ resp = http.request(req)
41
+ data = JSON.parse(resp.body)
42
+
43
+ case resp.code.to_i
44
+ when 200..299 then data
45
+ when 401, 403 then raise AuthError.new(data["message"] || "Unauthorized", status_code: resp.code.to_i)
46
+ when 402 then raise CreditsExhaustedError.new(data["message"] || "Credits exhausted", status_code: 402)
47
+ when 429 then raise RateLimitError.new(data["message"] || "Rate limited", status_code: 429)
48
+ else raise ScrapioError.new(data["message"] || "HTTP #{resp.code}", status_code: resp.code.to_i)
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,15 @@
1
+ module Scrapio
2
+ module Resources
3
+ class Amazon
4
+ def initialize(http) = @http = http
5
+
6
+ def get_product(asin, country: nil)
7
+ @http.get("/v1/amazon/product", { asin: asin, country: country })
8
+ end
9
+
10
+ def search(query, country: nil, page: nil)
11
+ @http.get("/v1/amazon/search", { query: query, country: country, page: page })
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,16 @@
1
+ module Scrapio
2
+ module Resources
3
+ class Crawl
4
+ def initialize(http) = @http = http
5
+
6
+ def crawl(seeds:, max_pages: nil, max_depth: nil, same_domain_only: nil,
7
+ output: nil, extract: nil, timeout_ms: nil)
8
+ @http.post("/v1/crawl", {
9
+ seeds: seeds, max_pages: max_pages, max_depth: max_depth,
10
+ same_domain_only: same_domain_only, output: output,
11
+ extract: extract, timeout_ms: timeout_ms,
12
+ })
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,16 @@
1
+ module Scrapio
2
+ module Resources
3
+ class Fetch
4
+ def initialize(http) = @http = http
5
+
6
+ def fetch(url:, render_js: nil, device: nil, session: nil, output: nil,
7
+ extract: nil, actions: nil, timeout: nil, proxy: nil, country: nil)
8
+ @http.post("/v1/fetch", {
9
+ url: url, render_js: render_js, device: device, session: session,
10
+ output: output, extract: extract, actions: actions,
11
+ timeout: timeout, proxy: proxy, country: country,
12
+ })
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ module Scrapio
2
+ module Resources
3
+ class Google
4
+ def initialize(http) = @http = http
5
+
6
+ def search(search:, search_type: nil, country_code: nil, language: nil,
7
+ device: nil, page: nil, date_range: nil, **opts)
8
+ @http.get("/v1/google/search", {
9
+ search: search, search_type: search_type, country_code: country_code,
10
+ language: language, device: device, page: page, date_range: date_range,
11
+ }.merge(opts))
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Scrapio
2
+ module Resources
3
+ class Interact
4
+ def initialize(http) = @http = http
5
+
6
+ def interact(url:, actions:, device: nil, session: nil,
7
+ output: nil, extract: nil, timeout_ms: nil)
8
+ @http.post("/v1/interact", {
9
+ url: url, actions: actions, device: device, session: session,
10
+ output: output, extract: extract, timeout_ms: timeout_ms,
11
+ })
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,31 @@
1
+ module Scrapio
2
+ module Resources
3
+ class Jobs
4
+ TERMINAL = %w[completed partial failed cancelled].freeze
5
+
6
+ def initialize(http) = @http = http
7
+
8
+ def create(job_type:, payload:, webhook_url: nil)
9
+ @http.post("/v1/jobs", { job_type: job_type, payload: payload, webhook_url: webhook_url })
10
+ end
11
+
12
+ def get(job_id)
13
+ @http.get("/v1/jobs/#{job_id}")
14
+ end
15
+
16
+ def get_result(job_id)
17
+ @http.get("/v1/jobs/#{job_id}/result")
18
+ end
19
+
20
+ def wait_for_completion(job_id, poll_interval: 2.0, timeout: 300.0)
21
+ deadline = Time.now + timeout
22
+ loop do
23
+ job = get(job_id)
24
+ return get_result(job_id) if TERMINAL.include?(job["status"])
25
+ raise ScrapioError, "Job #{job_id} did not complete within #{timeout}s" if Time.now >= deadline
26
+ sleep(poll_interval)
27
+ end
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,15 @@
1
+ module Scrapio
2
+ module Resources
3
+ class Walmart
4
+ def initialize(http) = @http = http
5
+
6
+ def get_product(product_id, country: nil)
7
+ @http.get("/v1/walmart/product", { product_id: product_id, country: country })
8
+ end
9
+
10
+ def search(query, country: nil, page: nil)
11
+ @http.get("/v1/walmart/search", { query: query, country: country, page: page })
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,19 @@
1
+ module Scrapio
2
+ module Resources
3
+ class YouTube
4
+ def initialize(http) = @http = http
5
+
6
+ def get_video(video_id)
7
+ @http.get("/v1/youtube/videos/#{URI.encode_uri_component(video_id)}")
8
+ end
9
+
10
+ def search(query, page: nil, country: nil, language: nil)
11
+ @http.get("/v1/youtube/search", { query: query, page: page, country: country, language: language })
12
+ end
13
+
14
+ def get_subtitles(video_id, language: nil)
15
+ @http.get("/v1/youtube/subtitles", { video_id: video_id, language: language })
16
+ end
17
+ end
18
+ end
19
+ end
data/lib/scrapio.rb ADDED
@@ -0,0 +1,8 @@
1
+ require_relative "scrapio/errors"
2
+ require_relative "scrapio/client"
3
+
4
+ module Scrapio
5
+ def self.new(api_key, **opts)
6
+ Client.new(api_key, **opts)
7
+ end
8
+ end
metadata ADDED
@@ -0,0 +1,60 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: scrapio
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Scrapio
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-06-29 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Fetch, crawl, search, and extract structured data from any URL. Includes
14
+ Google Search, YouTube transcripts, Amazon and Walmart product data, browser automation,
15
+ and async jobs.
16
+ email: support@scrapio.dev
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - README.md
22
+ - lib/scrapio.rb
23
+ - lib/scrapio/client.rb
24
+ - lib/scrapio/errors.rb
25
+ - lib/scrapio/http_client.rb
26
+ - lib/scrapio/resources/amazon.rb
27
+ - lib/scrapio/resources/crawl.rb
28
+ - lib/scrapio/resources/fetch.rb
29
+ - lib/scrapio/resources/google.rb
30
+ - lib/scrapio/resources/interact.rb
31
+ - lib/scrapio/resources/jobs.rb
32
+ - lib/scrapio/resources/walmart.rb
33
+ - lib/scrapio/resources/youtube.rb
34
+ homepage: https://scrapio.dev
35
+ licenses:
36
+ - MIT
37
+ metadata:
38
+ homepage_uri: https://scrapio.dev
39
+ source_code_uri: https://github.com/xsronhou/scrapping-tool
40
+ documentation_uri: https://scrapio.dev/docs
41
+ post_install_message:
42
+ rdoc_options: []
43
+ require_paths:
44
+ - lib
45
+ required_ruby_version: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '2.7'
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ requirements: []
56
+ rubygems_version: 3.5.22
57
+ signing_key:
58
+ specification_version: 4
59
+ summary: Official Ruby SDK for Scrapio
60
+ test_files: []