synoppy 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +21 -0
  3. data/README.md +190 -0
  4. data/lib/synoppy.rb +182 -0
  5. metadata +50 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 65693e5a2c5e7bef8b207227c4d606efb03adeff7780cee0963d145b5eeebb32
4
+ data.tar.gz: a4e650761ee1432f0c49e3dd9de39bcbf469c224ee28b792891f1e8d16a735bc
5
+ SHA512:
6
+ metadata.gz: 34f4c23fdd06970a58d5d57c7b78409bacc1e29050edfb2fe1de44c84fd9e6cb6e18b0b335d91a3a51e803182de6a7b8427f231b5eeff864828ed5b2ff2b304e
7
+ data.tar.gz: 2966d35ea34895a70f7079d2e1a1480de70cbbc603113846b786f05cd49208969f52eec971cc27c770169841c8a40226be5edc1e95ee1533b2bf04d0d7591f01
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Saanora
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,190 @@
1
+ # synoppy (Ruby)
2
+
3
+ [![Gem](https://img.shields.io/gem/v/synoppy.svg)](https://rubygems.org/gems/synoppy)
4
+
5
+ **Give your AI agents the whole web.** Synoppy is the web-data layer for AI agents — one key to **read, crawl, map, extract, classify & enrich** any site, plus screenshots and image scraping. Pure `net/http`, zero runtime dependencies.
6
+
7
+ [**Get a free key →**](https://synoppy.com/dashboard) · [Docs](https://synoppy.com/docs) · [synoppy.com](https://synoppy.com)
8
+
9
+ ```bash
10
+ gem install synoppy
11
+ ```
12
+
13
+ ## Quickstart
14
+
15
+ ```ruby
16
+ require "synoppy"
17
+
18
+ client = Synoppy::Client.new(api_key: ENV["SYNOPPY_API_KEY"])
19
+
20
+ # Read any URL -> clean markdown
21
+ page = client.read("https://stripe.com/blog", formats: ["markdown"])
22
+ puts page["markdown"]
23
+
24
+ # Crawl a site
25
+ site = client.crawl("https://example.com", limit: 25)
26
+ puts "#{site["count"]} of #{site["discovered"]} pages"
27
+
28
+ # AI structured extraction
29
+ result = client.extract("https://news.ycombinator.com", prompt: "Return { title, summary, topics }")
30
+ p result["data"]
31
+
32
+ # Brand intelligence
33
+ brand = client.enrich("linear.app")
34
+ p brand["colors"], brand["fonts"], brand["socials"]
35
+ ```
36
+
37
+ `Client.new` also accepts `base_url:` (defaults to `https://synoppy.com`) and `timeout:` (seconds).
38
+
39
+ ## Methods
40
+
41
+ ### `read(url, formats:, only_main_content:, timeout_ms:, render:, wait_ms:)` — alias `scrape`
42
+
43
+ `POST /api/scrape`. Read a URL into clean `markdown` / `html` / `text`.
44
+
45
+ ```ruby
46
+ page = client.read(
47
+ "https://stripe.com/blog",
48
+ formats: ["markdown", "html"], # "markdown" | "html" | "text"
49
+ only_main_content: true, # strip nav/boilerplate
50
+ timeout_ms: 15_000, # per-request fetch budget
51
+ render: "auto", # true | false | "auto" — headless browser
52
+ wait_ms: 500 # extra wait after load before capture
53
+ )
54
+ page["markdown"]
55
+ page["metadata"]["title"] # title, description, language, siteName, author,
56
+ # ogImage, sourceUrl, statusCode, wordCount,
57
+ # fetchedAt, rendered, bytesIn
58
+ page["renderMs"] # present when rendered
59
+ page["latencyMs"]
60
+ ```
61
+
62
+ ### `screenshot(url, full_page:, wait_ms:, timeout_ms:)`
63
+
64
+ `POST /api/screenshot`. Capture a PNG screenshot, returned as a data URL.
65
+
66
+ ```ruby
67
+ shot = client.screenshot("https://stripe.com", full_page: true, wait_ms: 500)
68
+ shot["screenshot"] # "data:image/png;base64,..."
69
+ shot["sourceUrl"]
70
+ shot["statusCode"]
71
+ shot["fullPage"]
72
+ ```
73
+
74
+ May raise `Synoppy::Error` with code `RENDER_UNAVAILABLE` (HTTP 503) when the render backend is down.
75
+
76
+ ### `crawl(url, limit:)`
77
+
78
+ `POST /api/crawl`. Crawl a site into one clean page per discovered URL (requires a key). `limit` is 1–25.
79
+
80
+ ```ruby
81
+ site = client.crawl("https://example.com", limit: 25)
82
+ site["domain"]
83
+ site["discovered"]
84
+ site["count"]
85
+ site["pages"] # [{ "url", "title", "markdown", "words" }]
86
+ site["credits"]
87
+ ```
88
+
89
+ ### `map(url)` — alias `sitemap`
90
+
91
+ `POST /api/map`. Discover every URL on a domain.
92
+
93
+ ```ruby
94
+ m = client.map("https://example.com")
95
+ m["urls"] # string[]
96
+ m["count"]
97
+ m["source"] # "sitemap" | "links"
98
+ m["domain"]
99
+ ```
100
+
101
+ ### `extract(url, prompt:)`
102
+
103
+ `POST /api/extract`. AI-structured JSON extraction (requires a key). `instruction:` is accepted as an alias for `prompt:`.
104
+
105
+ ```ruby
106
+ result = client.extract("https://news.ycombinator.com", prompt: "Return { title, summary, topics }")
107
+ result["data"]
108
+ result["model"]
109
+ result["metadata"]
110
+ result["truncated"]
111
+ result["usage"] # { "inputTokens", "outputTokens" }
112
+ ```
113
+
114
+ ### `classify(url, labels:)`
115
+
116
+ `POST /api/classify`. Classify a company by industry, or against your own labels (requires a key).
117
+
118
+ ```ruby
119
+ # Default: industry taxonomy
120
+ c = client.classify("https://stripe.com")
121
+ c["data"]["industry"]
122
+ c["data"]["naics_code"] # also naics_title, naics_sector, naics_sector_title, naics_valid
123
+ c["data"]["sic_code"] # also sic_title, sic_division, sic_division_title, sic_valid
124
+ c["data"]["categories"]
125
+ c["data"]["confidence"]
126
+
127
+ # Labels mode
128
+ c = client.classify("https://stripe.com", labels: ["fintech", "ecommerce", "social"])
129
+ c["data"]["label"]
130
+ c["data"]["matched"]
131
+ c["data"]["confidence"]
132
+ c["data"]["reasoning"]
133
+ ```
134
+
135
+ ### `enrich(url = nil, domain:, email:)` — alias `brand`
136
+
137
+ `POST /api/brand`. Resolve a brand into a full profile. Pass exactly one of a positional `url`, `domain:`, or `email:` (a work email is mapped to its domain).
138
+
139
+ ```ruby
140
+ brand = client.enrich("linear.app")
141
+ brand = client.enrich(domain: "linear.app")
142
+ brand = client.enrich(email: "jane@linear.app")
143
+
144
+ brand["name"]
145
+ brand["description"]
146
+ brand["logo"]
147
+ brand["colors"] # string[]
148
+ brand["fonts"] # string[]
149
+ brand["address"]
150
+ brand["socials"] # [{ "label", "url" }]
151
+ brand["domain"]
152
+ ```
153
+
154
+ ### `images(url)`
155
+
156
+ `POST /api/images`. Pull every image off a page.
157
+
158
+ ```ruby
159
+ imgs = client.images("https://stripe.com")
160
+ imgs["count"]
161
+ imgs["images"] # [{ "src", "alt", "width", "height" }]
162
+ ```
163
+
164
+ ### Coming soon
165
+
166
+ `act` is not live yet — the method exists but raises `NotImplementedError`. It will map to `/api/act` once that endpoint ships.
167
+
168
+ ## Credits
169
+
170
+ Every successful response includes `creditsUsed` (number) and `creditsRemaining` (number or `nil`) at the top level, on every method:
171
+
172
+ ```ruby
173
+ page = client.read("https://stripe.com")
174
+ puts "used #{page["creditsUsed"]}, #{page["creditsRemaining"]} remaining"
175
+
176
+ shot = client.screenshot("https://stripe.com")
177
+ puts "used #{shot["creditsUsed"]}, #{shot["creditsRemaining"]} remaining"
178
+ ```
179
+
180
+ ## Errors
181
+
182
+ ```ruby
183
+ begin
184
+ client.crawl("https://example.com")
185
+ rescue Synoppy::Error => e
186
+ warn "#{e.code} #{e.status}: #{e.message}"
187
+ end
188
+ ```
189
+
190
+ MIT licensed.
data/lib/synoppy.rb ADDED
@@ -0,0 +1,182 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+ require "uri"
6
+
7
+ # Official Ruby SDK for the Synoppy web-data API.
8
+ module Synoppy
9
+ VERSION = "1.0.0"
10
+ DEFAULT_BASE_URL = "https://synoppy.com"
11
+
12
+ # Raised when the API returns an error response.
13
+ class Error < StandardError
14
+ attr_reader :code, :status
15
+
16
+ def initialize(message, code, status)
17
+ super(message)
18
+ @code = code
19
+ @status = status
20
+ end
21
+ end
22
+
23
+ class Client
24
+ def initialize(api_key:, base_url: DEFAULT_BASE_URL, timeout: 60)
25
+ raise ArgumentError, "api_key is required" if api_key.nil? || api_key.empty?
26
+
27
+ @api_key = api_key
28
+ @base_url = base_url.sub(%r{/+\z}, "")
29
+ @timeout = timeout
30
+ end
31
+
32
+ # Read a URL -> clean markdown / HTML / text.
33
+ #
34
+ # formats : array of "markdown" | "html" | "text"
35
+ # only_main_content: strip nav/boilerplate (boolean)
36
+ # timeout_ms : per-request fetch budget in ms
37
+ # render : true | false | "auto" — run a headless browser before scraping
38
+ # wait_ms : extra wait after load (ms) before capture
39
+ #
40
+ # Returns { success, metadata { title, description, language, siteName, author,
41
+ # ogImage, sourceUrl, statusCode, wordCount, fetchedAt, rendered, bytesIn },
42
+ # markdown?, html?, text?, renderMs?, latencyMs, creditsUsed, creditsRemaining }.
43
+ def read(url, formats: nil, only_main_content: nil, timeout_ms: nil, render: nil, wait_ms: nil)
44
+ body = { url: url }
45
+ body[:formats] = formats unless formats.nil?
46
+ body[:onlyMainContent] = only_main_content unless only_main_content.nil?
47
+ body[:timeoutMs] = timeout_ms unless timeout_ms.nil?
48
+ body[:render] = render unless render.nil?
49
+ body[:waitMs] = wait_ms unless wait_ms.nil?
50
+ request("/api/scrape", body)
51
+ end
52
+ alias scrape read
53
+
54
+ # Capture a full PNG screenshot of a URL (returned as a data URL).
55
+ #
56
+ # full_page : capture the entire scrollable page (boolean)
57
+ # wait_ms : extra wait after load (ms) before capture
58
+ # timeout_ms: per-request budget in ms
59
+ #
60
+ # Returns { success, screenshot (PNG data URL), sourceUrl, statusCode, fullPage,
61
+ # latencyMs, creditsUsed, creditsRemaining }. May 503 RENDER_UNAVAILABLE.
62
+ def screenshot(url, full_page: nil, wait_ms: nil, timeout_ms: nil)
63
+ body = { url: url }
64
+ body[:fullPage] = full_page unless full_page.nil?
65
+ body[:waitMs] = wait_ms unless wait_ms.nil?
66
+ body[:timeoutMs] = timeout_ms unless timeout_ms.nil?
67
+ request("/api/screenshot", body)
68
+ end
69
+
70
+ # Crawl a site -> one clean page per URL discovered (requires a key).
71
+ #
72
+ # limit: number of pages to crawl (1-25).
73
+ #
74
+ # Returns { success, domain, discovered, count,
75
+ # pages:[{ url, title, markdown, words }], credits, latencyMs,
76
+ # creditsUsed, creditsRemaining }.
77
+ def crawl(url, limit: nil)
78
+ body = { url: url }
79
+ body[:limit] = limit unless limit.nil?
80
+ request("/api/crawl", body)
81
+ end
82
+
83
+ # Discover every URL on a domain.
84
+ #
85
+ # Returns { success, domain, urls: string[], count,
86
+ # source: "sitemap" | "links", latencyMs, creditsUsed, creditsRemaining }.
87
+ def map(url)
88
+ request("/api/map", { url: url })
89
+ end
90
+ alias sitemap map
91
+
92
+ # AI-structured JSON extraction (requires a key).
93
+ #
94
+ # prompt: natural-language instruction describing the JSON to return.
95
+ # `instruction:` is accepted as an alias for `prompt:`.
96
+ #
97
+ # Returns { success, url, model, data, metadata, truncated,
98
+ # usage:{ inputTokens, outputTokens }, latencyMs, creditsUsed, creditsRemaining }.
99
+ def extract(url, prompt: nil, instruction: nil)
100
+ body = { url: url }
101
+ prompt ||= instruction
102
+ body[:prompt] = prompt unless prompt.nil?
103
+ request("/api/extract", body)
104
+ end
105
+
106
+ # Classify a company by industry or your own labels (requires a key).
107
+ #
108
+ # labels: optional array of custom labels. When omitted, returns the default
109
+ # industry taxonomy { industry, naics_code, naics_title, naics_sector,
110
+ # naics_sector_title, naics_valid, sic_code, sic_title, sic_division,
111
+ # sic_division_title, sic_valid, categories, confidence }.
112
+ # When provided, returns { label, matched, confidence, reasoning }.
113
+ #
114
+ # Plus creditsUsed / creditsRemaining at the top level.
115
+ def classify(url, labels: nil)
116
+ body = { url: url }
117
+ body[:labels] = labels unless labels.nil?
118
+ request("/api/classify", body)
119
+ end
120
+
121
+ # Resolve a brand into a full profile. Accepts a url:, a domain:, or an
122
+ # email: (a work email is mapped to its domain). Provide exactly one.
123
+ #
124
+ # Returns { success, domain, name, description, logo, colors: string[],
125
+ # fonts: string[], address, socials:[{ label, url }], bytesIn, latencyMs,
126
+ # creditsUsed, creditsRemaining }.
127
+ def enrich(url = nil, domain: nil, email: nil)
128
+ body = {}
129
+ body[:url] = url unless url.nil?
130
+ body[:domain] = domain unless domain.nil?
131
+ body[:email] = email unless email.nil?
132
+ if body.empty?
133
+ raise ArgumentError, "one of url, domain:, or email: is required"
134
+ end
135
+
136
+ request("/api/brand", body)
137
+ end
138
+ alias brand enrich
139
+
140
+ # Pull every image off a page.
141
+ #
142
+ # Returns { success, url, count,
143
+ # images:[{ src, alt, width, height }], bytesIn, latencyMs,
144
+ # creditsUsed, creditsRemaining }.
145
+ def images(url)
146
+ request("/api/images", { url: url })
147
+ end
148
+
149
+ # Take actions on a page (click, type, navigate). Coming soon —
150
+ # /api/act is not live yet.
151
+ def act(*)
152
+ raise NotImplementedError, "act is coming soon — /api/act is not live yet"
153
+ end
154
+
155
+ private
156
+
157
+ def request(path, body)
158
+ uri = URI("#{@base_url}#{path}")
159
+ http = Net::HTTP.new(uri.host, uri.port)
160
+ http.use_ssl = uri.scheme == "https"
161
+ http.read_timeout = @timeout
162
+ req = Net::HTTP::Post.new(uri)
163
+ req["Authorization"] = "Bearer #{@api_key}"
164
+ req["Content-Type"] = "application/json"
165
+ req["User-Agent"] = "synoppy-ruby/#{VERSION}"
166
+ req.body = JSON.generate(body)
167
+
168
+ res = http.request(req)
169
+ data = begin
170
+ JSON.parse(res.body)
171
+ rescue StandardError
172
+ {}
173
+ end
174
+
175
+ unless res.is_a?(Net::HTTPSuccess) && data["success"] != false
176
+ raise Error.new(data["error"] || "HTTP #{res.code}", data["code"] || "ERROR", res.code.to_i)
177
+ end
178
+
179
+ data
180
+ end
181
+ end
182
+ end
metadata ADDED
@@ -0,0 +1,50 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: synoppy
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Saanora
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2026-06-21 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Scrape, screenshot, crawl, map, extract, classify, enrich, and images
14
+ — one key for the whole web.
15
+ email:
16
+ - support@synoppy.com
17
+ executables: []
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - LICENSE
22
+ - README.md
23
+ - lib/synoppy.rb
24
+ homepage: https://synoppy.com/docs/sdks
25
+ licenses:
26
+ - MIT
27
+ metadata:
28
+ homepage_uri: https://synoppy.com
29
+ documentation_uri: https://synoppy.com/docs/sdks
30
+ source_code_uri: https://github.com/Synoppy/synoppy-ruby
31
+ post_install_message:
32
+ rdoc_options: []
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '2.7'
40
+ required_rubygems_version: !ruby/object:Gem::Requirement
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ requirements: []
46
+ rubygems_version: 3.4.19
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: Official Ruby SDK for the Synoppy web-data API.
50
+ test_files: []