opencode-skills-collection 3.0.37 → 3.0.39

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +13 -1
  2. package/bundled-skills/2slides-ppt-generator/SKILL.md +786 -0
  3. package/bundled-skills/2slides-ppt-generator/references/api-reference.md +499 -0
  4. package/bundled-skills/2slides-ppt-generator/references/mcp-integration.md +282 -0
  5. package/bundled-skills/2slides-ppt-generator/references/pricing.md +195 -0
  6. package/bundled-skills/2slides-ppt-generator/scripts/api_constants.py +87 -0
  7. package/bundled-skills/2slides-ppt-generator/scripts/create_pdf_slides.py +159 -0
  8. package/bundled-skills/2slides-ppt-generator/scripts/download_slides_pages_voices.py +157 -0
  9. package/bundled-skills/2slides-ppt-generator/scripts/generate_narration.py +197 -0
  10. package/bundled-skills/2slides-ppt-generator/scripts/generate_slides.py +247 -0
  11. package/bundled-skills/2slides-ppt-generator/scripts/get_job_status.py +106 -0
  12. package/bundled-skills/2slides-ppt-generator/scripts/search_themes.py +137 -0
  13. package/bundled-skills/anti-sycophancy/README.md +86 -0
  14. package/bundled-skills/anti-sycophancy/SKILL.md +40 -0
  15. package/bundled-skills/antigravity-agent-manager/SKILL.md +112 -0
  16. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  17. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  18. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  19. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  20. package/bundled-skills/docs/sources/sources.md +1 -0
  21. package/bundled-skills/docs/users/bundles.md +1 -1
  22. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  23. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  24. package/bundled-skills/docs/users/getting-started.md +1 -1
  25. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  26. package/bundled-skills/docs/users/usage.md +4 -4
  27. package/bundled-skills/docs/users/visual-guide.md +4 -4
  28. package/bundled-skills/event-staffing-compliance/SKILL.md +91 -0
  29. package/bundled-skills/event-staffing-ordering/SKILL.md +119 -0
  30. package/bundled-skills/examprep-ai/SKILL.md +446 -0
  31. package/bundled-skills/hasdata/SKILL.md +107 -0
  32. package/bundled-skills/hasdata/references/code-recipes.md +150 -0
  33. package/bundled-skills/hasdata/references/ecommerce.md +116 -0
  34. package/bundled-skills/hasdata/references/jobs.md +111 -0
  35. package/bundled-skills/hasdata/references/local-business.md +145 -0
  36. package/bundled-skills/hasdata/references/real-estate.md +84 -0
  37. package/bundled-skills/hasdata/references/scraper-jobs.md +252 -0
  38. package/bundled-skills/hasdata/references/search.md +154 -0
  39. package/bundled-skills/hasdata/references/travel.md +202 -0
  40. package/bundled-skills/hasdata/references/web-scraping.md +159 -0
  41. package/bundled-skills/hasdata/references/youtube.md +186 -0
  42. package/bundled-skills/hasdata-cli/SKILL.md +169 -0
  43. package/bundled-skills/hasdata-cli/references/all-commands.md +107 -0
  44. package/bundled-skills/hasdata-cli/references/ecommerce.md +106 -0
  45. package/bundled-skills/hasdata-cli/references/enrichment.md +227 -0
  46. package/bundled-skills/hasdata-cli/references/jobs.md +84 -0
  47. package/bundled-skills/hasdata-cli/references/local-business.md +123 -0
  48. package/bundled-skills/hasdata-cli/references/real-estate.md +126 -0
  49. package/bundled-skills/hasdata-cli/references/search.md +122 -0
  50. package/bundled-skills/hasdata-cli/references/travel.md +102 -0
  51. package/bundled-skills/hasdata-cli/references/web-scraping.md +181 -0
  52. package/bundled-skills/hasdata-cli/references/youtube.md +145 -0
  53. package/bundled-skills/linkedin-content-generator/SKILL.md +492 -0
  54. package/bundled-skills/linkedin-content-generator/scripts/generate_calendar.py +82 -0
  55. package/bundled-skills/linkedin-content-generator/scripts/generate_carousel.py +69 -0
  56. package/bundled-skills/linkedin-content-generator/scripts/generate_newsletter.py +64 -0
  57. package/bundled-skills/linkedin-content-generator/scripts/generate_post.py +77 -0
  58. package/bundled-skills/linkedin-content-generator/scripts/memory.md +49 -0
  59. package/bundled-skills/linkedin-content-generator/scripts/memory_manager.py +134 -0
  60. package/bundled-skills/linkedin-content-generator/scripts/utils.py +96 -0
  61. package/bundled-skills/permission-manager/README.md +22 -0
  62. package/bundled-skills/permission-manager/SKILL.md +54 -0
  63. package/bundled-skills/skill-suggester/README.md +14 -0
  64. package/bundled-skills/skill-suggester/SKILL.md +69 -0
  65. package/bundled-skills/smart-git-automation/README.md +31 -0
  66. package/bundled-skills/smart-git-automation/SKILL.md +96 -0
  67. package/bundled-skills/vercel-optimize/lib/cost-coverage.mjs +3 -1
  68. package/bundled-skills/vercel-optimize/lib/render-report.mjs +2 -2
  69. package/bundled-skills/vercel-optimize/lib/util.mjs +7 -0
  70. package/bundled-skills/vercel-optimize/lib/verify-claim.mjs +2 -7
  71. package/bundled-skills/vercel-optimize/lib/workspace-resolver.mjs +2 -1
  72. package/package.json +1 -1
  73. package/skills_index.json +268 -0
@@ -0,0 +1,252 @@
1
+ # Scraper Jobs — async, bulk
2
+
3
+ Use only when there's no Scraper-API equivalent (`crawler`, `contacts`, `sec-edgar`, `amazon-bestsellers`, `amazon-product-reviews`) or when you want webhook-driven fan-out without managing your own polling loop. Otherwise the matching Scraper API + paginated client loop is simpler.
4
+
5
+ | Slug | Notes |
6
+ |---|---|
7
+ | `crawler` | Recursive site crawl. Accepts every Web Scraping API parameter. |
8
+ | `contacts` | URL list → emails / phones / social profiles. |
9
+ | `sec-edgar` | Bulk SEC filings by CIK / ticker / company name. |
10
+ | `google-serp`, `google-maps`, `google-maps-reviews`, `google-trends` | Bulk Google. |
11
+ | `amazon-search`, `amazon-product`, `amazon-product-reviews`, `amazon-seller-products`, `amazon-bestsellers` | Bulk Amazon. |
12
+ | `shopify` | Multi-store crawl. |
13
+ | `zillow`, `redfin`, `airbnb` | Bulk real estate. |
14
+ | `yelp`, `yellow-pages` | Bulk local. |
15
+ | `indeed`, `glassdoor` | Bulk jobs. |
16
+
17
+ ## Lifecycle
18
+
19
+ 1. `POST /scrapers/<slug>/jobs` → returns the full job record. **The handle is `body.id` (numeric integer), not `jobId`** despite older doc snippets — store this. Status starts as `pending`.
20
+ 2. `GET /scrapers/jobs/<id>` — poll status.
21
+ 3. `GET /scrapers/jobs/<id>/results?page=…&limit=100` — once `status === "finished"`.
22
+ 4. `DELETE /scrapers/jobs/<id>` — stop early (rows produced before stop are kept).
23
+
24
+ Status values: `pending` → `in_progress` → `finished` (or `stopped` if cancelled).
25
+
26
+ **Shortcut for finished jobs:** the status response on a `finished` job carries a `data` object with direct download URLs:
27
+
28
+ ```json
29
+ "data": {
30
+ "csv": "https://f005.backblazeb2.com/file/.../{uuid}.csv",
31
+ "json": "https://f005.backblazeb2.com/file/.../{uuid}.json",
32
+ "xlsx": "https://f005.backblazeb2.com/file/.../{uuid}.xlsx"
33
+ }
34
+ ```
35
+
36
+ For one-shot ingestion, fetch `data.json` directly instead of paging `/results`. **These URLs are short-lived** — download immediately on `finished`.
37
+
38
+ ## End-to-end (Python)
39
+
40
+ ```python
41
+ import os, time, requests
42
+
43
+ API_KEY = os.environ["HASDATA_API_KEY"]
44
+ H = {"x-api-key": API_KEY, "Content-Type": "application/json"}
45
+ BASE = "https://api.hasdata.com"
46
+
47
+ def submit(slug, body):
48
+ r = requests.post(f"{BASE}/scrapers/{slug}/jobs", headers=H, json=body, timeout=60)
49
+ r.raise_for_status()
50
+ return r.json()["id"] # numeric job id — not "jobId"
51
+
52
+ def wait(job_id, poll=10, cap=60, timeout=3600):
53
+ deadline = time.time() + timeout
54
+ while time.time() < deadline:
55
+ s = requests.get(f"{BASE}/scrapers/jobs/{job_id}", headers=H, timeout=60).json()
56
+ if s["status"] in ("finished", "stopped"):
57
+ return s
58
+ time.sleep(poll)
59
+ poll = min(poll * 1.5, cap)
60
+ raise TimeoutError(job_id)
61
+
62
+ def results(job_id):
63
+ page = 1
64
+ while True:
65
+ body = requests.get(
66
+ f"{BASE}/scrapers/jobs/{job_id}/results",
67
+ headers=H, params={"page": page, "limit": 100}, timeout=120,
68
+ ).json()
69
+ for row in body["data"]:
70
+ yield row["data"] # double-wrapped — see below
71
+ if body["meta"]["currentPage"] >= body["meta"]["lastPage"]:
72
+ return
73
+ page += 1
74
+ ```
75
+
76
+ ### Response shapes
77
+
78
+ Submit (live):
79
+ ```json
80
+ {
81
+ "id": 416349, // ← the job handle, integer
82
+ "scraperId": 26,
83
+ "status": "pending",
84
+ "creditsSpent": 0,
85
+ "dataRowsCount": 0,
86
+ "input": { ... },
87
+ "createdAt": "...", "updatedAt": "...",
88
+ "scraper": { "slug": "contacts", ... },
89
+ "columns": [ ... ]
90
+ }
91
+ ```
92
+
93
+ Status (live; numeric fields arrive as **strings** when populated):
94
+ ```json
95
+ {
96
+ "id": 416349,
97
+ "status": "finished",
98
+ "creditsSpent": "5", // string!
99
+ "dataRowsCount": "1", // string!
100
+ "input": { ... },
101
+ "data": {
102
+ "csv": "https://f005.backblazeb2.com/.../{uuid}.csv",
103
+ "json": "https://f005.backblazeb2.com/.../{uuid}.json",
104
+ "xlsx": "https://f005.backblazeb2.com/.../{uuid}.xlsx"
105
+ }
106
+ }
107
+ ```
108
+
109
+ Results page:
110
+ ```json
111
+ {
112
+ "meta": {
113
+ "total": 1, "perPage": 100,
114
+ "currentPage": 1, "lastPage": 1,
115
+ "firstPage": 1, "firstPageUrl": "/?page=1",
116
+ "lastPageUrl": "/?page=1",
117
+ "nextPageUrl": null, "previousPageUrl": null
118
+ },
119
+ "data": [
120
+ {
121
+ "id": "...", "jobId": 416349, "dataId": "...",
122
+ "data": { /* the actual scraped row */ },
123
+ "createdAt": "...", "updatedAt": "..."
124
+ }
125
+ ]
126
+ }
127
+ ```
128
+
129
+ **Double `data`** — the row is `body["data"][i]["data"]`; the outer wraps with `id`, `jobId`, `dataId`, `createdAt`, `updatedAt`.
130
+
131
+ ## Common body fields
132
+
133
+ - `limit` (int) — max rows. `0` = no cap.
134
+ - `webhook.url` (string, https), `webhook.events` (any subset of `scraper.job.started`, `scraper.data.scraped`, `scraper.job.finished`), `webhook.headers` (sent on every callback — pin a shared secret here).
135
+
136
+ ## Webhooks
137
+
138
+ ```python
139
+ # Submit with webhook
140
+ submit("indeed", {
141
+ "keywords": ["software engineer", "data scientist"],
142
+ "locations": ["New York, NY", "Remote"],
143
+ "limit": 500,
144
+ "webhook": {
145
+ "url": "https://your.app/hasdata-hook",
146
+ "events": ["scraper.data.scraped", "scraper.job.finished"],
147
+ "headers": {"x-shared-secret": SHARED_SECRET},
148
+ },
149
+ })
150
+ ```
151
+
152
+ ```python
153
+ from flask import Flask, request, abort
154
+ app = Flask(__name__)
155
+
156
+ @app.post("/hasdata-hook")
157
+ def hook():
158
+ if request.headers.get("x-shared-secret") != SHARED_SECRET:
159
+ abort(401)
160
+ e = request.json
161
+ if e["event"] == "scraper.data.scraped":
162
+ save_row(e["jobId"], e["data"])
163
+ elif e["event"] == "scraper.job.finished":
164
+ finalize(e["jobId"])
165
+ return "", 200 # 2xx prevents retry
166
+ ```
167
+
168
+ - Async with **3 retries** on non-2xx. **Order not guaranteed** — payload is the source of truth.
169
+ - **No documented HMAC.** Pin a shared secret via `webhook.headers`, or just fetch results via the API on `scraper.job.finished` and ignore per-row deliveries.
170
+ - **Always pair webhooks with polling.** A long quiet period probably means missed callbacks.
171
+
172
+ ## Per-scraper bodies
173
+
174
+ ### `crawler` — recursive site crawl
175
+
176
+ Accepts every Web Scraping API parameter applied to **every page**.
177
+
178
+ | Field | Notes |
179
+ |---|---|
180
+ | `urls` | **Required.** Seed URLs. |
181
+ | `maxDepth` | Hops from seed. |
182
+ | `includePaths` / `excludePaths` | Regex. **Case-sensitive.** |
183
+ | `limit` | Cap on pages. `0` = unlimited. |
184
+
185
+ ```python
186
+ job = submit("crawler", {
187
+ "urls": ["https://docs.example.com"],
188
+ "maxDepth": 5,
189
+ "includePaths": "/docs/.+",
190
+ "outputFormat": ["markdown"],
191
+ "excludeTags": ["script", "style", "nav", "footer"],
192
+ "limit": 2000,
193
+ })
194
+ ```
195
+
196
+ ### `contacts` — URLs → contact info
197
+
198
+ ```python
199
+ submit("contacts", {"urls": ["https://example.com/about", "https://example.com/team"]})
200
+ ```
201
+
202
+ Verified row schema (one row per input URL):
203
+
204
+ ```json
205
+ {
206
+ "url": "https://example.com/about",
207
+ "emails": ["..."],
208
+ "phoneNumbers": ["..."],
209
+ "linkedin": ["..."],
210
+ "xcom": ["..."], // X / Twitter — note key is "xcom"
211
+ "facebook": ["..."],
212
+ "instagram": ["..."],
213
+ "dribbble": ["..."],
214
+ "clutch": ["..."]
215
+ }
216
+ ```
217
+
218
+ Empty arrays for missing categories — never null. If you only have a domain, discover URLs first via SERP `site:example.com`.
219
+
220
+ ### `sec-edgar` — bulk SEC filings
221
+
222
+ ```python
223
+ submit("sec-edgar", {
224
+ "limit": 100,
225
+ "ciks": ["AAPL", "789019", "Alphabet Inc."],
226
+ "filingTypes": "10-K, 10-Q, 8-K",
227
+ "startDate": "2024-01-01",
228
+ "endDate": "2025-12-31",
229
+ })
230
+ ```
231
+
232
+ `ciks` accepts CIKs, tickers, or company names mixed.
233
+
234
+ ### Bulk-API equivalents
235
+
236
+ `google-serp`, `google-maps`, `amazon-search`, `indeed`, `glassdoor`, etc. Jobs accept arrays of inputs (`keywords[]`, `locations[]`, etc.). Use them when you want webhook fan-out; otherwise the synchronous Scraper API + paginated client loop is simpler.
237
+
238
+ ### Crawler vs Contacts vs Web Scraping batch
239
+
240
+ - **crawler** — unknown URL set, recursive discovery.
241
+ - **contacts** — known URL list, want extracted contact fields.
242
+ - **`/scrape/batch/web`** — known URL list, want full HTML/markdown/AI extraction at >1k scale.
243
+
244
+ ## Gotchas
245
+
246
+ - **Persist the job `id` immediately** (the integer from the submit response — *not* `jobId`). Only handle to status, results, stop.
247
+ - **Result file retention is short.** Download right after `finished`.
248
+ - **Webhooks are best-effort.** Always poll as a backup.
249
+ - **`includePaths` regex is case-sensitive.**
250
+ - **Status `stopped` is terminal.** Rows already produced remain available.
251
+ - **Don't poll faster than every 10 s** — wastes concurrency cap.
252
+ - **Double-wrapped results** — `body["data"][i]["data"]`, not `body["data"][i]`.
@@ -0,0 +1,154 @@
1
+ # Search & SERP APIs
2
+
3
+ Pre-parsed JSON for Google, AI Mode, Bing, and the specialized Google panels. Synchronous `GET` under `https://api.hasdata.com`.
4
+
5
+ | Endpoint | Returns |
6
+ |---|---|
7
+ | `/scrape/google/serp` | Full SERP — organic + every rich-snippet block |
8
+ | `/scrape/google-light/serp` | Organic only |
9
+ | `/scrape/google/ai-mode` | Gemini answer + references |
10
+ | `/scrape/google/ai-overview` | AI Overview block |
11
+ | `/scrape/google/news` | News articles |
12
+ | `/scrape/google/shopping` | Shopping carousel |
13
+ | `/scrape/google/images` | Image search |
14
+ | `/scrape/google/events` | Local events |
15
+ | `/scrape/google/short-videos` | Short-video panel |
16
+ | `/scrape/google/immersive-product` | Expanded product pop-up |
17
+ | `/scrape/google-trends/search` | Trends + related queries |
18
+ | `/scrape/bing/serp` | Bing SERP |
19
+
20
+ For `/scrape/google/flights`, see `travel.md`.
21
+
22
+ ## Google SERP
23
+
24
+ ```python
25
+ import requests
26
+
27
+ resp = requests.get(
28
+ "https://api.hasdata.com/scrape/google/serp",
29
+ headers={"x-api-key": API_KEY},
30
+ params={"q": "coffee beans", "gl": "us", "hl": "en", "num": 100},
31
+ timeout=300,
32
+ )
33
+ for hit in resp.json().get("organicResults", []):
34
+ print(hit["position"], hit["title"], hit["link"])
35
+ ```
36
+
37
+ ### Query parameters
38
+
39
+ | Param | Default | Notes |
40
+ |---|---|---|
41
+ | `q` | — | **Required.** |
42
+ | `location` | — | Canonical, e.g. `"Austin,Texas,United States"`. Hyper-local. |
43
+ | `uule` | — | Pre-encoded location (mutually exclusive with `location`). |
44
+ | `domain` | `google.com` | `google.co.uk`, `google.de`, … |
45
+ | `gl` | — | 2-letter country (`us`, `de`, `jp`). |
46
+ | `hl` | — | 2-letter UI language. |
47
+ | `lr` | — | Content-language filter (`lang_en`). |
48
+ | `tbs` | — | Filters — `qdr:d|w|m|y` for time, `li:1` verbatim, sort, image type. |
49
+ | `safe` | — | `active` / `off`. |
50
+ | `start` | `0` | Pagination offset. |
51
+ | `num` | `10` | Results/page. **Max 100** |
52
+ | `tbm` | — | `isch` images, `vid`, `nws`, `shop`, `lcl`. |
53
+ | `deviceType` | — | `desktop`, `mobile`, `tablet`. |
54
+
55
+ ### Response keys
56
+
57
+ ```
58
+ requestMetadata, searchInformation, organicResults, knowledgeGraph, answerBox,
59
+ aiOverview, topStories, newsResults, localResults, inlineShoppingResults,
60
+ inlineVideos, inlineImages, recipesResults, perspectives, discussionsAndForums,
61
+ relatedQuestions, relatedSearches, adResults, pagination
62
+ ```
63
+
64
+ Rich-snippet keys appear **only when the SERP shows that block** — always `data.get(key, default)`.
65
+
66
+ ### Tips
67
+
68
+ - `gl`/`hl` change ranking, not just localization. Run the same `q` with different `gl` to study geo-bias.
69
+ - `location="Austin,Texas,United States"` produces hyperlocal results that differ from `gl=us` alone.
70
+
71
+ ## Google Light SERP
72
+
73
+ Same params as full SERP, but the response is trimmed to a few keys — typically `requestMetadata`, `searchInformation`, `organicResults`, `relatedSearches`, and `pagination` when present. Use for crawler seeding and link discovery when you don't need the heavier rich-snippet blocks.
74
+
75
+ ## Google AI Mode
76
+
77
+ ```python
78
+ resp = requests.get(
79
+ "https://api.hasdata.com/scrape/google/ai-mode",
80
+ headers={"x-api-key": API_KEY},
81
+ params={"q": "is coffee good for health?", "location": "Austin,Texas,United States"},
82
+ timeout=300,
83
+ )
84
+ ```
85
+
86
+ Params: `q` (required), `location`, `uule`, `gl`. Response:
87
+
88
+ ```json
89
+ {
90
+ "requestMetadata": {...},
91
+ "textBlocks": [
92
+ {"type":"heading","snippet":"..."},
93
+ {"type":"paragraph","snippet":"...","snippetHighlightedWords":["..."]},
94
+ {"type":"list","list":[{"snippet":"..."}]},
95
+ {"type":"table","table":{...}},
96
+ {"type":"code","code":"..."}
97
+ ],
98
+ "references": [{"index":1,"link":"...","title":"...","snippet":"...","source":"..."}]
99
+ }
100
+ ```
101
+
102
+ Block types observed in practice: `heading`, `paragraph`, `list`, `table`, `code`. Always switch on `type` rather than assuming a fixed set.
103
+
104
+ Pattern: AI Mode for the answer → `/scrape/web` (markdown) on each `references[].link` → cited RAG context.
105
+
106
+ ## Google News / Shopping / Bing
107
+
108
+ Same shape: `q` + `gl`/`hl`/`location`. News supports `tbs=qdr:d|w|m|y` for time windows. Bing returns the same key set as Google SERP — useful for cross-engine consensus (disagreement = contested topic).
109
+
110
+ ## Patterns
111
+
112
+ ### Pagination
113
+
114
+ ```python
115
+ def all_organic(q, target=300):
116
+ out, start = [], 0
117
+ while len(out) < target:
118
+ page = requests.get(
119
+ "https://api.hasdata.com/scrape/google-light/serp",
120
+ headers={"x-api-key": API_KEY},
121
+ params={"q": q, "num": 100, "start": start},
122
+ timeout=300,
123
+ ).json().get("organicResults", [])
124
+ if not page:
125
+ break
126
+ out.extend(page)
127
+ start += 100
128
+ return out[:target]
129
+ ```
130
+
131
+ ### Reverse lookup (email / phone / domain → identity)
132
+
133
+ ```python
134
+ requests.get(
135
+ "https://api.hasdata.com/scrape/google/serp",
136
+ headers={"x-api-key": API_KEY},
137
+ params={"q": f'"{literal}"', "num": 20},
138
+ timeout=300,
139
+ ).json().get("organicResults", [])
140
+ ```
141
+
142
+ Quoted literals (emails, phones, error strings) usually surface the canonical mention.
143
+
144
+ ### Indexation check
145
+
146
+ ```python
147
+ def is_indexed(url):
148
+ r = requests.get(
149
+ "https://api.hasdata.com/scrape/google-light/serp",
150
+ headers={"x-api-key": API_KEY},
151
+ params={"q": f"site:{url}", "num": 1}, timeout=300,
152
+ )
153
+ return bool(r.json().get("organicResults"))
154
+ ```
@@ -0,0 +1,202 @@
1
+ # Travel APIs — Airbnb, Booking, Google Flights
2
+
3
+ | Endpoint | Returns |
4
+ |---|---|
5
+ | `/scrape/airbnb/listing` | Airbnb search results |
6
+ | `/scrape/airbnb/property` | Single Airbnb listing |
7
+ | `/scrape/booking/search` | Booking.com search results (hotels, apartments) |
8
+ | `/scrape/booking/place` | Single Booking.com property with room/rate list |
9
+ | `/scrape/google/flights` | Google Flights prices and itineraries |
10
+
11
+ All synchronous `GET`. Airbnb is 5 credits; Booking is 10; Google Flights is 15.
12
+
13
+ For activities at the destination see `/scrape/google/events` (in `search.md`); for ground transport, scrape the operator's site with `POST /scrape/web`.
14
+
15
+ ## Airbnb
16
+
17
+ ```python
18
+ import requests
19
+
20
+ def airbnb_search(location, check_in, check_out, **kwargs):
21
+ return requests.get(
22
+ "https://api.hasdata.com/scrape/airbnb/listing",
23
+ headers={"x-api-key": API_KEY},
24
+ params={"location": location, "checkIn": check_in, "checkOut": check_out, **kwargs},
25
+ timeout=300,
26
+ ).json()
27
+ ```
28
+
29
+ | Param | Notes |
30
+ |---|---|
31
+ | `location` | **Required.** Free-form. |
32
+ | `checkIn` | **Required.** `YYYY-MM-DD`. |
33
+ | `checkOut`, `adults`, `children`, `infants`, `pets` | Optional. |
34
+ | `nextPageToken` | Pagination cursor. |
35
+
36
+ ### Token pagination
37
+
38
+ ```python
39
+ def airbnb_all(location, check_in, check_out):
40
+ out, token = [], None
41
+ while True:
42
+ page = airbnb_search(location, check_in, check_out,
43
+ **({"nextPageToken": token} if token else {}))
44
+ out.extend(page.get("listings", []))
45
+ token = page.get("nextPageToken")
46
+ if not token:
47
+ return out
48
+ ```
49
+
50
+ ### Airbnb Property
51
+
52
+ ```python
53
+ requests.get(
54
+ "https://api.hasdata.com/scrape/airbnb/property",
55
+ headers={"x-api-key": API_KEY},
56
+ params={"url": "https://www.airbnb.com/rooms/12345678"},
57
+ timeout=300,
58
+ )
59
+ ```
60
+
61
+ ## Booking Search
62
+
63
+ ```python
64
+ import json, requests
65
+
66
+ def booking_search(keyword, check_in, check_out, *, adults=2, children=0,
67
+ children_ages=None, rooms=1, **filters):
68
+ params = {
69
+ "keyword": keyword,
70
+ "checkInDate": check_in,
71
+ "checkOutDate": check_out,
72
+ "adults": adults,
73
+ "children": children,
74
+ "rooms": rooms,
75
+ **filters,
76
+ }
77
+ if children and children_ages:
78
+ params["childrenAgesJson"] = json.dumps(children_ages)
79
+ return requests.get(
80
+ "https://api.hasdata.com/scrape/booking/search",
81
+ headers={"x-api-key": API_KEY},
82
+ params=params, timeout=300,
83
+ ).json()
84
+ ```
85
+
86
+ | Param | Notes |
87
+ |---|---|
88
+ | `keyword` | **Required.** City, neighborhood, or property name. |
89
+ | `checkInDate` / `checkOutDate` | **Required.** `YYYY-MM-DD`. |
90
+ | `adults`, `children`, `rooms` | **Required.** Pass `children=0` explicitly when none. |
91
+ | `childrenAgesJson` | Required iff `children > 0` — JSON array of ages (0–17), one per child. |
92
+ | `price[min]` / `price[max]` | `>= 10` / `>= 20`. Bracketed — `requests`/`axios` serialize nested dicts as `price[min]=…`. |
93
+ | `rating[]`, `reviewScore[]`, `propertyType[]`, `facilities[]`, `meals[]`, `bedPreference[]`, `roomFacilities[]`, `propertyAccessibility[]`, `roomAccessibility[]`, `distanceFromCenter[]`, `travelGroup[]`, `onlinePayment[]`, `reservationPolicy[]` | Multi-value filters (OR). |
94
+ | `bedrooms`, `bathrooms` | Minimum count. |
95
+ | `sort` | `ourTopPicks`, `homesAndApartmentsFirst`, `priceLowestFirst`, `priceHighestFirst`, `bestReviewedAndLowestPrice`, `ratingHighToLow`, `ratingLowToHigh`, `ratingAndPrice`, `distanceFromDowntown`, `topReviewed`. |
96
+ | `page` | 1-indexed, 25 results per page. |
97
+ | `currency` | ISO code or `hotelCurrency` to keep native. |
98
+ | `language` | UI locale. |
99
+
100
+ Top-level response (verified live): `requestMetadata`, `searchInformation`, `pagination`, `results`. Per-result keys: `hotelId`, `roomId`, `title`, `url`, `location`, `rating`, `reviews`, `price` (object with `total` / `nightly` / `currency`), `room`, `beds`, `bedTypes`, `policies`, `photo`.
101
+
102
+ ## Booking Place
103
+
104
+ ```python
105
+ resp = requests.get(
106
+ "https://api.hasdata.com/scrape/booking/place",
107
+ headers={"x-api-key": API_KEY},
108
+ params={
109
+ "url": "https://www.booking.com/hotel/fr/le-bristol-paris.html",
110
+ "checkInDate": "2026-07-10",
111
+ "checkOutDate": "2026-07-13",
112
+ "adults": 2,
113
+ "children": 0,
114
+ "rooms": 1,
115
+ },
116
+ timeout=300,
117
+ ).json()
118
+ ```
119
+
120
+ `url` must be `booking.com` / `www.booking.com`. The remaining stay/guest parameters share the same rules as `booking-search` (including `childrenAgesJson` when `children > 0`).
121
+
122
+ Response top-level keys: `requestMetadata`, `overview`, `bookingDetails`, `rooms`, `facilities`, `houseRules`, `ratings`, `reviews`, `restaurants`, `breadcrumbs`, `questionsAndAnswers`.
123
+
124
+ - `overview` → `id`, `title`, `address`, `description`, `propertyType`, `photos`, `highlights`, `mostPopularFacilities`.
125
+ - `rooms[i]` → `roomId`, `name`, `bedTypes`, `beds`, `facilities`, `otherFacilities`, `variants[]` (per-rate price/availability). Variants are the actual buyable units; `rooms[i]` is the floor-plan.
126
+
127
+ ## Google Flights
128
+
129
+ ```python
130
+ resp = requests.get(
131
+ "https://api.hasdata.com/scrape/google/flights",
132
+ headers={"x-api-key": API_KEY},
133
+ params={
134
+ "departureId": "JFK",
135
+ "arrivalId": "LAX",
136
+ "outboundDate": "2026-06-15",
137
+ "returnDate": "2026-06-22", # omit for one-way
138
+ "currency": "USD",
139
+ },
140
+ timeout=300,
141
+ ).json()
142
+ ```
143
+
144
+ | Param | Notes |
145
+ |---|---|
146
+ | `departureId` / `arrivalId` | **Required.** IATA airport codes (`JFK`, `LAX`). |
147
+ | `outboundDate` | **Required.** `YYYY-MM-DD`. |
148
+ | `returnDate` | Optional — omit for one-way. |
149
+ | `currency` | ISO code. |
150
+ | `gl`, `hl` | Country / language. |
151
+ | `travelClass` | `1` economy, `2` premium economy, `3` business, `4` first. |
152
+ | `stops` | `0` any, `1` non-stop, `2` ≤1 stop, `3` ≤2 stops. |
153
+ | `adults`, `children`, `infantsInSeat`, `infantsOnLap` | Passenger counts. |
154
+
155
+ ## Patterns
156
+
157
+ ### STR yield estimate
158
+
159
+ ```python
160
+ rentals = airbnb_search(area, ci, co).get("listings", []) # Airbnb → "listings"
161
+ # pair with /scrape/zillow/listing (see real-estate.md) for purchase price
162
+ night = sum(r.get("price", 0) for r in rentals) / max(len(rentals), 1)
163
+ ```
164
+
165
+ ### Hotel-vs-rental price diff
166
+
167
+ ```python
168
+ b = booking_search(city, ci, co, adults=2, children=0, rooms=1, sort="priceLowestFirst")
169
+ a = airbnb_search(city, ci, co, adults=2)
170
+ def median(xs): xs = sorted(xs); return xs[len(xs)//2] if xs else None
171
+ median_hotel = median([r["price"]["nightly"] for r in b.get("results", []) if r.get("price")])
172
+ median_str = median([r["price"] for r in a.get("listings", []) if r.get("price")])
173
+ ```
174
+
175
+ ### Full trip cost
176
+
177
+ ```python
178
+ flight = requests.get(
179
+ "https://api.hasdata.com/scrape/google/flights",
180
+ headers={"x-api-key": API_KEY},
181
+ params={"departureId": origin, "arrivalId": dest_iata,
182
+ "outboundDate": dep, "returnDate": ret, "currency": "USD"},
183
+ timeout=300,
184
+ ).json()
185
+ cheapest_flight = min((f["price"] for f in flight.get("best_flights", [])), default=None)
186
+
187
+ stay = booking_search(city, dep, ret, adults=2, children=0, rooms=1, sort="priceLowestFirst")
188
+ cheapest_stay = stay.get("results", [{}])[0].get("price", {}).get("total")
189
+
190
+ total = (cheapest_flight or 0) + (cheapest_stay or 0)
191
+ ```
192
+
193
+ ## Gotchas
194
+
195
+ - **Airbnb requires `checkIn`** and uses **token** pagination — store `nextPageToken`, not page numbers.
196
+ - **Airbnb property endpoints take URLs**, not IDs.
197
+ - **Booking requires `children` even when zero.** Pass `children=0`. When `children > 0`, also pass `childrenAgesJson` with exactly that many ages.
198
+ - **Booking `price[min]` / `price[max]`** are bracketed — use a nested dict with `requests`/`axios`.
199
+ - **Booking `rooms[i].variants[]` is where prices live** — the parent `rooms[i]` describes the floor-plan, variants are the buyable rates with `priceBreakdown` / `cancellationPolicy` / `mealPlan`.
200
+ - **`bookingDetails` carries the resolved stay context** the response was priced for — echo it back when persisting results so future comparisons use the same dates / occupancy.
201
+ - **Google Flights uses IATA codes**, not city names. `JFK` not `New York`.
202
+ - **Round-trip vs one-way** is determined by `returnDate` presence — pass it for round-trip, omit for one-way.