npm - opencode-skills-collection - Versions diffs - 3.0.37 → 3.0.38 - Mend

opencode-skills-collection 3.0.37 → 3.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (73) hide show

package/bundled-skills/hasdata/references/code-recipes.md ADDED Viewed

@@ -0,0 +1,150 @@
+# Code recipes — wiring HasData into your code
+## Ground rules
+- **Base URL:** `https://api.hasdata.com`. Header `x-api-key` on every request.
+- **Methods:** Scraper APIs are `GET`; Web Scraping is `POST`; Scraper Jobs use `POST` (submit) + `GET` (status/results) + `DELETE` (stop).
+- **Key handling:** read from env (`HASDATA_API_KEY`). Never hardcode, never log.
+- **Timeouts:** **client timeout ≥ 300 s.** HasData's deadline is 300 s; shorter clients get phantom failures while still being billed.
+- **Retries:** `429` and `5xx` only with exponential backoff + jitter. Never retry `4xx`.
+- **Concurrency:** cap at plan limit. Free tier = 1.
+- **Success signal:** sync APIs require `body.requestMetadata.status === "ok"`. HTTP 200 alone isn't enough.
+## Status codes
+| Code | Meaning | Action |
+|---|---|---|
+| 200 + `status:"ok"` | OK | Use body |
+| 401 | Bad/missing key | Fix — don't retry |
+| 403 | Quota exhausted | Don't retry |
+| 429 | Concurrency cap | Backoff + retry |
+| 500 | Server error | Retry |
+## Python — minimal client
+```python
+import os, requests
+class HasData:
+    BASE = "https://api.hasdata.com"
+    def __init__(self, api_key=None, timeout=300):
+        self.s = requests.Session()
+        self.s.headers["x-api-key"] = api_key or os.environ["HASDATA_API_KEY"]
+        self.timeout = timeout
+    def get(self, path, **params):
+        r = self.s.get(f"{self.BASE}{path}", params=params, timeout=self.timeout)
+        r.raise_for_status()
+        body = r.json()
+        if body.get("requestMetadata", {}).get("status") != "ok":
+            raise RuntimeError(f"hasdata not-ok: {body.get('requestMetadata')}")
+        return body
+    def post(self, path, body):
+        r = self.s.post(f"{self.BASE}{path}", json=body, timeout=self.timeout)
+        r.raise_for_status()
+        return r.json()
+hd = HasData()
+serp = hd.get("/scrape/google/serp", q="coffee", num=20)["organicResults"]
+md   = hd.post("/scrape/web", {"url": "https://example.com", "outputFormat": ["markdown"]})["markdown"]
+```
+## Python — retry + bounded concurrency
+```python
+import time, random
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from requests import HTTPError
+def with_retry(fn, attempts=5, base=1.0, cap=60.0):
+    for i in range(attempts):
+        try:
+            return fn()
+        except HTTPError as e:
+            code = e.response.status_code
+            if code == 429 or 500 <= code < 600:
+                time.sleep(min(cap, base * 2 ** i) + random.random())
+                continue
+            raise
+    raise RuntimeError("retry exhausted")
+def scrape_many(urls, workers=5):
+    out = {}
+    with ThreadPoolExecutor(max_workers=workers) as ex:
+        futs = {ex.submit(lambda u=u: hd.post("/scrape/web", {"url": u, "outputFormat": ["markdown"]})): u
+                for u in urls}
+        for f in as_completed(futs):
+            try:
+                out[futs[f]] = f.result().get("markdown")
+            except Exception as e:
+                out[futs[f]] = e
+    return out
+```
+Cap `workers` at your plan's concurrency — anything higher just generates `429`s.
+## TypeScript — minimal client
+```typescript
+const BASE = "https://api.hasdata.com";
+const KEY  = process.env.HASDATA_API_KEY!;
+async function get<T = any>(path: string, params: Record<string, string | number> = {}): Promise<T> {
+  const qs = new URLSearchParams(Object.entries(params).map(([k, v]) => [k, String(v)]));
+  const r = await fetch(`${BASE}${path}?${qs}`, {
+    headers: { "x-api-key": KEY },
+    signal:  AbortSignal.timeout(300_000),
+  });
+  if (!r.ok) throw new Error(`HasData ${r.status} ${await r.text()}`);
+  const body = await r.json() as any;
+  if (body?.requestMetadata?.status && body.requestMetadata.status !== "ok") {
+    throw new Error(`HasData not-ok: ${JSON.stringify(body.requestMetadata)}`);
+  }
+  return body as T;
+}
+async function post<T = any>(path: string, body: unknown): Promise<T> {
+  const r = await fetch(`${BASE}${path}`, {
+    method:  "POST",
+    headers: { "x-api-key": KEY, "Content-Type": "application/json" },
+    body:    JSON.stringify(body),
+    signal:  AbortSignal.timeout(300_000),
+  });
+  if (!r.ok) throw new Error(`HasData ${r.status} ${await r.text()}`);
+  return r.json() as Promise<T>;
+}
+// Bounded concurrency, no deps
+async function pool<T, R>(items: T[], n: number, fn: (x: T) => Promise<R>) {
+  const out: R[] = []; let i = 0;
+  await Promise.all(Array.from({ length: n }, async () => {
+    while (i < items.length) { const k = i++; out[k] = await fn(items[k]); }
+  }));
+  return out;
+}
+```
+## Pagination cheat sheet
+| Endpoint family | Pagination |
+|---|---|
+| Google SERP / Light SERP / Bing | `start` + `num` (max 100) |
+| Google Maps Search | `start` (steps of 20) |
+| Yelp Search | `start` (steps of 10) |
+| Google Maps Reviews / Glassdoor / Airbnb | `nextPageToken` |
+| Indeed / YellowPages / Amazon Search | `start` or `page` |
+| Shopify Products | `page` (with `limit` ≤ 250) |
+| Scraper-Job results | `page` + `limit` (max 100) until `meta.currentPage >= meta.lastPage` |
+## Pre-ship checklist
+- [ ] Key from env, never logged.
+- [ ] All HTTP timeouts ≥ 300 s.
+- [ ] `requestMetadata.status === "ok"` checked on every sync response.
+- [ ] Backoff on 429 + 5xx; never on 4xx.
+- [ ] Concurrency capped at plan limit.
+- [ ] Job `id` (from submit response) persisted to durable storage immediately.
+- [ ] Webhooks paired with polling fallback.
+- [ ] Result files downloaded immediately on `scraper.job.finished`.

package/bundled-skills/hasdata/references/ecommerce.md ADDED Viewed

@@ -0,0 +1,116 @@
+# E-commerce APIs — Amazon & Shopify
+| Endpoint | Returns |
+|---|---|
+| `/scrape/amazon/product` | Single product (price, ratings, variants, other sellers, A+) |
+| `/scrape/amazon/search` | Search results (sponsored + organic) |
+| `/scrape/amazon/seller` | Seller profile |
+| `/scrape/amazon/seller-products` | Seller catalog |
+| `/scrape/shopify/products` | Products from any Shopify store |
+| `/scrape/shopify/collections` | Collections from any Shopify store |
+All synchronous `GET`.
+## Amazon Product
+```python
+import requests
+resp = requests.get(
+    "https://api.hasdata.com/scrape/amazon/product",
+    headers={"x-api-key": API_KEY},
+    params={"asin": "B0DHJ7SBDR", "domain": "www.amazon.com", "otherSellers": "true"},
+    timeout=300,
+)
+```
+| Param | Notes |
+|---|---|
+| `asin` | **Required.**. |
+| `domain` | `www.amazon.com` (default), `.co.uk`, `.de`, `.co.jp`, … |
+| `language` | Locale per domain. |
+| `deliveryZip` | Affects shipping/availability fields. |
+| `shippingLocation` | 2-letter country code. |
+| `otherSellers` | `true` (default) to include other-seller block. |
+Response: top-level `requestMetadata` + `product`. The `product` object's keys (verified live): `asin`, `url`, `title`, `brand`, `isAvailable`, `primaryFeatures`, `features`, `featureBullets`, `description`, `badges`, `breadcrumbs`, `whatIsInTheBox`, `variants`, `totalImages`, `primaryImage`, `images`, `descriptionImages`, `totalVideos`, `primaryVideo`, `videos`, `specification`, `reviewsInfo` (rating + count + sample reviews live here, not at the root). Pricing fields are surfaced via `variants` and `specification`.
+## Amazon Search
+```python
+params = {"q": "mechanical keyboard", "domain": "www.amazon.com", "page": 1}
+```
+Params: `q` (required), `domain`, `language`, `page`, `deliveryZip`, `shippingLocation`, `sortBy`.
+## Amazon Seller / Seller Products
+```python
+profile = requests.get(
+    "https://api.hasdata.com/scrape/amazon/seller",
+    headers={"x-api-key": API_KEY},
+    params={"sellerId": "A1MNOPQR", "domain": "www.amazon.com"},
+    timeout=300,
+).json()
+catalog = requests.get(
+    "https://api.hasdata.com/scrape/amazon/seller-products",
+    headers={"x-api-key": API_KEY},
+    params={"sellerId": "A1MNOPQR", "page": 1},
+    timeout=300,
+).json()
+```
+Use cases: counterfeit detection, MAP enforcement, competitor catalog mirroring.
+## Shopify Products
+Works on **any** Shopify storefront with no authentication.
+```python
+def shopify_all(store_url):
+    page, out = 1, []
+    while True:
+        batch = requests.get(
+            "https://api.hasdata.com/scrape/shopify/products",
+            headers={"x-api-key": API_KEY},
+            params={"url": store_url, "page": page, "limit": 250},
+            timeout=300,
+        ).json().get("products", [])
+        if not batch:
+            return out
+        out.extend(batch)
+        page += 1
+```
+| Param | Notes |
+|---|---|
+| `url` | **Required.** Storefront URL. |
+| `limit` | 1–250, default `1`. **Bump to 250** for catalog work. |
+| `page` | 1-indexed. |
+| `collection` | Collection handle filter. |
+`/scrape/shopify/collections` has the same shape and returns the collection list.
+## Patterns
+### Cross-merchant price comparison
+```python
+a = requests.get("https://api.hasdata.com/scrape/amazon/search",
+                 headers={"x-api-key": API_KEY},
+                 params={"q": query}, timeout=300).json()
+g = requests.get("https://api.hasdata.com/scrape/google/shopping",
+                 headers={"x-api-key": API_KEY},
+                 params={"q": query, "gl": "us"}, timeout=300).json()
+```
+### Reviews & bestsellers go through Scraper Jobs
+The Product API only includes a sample of reviews. For all reviews use the `amazon-product-reviews` Scraper Job. For bestseller ranks use `amazon-bestsellers` — there's no synchronous API. See `scraper-jobs.md`.
+## Gotchas
+- **Same ASIN ≠ same product across `domain`s.** `.com` vs `.co.uk` can differ.
+- **`deliveryZip` changes availability.** Pass it when stock matters; omit for spec-only scrapes.
+- **Shopify `limit` defaults to 1** — always set 250 for catalog crawls.

package/bundled-skills/hasdata/references/jobs.md ADDED Viewed

@@ -0,0 +1,111 @@
+# Jobs APIs — Indeed & Glassdoor
+| Endpoint | Returns |
+|---|---|
+| `/scrape/indeed/listing` | Indeed search results |
+| `/scrape/indeed/job` | Single Indeed job detail |
+| `/scrape/glassdoor/listing` | Glassdoor search results |
+| `/scrape/glassdoor/job` | Single Glassdoor job (incl. salary band, company snippet) |
+All synchronous `GET`.
+## Indeed Listing
+```python
+import requests
+resp = requests.get(
+    "https://api.hasdata.com/scrape/indeed/listing",
+    headers={"x-api-key": API_KEY},
+    params={
+        "keyword":  "software engineer",
+        "location": "New York, NY",
+        "sort":     "date",
+        "domain":   "www.indeed.com",
+        "start":    0,
+    },
+    timeout=300,
+)
+```
+| Param | Notes |
+|---|---|
+| `keyword` | **Required.** |
+| `location` | **Required.** |
+| `sort` | `date`, `relevance` (default). |
+| `domain` | Country site — `www.indeed.com`, `uk.indeed.com`, `de.indeed.com`. |
+| `start` | Offset, **steps of 10**. |
+Response: `jobs` array with `title`, `company`, `location`, `salary`, `description`, `postedAt`, `link`, `jobKey`. Salary is free-form string — parse with regex.
+## Indeed Job
+Pass `jobKey` from listing → returns full description, requirements, benefits, company URL.
+## Glassdoor Listing & Job
+```python
+params = {"keyword": "software engineer", "location": "New York, NY", "sort": "recent"}
+# pagination: pass back nextPageToken
+```
+| Param | Notes |
+|---|---|
+| `keyword`, `location` | **Required.** |
+| `sort` | `recent` (default), `relevant`. |
+| `domain` | Country site. |
+| `nextPageToken` | Cursor pagination. |
+## Patterns
+### Salary band
+```python
+import re, statistics
+def salary_band(role, location):
+    page = requests.get(
+        "https://api.hasdata.com/scrape/indeed/listing",
+        headers={"x-api-key": API_KEY},
+        params={"keyword": role, "location": location}, timeout=300,
+    ).json()
+    nums = [int(m.replace(",", ""))
+            for j in page.get("jobs", [])
+            for m in re.findall(r"\$([\d,]+)", j.get("salary") or "")]
+    if not nums: return None
+    return {"n": len(nums), "median": statistics.median(nums)}
+```
+### Hiring velocity by company
+```python
+from collections import Counter
+page = indeed_listing(role, loc, sort="date")
+Counter(j.get("company") for j in page.get("jobs", []))
+```
+Run weekly; sustained increases often precede earnings/PR signals.
+### Pagination differs
+```python
+# Indeed: numeric start
+for p in range(10):
+    page = indeed_listing(kw, loc, start=p * 10)
+# Glassdoor: cursor token
+out, token = [], None
+while True:
+    page = glassdoor_listing(kw, loc, next_token=token)
+    out.extend(page.get("jobs", []))
+    token = page.get("nextPageToken")
+    if not token: break
+```
+## Gotchas
+- **Salary is free-form string.** Always regex-parse.
+- **Indeed = numeric start (10), Glassdoor = token.** Don't mix.
+- **`domain` matters for non-US.** `uk.indeed.com`, `ca.indeed.com`, etc.
+- **Prefer the API + pagination for bulk.** Reach for the matching Scraper Job only when you want webhook-driven fan-out across many keyword × location pairs without managing the polling loop yourself.

package/bundled-skills/hasdata/references/local-business.md ADDED Viewed

@@ -0,0 +1,145 @@
+# Local Business APIs — Google Maps, Yelp, YellowPages
+| Endpoint | Returns |
+|---|---|
+| `/scrape/google-maps/search` | Search results in a viewport |
+| `/scrape/google-maps/place` | Single place details |
+| `/scrape/google-maps/reviews` | Reviews for a place, paginated |
+| `/scrape/google-maps/photos` | Photo gallery |
+| `/scrape/google-maps/posts` | Owner-published posts (offers, events, announcements) |
+| `/scrape/google-maps/contributor-reviews` | All reviews by a Google reviewer |
+| `/scrape/yelp/search` | Yelp search |
+| `/scrape/yelp/place` | Yelp business detail |
+| `/scrape/yellowpages/search` | YellowPages search |
+| `/scrape/yellowpages/place` | YellowPages business detail |
+All synchronous `GET`.
+## Google Maps Search
+```python
+import requests
+resp = requests.get(
+    "https://api.hasdata.com/scrape/google-maps/search",
+    headers={"x-api-key": API_KEY},
+    params={"q": "Pizza", "ll": "@40.7455,-74.0083,14z"},
+    timeout=300,
+)
+```
+| Param | Notes |
+|---|---|
+| `q` | **Required.** Free-form query. |
+| `ll` | `@LAT,LNG,ZOOMz` viewport — **lat/lng + zoom, not a city name**. Required for tight pagination. |
+| `domain`, `gl`, `hl` | Standard. |
+| `start` | Pagination offset, **steps of 20**. |
+Response: `localResults` — each entry has `position`, `title`, `placeId`, `dataId`, `kgmid`, `thumbnail`, `phone`, `address`, `website`, `description`, `workingHours` (object with `timezone` + `days[]`), `openState`, `rating`, `reviews`, `type` + `types[]` (categories), `price`, `priceDescription`, `gpsCoordinates`, `serviceOptions[]`, `extensions` (offerings, accessibility, payments, …), `menu`. Feed `placeId`/`dataId` into `/place` and `/reviews`.
+## Google Maps Place
+```python
+params = {"placeId": "ChIJFU2bda4SM4cRKSCRyb6pOB8"}
+```
+Returns full place detail — coordinates, hours by day, phone, website, popular times, attributes (delivery, dine-in), photo summary.
+## Google Maps Reviews
+```python
+def reviews(place_id=None, data_id=None, sort_by="newestFirst", token=None):
+    params = {}
+    if place_id: params["placeId"] = place_id
+    if data_id:  params["dataId"]  = data_id
+    if sort_by:  params["sortBy"]  = sort_by
+    if token:    params["nextPageToken"] = token
+    return requests.get(
+        "https://api.hasdata.com/scrape/google-maps/reviews",
+        headers={"x-api-key": API_KEY},
+        params=params, timeout=300,
+    ).json()
+```
+| Param | Notes |
+|---|---|
+| `placeId` / `dataId` | Pass one. `dataId` is the hex pair from Maps results. |
+| `sortBy` | `newestFirst`, `highestRating`, `lowestRating`, `mostRelevant`. |
+| `topicId` | Filter by review topic. |
+| `nextPageToken` | Cursor pagination. |
+## Google Maps Posts
+```python
+resp = requests.get(
+    "https://api.hasdata.com/scrape/google-maps/posts",
+    headers={"x-api-key": API_KEY},
+    params={"placeId": "ChIJ..."},      # or dataId="0x...:0x..."
+    timeout=300,
+)
+for p in resp.json().get("posts", []):
+    print(p["postedAt"], p["description"][:120], p.get("cta", {}).get("url"))
+```
+Either `placeId` **or** `dataId` is required. Optional: `hl` (UI language), `nextPageToken` (cursor pagination). 10 credits/call.
+Per-post fields (verified live): `postId`, `locationId`, `title`, `description`, `image`, `cta` (`label` + `url`), `createdAt` (ISO), `postedAt` (human-readable), `shareUrl`, `postUrl`. Response top-level: `posts`, `pagination`, `source`, `requestMetadata`.
+Posts surface current offers, holiday hours, events, and product launches the business is actively promoting. Cheaper signal than the homepage scrape, and `cta.url` is the canonical landing page.
+## Yelp & YellowPages
+```python
+# Yelp
+params = {"keyword": "McDonald's", "location": "New York, NY", "start": 0}  # steps of 10
+# YellowPages
+params = {"keyword": "Plumbers", "location": "New York, NY", "page": 1}
+```
+YellowPages is US-only — EU/APAC searches return nothing useful.
+## Patterns
+### Lead-gen with emails (Maps + Web Scraping)
+Maps results have website + phone but **not email**. Combine with the Web Scraping API's `extractEmails` only for public business contact pages, legitimate outreach, and workflows that honor opt-out, privacy-law, rate, and terms-of-service constraints:
+```python
+leads = []
+for biz in maps_results.get("localResults", []):
+    site = biz.get("website")
+    if not site: continue
+    page = requests.post(
+        "https://api.hasdata.com/scrape/web",
+        headers={"x-api-key": API_KEY},
+        json={"url": site, "extractEmails": True},
+        timeout=300,
+    ).json()
+    leads.append({
+        "name":    biz["title"],
+        "phone":   biz.get("phone"),
+        "website": site,
+        "emails":  page.get("extractedEmails") or [],
+    })
+```
+For higher volume, switch to the `contacts` Scraper Job (see `scraper-jobs.md`) only when you have a legitimate purpose, a compliant outreach process, and rate/opt-out controls.
+### New-business discovery
+Filter Maps by review count `< 5` — usually means recently opened.
+```python
+new = [b for b in localResults if (b.get("reviews") or 0) < 5]
+```
+### Multi-location chain mapping
+Search the brand name; every `localResults` entry is a branch.
+## Gotchas
+- **`ll` is a viewport, not a city.** `@lat,lng,zoom`. Pasting "Brooklyn" fails.
+- **Pagination steps differ.** Maps `start` = +20, Yelp `start` = +10, Maps Reviews uses `nextPageToken`.
+- **`placeId` vs `dataId`** — Place prefers `placeId`; Reviews accepts either.
+- **YellowPages is US-only.**

package/bundled-skills/hasdata/references/real-estate.md ADDED Viewed

@@ -0,0 +1,84 @@
+# Real Estate APIs — Zillow, Redfin
+| Endpoint | Returns |
+|---|---|
+| `/scrape/zillow/listing` | Search results by area + filters |
+| `/scrape/zillow/property` | Single home (history, agent, schools, taxes) |
+| `/scrape/redfin/listing` | Redfin search results |
+| `/scrape/redfin/property` | Single Redfin home |
+All synchronous `GET`. 5 credits each.
+For short-term rentals (Airbnb), hotels (Booking), and flights, see `travel.md`.
+## Zillow Listing
+Filter params use **bracketed** keys (`price[min]`, `beds[max]`).
+```python
+import requests
+def zillow_search(keyword, listing_type="forSale", **filters):
+    r = requests.get(
+        "https://api.hasdata.com/scrape/zillow/listing",
+        headers={"x-api-key": API_KEY},
+        params={"keyword": keyword, "type": listing_type, **filters},
+        timeout=300,
+    )
+    return r.json()
+zillow_search("Brooklyn, NY", price={"min": 800000, "max": 2000000})
+zillow_search("33321", "sold", daysOnZillow="6m")  # recent comps
+```
+`requests` + `axios` serialize nested dicts as `price[min]=…&price[max]=…` automatically. With raw `URLSearchParams`, build the bracketed keys yourself.
+| Param | Notes |
+|---|---|
+| `keyword` | **Required.** Area string ("New York, NY", zip, neighborhood). |
+| `type` | **Required.** `forSale`, `forRent`, `sold`. |
+| `price[min/max]`, `beds[min/max]`, `baths[min/max]`, `sqft[min/max]` | Range filters. |
+| `daysOnZillow` | `24h`, `7d`, `14d`, `30d`, `90d`, `6m`, `12m`. |
+| `page` | Pagination. |
+Response: `requestMetadata`, `searchInformation`, **`properties`** (the listings array — not `listings`), `pagination`.
+## Zillow Property
+```python
+requests.get(
+    "https://api.hasdata.com/scrape/zillow/property",
+    headers={"x-api-key": API_KEY},
+    params={"url": url, "extractAgentEmails": "true"},
+    timeout=300,
+)
+```
+Takes a full Zillow URL (not zpid). Returns address, lot/sqft/beds/baths, price + tax history, schools, agent block, photos. Agent emails are best-effort.
+## Redfin
+```python
+# Listing
+params = {"keyword": "33321", "type": "forSale", "page": 1}
+# Property
+params = {"url": "https://www.redfin.com/FL/Tamarac/9...html"}
+```
+Same bracketed `price[min]`, `beds[min]`, etc. as Zillow. Zip codes work best for `keyword`.
+## Patterns
+### Sold comps for ROI
+```python
+sold = zillow_search(zip_code, "sold", daysOnZillow="6m").get("properties", [])
+ppsf = [(l["price"] / l["livingArea"]) for l in sold if l.get("livingArea")]
+```
+## Gotchas
+- **Bracketed query keys** — work with `requests`/`axios`, not raw `URLSearchParams`.
+- **`type=sold` + `daysOnZillow` = comps recipe.** Without `daysOnZillow`, history is unbounded.
+- **Property endpoints take URLs**, not IDs.
+- **Agent emails are best-effort.**