orangeslice 2.1.0 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +10 -0
- package/dist/cli.js +13 -0
- package/dist/crunchbase.d.ts +10 -0
- package/dist/crunchbase.js +13 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.js +7 -1
- package/docs/integrations/gmail/index.md +1 -1
- package/docs/integrations/gmail/sendEmail.md +13 -13
- package/docs/prospecting/index.md +24 -16
- package/docs/services/company/linkedin/search.md +32 -0
- package/docs/services/crunchbase/search.md +337 -0
- package/docs/services/index.md +1 -1
- package/docs/services/person/linkedin/search.md +32 -0
- package/package.json +1 -1
- package/docs/providers/predictleads/openapi.json +0 -13209
- package/docs/services/healthcare/npi.md +0 -190
package/README.md
CHANGED
|
@@ -64,11 +64,21 @@ const [companies, searchPage, ai] = await Promise.all([
|
|
|
64
64
|
}
|
|
65
65
|
})
|
|
66
66
|
]);
|
|
67
|
+
|
|
68
|
+
const startups = await services.crunchbase.search({
|
|
69
|
+
sql: `
|
|
70
|
+
SELECT name, website_url, linkedin_url
|
|
71
|
+
FROM public.crunchbase_scraper_lean
|
|
72
|
+
WHERE operating_status = 'Active'
|
|
73
|
+
LIMIT 10
|
|
74
|
+
`
|
|
75
|
+
});
|
|
67
76
|
```
|
|
68
77
|
|
|
69
78
|
## Service map
|
|
70
79
|
|
|
71
80
|
- `services.company.linkedin.search/enrich`
|
|
81
|
+
- `services.crunchbase.search` (returns rows array directly)
|
|
72
82
|
- `services.company.getEmployeesFromLinkedin` (database-only B2B path)
|
|
73
83
|
- `services.person.linkedin.search/enrich`
|
|
74
84
|
- `services.web.search/batchSearch`
|
package/dist/cli.js
CHANGED
|
@@ -100,6 +100,19 @@ Use these docs as the source of truth. If there is any conflict between your pri
|
|
|
100
100
|
- Be concise, factual, and deterministic.
|
|
101
101
|
- Ask a clarifying question only when a missing detail blocks progress.
|
|
102
102
|
|
|
103
|
+
## Package Setup (Do Not Guess)
|
|
104
|
+
- Import from the package name, not a local file path:
|
|
105
|
+
- \`import { services } from "orangeslice"\`
|
|
106
|
+
- \`import { configure, services } from "orangeslice"\` when setting API key programmatically
|
|
107
|
+
- Do NOT use \`import { services } from "./orangeslice"\` unless the user explicitly has a local wrapper file at that path.
|
|
108
|
+
- \`npx orangeslice\` is a setup/bootstrap command (docs sync, package install, auth). It does NOT execute user app scripts.
|
|
109
|
+
|
|
110
|
+
## Runtime Requirements
|
|
111
|
+
- If writing standalone scripts that use top-level \`await\`, use ESM:
|
|
112
|
+
- Set \`"type": "module"\` in \`package.json\`, or
|
|
113
|
+
- Use \`.mjs\` files.
|
|
114
|
+
- If the project is CommonJS and cannot switch to ESM, avoid top-level \`await\` and wrap async code in an async function.
|
|
115
|
+
|
|
103
116
|
## Mandatory Read Order (Before writing code)
|
|
104
117
|
1. \`./services/index.md\` - service map and capabilities
|
|
105
118
|
2. Relevant docs under \`./services/**\` for every service you plan to call
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export interface CrunchbaseSearchParams {
|
|
2
|
+
sql: string;
|
|
3
|
+
userId?: string;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Search the Crunchbase lean table using SQL.
|
|
7
|
+
*
|
|
8
|
+
* Returns rows directly (no envelope).
|
|
9
|
+
*/
|
|
10
|
+
export declare function crunchbaseSearch<T = Record<string, unknown>>(params: CrunchbaseSearchParams): Promise<T[]>;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.crunchbaseSearch = crunchbaseSearch;
|
|
4
|
+
const api_1 = require("./api");
|
|
5
|
+
/**
|
|
6
|
+
* Search the Crunchbase lean table using SQL.
|
|
7
|
+
*
|
|
8
|
+
* Returns rows directly (no envelope).
|
|
9
|
+
*/
|
|
10
|
+
async function crunchbaseSearch(params) {
|
|
11
|
+
const data = await (0, api_1.post)("/execute/crunchbase-sql", { sql: params.sql });
|
|
12
|
+
return data.rows ?? [];
|
|
13
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -2,6 +2,8 @@ export { configure } from "./api";
|
|
|
2
2
|
export type { OrangesliceConfig } from "./api";
|
|
3
3
|
export { linkedinSearch } from "./b2b";
|
|
4
4
|
export type { LinkedInSearchParams, LinkedInSearchResponse } from "./b2b";
|
|
5
|
+
export { crunchbaseSearch } from "./crunchbase";
|
|
6
|
+
export type { CrunchbaseSearchParams } from "./crunchbase";
|
|
5
7
|
export { webSearch, webBatchSearch } from "./serp";
|
|
6
8
|
export type { WebSearchQuery, WebSearchResult, WebSearchResponse, BatchWebSearchParams } from "./serp";
|
|
7
9
|
export { generateObject } from "./generateObject";
|
|
@@ -21,12 +23,16 @@ export type { PersonLinkedinFindUrlParams, CompanyLinkedinFindUrlParams, PersonC
|
|
|
21
23
|
import { runApifyActor } from "./apify";
|
|
22
24
|
import { linkedinSearch } from "./b2b";
|
|
23
25
|
import { browserExecute } from "./browser";
|
|
26
|
+
import { crunchbaseSearch } from "./crunchbase";
|
|
24
27
|
import { personLinkedinEnrich, personLinkedinFindUrl, personContactGet, companyLinkedinEnrich, companyLinkedinFindUrl, companyGetEmployeesFromLinkedin, geoParseAddress, builtWithLookupDomain, builtWithRelationships, builtWithSearchByTech } from "./expansion";
|
|
25
28
|
import { scrapeWebsite } from "./firecrawl";
|
|
26
29
|
import { generateObject } from "./generateObject";
|
|
27
30
|
import { googleMapsScrape } from "./googleMaps";
|
|
28
31
|
import { webBatchSearch, webSearch } from "./serp";
|
|
29
32
|
export declare const services: {
|
|
33
|
+
crunchbase: {
|
|
34
|
+
search: typeof crunchbaseSearch;
|
|
35
|
+
};
|
|
30
36
|
company: {
|
|
31
37
|
linkedin: {
|
|
32
38
|
findUrl: typeof companyLinkedinFindUrl;
|
package/dist/index.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.services = exports.builtWithSearchByTech = exports.builtWithRelationships = exports.builtWithLookupDomain = exports.geoParseAddress = exports.companyGetEmployeesFromLinkedin = exports.companyLinkedinFindUrl = exports.companyLinkedinEnrich = exports.personContactGet = exports.personLinkedinFindUrl = exports.personLinkedinEnrich = exports.PREDICT_LEADS_OPERATION_IDS = exports.predictLeads = exports.executePredictLeads = exports.googleMapsScrape = exports.runApifyActor = exports.browserExecute = exports.scrapeWebsite = exports.generateObject = exports.webBatchSearch = exports.webSearch = exports.linkedinSearch = exports.configure = void 0;
|
|
3
|
+
exports.services = exports.builtWithSearchByTech = exports.builtWithRelationships = exports.builtWithLookupDomain = exports.geoParseAddress = exports.companyGetEmployeesFromLinkedin = exports.companyLinkedinFindUrl = exports.companyLinkedinEnrich = exports.personContactGet = exports.personLinkedinFindUrl = exports.personLinkedinEnrich = exports.PREDICT_LEADS_OPERATION_IDS = exports.predictLeads = exports.executePredictLeads = exports.googleMapsScrape = exports.runApifyActor = exports.browserExecute = exports.scrapeWebsite = exports.generateObject = exports.webBatchSearch = exports.webSearch = exports.crunchbaseSearch = exports.linkedinSearch = exports.configure = void 0;
|
|
4
4
|
var api_1 = require("./api");
|
|
5
5
|
Object.defineProperty(exports, "configure", { enumerable: true, get: function () { return api_1.configure; } });
|
|
6
6
|
var b2b_1 = require("./b2b");
|
|
7
7
|
Object.defineProperty(exports, "linkedinSearch", { enumerable: true, get: function () { return b2b_1.linkedinSearch; } });
|
|
8
|
+
var crunchbase_1 = require("./crunchbase");
|
|
9
|
+
Object.defineProperty(exports, "crunchbaseSearch", { enumerable: true, get: function () { return crunchbase_1.crunchbaseSearch; } });
|
|
8
10
|
var serp_1 = require("./serp");
|
|
9
11
|
Object.defineProperty(exports, "webSearch", { enumerable: true, get: function () { return serp_1.webSearch; } });
|
|
10
12
|
Object.defineProperty(exports, "webBatchSearch", { enumerable: true, get: function () { return serp_1.webBatchSearch; } });
|
|
@@ -36,6 +38,7 @@ Object.defineProperty(exports, "builtWithSearchByTech", { enumerable: true, get:
|
|
|
36
38
|
const apify_2 = require("./apify");
|
|
37
39
|
const b2b_2 = require("./b2b");
|
|
38
40
|
const browser_2 = require("./browser");
|
|
41
|
+
const crunchbase_2 = require("./crunchbase");
|
|
39
42
|
const expansion_2 = require("./expansion");
|
|
40
43
|
const firecrawl_2 = require("./firecrawl");
|
|
41
44
|
const generateObject_2 = require("./generateObject");
|
|
@@ -43,6 +46,9 @@ const googleMaps_2 = require("./googleMaps");
|
|
|
43
46
|
const predictLeads_2 = require("./predictLeads");
|
|
44
47
|
const serp_2 = require("./serp");
|
|
45
48
|
exports.services = {
|
|
49
|
+
crunchbase: {
|
|
50
|
+
search: crunchbase_2.crunchbaseSearch
|
|
51
|
+
},
|
|
46
52
|
company: {
|
|
47
53
|
linkedin: {
|
|
48
54
|
findUrl: expansion_2.companyLinkedinFindUrl,
|
|
@@ -9,4 +9,4 @@ Typed functions for Gmail actions powered by Orange Slice Google integrations.
|
|
|
9
9
|
## Email
|
|
10
10
|
|
|
11
11
|
- `integrations.gmail.sendEmail(input)` - Send an email through the connected Gmail account
|
|
12
|
-
- Heavy rate limit: `sendEmail` is capped at **
|
|
12
|
+
- Heavy rate limit: `sendEmail` is capped at **40 calls/day** per connected Gmail account
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
Send an email from the connected Gmail account.
|
|
4
4
|
|
|
5
|
-
> Rate limit note for AI: `integrations.gmail.sendEmail(...)` is heavily rate-limited to **
|
|
5
|
+
> Rate limit note for AI: `integrations.gmail.sendEmail(...)` is heavily rate-limited to **40 calls/day** per connected Gmail account. Use sparingly and batch/aggregate where possible.
|
|
6
6
|
|
|
7
7
|
```typescript
|
|
8
8
|
// Basic email
|
|
@@ -25,18 +25,18 @@ await integrations.gmail.sendEmail({
|
|
|
25
25
|
|
|
26
26
|
## Input
|
|
27
27
|
|
|
28
|
-
| Parameter | Type
|
|
29
|
-
| ------------------ |
|
|
30
|
-
| `recipient_email` | `string`
|
|
31
|
-
| `extra_recipients` | `string[]`
|
|
32
|
-
| `cc` | `string[]`
|
|
33
|
-
| `bcc` | `string[]`
|
|
34
|
-
| `subject` | `string`
|
|
35
|
-
| `body` | `string`
|
|
36
|
-
| `is_html` | `boolean`
|
|
37
|
-
| `from_email` | `string`
|
|
38
|
-
| `attachment` | `object`
|
|
39
|
-
| `user_id` | `string`
|
|
28
|
+
| Parameter | Type | Required | Description |
|
|
29
|
+
| ------------------ | ---------- | -------- | --------------------------------- |
|
|
30
|
+
| `recipient_email` | `string` | No\* | Primary `To` recipient |
|
|
31
|
+
| `extra_recipients` | `string[]` | No | Additional `To` recipients |
|
|
32
|
+
| `cc` | `string[]` | No | CC recipients |
|
|
33
|
+
| `bcc` | `string[]` | No | BCC recipients |
|
|
34
|
+
| `subject` | `string` | No\* | Email subject |
|
|
35
|
+
| `body` | `string` | No\* | Email body (plain text or HTML) |
|
|
36
|
+
| `is_html` | `boolean` | No | Set to `true` when body is HTML |
|
|
37
|
+
| `from_email` | `string` | No | Optional verified send-as alias |
|
|
38
|
+
| `attachment` | `object` | No | Optional attachment payload |
|
|
39
|
+
| `user_id` | `string` | No | Gmail user id (`"me"` by default) |
|
|
40
40
|
|
|
41
41
|
\*Gmail requires at least one recipient (`recipient_email`, `cc`, or `bcc`) and at least one of `subject` or `body`.
|
|
42
42
|
|
|
@@ -16,6 +16,7 @@ description: Strategies for searching or finding people and companies. This is a
|
|
|
16
16
|
Run queries with built-in filters when the criteria is searchable:
|
|
17
17
|
|
|
18
18
|
- **Web search (`services.web.search`)** — **Default for LinkedIn**. Use for keywords, niche queries, fuzzy matching, anything descriptive.
|
|
19
|
+
- **Crunchbase (`services.crunchbase.search`)** — **Default for funding data**. Use for funding-stage, round type, amount, date windows, and investor-backed company discovery.
|
|
19
20
|
- **LinkedIn B2B DB** — **Indexed lookups ONLY:** company by domain/slug/ID, employees at a known company (by company_id), basic funding (2-table join). Everything else = web search. See [QUICK_REF](./linkedin_data/QUICK_REF.md).
|
|
20
21
|
- **Google Maps** — industry, location, ratings
|
|
21
22
|
- **LinkedIn job search** — job filters, titles
|
|
@@ -83,11 +84,11 @@ When using qualification columns, think Circle & Star:
|
|
|
83
84
|
| Source | Use When | Limitations |
|
|
84
85
|
| ------------------------ | ----------------------------------------------------------- | -------------------------------------------------------- |
|
|
85
86
|
| **Web Search (Default)** | **Everything else** — keywords, niche, fuzzy, specific | Requires verification columns for false positives. |
|
|
87
|
+
| **Crunchbase (Funding Default)** | Funding-focused prospecting: stage, round type, amount, recency, investors | Best for funding intelligence; use other sources for non-funding discovery criteria. |
|
|
86
88
|
| **PredictLeads** | Company intelligence, buying signals, and structured company events at scale | Coverage varies by company/market; use web search for very niche long-tail discovery. |
|
|
87
89
|
| **Niche Directory Scrape** | Well-defined categories with existing lists (see below) | Requires finding the right directory first. |
|
|
88
90
|
| **LinkedIn B2B DB** | **Indexed lookups ONLY:** company by domain/slug/ID, employees at known company, basic 2-table funding. | **3s hard max. No keyword search, no LATERAL, no 3+ table joins.** Everything else = web search. |
|
|
89
91
|
| **Google Maps** | Local/SMB, physical locations, restaurants, retail | Limited to businesses with physical presence. |
|
|
90
|
-
| **NPI Database** | Healthcare providers | Healthcare only. Free. |
|
|
91
92
|
| **Apify Actors** | Platform-specific scraping (Instagram, TikTok, job boards) | Per-platform setup. May break with platform changes. |
|
|
92
93
|
|
|
93
94
|
### PredictLeads: When It Is Better Than Everything Else
|
|
@@ -105,6 +106,12 @@ Prefer other sources when:
|
|
|
105
106
|
- You need local storefront/SMB discovery -> use Google Maps
|
|
106
107
|
- You need fast indexed LinkedIn lookups by known IDs/domain/company -> use LinkedIn B2B DB
|
|
107
108
|
|
|
109
|
+
### Funding Prospecting Standard: Use Crunchbase First
|
|
110
|
+
|
|
111
|
+
For any request centered on funding data (for example: "Series A fintech companies", "companies that raised in the last 12 months", "recently funded startups"), use `services.crunchbase.search` as the **standard/default source**.
|
|
112
|
+
|
|
113
|
+
Use LinkedIn B2B DB funding joins only when the user explicitly needs a LinkedIn-only workflow or a narrow lookup tied to existing LinkedIn records. Otherwise, Crunchbase should be the first choice for funding-oriented discovery.
|
|
114
|
+
|
|
108
115
|
### Niche Directory Scraping — For Well-Defined Categories
|
|
109
116
|
|
|
110
117
|
When users ask for companies in a **specific, well-defined niche** (e.g., "fast food chains", "Fortune 500 companies", "Y Combinator startups"), the best approach is often to **find and scrape a curated directory or list**.
|
|
@@ -260,27 +267,28 @@ Don't overthink it — just create 2-3 views that match the columns you built. S
|
|
|
260
267
|
|
|
261
268
|
## Examples
|
|
262
269
|
|
|
263
|
-
| User Request | Approach | Why
|
|
264
|
-
| -------------------------------------------- | ---------------- |
|
|
265
|
-
| "AI CRM companies" | Web search | Keyword query → `"AI CRM" site:linkedin.com/company`
|
|
266
|
-
| "Fintech startups" | Web search | Fuzzy/descriptive → `"fintech" "startup" site:linkedin.com/company`
|
|
267
|
-
| "SDRs at Series A companies" | Web search | Specific criteria → `"SDR" "Series A" site:linkedin.com/in`
|
|
268
|
-
| "
|
|
269
|
-
| "
|
|
270
|
-
| "
|
|
271
|
-
| "
|
|
272
|
-
| "
|
|
273
|
-
| "
|
|
274
|
-
| "
|
|
275
|
-
| "
|
|
276
|
-
| "
|
|
270
|
+
| User Request | Approach | Why |
|
|
271
|
+
| -------------------------------------------- | ---------------- | --------------------------------------------------------------------------- |
|
|
272
|
+
| "AI CRM companies" | Web search | Keyword query → `"AI CRM" site:linkedin.com/company` |
|
|
273
|
+
| "Fintech startups" | Web search | Fuzzy/descriptive → `"fintech" "startup" site:linkedin.com/company` |
|
|
274
|
+
| "SDRs at Series A companies" | Web search | Specific criteria → `"SDR" "Series A" site:linkedin.com/in` |
|
|
275
|
+
| "Series A/B companies raised last year" | Crunchbase | Funding-specific discovery is best handled via `services.crunchbase.search` |
|
|
276
|
+
| "Companies using Kubernetes" | Web search | Technology match → `"Kubernetes" site:linkedin.com/company` |
|
|
277
|
+
| "VPs who worked at Google" | Web search | Fuzzy history match → `"VP" "Google" site:linkedin.com/in` |
|
|
278
|
+
| "1000 software engineers in Bay Area" | B2B DB | Simple title + location + high volume |
|
|
279
|
+
| "All healthcare companies 100-500 employees" | B2B DB | Industry + size + high volume |
|
|
280
|
+
| "Fast food chains that..." | Directory scrape | Scrape Wikipedia list → `browser.execute` |
|
|
281
|
+
| "Restaurants in Austin" | Google Maps | Local/SMB with physical presence |
|
|
282
|
+
| "Companies hiring SDRs" | LinkedIn Jobs | Job search with title filter |
|
|
283
|
+
| "Warehouses implementing WMS" | Circle + columns | Pull logistics companies → add "WMS Score" column |
|
|
284
|
+
| "Companies that recently switched CRMs" | Circle + columns | Pull SaaS companies → add "CRM Change Signals" column |
|
|
277
285
|
|
|
278
286
|
---
|
|
279
287
|
|
|
280
288
|
## Tools
|
|
281
289
|
|
|
282
290
|
- **LinkedIn:** `services.company.linkedin.search({ sql: "SELECT ... FROM linkedin_company ..." })`, `services.person.linkedin.search({ sql: "SELECT ... FROM linkedin_profile ..." })` — **Lookup tool only, 3s max, 2-table joins max. Use web search for anything else.**
|
|
283
|
-
- **
|
|
291
|
+
- **Funding:** `services.crunchbase.search({ sql: "SELECT ... FROM ... WHERE ..." })` — **Default for funding search and screening.**
|
|
284
292
|
- **Local/SMB:** `googleMaps.scrape`
|
|
285
293
|
- **Web:** `web.search` + `browser.execute`
|
|
286
294
|
- **Platforms:** `services.apify.runActor`
|
|
@@ -390,6 +390,38 @@ WHERE lc.company_size = '51-200 employees'
|
|
|
390
390
|
- **Use `lc` alias** for company tables
|
|
391
391
|
- **Default to US**: `lc.country_code = 'US'`
|
|
392
392
|
|
|
393
|
+
## Return Type
|
|
394
|
+
|
|
395
|
+
`services.company.linkedin.search()` returns an object envelope:
|
|
396
|
+
|
|
397
|
+
```typescript
|
|
398
|
+
{
|
|
399
|
+
rows: (Record < string, unknown > []);
|
|
400
|
+
count: number;
|
|
401
|
+
}
|
|
402
|
+
```
|
|
403
|
+
|
|
404
|
+
- `rows`: Result rows from your SQL query, with exactly the columns you selected.
|
|
405
|
+
- `count`: Number of rows returned in `rows`.
|
|
406
|
+
|
|
407
|
+
Example:
|
|
408
|
+
|
|
409
|
+
```typescript
|
|
410
|
+
const searchResult = await services.company.linkedin.search({
|
|
411
|
+
sql: `
|
|
412
|
+
SELECT
|
|
413
|
+
lc.company_name,
|
|
414
|
+
lc.domain,
|
|
415
|
+
'https://www.linkedin.com/company/' || lc.universal_name AS lc_linkedin_url
|
|
416
|
+
FROM linkedin_company lc
|
|
417
|
+
WHERE lc.domain = 'stripe.com'
|
|
418
|
+
LIMIT 1
|
|
419
|
+
`
|
|
420
|
+
});
|
|
421
|
+
|
|
422
|
+
return searchResult.rows; // Most spreadsheet snippets should return rows
|
|
423
|
+
```
|
|
424
|
+
|
|
393
425
|
---
|
|
394
426
|
|
|
395
427
|
## Table Aliases
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
---
|
|
2
|
+
description: Search Crunchbase with SQL
|
|
3
|
+
---
|
|
4
|
+
|
|
5
|
+
# Crunchbase Search
|
|
6
|
+
|
|
7
|
+
Run SQL against `public.crunchbase_scraper_lean` for startup/company prospecting.
|
|
8
|
+
|
|
9
|
+
```typescript
|
|
10
|
+
const rows = await services.crunchbase.search({
|
|
11
|
+
sql: `
|
|
12
|
+
SELECT name, website_url, linkedin_url
|
|
13
|
+
FROM public.crunchbase_scraper_lean
|
|
14
|
+
WHERE operating_status = 'active'
|
|
15
|
+
LIMIT 25
|
|
16
|
+
`
|
|
17
|
+
});
|
|
18
|
+
|
|
19
|
+
// rows: Record<string, unknown>[]
|
|
20
|
+
return rows;
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
## Contract (Hard Rules)
|
|
24
|
+
|
|
25
|
+
- Query **only** `public.crunchbase_scraper_lean`.
|
|
26
|
+
- **Only one statement** is allowed.
|
|
27
|
+
- **Only SELECT** queries are allowed (`WITH ... SELECT` is fine).
|
|
28
|
+
- Always include `LIMIT` (recommended `<= 100`).
|
|
29
|
+
- This is an external service path, not `ctx.sql()`.
|
|
30
|
+
- Credits are 1 credit per returned row (reserve estimate is derived from `LIMIT`).
|
|
31
|
+
|
|
32
|
+
## Return Type
|
|
33
|
+
|
|
34
|
+
`services.crunchbase.search()` returns rows directly:
|
|
35
|
+
|
|
36
|
+
```typescript
|
|
37
|
+
(Record < string, unknown > []);
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
No `{ rows, count }` envelope.
|
|
41
|
+
|
|
42
|
+
```typescript
|
|
43
|
+
const rows = await services.crunchbase.search({ sql: "SELECT name FROM public.crunchbase_scraper_lean LIMIT 10" });
|
|
44
|
+
const count = rows.length;
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Live Schema (Verified)
|
|
48
|
+
|
|
49
|
+
Source of truth: live DB introspection of `public.crunchbase_scraper_lean`.
|
|
50
|
+
|
|
51
|
+
| Column | Type | Nullable |
|
|
52
|
+
| ---------------------------- | ------------- | -------- |
|
|
53
|
+
| `id` | `bigint` | no |
|
|
54
|
+
| `uuid` | `text` | yes |
|
|
55
|
+
| `name` | `text` | yes |
|
|
56
|
+
| `link` | `text` | yes |
|
|
57
|
+
| `type` | `text` | yes |
|
|
58
|
+
| `operating_status` | `text` | yes |
|
|
59
|
+
| `company_type` | `text` | yes |
|
|
60
|
+
| `short_description` | `text` | yes |
|
|
61
|
+
| `description` | `text` | yes |
|
|
62
|
+
| `website_url` | `text` | yes |
|
|
63
|
+
| `linkedin_url` | `text` | yes |
|
|
64
|
+
| `twitter_url` | `text` | yes |
|
|
65
|
+
| `facebook_url` | `text` | yes |
|
|
66
|
+
| `contact_email` | `text` | yes |
|
|
67
|
+
| `phone_number` | `text` | yes |
|
|
68
|
+
| `hq_postal_code` | `text` | yes |
|
|
69
|
+
| `primary_category` | `text` | yes |
|
|
70
|
+
| `categories` | `jsonb` | no |
|
|
71
|
+
| `category_groups` | `jsonb` | no |
|
|
72
|
+
| `location_identifiers` | `jsonb` | no |
|
|
73
|
+
| `location_group_identifiers` | `jsonb` | no |
|
|
74
|
+
| `num_employees_enum` | `integer` | yes |
|
|
75
|
+
| `revenue_range` | `text` | yes |
|
|
76
|
+
| `funding_stage` | `text` | yes |
|
|
77
|
+
| `funding_total_usd` | `numeric` | yes |
|
|
78
|
+
| `last_funding_total_usd` | `numeric` | yes |
|
|
79
|
+
| `last_funding_type` | `text` | yes |
|
|
80
|
+
| `last_funding_date` | `date` | yes |
|
|
81
|
+
| `num_funding_rounds` | `integer` | yes |
|
|
82
|
+
| `num_investors` | `integer` | yes |
|
|
83
|
+
| `num_lead_investors` | `integer` | yes |
|
|
84
|
+
| `rank_org_company` | `integer` | yes |
|
|
85
|
+
| `rank_org` | `integer` | yes |
|
|
86
|
+
| `rank_delta_d7` | `integer` | yes |
|
|
87
|
+
| `rank_delta_d30` | `integer` | yes |
|
|
88
|
+
| `rank_delta_d90` | `integer` | yes |
|
|
89
|
+
| `growth_score_tier` | `text` | yes |
|
|
90
|
+
| `heat_score_tier` | `text` | yes |
|
|
91
|
+
| `ipo_status` | `text` | yes |
|
|
92
|
+
| `went_public_on` | `date` | yes |
|
|
93
|
+
| `imported_at` | `timestamptz` | no |
|
|
94
|
+
|
|
95
|
+
## Enum Catalog (Verified Distinct Values)
|
|
96
|
+
|
|
97
|
+
These are observed live values, in production data.
|
|
98
|
+
|
|
99
|
+
### `operating_status`
|
|
100
|
+
|
|
101
|
+
- `active`
|
|
102
|
+
- `closed`
|
|
103
|
+
|
|
104
|
+
### `company_type`
|
|
105
|
+
|
|
106
|
+
- `for_profit`
|
|
107
|
+
- `non_profit`
|
|
108
|
+
|
|
109
|
+
### `type`
|
|
110
|
+
|
|
111
|
+
- `organization`
|
|
112
|
+
|
|
113
|
+
### `funding_stage`
|
|
114
|
+
|
|
115
|
+
- `seed`
|
|
116
|
+
- `early_stage_venture`
|
|
117
|
+
- `m_and_a`
|
|
118
|
+
- `late_stage_venture`
|
|
119
|
+
- `ipo`
|
|
120
|
+
|
|
121
|
+
### `last_funding_type`
|
|
122
|
+
|
|
123
|
+
- `seed`
|
|
124
|
+
- `series_a`
|
|
125
|
+
- `series_b`
|
|
126
|
+
- `series_c`
|
|
127
|
+
|
|
128
|
+
### `revenue_range`
|
|
129
|
+
|
|
130
|
+
- `r_00000000`
|
|
131
|
+
- `r_00001000`
|
|
132
|
+
- `r_00010000`
|
|
133
|
+
- `r_00050000`
|
|
134
|
+
- `r_00100000`
|
|
135
|
+
- `r_00500000`
|
|
136
|
+
- `r_01000000`
|
|
137
|
+
- `r_10000000`
|
|
138
|
+
|
|
139
|
+
### `growth_score_tier`
|
|
140
|
+
|
|
141
|
+
- `c100_high`
|
|
142
|
+
- `c200_medium`
|
|
143
|
+
- `c300_low`
|
|
144
|
+
|
|
145
|
+
### `heat_score_tier`
|
|
146
|
+
|
|
147
|
+
- `c100_high`
|
|
148
|
+
- `c200_medium`
|
|
149
|
+
- `c300_low`
|
|
150
|
+
|
|
151
|
+
### `ipo_status`
|
|
152
|
+
|
|
153
|
+
- `private`
|
|
154
|
+
- `public`
|
|
155
|
+
- `delisted`
|
|
156
|
+
|
|
157
|
+
### `num_employees_enum`
|
|
158
|
+
|
|
159
|
+
Column exists, but currently sparse/null in this dataset.
|
|
160
|
+
|
|
161
|
+
## JSONB Array Fields
|
|
162
|
+
|
|
163
|
+
`categories`, `category_groups`, `location_identifiers`, and `location_group_identifiers` are `jsonb` arrays.
|
|
164
|
+
|
|
165
|
+
Do **not** treat them as `text[]` with `&& ARRAY[...]::text[]`.
|
|
166
|
+
Use `jsonb_array_elements_text(...)` with `EXISTS`, for example:
|
|
167
|
+
|
|
168
|
+
```sql
|
|
169
|
+
AND EXISTS (
|
|
170
|
+
SELECT 1
|
|
171
|
+
FROM jsonb_array_elements_text(categories) AS c(category)
|
|
172
|
+
WHERE category IN ('Health Care', 'Biotechnology')
|
|
173
|
+
)
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
## Recommended Query Patterns
|
|
177
|
+
|
|
178
|
+
| Pattern | Why |
|
|
179
|
+
| ------------------------------------------------------- | ---------------------------------- |
|
|
180
|
+
| Equality / `IN` filters on enum columns | Fast and stable |
|
|
181
|
+
| Date windows on `last_funding_date` | Strong recency control |
|
|
182
|
+
| Numeric ranges on `funding_total_usd` | Good segmentation |
|
|
183
|
+
| `EXISTS + jsonb_array_elements_text` for tags/locations | Works with current schema |
|
|
184
|
+
| Explicit narrow column lists | Lower payload and faster execution |
|
|
185
|
+
|
|
186
|
+
## Banned / Avoided Patterns
|
|
187
|
+
|
|
188
|
+
| Pattern | Why | Better Alternative |
|
|
189
|
+
| ---------------------------------------------------------------------------- | ----------------------------------- | --------------------------------------------------- |
|
|
190
|
+
| Missing `LIMIT` | Unbounded scans + excessive credits | Always add `LIMIT` |
|
|
191
|
+
| `SELECT *` for production pulls | Larger payload and cost | Select only needed columns |
|
|
192
|
+
| Leading-wildcard scans on long text (`ILIKE '%term%'`) across broad dataset | Expensive text scans | Use enum/date/range filters first, then narrow text |
|
|
193
|
+
| Heavy aggregations (`COUNT(*)`, `DISTINCT`, wide `GROUP BY`) on large slices | Slow and expensive | Pull scoped rows, aggregate in code |
|
|
194
|
+
| Unscoped global sorts on large sets | Expensive sort operations | Filter first, sort smaller result sets |
|
|
195
|
+
| Multi-table joins for routine prospecting | More planner risk and latency | Stay on lean table only |
|
|
196
|
+
|
|
197
|
+
## Canonical Prospecting Queries
|
|
198
|
+
|
|
199
|
+
### 1) US early-stage SaaS/AI, currently active
|
|
200
|
+
|
|
201
|
+
```sql
|
|
202
|
+
SELECT
|
|
203
|
+
name,
|
|
204
|
+
website_url,
|
|
205
|
+
linkedin_url,
|
|
206
|
+
funding_stage,
|
|
207
|
+
num_employees_enum,
|
|
208
|
+
last_funding_date
|
|
209
|
+
FROM public.crunchbase_scraper_lean
|
|
210
|
+
WHERE operating_status = 'active'
|
|
211
|
+
AND funding_stage IN ('seed', 'early_stage_venture')
|
|
212
|
+
AND EXISTS (
|
|
213
|
+
SELECT 1
|
|
214
|
+
FROM jsonb_array_elements_text(categories) AS c(category)
|
|
215
|
+
WHERE category IN ('SaaS', 'Artificial Intelligence (AI)')
|
|
216
|
+
)
|
|
217
|
+
AND EXISTS (
|
|
218
|
+
SELECT 1
|
|
219
|
+
FROM jsonb_array_elements_text(location_identifiers) AS l(location)
|
|
220
|
+
WHERE location = 'United States'
|
|
221
|
+
)
|
|
222
|
+
LIMIT 100;
|
|
223
|
+
```
|
|
224
|
+
|
|
225
|
+
### 2) Recently funded (last 12 months)
|
|
226
|
+
|
|
227
|
+
```sql
|
|
228
|
+
SELECT
|
|
229
|
+
name,
|
|
230
|
+
website_url,
|
|
231
|
+
last_funding_type,
|
|
232
|
+
last_funding_date,
|
|
233
|
+
last_funding_total_usd,
|
|
234
|
+
funding_total_usd
|
|
235
|
+
FROM public.crunchbase_scraper_lean
|
|
236
|
+
WHERE operating_status = 'active'
|
|
237
|
+
AND last_funding_date >= CURRENT_DATE - INTERVAL '12 months'
|
|
238
|
+
AND last_funding_type IN ('seed', 'series_a', 'series_b')
|
|
239
|
+
ORDER BY last_funding_date DESC NULLS LAST
|
|
240
|
+
LIMIT 100;
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### 3) Bay Area fintech companies with meaningful funding
|
|
244
|
+
|
|
245
|
+
```sql
|
|
246
|
+
SELECT
|
|
247
|
+
name,
|
|
248
|
+
website_url,
|
|
249
|
+
funding_stage,
|
|
250
|
+
funding_total_usd,
|
|
251
|
+
num_employees_enum
|
|
252
|
+
FROM public.crunchbase_scraper_lean
|
|
253
|
+
WHERE operating_status = 'active'
|
|
254
|
+
AND EXISTS (
|
|
255
|
+
SELECT 1
|
|
256
|
+
FROM jsonb_array_elements_text(categories) AS c(category)
|
|
257
|
+
WHERE category IN ('FinTech', 'Financial Services')
|
|
258
|
+
)
|
|
259
|
+
AND EXISTS (
|
|
260
|
+
SELECT 1
|
|
261
|
+
FROM jsonb_array_elements_text(location_group_identifiers) AS g(location_group)
|
|
262
|
+
WHERE location_group = 'San Francisco Bay Area'
|
|
263
|
+
)
|
|
264
|
+
AND funding_total_usd >= 5000000
|
|
265
|
+
LIMIT 75;
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### 4) Non-profits with health focus
|
|
269
|
+
|
|
270
|
+
```sql
|
|
271
|
+
SELECT
|
|
272
|
+
name,
|
|
273
|
+
website_url,
|
|
274
|
+
company_type,
|
|
275
|
+
categories,
|
|
276
|
+
location_identifiers
|
|
277
|
+
FROM public.crunchbase_scraper_lean
|
|
278
|
+
WHERE company_type = 'non_profit'
|
|
279
|
+
AND EXISTS (
|
|
280
|
+
SELECT 1
|
|
281
|
+
FROM jsonb_array_elements_text(categories) AS c(category)
|
|
282
|
+
WHERE category ILIKE ANY (ARRAY['%health%', '%medical%', '%biotech%', '%pharma%', '%telemedicine%'])
|
|
283
|
+
)
|
|
284
|
+
LIMIT 100;
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### 5) Healthtech seed to series B (safe column set)
|
|
288
|
+
|
|
289
|
+
```sql
|
|
290
|
+
SELECT
|
|
291
|
+
name,
|
|
292
|
+
website_url,
|
|
293
|
+
linkedin_url,
|
|
294
|
+
short_description,
|
|
295
|
+
funding_stage,
|
|
296
|
+
last_funding_type,
|
|
297
|
+
last_funding_date,
|
|
298
|
+
funding_total_usd,
|
|
299
|
+
num_employees_enum,
|
|
300
|
+
categories,
|
|
301
|
+
location_identifiers,
|
|
302
|
+
num_investors,
|
|
303
|
+
num_funding_rounds
|
|
304
|
+
FROM public.crunchbase_scraper_lean
|
|
305
|
+
WHERE operating_status = 'active'
|
|
306
|
+
AND last_funding_type IN ('seed', 'series_a', 'series_b')
|
|
307
|
+
AND EXISTS (
|
|
308
|
+
SELECT 1
|
|
309
|
+
FROM jsonb_array_elements_text(categories) AS c(category)
|
|
310
|
+
WHERE category ILIKE ANY (ARRAY['%health%', '%medical%', '%biotech%', '%pharma%', '%telemedicine%'])
|
|
311
|
+
)
|
|
312
|
+
ORDER BY last_funding_date DESC NULLS LAST
|
|
313
|
+
LIMIT 100;
|
|
314
|
+
```
|
|
315
|
+
|
|
316
|
+
## Usage Pattern (Spreadsheet Code)
|
|
317
|
+
|
|
318
|
+
```typescript
|
|
319
|
+
const rows = await services.crunchbase.search({
|
|
320
|
+
sql: `
|
|
321
|
+
SELECT name, website_url, linkedin_url
|
|
322
|
+
FROM public.crunchbase_scraper_lean
|
|
323
|
+
WHERE operating_status = 'active'
|
|
324
|
+
LIMIT 20
|
|
325
|
+
`
|
|
326
|
+
});
|
|
327
|
+
|
|
328
|
+
// rows is already an array of objects
|
|
329
|
+
return rows;
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
## Troubleshooting
|
|
333
|
+
|
|
334
|
+
- `column "...\" does not exist` -> you are using an old/nonexistent column name; check "Known Bad Column Names".
|
|
335
|
+
- `only public.crunchbase_scraper_lean is allowed` -> query references a disallowed table.
|
|
336
|
+
- `only SELECT queries are allowed` -> remove `INSERT/UPDATE/DELETE`, keep read-only SQL.
|
|
337
|
+
- Empty results with no error -> usually value casing mismatch (use lowercase enum values like `active`, `series_a`).
|
package/docs/services/index.md
CHANGED
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
- **apify**: Run any of 10,000+ Apify actors for web scraping, social media, e-commerce, and more.
|
|
3
3
|
- **browser**: Kernel browser automation - spin up cloud browsers, execute Playwright code, take screenshots. **Use this for scraping structured lists of repeated data** (e.g., product listings, search results, table rows) where you know the DOM structure. Also ideal for **intercepting network requests** to discover underlying APIs, then paginate those APIs directly in your code (faster & cheaper than clicking through pages). Perfect for JS-heavy sites that don't work with simple HTTP scraping.
|
|
4
4
|
- **company**: company data (getting employees at the company, getting company data, getting open jobs).
|
|
5
|
+
- **crunchbase**: SQL search over the lean Crunchbase company table (`public.crunchbase_scraper_lean`) for startup prospecting.
|
|
5
6
|
- **person**: finding a persons linkedin url, enriching it from linkedin, contact info, and searching for specific people / groups on linkedin
|
|
6
7
|
- **geo**: parsing address
|
|
7
8
|
- **googleMaps**: search businesses via Google Maps.
|
|
8
9
|
- **email**: send transactional notification emails through Orange Slice's managed sender.
|
|
9
10
|
- **scrape**: website scraper, sitemap scraper
|
|
10
11
|
- **web**: SERP
|
|
11
|
-
- **healthcare**: Query the NPI (National Provider Identifier) database for healthcare organizations by specialty, location, or name. Contains 1.8M+ providers.
|
|
12
12
|
- **predictLeads**: company intelligence datasets (financing events, technologies, products, job openings, news, and related company data).
|
|
13
13
|
- **guides**: agent notes & operational docs (see [Error Handling Cheatsheet](../error-handling-cheatsheet.md))
|