orangeslice 2.1.0 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -64,11 +64,21 @@ const [companies, searchPage, ai] = await Promise.all([
64
64
  }
65
65
  })
66
66
  ]);
67
+
68
+ const startups = await services.crunchbase.search({
69
+ sql: `
70
+ SELECT name, website_url, linkedin_url
71
+ FROM public.crunchbase_scraper_lean
72
+ WHERE operating_status = 'Active'
73
+ LIMIT 10
74
+ `
75
+ });
67
76
  ```
68
77
 
69
78
  ## Service map
70
79
 
71
80
  - `services.company.linkedin.search/enrich`
81
+ - `services.crunchbase.search` (returns rows array directly)
72
82
  - `services.company.getEmployeesFromLinkedin` (database-only B2B path)
73
83
  - `services.person.linkedin.search/enrich`
74
84
  - `services.web.search/batchSearch`
package/dist/cli.js CHANGED
@@ -100,6 +100,19 @@ Use these docs as the source of truth. If there is any conflict between your pri
100
100
  - Be concise, factual, and deterministic.
101
101
  - Ask a clarifying question only when a missing detail blocks progress.
102
102
 
103
+ ## Package Setup (Do Not Guess)
104
+ - Import from the package name, not a local file path:
105
+ - \`import { services } from "orangeslice"\`
106
+ - \`import { configure, services } from "orangeslice"\` when setting API key programmatically
107
+ - Do NOT use \`import { services } from "./orangeslice"\` unless the user explicitly has a local wrapper file at that path.
108
+ - \`npx orangeslice\` is a setup/bootstrap command (docs sync, package install, auth). It does NOT execute user app scripts.
109
+
110
+ ## Runtime Requirements
111
+ - If writing standalone scripts that use top-level \`await\`, use ESM:
112
+ - Set \`"type": "module"\` in \`package.json\`, or
113
+ - Use \`.mjs\` files.
114
+ - If the project is CommonJS and cannot switch to ESM, avoid top-level \`await\` and wrap async code in an async function.
115
+
103
116
  ## Mandatory Read Order (Before writing code)
104
117
  1. \`./services/index.md\` - service map and capabilities
105
118
  2. Relevant docs under \`./services/**\` for every service you plan to call
@@ -0,0 +1,10 @@
1
+ export interface CrunchbaseSearchParams {
2
+ sql: string;
3
+ userId?: string;
4
+ }
5
+ /**
6
+ * Search the Crunchbase lean table using SQL.
7
+ *
8
+ * Returns rows directly (no envelope).
9
+ */
10
+ export declare function crunchbaseSearch<T = Record<string, unknown>>(params: CrunchbaseSearchParams): Promise<T[]>;
@@ -0,0 +1,13 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.crunchbaseSearch = crunchbaseSearch;
4
+ const api_1 = require("./api");
5
+ /**
6
+ * Search the Crunchbase lean table using SQL.
7
+ *
8
+ * Returns rows directly (no envelope).
9
+ */
10
+ async function crunchbaseSearch(params) {
11
+ const data = await (0, api_1.post)("/execute/crunchbase-sql", { sql: params.sql });
12
+ return data.rows ?? [];
13
+ }
package/dist/index.d.ts CHANGED
@@ -2,6 +2,8 @@ export { configure } from "./api";
2
2
  export type { OrangesliceConfig } from "./api";
3
3
  export { linkedinSearch } from "./b2b";
4
4
  export type { LinkedInSearchParams, LinkedInSearchResponse } from "./b2b";
5
+ export { crunchbaseSearch } from "./crunchbase";
6
+ export type { CrunchbaseSearchParams } from "./crunchbase";
5
7
  export { webSearch, webBatchSearch } from "./serp";
6
8
  export type { WebSearchQuery, WebSearchResult, WebSearchResponse, BatchWebSearchParams } from "./serp";
7
9
  export { generateObject } from "./generateObject";
@@ -21,12 +23,16 @@ export type { PersonLinkedinFindUrlParams, CompanyLinkedinFindUrlParams, PersonC
21
23
  import { runApifyActor } from "./apify";
22
24
  import { linkedinSearch } from "./b2b";
23
25
  import { browserExecute } from "./browser";
26
+ import { crunchbaseSearch } from "./crunchbase";
24
27
  import { personLinkedinEnrich, personLinkedinFindUrl, personContactGet, companyLinkedinEnrich, companyLinkedinFindUrl, companyGetEmployeesFromLinkedin, geoParseAddress, builtWithLookupDomain, builtWithRelationships, builtWithSearchByTech } from "./expansion";
25
28
  import { scrapeWebsite } from "./firecrawl";
26
29
  import { generateObject } from "./generateObject";
27
30
  import { googleMapsScrape } from "./googleMaps";
28
31
  import { webBatchSearch, webSearch } from "./serp";
29
32
  export declare const services: {
33
+ crunchbase: {
34
+ search: typeof crunchbaseSearch;
35
+ };
30
36
  company: {
31
37
  linkedin: {
32
38
  findUrl: typeof companyLinkedinFindUrl;
package/dist/index.js CHANGED
@@ -1,10 +1,12 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.services = exports.builtWithSearchByTech = exports.builtWithRelationships = exports.builtWithLookupDomain = exports.geoParseAddress = exports.companyGetEmployeesFromLinkedin = exports.companyLinkedinFindUrl = exports.companyLinkedinEnrich = exports.personContactGet = exports.personLinkedinFindUrl = exports.personLinkedinEnrich = exports.PREDICT_LEADS_OPERATION_IDS = exports.predictLeads = exports.executePredictLeads = exports.googleMapsScrape = exports.runApifyActor = exports.browserExecute = exports.scrapeWebsite = exports.generateObject = exports.webBatchSearch = exports.webSearch = exports.linkedinSearch = exports.configure = void 0;
3
+ exports.services = exports.builtWithSearchByTech = exports.builtWithRelationships = exports.builtWithLookupDomain = exports.geoParseAddress = exports.companyGetEmployeesFromLinkedin = exports.companyLinkedinFindUrl = exports.companyLinkedinEnrich = exports.personContactGet = exports.personLinkedinFindUrl = exports.personLinkedinEnrich = exports.PREDICT_LEADS_OPERATION_IDS = exports.predictLeads = exports.executePredictLeads = exports.googleMapsScrape = exports.runApifyActor = exports.browserExecute = exports.scrapeWebsite = exports.generateObject = exports.webBatchSearch = exports.webSearch = exports.crunchbaseSearch = exports.linkedinSearch = exports.configure = void 0;
4
4
  var api_1 = require("./api");
5
5
  Object.defineProperty(exports, "configure", { enumerable: true, get: function () { return api_1.configure; } });
6
6
  var b2b_1 = require("./b2b");
7
7
  Object.defineProperty(exports, "linkedinSearch", { enumerable: true, get: function () { return b2b_1.linkedinSearch; } });
8
+ var crunchbase_1 = require("./crunchbase");
9
+ Object.defineProperty(exports, "crunchbaseSearch", { enumerable: true, get: function () { return crunchbase_1.crunchbaseSearch; } });
8
10
  var serp_1 = require("./serp");
9
11
  Object.defineProperty(exports, "webSearch", { enumerable: true, get: function () { return serp_1.webSearch; } });
10
12
  Object.defineProperty(exports, "webBatchSearch", { enumerable: true, get: function () { return serp_1.webBatchSearch; } });
@@ -36,6 +38,7 @@ Object.defineProperty(exports, "builtWithSearchByTech", { enumerable: true, get:
36
38
  const apify_2 = require("./apify");
37
39
  const b2b_2 = require("./b2b");
38
40
  const browser_2 = require("./browser");
41
+ const crunchbase_2 = require("./crunchbase");
39
42
  const expansion_2 = require("./expansion");
40
43
  const firecrawl_2 = require("./firecrawl");
41
44
  const generateObject_2 = require("./generateObject");
@@ -43,6 +46,9 @@ const googleMaps_2 = require("./googleMaps");
43
46
  const predictLeads_2 = require("./predictLeads");
44
47
  const serp_2 = require("./serp");
45
48
  exports.services = {
49
+ crunchbase: {
50
+ search: crunchbase_2.crunchbaseSearch
51
+ },
46
52
  company: {
47
53
  linkedin: {
48
54
  findUrl: expansion_2.companyLinkedinFindUrl,
@@ -9,4 +9,4 @@ Typed functions for Gmail actions powered by Orange Slice Google integrations.
9
9
  ## Email
10
10
 
11
11
  - `integrations.gmail.sendEmail(input)` - Send an email through the connected Gmail account
12
- - Heavy rate limit: `sendEmail` is capped at **20 calls/day** per connected Gmail account
12
+ - Heavy rate limit: `sendEmail` is capped at **40 calls/day** per connected Gmail account
@@ -2,7 +2,7 @@
2
2
 
3
3
  Send an email from the connected Gmail account.
4
4
 
5
- > Rate limit note for AI: `integrations.gmail.sendEmail(...)` is heavily rate-limited to **20 calls/day** per connected Gmail account. Use sparingly and batch/aggregate where possible.
5
+ > Rate limit note for AI: `integrations.gmail.sendEmail(...)` is heavily rate-limited to **40 calls/day** per connected Gmail account. Use sparingly and batch/aggregate where possible.
6
6
 
7
7
  ```typescript
8
8
  // Basic email
@@ -25,18 +25,18 @@ await integrations.gmail.sendEmail({
25
25
 
26
26
  ## Input
27
27
 
28
- | Parameter | Type | Required | Description |
29
- | ------------------ | ----------- | -------- | ----------- |
30
- | `recipient_email` | `string` | No\* | Primary `To` recipient |
31
- | `extra_recipients` | `string[]` | No | Additional `To` recipients |
32
- | `cc` | `string[]` | No | CC recipients |
33
- | `bcc` | `string[]` | No | BCC recipients |
34
- | `subject` | `string` | No\* | Email subject |
35
- | `body` | `string` | No\* | Email body (plain text or HTML) |
36
- | `is_html` | `boolean` | No | Set to `true` when body is HTML |
37
- | `from_email` | `string` | No | Optional verified send-as alias |
38
- | `attachment` | `object` | No | Optional attachment payload |
39
- | `user_id` | `string` | No | Gmail user id (`"me"` by default) |
28
+ | Parameter | Type | Required | Description |
29
+ | ------------------ | ---------- | -------- | --------------------------------- |
30
+ | `recipient_email` | `string` | No\* | Primary `To` recipient |
31
+ | `extra_recipients` | `string[]` | No | Additional `To` recipients |
32
+ | `cc` | `string[]` | No | CC recipients |
33
+ | `bcc` | `string[]` | No | BCC recipients |
34
+ | `subject` | `string` | No\* | Email subject |
35
+ | `body` | `string` | No\* | Email body (plain text or HTML) |
36
+ | `is_html` | `boolean` | No | Set to `true` when body is HTML |
37
+ | `from_email` | `string` | No | Optional verified send-as alias |
38
+ | `attachment` | `object` | No | Optional attachment payload |
39
+ | `user_id` | `string` | No | Gmail user id (`"me"` by default) |
40
40
 
41
41
  \*Gmail requires at least one recipient (`recipient_email`, `cc`, or `bcc`) and at least one of `subject` or `body`.
42
42
 
@@ -16,6 +16,7 @@ description: Strategies for searching or finding people and companies. This is a
16
16
  Run queries with built-in filters when the criteria is searchable:
17
17
 
18
18
  - **Web search (`services.web.search`)** — **Default for LinkedIn**. Use for keywords, niche queries, fuzzy matching, anything descriptive.
19
+ - **Crunchbase (`services.crunchbase.search`)** — **Default for funding data**. Use for funding-stage, round type, amount, date windows, and investor-backed company discovery.
19
20
  - **LinkedIn B2B DB** — **Indexed lookups ONLY:** company by domain/slug/ID, employees at a known company (by company_id), basic funding (2-table join). Everything else = web search. See [QUICK_REF](./linkedin_data/QUICK_REF.md).
20
21
  - **Google Maps** — industry, location, ratings
21
22
  - **LinkedIn job search** — job filters, titles
@@ -83,11 +84,11 @@ When using qualification columns, think Circle & Star:
83
84
  | Source | Use When | Limitations |
84
85
  | ------------------------ | ----------------------------------------------------------- | -------------------------------------------------------- |
85
86
  | **Web Search (Default)** | **Everything else** — keywords, niche, fuzzy, specific | Requires verification columns for false positives. |
87
+ | **Crunchbase (Funding Default)** | Funding-focused prospecting: stage, round type, amount, recency, investors | Best for funding intelligence; use other sources for non-funding discovery criteria. |
86
88
  | **PredictLeads** | Company intelligence, buying signals, and structured company events at scale | Coverage varies by company/market; use web search for very niche long-tail discovery. |
87
89
  | **Niche Directory Scrape** | Well-defined categories with existing lists (see below) | Requires finding the right directory first. |
88
90
  | **LinkedIn B2B DB** | **Indexed lookups ONLY:** company by domain/slug/ID, employees at known company, basic 2-table funding. | **3s hard max. No keyword search, no LATERAL, no 3+ table joins.** Everything else = web search. |
89
91
  | **Google Maps** | Local/SMB, physical locations, restaurants, retail | Limited to businesses with physical presence. |
90
- | **NPI Database** | Healthcare providers | Healthcare only. Free. |
91
92
  | **Apify Actors** | Platform-specific scraping (Instagram, TikTok, job boards) | Per-platform setup. May break with platform changes. |
92
93
 
93
94
  ### PredictLeads: When It Is Better Than Everything Else
@@ -105,6 +106,12 @@ Prefer other sources when:
105
106
  - You need local storefront/SMB discovery -> use Google Maps
106
107
  - You need fast indexed LinkedIn lookups by known IDs/domain/company -> use LinkedIn B2B DB
107
108
 
109
+ ### Funding Prospecting Standard: Use Crunchbase First
110
+
111
+ For any request centered on funding data (for example: "Series A fintech companies", "companies that raised in the last 12 months", "recently funded startups"), use `services.crunchbase.search` as the **standard/default source**.
112
+
113
+ Use LinkedIn B2B DB funding joins only when the user explicitly needs a LinkedIn-only workflow or a narrow lookup tied to existing LinkedIn records. Otherwise, Crunchbase should be the first choice for funding-oriented discovery.
114
+
108
115
  ### Niche Directory Scraping — For Well-Defined Categories
109
116
 
110
117
  When users ask for companies in a **specific, well-defined niche** (e.g., "fast food chains", "Fortune 500 companies", "Y Combinator startups"), the best approach is often to **find and scrape a curated directory or list**.
@@ -260,27 +267,28 @@ Don't overthink it — just create 2-3 views that match the columns you built. S
260
267
 
261
268
  ## Examples
262
269
 
263
- | User Request | Approach | Why |
264
- | -------------------------------------------- | ---------------- | ------------------------------------------------------------------- |
265
- | "AI CRM companies" | Web search | Keyword query → `"AI CRM" site:linkedin.com/company` |
266
- | "Fintech startups" | Web search | Fuzzy/descriptive → `"fintech" "startup" site:linkedin.com/company` |
267
- | "SDRs at Series A companies" | Web search | Specific criteria → `"SDR" "Series A" site:linkedin.com/in` |
268
- | "Companies using Kubernetes" | Web search | Technology match `"Kubernetes" site:linkedin.com/company` |
269
- | "VPs who worked at Google" | Web search | Fuzzy history match → `"VP" "Google" site:linkedin.com/in` |
270
- | "1000 software engineers in Bay Area" | B2B DB | Simple title + location + high volume |
271
- | "All healthcare companies 100-500 employees" | B2B DB | Industry + size + high volume |
272
- | "Fast food chains that..." | Directory scrape | Scrape Wikipedia list `browser.execute` |
273
- | "Restaurants in Austin" | Google Maps | Local/SMB with physical presence |
274
- | "Companies hiring SDRs" | LinkedIn Jobs | Job search with title filter |
275
- | "Warehouses implementing WMS" | Circle + columns | Pull logistics companies add "WMS Score" column |
276
- | "Companies that recently switched CRMs" | Circle + columns | Pull SaaS companies → add "CRM Change Signals" column |
270
+ | User Request | Approach | Why |
271
+ | -------------------------------------------- | ---------------- | --------------------------------------------------------------------------- |
272
+ | "AI CRM companies" | Web search | Keyword query → `"AI CRM" site:linkedin.com/company` |
273
+ | "Fintech startups" | Web search | Fuzzy/descriptive → `"fintech" "startup" site:linkedin.com/company` |
274
+ | "SDRs at Series A companies" | Web search | Specific criteria → `"SDR" "Series A" site:linkedin.com/in` |
275
+ | "Series A/B companies raised last year" | Crunchbase | Funding-specific discovery is best handled via `services.crunchbase.search` |
276
+ | "Companies using Kubernetes" | Web search | Technology match → `"Kubernetes" site:linkedin.com/company` |
277
+ | "VPs who worked at Google" | Web search | Fuzzy history match `"VP" "Google" site:linkedin.com/in` |
278
+ | "1000 software engineers in Bay Area" | B2B DB | Simple title + location + high volume |
279
+ | "All healthcare companies 100-500 employees" | B2B DB | Industry + size + high volume |
280
+ | "Fast food chains that..." | Directory scrape | Scrape Wikipedia list → `browser.execute` |
281
+ | "Restaurants in Austin" | Google Maps | Local/SMB with physical presence |
282
+ | "Companies hiring SDRs" | LinkedIn Jobs | Job search with title filter |
283
+ | "Warehouses implementing WMS" | Circle + columns | Pull logistics companies → add "WMS Score" column |
284
+ | "Companies that recently switched CRMs" | Circle + columns | Pull SaaS companies → add "CRM Change Signals" column |
277
285
 
278
286
  ---
279
287
 
280
288
  ## Tools
281
289
 
282
290
  - **LinkedIn:** `services.company.linkedin.search({ sql: "SELECT ... FROM linkedin_company ..." })`, `services.person.linkedin.search({ sql: "SELECT ... FROM linkedin_profile ..." })` — **Lookup tool only, 3s max, 2-table joins max. Use web search for anything else.**
283
- - **Healthcare:** `healthcare.npi`
291
+ - **Funding:** `services.crunchbase.search({ sql: "SELECT ... FROM ... WHERE ..." })` — **Default for funding search and screening.**
284
292
  - **Local/SMB:** `googleMaps.scrape`
285
293
  - **Web:** `web.search` + `browser.execute`
286
294
  - **Platforms:** `services.apify.runActor`
@@ -390,6 +390,38 @@ WHERE lc.company_size = '51-200 employees'
390
390
  - **Use `lc` alias** for company tables
391
391
  - **Default to US**: `lc.country_code = 'US'`
392
392
 
393
+ ## Return Type
394
+
395
+ `services.company.linkedin.search()` returns an object envelope:
396
+
397
+ ```typescript
398
+ {
399
+ rows: (Record < string, unknown > []);
400
+ count: number;
401
+ }
402
+ ```
403
+
404
+ - `rows`: Result rows from your SQL query, with exactly the columns you selected.
405
+ - `count`: Number of rows returned in `rows`.
406
+
407
+ Example:
408
+
409
+ ```typescript
410
+ const searchResult = await services.company.linkedin.search({
411
+ sql: `
412
+ SELECT
413
+ lc.company_name,
414
+ lc.domain,
415
+ 'https://www.linkedin.com/company/' || lc.universal_name AS lc_linkedin_url
416
+ FROM linkedin_company lc
417
+ WHERE lc.domain = 'stripe.com'
418
+ LIMIT 1
419
+ `
420
+ });
421
+
422
+ return searchResult.rows; // Most spreadsheet snippets should return rows
423
+ ```
424
+
393
425
  ---
394
426
 
395
427
  ## Table Aliases
@@ -0,0 +1,337 @@
1
+ ---
2
+ description: Search Crunchbase with SQL
3
+ ---
4
+
5
+ # Crunchbase Search
6
+
7
+ Run SQL against `public.crunchbase_scraper_lean` for startup/company prospecting.
8
+
9
+ ```typescript
10
+ const rows = await services.crunchbase.search({
11
+ sql: `
12
+ SELECT name, website_url, linkedin_url
13
+ FROM public.crunchbase_scraper_lean
14
+ WHERE operating_status = 'active'
15
+ LIMIT 25
16
+ `
17
+ });
18
+
19
+ // rows: Record<string, unknown>[]
20
+ return rows;
21
+ ```
22
+
23
+ ## Contract (Hard Rules)
24
+
25
+ - Query **only** `public.crunchbase_scraper_lean`.
26
+ - **Only one statement** is allowed.
27
+ - **Only SELECT** queries are allowed (`WITH ... SELECT` is fine).
28
+ - Always include `LIMIT` (recommended `<= 100`).
29
+ - This is an external service path, not `ctx.sql()`.
30
+ - Credits are 1 credit per returned row (reserve estimate is derived from `LIMIT`).
31
+
32
+ ## Return Type
33
+
34
+ `services.crunchbase.search()` returns rows directly:
35
+
36
+ ```typescript
37
+ (Record < string, unknown > []);
38
+ ```
39
+
40
+ No `{ rows, count }` envelope.
41
+
42
+ ```typescript
43
+ const rows = await services.crunchbase.search({ sql: "SELECT name FROM public.crunchbase_scraper_lean LIMIT 10" });
44
+ const count = rows.length;
45
+ ```
46
+
47
+ ## Live Schema (Verified)
48
+
49
+ Source of truth: live DB introspection of `public.crunchbase_scraper_lean`.
50
+
51
+ | Column | Type | Nullable |
52
+ | ---------------------------- | ------------- | -------- |
53
+ | `id` | `bigint` | no |
54
+ | `uuid` | `text` | yes |
55
+ | `name` | `text` | yes |
56
+ | `link` | `text` | yes |
57
+ | `type` | `text` | yes |
58
+ | `operating_status` | `text` | yes |
59
+ | `company_type` | `text` | yes |
60
+ | `short_description` | `text` | yes |
61
+ | `description` | `text` | yes |
62
+ | `website_url` | `text` | yes |
63
+ | `linkedin_url` | `text` | yes |
64
+ | `twitter_url` | `text` | yes |
65
+ | `facebook_url` | `text` | yes |
66
+ | `contact_email` | `text` | yes |
67
+ | `phone_number` | `text` | yes |
68
+ | `hq_postal_code` | `text` | yes |
69
+ | `primary_category` | `text` | yes |
70
+ | `categories` | `jsonb` | no |
71
+ | `category_groups` | `jsonb` | no |
72
+ | `location_identifiers` | `jsonb` | no |
73
+ | `location_group_identifiers` | `jsonb` | no |
74
+ | `num_employees_enum` | `integer` | yes |
75
+ | `revenue_range` | `text` | yes |
76
+ | `funding_stage` | `text` | yes |
77
+ | `funding_total_usd` | `numeric` | yes |
78
+ | `last_funding_total_usd` | `numeric` | yes |
79
+ | `last_funding_type` | `text` | yes |
80
+ | `last_funding_date` | `date` | yes |
81
+ | `num_funding_rounds` | `integer` | yes |
82
+ | `num_investors` | `integer` | yes |
83
+ | `num_lead_investors` | `integer` | yes |
84
+ | `rank_org_company` | `integer` | yes |
85
+ | `rank_org` | `integer` | yes |
86
+ | `rank_delta_d7` | `integer` | yes |
87
+ | `rank_delta_d30` | `integer` | yes |
88
+ | `rank_delta_d90` | `integer` | yes |
89
+ | `growth_score_tier` | `text` | yes |
90
+ | `heat_score_tier` | `text` | yes |
91
+ | `ipo_status` | `text` | yes |
92
+ | `went_public_on` | `date` | yes |
93
+ | `imported_at` | `timestamptz` | no |
94
+
95
+ ## Enum Catalog (Verified Distinct Values)
96
+
97
+ These are observed live values, in production data.
98
+
99
+ ### `operating_status`
100
+
101
+ - `active`
102
+ - `closed`
103
+
104
+ ### `company_type`
105
+
106
+ - `for_profit`
107
+ - `non_profit`
108
+
109
+ ### `type`
110
+
111
+ - `organization`
112
+
113
+ ### `funding_stage`
114
+
115
+ - `seed`
116
+ - `early_stage_venture`
117
+ - `m_and_a`
118
+ - `late_stage_venture`
119
+ - `ipo`
120
+
121
+ ### `last_funding_type`
122
+
123
+ - `seed`
124
+ - `series_a`
125
+ - `series_b`
126
+ - `series_c`
127
+
128
+ ### `revenue_range`
129
+
130
+ - `r_00000000`
131
+ - `r_00001000`
132
+ - `r_00010000`
133
+ - `r_00050000`
134
+ - `r_00100000`
135
+ - `r_00500000`
136
+ - `r_01000000`
137
+ - `r_10000000`
138
+
139
+ ### `growth_score_tier`
140
+
141
+ - `c100_high`
142
+ - `c200_medium`
143
+ - `c300_low`
144
+
145
+ ### `heat_score_tier`
146
+
147
+ - `c100_high`
148
+ - `c200_medium`
149
+ - `c300_low`
150
+
151
+ ### `ipo_status`
152
+
153
+ - `private`
154
+ - `public`
155
+ - `delisted`
156
+
157
+ ### `num_employees_enum`
158
+
159
+ Column exists, but currently sparse/null in this dataset.
160
+
161
+ ## JSONB Array Fields
162
+
163
+ `categories`, `category_groups`, `location_identifiers`, and `location_group_identifiers` are `jsonb` arrays.
164
+
165
+ Do **not** treat them as `text[]` with `&& ARRAY[...]::text[]`.
166
+ Use `jsonb_array_elements_text(...)` with `EXISTS`, for example:
167
+
168
+ ```sql
169
+ AND EXISTS (
170
+ SELECT 1
171
+ FROM jsonb_array_elements_text(categories) AS c(category)
172
+ WHERE category IN ('Health Care', 'Biotechnology')
173
+ )
174
+ ```
175
+
176
+ ## Recommended Query Patterns
177
+
178
+ | Pattern | Why |
179
+ | ------------------------------------------------------- | ---------------------------------- |
180
+ | Equality / `IN` filters on enum columns | Fast and stable |
181
+ | Date windows on `last_funding_date` | Strong recency control |
182
+ | Numeric ranges on `funding_total_usd` | Good segmentation |
183
+ | `EXISTS + jsonb_array_elements_text` for tags/locations | Works with current schema |
184
+ | Explicit narrow column lists | Lower payload and faster execution |
185
+
186
+ ## Banned / Avoided Patterns
187
+
188
+ | Pattern | Why | Better Alternative |
189
+ | ---------------------------------------------------------------------------- | ----------------------------------- | --------------------------------------------------- |
190
+ | Missing `LIMIT` | Unbounded scans + excessive credits | Always add `LIMIT` |
191
+ | `SELECT *` for production pulls | Larger payload and cost | Select only needed columns |
192
+ | Leading-wildcard scans on long text (`ILIKE '%term%'`) across broad dataset | Expensive text scans | Use enum/date/range filters first, then narrow text |
193
+ | Heavy aggregations (`COUNT(*)`, `DISTINCT`, wide `GROUP BY`) on large slices | Slow and expensive | Pull scoped rows, aggregate in code |
194
+ | Unscoped global sorts on large sets | Expensive sort operations | Filter first, sort smaller result sets |
195
+ | Multi-table joins for routine prospecting | More planner risk and latency | Stay on lean table only |
196
+
197
+ ## Canonical Prospecting Queries
198
+
199
+ ### 1) US early-stage SaaS/AI, currently active
200
+
201
+ ```sql
202
+ SELECT
203
+ name,
204
+ website_url,
205
+ linkedin_url,
206
+ funding_stage,
207
+ num_employees_enum,
208
+ last_funding_date
209
+ FROM public.crunchbase_scraper_lean
210
+ WHERE operating_status = 'active'
211
+ AND funding_stage IN ('seed', 'early_stage_venture')
212
+ AND EXISTS (
213
+ SELECT 1
214
+ FROM jsonb_array_elements_text(categories) AS c(category)
215
+ WHERE category IN ('SaaS', 'Artificial Intelligence (AI)')
216
+ )
217
+ AND EXISTS (
218
+ SELECT 1
219
+ FROM jsonb_array_elements_text(location_identifiers) AS l(location)
220
+ WHERE location = 'United States'
221
+ )
222
+ LIMIT 100;
223
+ ```
224
+
225
+ ### 2) Recently funded (last 12 months)
226
+
227
+ ```sql
228
+ SELECT
229
+ name,
230
+ website_url,
231
+ last_funding_type,
232
+ last_funding_date,
233
+ last_funding_total_usd,
234
+ funding_total_usd
235
+ FROM public.crunchbase_scraper_lean
236
+ WHERE operating_status = 'active'
237
+ AND last_funding_date >= CURRENT_DATE - INTERVAL '12 months'
238
+ AND last_funding_type IN ('seed', 'series_a', 'series_b')
239
+ ORDER BY last_funding_date DESC NULLS LAST
240
+ LIMIT 100;
241
+ ```
242
+
243
+ ### 3) Bay Area fintech companies with meaningful funding
244
+
245
+ ```sql
246
+ SELECT
247
+ name,
248
+ website_url,
249
+ funding_stage,
250
+ funding_total_usd,
251
+ num_employees_enum
252
+ FROM public.crunchbase_scraper_lean
253
+ WHERE operating_status = 'active'
254
+ AND EXISTS (
255
+ SELECT 1
256
+ FROM jsonb_array_elements_text(categories) AS c(category)
257
+ WHERE category IN ('FinTech', 'Financial Services')
258
+ )
259
+ AND EXISTS (
260
+ SELECT 1
261
+ FROM jsonb_array_elements_text(location_group_identifiers) AS g(location_group)
262
+ WHERE location_group = 'San Francisco Bay Area'
263
+ )
264
+ AND funding_total_usd >= 5000000
265
+ LIMIT 75;
266
+ ```
267
+
268
+ ### 4) Non-profits with health focus
269
+
270
+ ```sql
271
+ SELECT
272
+ name,
273
+ website_url,
274
+ company_type,
275
+ categories,
276
+ location_identifiers
277
+ FROM public.crunchbase_scraper_lean
278
+ WHERE company_type = 'non_profit'
279
+ AND EXISTS (
280
+ SELECT 1
281
+ FROM jsonb_array_elements_text(categories) AS c(category)
282
+ WHERE category ILIKE ANY (ARRAY['%health%', '%medical%', '%biotech%', '%pharma%', '%telemedicine%'])
283
+ )
284
+ LIMIT 100;
285
+ ```
286
+
287
+ ### 5) Healthtech seed to series B (safe column set)
288
+
289
+ ```sql
290
+ SELECT
291
+ name,
292
+ website_url,
293
+ linkedin_url,
294
+ short_description,
295
+ funding_stage,
296
+ last_funding_type,
297
+ last_funding_date,
298
+ funding_total_usd,
299
+ num_employees_enum,
300
+ categories,
301
+ location_identifiers,
302
+ num_investors,
303
+ num_funding_rounds
304
+ FROM public.crunchbase_scraper_lean
305
+ WHERE operating_status = 'active'
306
+ AND last_funding_type IN ('seed', 'series_a', 'series_b')
307
+ AND EXISTS (
308
+ SELECT 1
309
+ FROM jsonb_array_elements_text(categories) AS c(category)
310
+ WHERE category ILIKE ANY (ARRAY['%health%', '%medical%', '%biotech%', '%pharma%', '%telemedicine%'])
311
+ )
312
+ ORDER BY last_funding_date DESC NULLS LAST
313
+ LIMIT 100;
314
+ ```
315
+
316
+ ## Usage Pattern (Spreadsheet Code)
317
+
318
+ ```typescript
319
+ const rows = await services.crunchbase.search({
320
+ sql: `
321
+ SELECT name, website_url, linkedin_url
322
+ FROM public.crunchbase_scraper_lean
323
+ WHERE operating_status = 'active'
324
+ LIMIT 20
325
+ `
326
+ });
327
+
328
+ // rows is already an array of objects
329
+ return rows;
330
+ ```
331
+
332
+ ## Troubleshooting
333
+
334
+ - `column "...\" does not exist` -> you are using an old/nonexistent column name; check "Known Bad Column Names".
335
+ - `only public.crunchbase_scraper_lean is allowed` -> query references a disallowed table.
336
+ - `only SELECT queries are allowed` -> remove `INSERT/UPDATE/DELETE`, keep read-only SQL.
337
+ - Empty results with no error -> usually value casing mismatch (use lowercase enum values like `active`, `series_a`).
@@ -2,12 +2,12 @@
2
2
  - **apify**: Run any of 10,000+ Apify actors for web scraping, social media, e-commerce, and more.
3
3
  - **browser**: Kernel browser automation - spin up cloud browsers, execute Playwright code, take screenshots. **Use this for scraping structured lists of repeated data** (e.g., product listings, search results, table rows) where you know the DOM structure. Also ideal for **intercepting network requests** to discover underlying APIs, then paginate those APIs directly in your code (faster & cheaper than clicking through pages). Perfect for JS-heavy sites that don't work with simple HTTP scraping.
4
4
  - **company**: company data (getting employees at the company, getting company data, getting open jobs).
5
+ - **crunchbase**: SQL search over the lean Crunchbase company table (`public.crunchbase_scraper_lean`) for startup prospecting.
5
6
  - **person**: finding a persons linkedin url, enriching it from linkedin, contact info, and searching for specific people / groups on linkedin
6
7
  - **geo**: parsing address
7
8
  - **googleMaps**: search businesses via Google Maps.
8
9
  - **email**: send transactional notification emails through Orange Slice's managed sender.
9
10
  - **scrape**: website scraper, sitemap scraper
10
11
  - **web**: SERP
11
- - **healthcare**: Query the NPI (National Provider Identifier) database for healthcare organizations by specialty, location, or name. Contains 1.8M+ providers.
12
12
  - **predictLeads**: company intelligence datasets (financing events, technologies, products, job openings, news, and related company data).
13
13
  - **guides**: agent notes & operational docs (see [Error Handling Cheatsheet](../error-handling-cheatsheet.md))