orangeslice 1.5.0 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -56,6 +56,35 @@ All calls are rate-limited automatically.
56
56
  npm install orangeslice
57
57
  ```
58
58
 
59
+ ### TypeScript Setup
60
+
61
+ If running `.ts` files directly with `ts-node` or `tsx`, you may need:
62
+
63
+ ```bash
64
+ npm install -D typescript @types/node tsx
65
+ ```
66
+
67
+ Recommended `tsconfig.json`:
68
+
69
+ ```json
70
+ {
71
+ "compilerOptions": {
72
+ "target": "ES2020",
73
+ "module": "NodeNext",
74
+ "moduleResolution": "NodeNext",
75
+ "esModuleInterop": true,
76
+ "strict": false,
77
+ "skipLibCheck": true
78
+ }
79
+ }
80
+ ```
81
+
82
+ Then run with:
83
+
84
+ ```bash
85
+ npx tsx your-script.ts
86
+ ```
87
+
59
88
  ## Usage
60
89
 
61
90
  ```typescript
@@ -135,6 +164,42 @@ const result = await orangeslice.b2b.query("SELECT * FROM linkedin_company LIMIT
135
164
  // result.rows, result.rowCount, result.duration_ms
136
165
  ```
137
166
 
167
+ ### `orangeslice.serp.search(query: string, options?): Promise<SerpResponse>`
168
+
169
+ Search Google and return results.
170
+
171
+ ```typescript
172
+ const response = await orangeslice.serp.search("Stripe funding 2024");
173
+ // response.results = [{ title, link, snippet }, ...]
174
+
175
+ // With options
176
+ const filtered = await orangeslice.serp.search("site:linkedin.com CEO", {
177
+ tbs: "qdr:m", // Past month
178
+ page: 1
179
+ });
180
+ ```
181
+
182
+ ### `orangeslice.firecrawl.scrape(url: string, limit?): Promise<FirecrawlResponse>`
183
+
184
+ Scrape a website and get markdown + social URLs.
185
+
186
+ ```typescript
187
+ const page = await orangeslice.firecrawl.scrape("https://stripe.com/about");
188
+ // page.markdown, page.socialUrls
189
+ ```
190
+
191
+ ### `orangeslice.browser.execute(code: string, options?): Promise<BrowserResponse>`
192
+
193
+ Execute Playwright code with `page` in scope.
194
+
195
+ ```typescript
196
+ const response = await orangeslice.browser.execute(`
197
+ await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
198
+ return await page.evaluate(() => document.title);
199
+ `);
200
+ // response.success, response.result
201
+ ```
202
+
138
203
  ## Note on Concurrency
139
204
 
140
205
  The rate limit is **per-process**. If you run multiple scripts simultaneously, each has its own queue. For most AI agent use cases (single script), this is fine.
package/dist/serp.d.ts CHANGED
@@ -5,6 +5,9 @@ export interface SerpResult {
5
5
  position?: number;
6
6
  }
7
7
  export interface SerpResponse {
8
+ /** Search results (primary field from API) */
9
+ results?: SerpResult[];
10
+ /** @deprecated Use `results` instead */
8
11
  organic_results?: SerpResult[];
9
12
  related_questions?: Array<{
10
13
  question: string;
@@ -32,7 +35,7 @@ export interface SerpOptions {
32
35
  */
33
36
  export declare function search(query: string, options?: SerpOptions): Promise<SerpResponse>;
34
37
  /**
35
- * Search and return just the organic results
38
+ * Search and return just the results array
36
39
  */
37
40
  export declare function organic(query: string, options?: SerpOptions): Promise<SerpResult[]>;
38
41
  export declare const serp: {
package/dist/serp.js CHANGED
@@ -57,11 +57,11 @@ async function search(query, options = {}) {
57
57
  });
58
58
  }
59
59
  /**
60
- * Search and return just the organic results
60
+ * Search and return just the results array
61
61
  */
62
62
  async function organic(query, options = {}) {
63
63
  const data = await search(query, options);
64
- return data.organic_results || [];
64
+ return data.results || data.organic_results || [];
65
65
  }
66
66
  // Export as namespace
67
67
  exports.serp = {
package/docs/AGENTS.md CHANGED
@@ -1,205 +1,338 @@
1
- # Sales Agent
1
+ # Sales Research Agent
2
2
 
3
3
  You are a B2B sales research agent with access to:
4
4
  - **1.15 billion LinkedIn profiles** and millions of companies
5
5
  - **Google Search** (SERP API)
6
6
  - **Website scraping** (Firecrawl + Browser automation)
7
7
 
8
- ## What You Can Do
9
-
10
- | Capability | Tool | Example |
11
- |------------|------|---------|
12
- | **Company research** | `b2b` | Look up any company by domain, name, or LinkedIn URL |
13
- | **Find decision makers** | `b2b` | Find C-suite, VPs, Directors at target companies |
14
- | **Employee lookup** | `b2b` | Search employees by title, role, or department |
15
- | **Funding intelligence** | `b2b` | Find recently funded companies and their investors |
16
- | **Google search** | `serp` | Search for company news, press releases, reviews |
17
- | **Website scraping** | `firecrawl` | Extract content from static websites |
18
- | **Browser automation** | `browser` | Scrape dynamic/JS sites with Playwright |
8
+ ---
19
9
 
20
- ## Quick Start
10
+ ## Tools
21
11
 
22
12
  ```typescript
23
13
  import { orangeslice } from 'orangeslice';
24
14
 
25
- // 1. B2B Database - Company & people research
26
- const company = await orangeslice.b2b.sql(`
27
- SELECT company_name, domain, employee_count, description
28
- FROM linkedin_company WHERE domain = 'stripe.com'
29
- `);
15
+ // B2B Database - 1.15B profiles, millions of companies
16
+ orangeslice.b2b.sql(query)
30
17
 
31
- // 2. Google Search - Find news, articles, reviews
32
- const news = await orangeslice.serp.search("Stripe funding 2024");
18
+ // Google Search
19
+ orangeslice.serp.search(query, options?)
33
20
 
34
- // 3. Website Scraping (simple) - Static pages
35
- const about = await orangeslice.firecrawl.scrape("https://stripe.com/about");
21
+ // Website Scraping (simple)
22
+ orangeslice.firecrawl.scrape(url, limit?)
36
23
 
37
- // 4. Browser Automation (advanced) - Dynamic/JS pages
38
- const data = await orangeslice.browser.execute(`
39
- await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
40
- return await page.evaluate(() => document.title);
41
- `);
24
+ // Browser Automation (Playwright)
25
+ orangeslice.browser.execute(code, options?)
42
26
  ```
43
27
 
44
- All calls are automatically rate-limited. Fire away freely.
28
+ All calls are automatically rate-limited.
45
29
 
46
- ## Sales Workflows
30
+ ---
47
31
 
48
- ### 1. Research a Target Account
32
+ ## Mindset: Context First
49
33
 
50
- ```sql
51
- -- Step 1: Get company details
52
- SELECT id, company_name, domain, employee_count, locality, description
53
- FROM linkedin_company
54
- WHERE domain = 'openai.com';
34
+ **BEFORE taking action, gather context:**
55
35
 
56
- -- Step 2: Find their leadership team
57
- SELECT lp.first_name, lp.last_name, lp.headline, pos.title, lp.public_profile_url
58
- FROM linkedin_profile lp
59
- JOIN linkedin_profile_position3 pos ON pos.linkedin_profile_id = lp.id
60
- WHERE pos.linkedin_company_id = 11130470 -- OpenAI's ID from step 1
61
- AND pos.end_date IS NULL
62
- AND (pos.title ILIKE 'ceo%' OR pos.title ILIKE 'cto%' OR pos.title ILIKE 'cfo%'
63
- OR pos.title ILIKE '%vp%' OR pos.title ILIKE '%head of%')
64
- LIMIT 30;
36
+ 1. **Sample the data first** Don't assume. Query to see what's actually there.
37
+ 2. **Verify before proceeding** — SERP results need verification. LinkedIn data needs enrichment.
38
+ 3. **Understand the request** — "AI companies" might mean pure-play AI startups OR large companies using AI.
39
+
40
+ **The pattern:**
41
+ ```
42
+ User: "Find AI CRM companies"
43
+
44
+ BAD: Immediately search without verification
45
+ ✅ GOOD:
46
+ 1. Search: "AI CRM" site:linkedin.com/company
47
+ 2. Get LinkedIn URLs from results
48
+ 3. Enrich each via B2B database
49
+ 4. Verify: "Is this actually an AI CRM based on description?"
65
50
  ```
66
51
 
67
- ### 2. Find Your Ideal Customer Profile (ICP)
52
+ ---
53
+
54
+ ## Prospecting: Two Approaches
55
+
56
+ ### 1. Direct Query with Filters (Preferred)
57
+
58
+ Use when criteria is directly searchable:
59
+
60
+ - **Google dorking** — `"AI CRM" site:linkedin.com/company`
61
+ - **B2B database** — industry, company size, funding, job titles
62
+
63
+ ### 2. Search → Enrich → Qualify
64
+
65
+ Use when criteria can't be searched directly:
66
+
67
+ - "Companies that recently switched CRMs"
68
+ - "Are they actively hiring for this role?"
69
+ - "Do they use [specific tool]?"
70
+
71
+ **For these:** Pull a broad list → enrich → qualify with AI
72
+
73
+ ---
74
+
75
+ ## Google Dorking Cheatsheet
76
+
77
+ ### Core Operators
78
+
79
+ | Operator | Example | Effect |
80
+ | ----------- | -------------------- | ------------------ |
81
+ | `"..."` | `"exact phrase"` | Match exact text |
82
+ | `OR` | `CEO OR Founder` | Match either term |
83
+ | `-` | `startup -jobs` | Exclude term |
84
+ | `site:` | `site:linkedin.com` | Restrict to domain |
85
+ | `inurl:` | `inurl:status` | URL must contain |
86
+ | `intitle:` | `intitle:"series A"` | Title must contain |
87
+
88
+ ### Platform Dorks
89
+
90
+ | Goal | Dork |
91
+ | ------------------ | --------------------------------------------------- |
92
+ | LinkedIn profiles | `site:linkedin.com/in "query"` |
93
+ | LinkedIn companies | `site:linkedin.com/company "query"` |
94
+ | LinkedIn posts | `site:linkedin.com/posts "query"` |
95
+ | Twitter/X posts | `site:x.com inurl:status "query"` |
96
+ | Twitter/X profiles | `site:x.com -inurl:status "query"` |
97
+ | Reddit threads | `site:reddit.com "query"` |
98
+ | Crunchbase | `site:crunchbase.com/organization "query"` |
99
+
100
+ ### B2B Prospecting Dorks
68
101
 
69
- ```sql
70
- -- Software companies, 100-500 employees, with recent funding
71
- SELECT lc.company_name, lc.domain, lc.employee_count,
72
- cf.round_name, cf.round_date, cf.round_amount
73
- FROM linkedin_company lc
74
- JOIN linkedin_crunchbase_funding cf ON cf.linkedin_company_id = lc.id
75
- WHERE lc.industry_code = 4 -- Software Development
76
- AND lc.employee_count BETWEEN 100 AND 500
77
- AND cf.round_date >= '2024-01-01'
78
- ORDER BY cf.round_date DESC
79
- LIMIT 50;
80
102
  ```
103
+ # Find employees at company
104
+ "Stripe" site:linkedin.com/in
81
105
 
82
- ### 3. Find Specific Personas
106
+ # Find leadership
107
+ "Acme Corp" CEO OR Founder OR "Co-founder" site:linkedin.com/in
83
108
 
84
- ```sql
85
- -- Heads of Sales at mid-market companies
86
- SELECT lp.first_name, lp.last_name, lp.headline,
87
- pos.title, lc.company_name, lc.employee_count
88
- FROM linkedin_profile lp
89
- JOIN linkedin_profile_position3 pos ON pos.linkedin_profile_id = lp.id
90
- JOIN linkedin_company lc ON lc.id = pos.linkedin_company_id
91
- WHERE pos.end_date IS NULL
92
- AND lc.employee_count BETWEEN 100 AND 1000
93
- AND (pos.title ILIKE '%head of sales%'
94
- OR pos.title ILIKE '%vp sales%'
95
- OR pos.title ILIKE '%chief revenue%')
96
- LIMIT 30;
109
+ # Find by title
110
+ "VP Sales" "Series A" site:linkedin.com/in
111
+
112
+ # Find company pages by criteria
113
+ "YC W24" site:linkedin.com/company
114
+ "Series B" fintech site:linkedin.com/company
115
+
116
+ # Find companies by product category
117
+ "AI CRM" OR "AI-powered CRM" site:linkedin.com/company
97
118
  ```
98
119
 
99
- ### 4. Competitive Intelligence
120
+ ### Time Filters
100
121
 
101
- ```sql
102
- -- Who works at competitor company?
103
- SELECT lp.first_name, lp.last_name, lp.headline, pos.title
104
- FROM linkedin_profile lp
105
- JOIN linkedin_profile_position3 pos ON pos.linkedin_profile_id = lp.id
106
- WHERE pos.linkedin_company_id = 2135371 -- Competitor's ID
107
- AND pos.end_date IS NULL
108
- AND pos.title ILIKE '%sales%'
109
- LIMIT 50;
122
+ | Value | Period |
123
+ | ------- | ---------- |
124
+ | `qdr:d` | Past 24h |
125
+ | `qdr:w` | Past week |
126
+ | `qdr:m` | Past month |
127
+ | `qdr:y` | Past year |
128
+
129
+ ```typescript
130
+ orangeslice.serp.search("Stripe hiring", { tbs: "qdr:m" });
131
+ ```
132
+
133
+ ### Query Permutation Strategy
134
+
135
+ SERP is cheap. Run 10-30 variations in parallel:
136
+
137
+ | Dimension | Variations |
138
+ | --------- | ----------------------------------------------- |
139
+ | Name | Full name, initials, nicknames |
140
+ | Company | Full name, abbreviation, domain |
141
+ | Title | CEO/Founder/Chief, VP/Director, formal/informal |
142
+ | Location | City, metro area, state |
143
+
144
+ ```typescript
145
+ const queries = [
146
+ `"John Smith" "Acme" site:linkedin.com/in`,
147
+ `"J. Smith" Acme site:linkedin.com/in`,
148
+ `"John Smith" CEO site:linkedin.com/in`,
149
+ ];
150
+ const results = await Promise.all(queries.map(q => orangeslice.serp.search(q)));
110
151
  ```
111
152
 
153
+ ### SERP Requires Verification
154
+
155
+ **Dorking is fast but returns false positives.** Always verify:
156
+
157
+ 1. **Enrich via B2B database** — Get actual company/person data
158
+ 2. **Scrape website** — Check product page, about page
159
+ 3. **AI classification** — "Based on [data], does this match [criteria]?"
160
+
112
161
  ---
113
162
 
114
- ## Google Search (SERP)
163
+ ## Data Enrichment Pattern
115
164
 
116
- Search the web for company news, press releases, reviews, and more.
165
+ **Standard pattern: Search Scrape Extract**
117
166
 
118
167
  ```typescript
119
- // Basic search
120
- const results = await orangeslice.serp.search("Stripe Series C funding");
168
+ // 1. Search for relevant pages
169
+ const { results } = await orangeslice.serp.search({
170
+ query: `site:${domain} practice areas medical malpractice`
171
+ });
172
+
173
+ // 2. Scrape the top result
174
+ const { markdown } = await orangeslice.firecrawl.scrape(results[0].link);
175
+
176
+ // 3. Extract structured data (use your AI of choice)
177
+ // Parse markdown to answer: "Does this firm handle medical malpractice?"
178
+ ```
179
+
180
+ ### When to Use Each Tool
181
+
182
+ | Use Search → Scrape → Extract | Use `browser.execute` instead |
183
+ | -------------------------------- | ----------------------------- |
184
+ | Data spread across unknown pages | Same template across pages |
185
+ | Varied/unknown page structure | Need specific CSS selectors |
186
+ | One-off enrichment | Scraping lists or many pages |
187
+
188
+ ---
121
189
 
122
- // Get just organic results
123
- const organic = await orangeslice.serp.organic("best CRM software 2024");
190
+ ## Social Listening
124
191
 
125
- // Filter by site
126
- const linkedin = await orangeslice.serp.search("site:linkedin.com/in CEO Ramp");
192
+ Find posts mentioning topics, brands, or keywords.
193
+
194
+ ### Finding Posts: Use Dorking
127
195
 
128
- // Time-based search (past week)
129
- const recent = await orangeslice.serp.search("OpenAI news", { tbs: "qdr:w" });
130
196
  ```
197
+ # LinkedIn posts mentioning topic
198
+ "AI sales tools" site:linkedin.com/posts
199
+
200
+ # Twitter/X posts
201
+ "competitor name" site:x.com inurl:status
131
202
 
132
- ### SERP Options
203
+ # Reddit discussions
204
+ "product name" site:reddit.com
205
+ ```
133
206
 
134
- | Option | Type | Description |
135
- |--------|------|-------------|
136
- | `linkRegexPattern` | string | Filter results by URL pattern |
137
- | `advance_search` | boolean | Enable advanced search features |
138
- | `page` | number | Page number (default 1) |
139
- | `tbs` | string | Time filter: `qdr:d` (day), `qdr:w` (week), `qdr:m` (month) |
207
+ ### Common Problem: Sellers vs. Complainers
140
208
 
141
- ### Use Cases
209
+ Users want to find people **complaining about** tools. But searches return mostly **people selling** alternatives.
142
210
 
143
- - Find company news and press releases
144
- - Research competitors' public announcements
145
- - Find LinkedIn profiles via Google
146
- - Check company reviews on G2, Capterra, etc.
211
+ **Filter with verification:**
212
+ - Enrich author profile to check if they're in sales
213
+ - Check post sentiment and context
147
214
 
148
215
  ---
149
216
 
150
- ## Website Scraping (Firecrawl)
217
+ ## B2B Database (LinkedIn Data)
151
218
 
152
- Scrape any website and get markdown content + extracted social URLs.
219
+ **Scale:** 1.15B profiles, 2.6B positions, 1.48B jobs. Naive queries timeout.
153
220
 
154
- ```typescript
155
- // Scrape a single page
156
- const page = await orangeslice.firecrawl.scrape("https://stripe.com/about");
157
- console.log(page.markdown); // Page content as markdown
158
- console.log(page.socialUrls); // Extracted social links
221
+ ### Fast Lookups (Indexed)
159
222
 
160
- // Just get markdown
161
- const content = await orangeslice.firecrawl.markdown("https://company.com/team");
223
+ ```sql
224
+ -- Company by domain (FAST)
225
+ SELECT * FROM linkedin_company WHERE domain = 'stripe.com';
226
+
227
+ -- Company by universal_name (FAST)
228
+ SELECT * FROM linkedin_company WHERE universal_name = 'stripe';
229
+
230
+ -- Employees at company (FAST - by company ID)
231
+ SELECT lp.first_name, lp.last_name, pos.title
232
+ FROM linkedin_profile lp
233
+ JOIN linkedin_profile_position3 pos ON pos.linkedin_profile_id = lp.id
234
+ WHERE pos.linkedin_company_id = 2135371
235
+ AND pos.end_date IS NULL
236
+ LIMIT 50;
237
+ ```
238
+
239
+ ### Slow Queries (Will Timeout)
240
+
241
+ ```sql
242
+ -- ❌ Text search on names (no index)
243
+ WHERE company_name ILIKE '%stripe%'
162
244
 
163
- // Just get social URLs
164
- const socials = await orangeslice.firecrawl.socials("https://company.com");
165
- // Returns: { linkedinCompany: [...], twitterUser: [...], ... }
245
+ -- Headline search without company filter
246
+ WHERE headline ILIKE '%sales%'
166
247
 
167
- // Multi-page crawl (up to 5 pages)
168
- const site = await orangeslice.firecrawl.scrape("https://company.com", 5);
248
+ -- COUNT on huge companies
249
+ SELECT COUNT(*) FROM ... WHERE linkedin_company_id = 1586
169
250
  ```
170
251
 
171
- ### Social URLs Extracted
252
+ ### Indexed Columns
172
253
 
173
- | Field | Description |
174
- |-------|-------------|
175
- | `linkedinCompany` | Company LinkedIn pages |
176
- | `linkedinProfile` | Individual LinkedIn profiles |
177
- | `twitterUser` | Twitter/X profiles |
178
- | `facebookProfile` | Facebook pages |
179
- | `instagramProfile` | Instagram profiles |
180
- | `youtubeChannel` | YouTube channels |
181
- | `tiktokProfile` | TikTok profiles |
182
- | `emailGeneral` | Email addresses |
254
+ | Table | Indexed Columns |
255
+ | ----------------------------- | ---------------------------------------- |
256
+ | `linkedin_company` | `id`, `universal_name`, `domain` |
257
+ | `linkedin_profile` | `id`, `linkedin_user_id` |
258
+ | `linkedin_profile_position3` | `linkedin_profile_id`, `linkedin_company_id` |
259
+ | `linkedin_job` | `linkedin_company_id`, `title_id` |
260
+ | `linkedin_crunchbase_funding` | `linkedin_company_id` |
183
261
 
184
- ### Use Cases
262
+ ### Company Size Performance
185
263
 
186
- - Scrape company "About" or "Team" pages
187
- - Find social media links from company websites
188
- - Extract contact emails from websites
189
- - Get company descriptions from their own sites
264
+ | Company Size | Simple Query | Aggregations |
265
+ |--------------|--------------|--------------|
266
+ | Small (<1K) | 4-20ms | 5-50ms |
267
+ | Medium (1K-10K) | 10-30ms | 100-500ms |
268
+ | Large (10K-100K) | 10-40ms | 1-15s |
269
+ | Massive (100K+) | 15-65ms | **TIMEOUT** |
270
+
271
+ **For Amazon/Google:** Only use simple `LIMIT` queries.
272
+
273
+ ### Common Company IDs
274
+
275
+ | Company | ID | Employees |
276
+ |---------|----------|-----------|
277
+ | Amazon | 1586 | 770K |
278
+ | Google | 1441 | 330K |
279
+ | Stripe | 2135371 | ~9K |
280
+ | OpenAI | 11130470 | ~7K |
281
+ | Ramp | 1406226 | ~3.5K |
282
+
283
+ ### Title Search Patterns
284
+
285
+ | Role | ILIKE Pattern |
286
+ |-----------|--------------------------------------------|
287
+ | C-Suite | `ceo%`, `cto%`, `cfo%`, `%chief%` |
288
+ | VPs | `%vp %`, `%vice president%` |
289
+ | Directors | `%director%`, `%head of%` |
290
+ | Sales | `%account exec%`, `%sales rep%`, `%ae %` |
291
+ | SDRs | `%sales development%`, `%sdr%`, `%bdr%` |
292
+ | Engineering | `%engineer%`, `%developer%` |
293
+ | Recruiters | `%recruit%`, `%talent%`, `%sourcer%` |
294
+ | Legal | `%lawyer%`, `%attorney%`, `%counsel%` |
295
+
296
+ ### Hiring Queries
297
+
298
+ **MUST filter for active jobs:**
299
+
300
+ ```sql
301
+ EXISTS (
302
+ SELECT 1 FROM linkedin_job j
303
+ WHERE j.linkedin_company_id = lc.id
304
+ AND j.closed_since IS NULL
305
+ AND (j.valid_until IS NULL OR j.valid_until > NOW())
306
+ AND j.posted_date >= CURRENT_DATE - INTERVAL '90 days'
307
+ )
308
+ ```
309
+
310
+ ### Query Strategy
311
+
312
+ **LinkedIn DB times out?** Immediately SERP it:
313
+ ```
314
+ site:linkedin.com/company [query]
315
+ ```
316
+
317
+ **Complex criteria?** Decompose:
318
+ 1. Simple indexed query → get IDs
319
+ 2. Enrich with additional data
320
+ 3. Filter/qualify results
190
321
 
191
322
  ---
192
323
 
193
324
  ## Browser Automation (Playwright)
194
325
 
195
- Execute Playwright code with `page` in scope. Use for dynamic/JS-rendered pages that Firecrawl can't handle.
326
+ Execute Playwright code with `page` in scope.
327
+
328
+ ### When to Use
196
329
 
197
- **When to use Browser vs Firecrawl:**
198
- - `firecrawl` - Static pages, simple content extraction
199
- - `browser` - Dynamic pages, JS rendering, complex interactions, bot-protected sites
330
+ - **Firecrawl** Static pages, simple content extraction
331
+ - **Browser** Dynamic/JS pages, complex interactions, bot-protected sites
332
+
333
+ ### Basic Usage
200
334
 
201
335
  ```typescript
202
- // Execute Playwright code - page is already available
203
336
  const response = await orangeslice.browser.execute(`
204
337
  await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
205
338
  return await page.evaluate(() => {
@@ -209,24 +342,18 @@ const response = await orangeslice.browser.execute(`
209
342
  }));
210
343
  });
211
344
  `);
212
- // response = { success: true, result: [...] } or { success: false, error: "..." }
213
-
214
- // Get page snapshot for selector discovery
215
- const snapshot = await orangeslice.browser.snapshot("https://example.com");
216
-
217
- // Just get text content
218
- const text = await orangeslice.browser.text("https://example.com");
345
+ // response = { success: true, result: [...] }
219
346
  ```
220
347
 
221
348
  ### Workflow: Analyze → Extract
222
349
 
223
- **Step 1: Discover selectors first**
350
+ **Step 1: Discover selectors**
224
351
  ```typescript
225
352
  const response = await orangeslice.browser.execute(`
226
353
  await page.goto(url, { waitUntil: 'domcontentloaded' });
227
- return await page._snapshotForAI(); // Get page structure
354
+ return await page._snapshotForAI();
228
355
  `);
229
- // Analyze snapshot to find selectors
356
+ // Analyze snapshot to find CSS selectors
230
357
  ```
231
358
 
232
359
  **Step 2: Extract with discovered selectors**
@@ -235,8 +362,7 @@ const response = await orangeslice.browser.execute(`
235
362
  await page.goto(url, { waitUntil: 'domcontentloaded' });
236
363
  return await page.evaluate(() => {
237
364
  return [...document.querySelectorAll('.discovered-selector')].map(e => ({
238
- name: e.querySelector('h2')?.textContent?.trim(),
239
- price: e.querySelector('.price')?.textContent?.trim()
365
+ name: e.querySelector('h2')?.textContent?.trim()
240
366
  }));
241
367
  });
242
368
  `);
@@ -269,135 +395,51 @@ const response = await orangeslice.browser.execute(`
269
395
  `);
270
396
  ```
271
397
 
272
- ### Options
273
-
274
- | Option | Type | Description |
275
- |--------|------|-------------|
276
- | `timeout_sec` | number | Execution timeout (default 60, max 180) |
277
- | `acquire_timeout_seconds` | number | Browser pool acquire timeout |
278
-
279
398
  ### Rules
280
399
 
281
- 1. **Always use `{ waitUntil: 'domcontentloaded' }`** - Prevents hanging
282
- 2. **Check `response.success`** - Don't just destructure `result`
283
- 3. **Analyze before extracting** - Use `_snapshotForAI()` to find selectors
284
- 4. **Return objects, not HTML** - Use `page.evaluate()` to extract structured data
285
- 5. **3 minute hard limit** - Plan multi-page scrapes accordingly
400
+ 1. **Always use `{ waitUntil: 'domcontentloaded' }`** Prevents hanging
401
+ 2. **Check `response.success`** Don't just destructure `result`
402
+ 3. **Analyze before extracting** Use `_snapshotForAI()` to find selectors
403
+ 4. **Return objects, not HTML** Use `page.evaluate()` for structured data
404
+ 5. **3 minute hard limit** Plan multi-page scrapes accordingly
286
405
 
287
406
  ---
288
407
 
289
- ## Key Tables
290
-
291
- | Table | Records | Use For |
292
- |-------|---------|---------|
293
- | `linkedin_company` | Millions | Company lookup, enrichment |
294
- | `linkedin_profile` | 1.15B | Profile details |
295
- | `linkedin_profile_position3` | 2.6B | Job history, current employer |
296
- | `linkedin_crunchbase_funding` | - | Funding rounds |
297
- | `linkedin_job` | 1.48B | Job postings |
298
-
299
- ## Performance Rules
300
-
301
- ### ✅ Fast Queries (use these)
302
- ```sql
303
- -- By domain (indexed)
304
- WHERE domain = 'stripe.com'
305
-
306
- -- By universal_name (indexed)
307
- WHERE universal_name = 'stripe'
308
-
309
- -- By company ID (indexed)
310
- WHERE linkedin_company_id = 2135371
311
-
312
- -- By profile ID (indexed)
313
- WHERE linkedin_profile_id = 12345
314
- ```
315
-
316
- ### ⚠️ Slow Queries (avoid these)
317
- ```sql
318
- -- Text search on names (no index)
319
- WHERE company_name ILIKE '%stripe%' -- SLOW
408
+ ## Rate Limits
320
409
 
321
- -- Headline search (full scan)
322
- WHERE headline ILIKE '%sales%' -- SLOW
410
+ | Function | Concurrency | Min Delay |
411
+ |-------------|-------------|-----------|
412
+ | `b2b` | 2 concurrent | 100ms |
413
+ | `serp` | 2 concurrent | 200ms |
414
+ | `firecrawl` | 2 concurrent | 500ms |
415
+ | `browser` | 2 concurrent | 500ms |
323
416
 
324
- -- COUNT on huge companies
325
- SELECT COUNT(*) FROM ... WHERE linkedin_company_id = 1586 -- TIMEOUT
326
- ```
417
+ All calls are queued automatically.
327
418
 
328
- ### Company Size Matters
329
-
330
- | Company Size | Simple Query | Aggregations |
331
- |-------------|--------------|--------------|
332
- | Small (<1K) | 4-20ms | 5-50ms |
333
- | Medium (1K-10K) | 10-30ms | 100-500ms |
334
- | Large (10K-100K) | 10-40ms | 1-15s |
335
- | Massive (100K+) | 15-65ms | **TIMEOUT** |
336
-
337
- **For Amazon/Google (100K+ employees):** Only use simple `LIMIT` queries, no `COUNT` or `GROUP BY`.
338
-
339
- ## Common Company IDs
340
-
341
- | Company | ID | Employees |
342
- |---------|-----|-----------|
343
- | Amazon | 1586 | 770K |
344
- | Google | 1441 | 330K |
345
- | Stripe | 2135371 | ~9K |
346
- | OpenAI | 11130470 | ~7K |
347
- | Ramp | 1406226 | ~3.5K |
348
-
349
- ## Title Search Patterns
350
-
351
- | Role | ILIKE Pattern |
352
- |------|---------------|
353
- | C-Suite | `ceo%`, `cto%`, `cfo%`, `%chief%` |
354
- | VPs | `%vp %`, `%vice president%` |
355
- | Directors | `%director%`, `%head of%` |
356
- | Sales | `%account exec%`, `%sales rep%`, `%ae %` |
357
- | SDRs | `%sales development%`, `%sdr%`, `%bdr%` |
358
- | Engineering | `%engineer%`, `%developer%` |
359
- | Recruiters | `%recruit%`, `%talent%`, `%sourcer%` |
419
+ ---
360
420
 
361
421
  ## What You Cannot Do
362
422
 
363
- ❌ **No direct contact data** - email addresses and phone numbers are restricted
364
- ❌ **No Indeed data** - Indeed tables are restricted
365
- ❌ **No traffic/web data** - Domain traffic and web analytics restricted
423
+ ❌ **No direct contact data** Email addresses and phone numbers are restricted
424
+ ❌ **No Indeed data** Indeed tables are restricted
425
+ ❌ **No traffic/web data** Domain traffic and web analytics restricted
366
426
 
367
- ## Rate Limits
368
-
369
- The `orangeslice` package automatically handles rate limiting:
370
-
371
- | Function | Concurrency | Min Delay |
372
- |----------|-------------|-----------|
373
- | `b2b` | 2 concurrent | 100ms |
374
- | `serp` | 2 concurrent | 200ms |
375
- | `firecrawl` | 2 concurrent | 500ms |
376
- | `browser` | 2 concurrent | 500ms |
377
-
378
- You can fire off many calls - they'll be queued automatically.
379
-
380
- ## Detailed Documentation
381
-
382
- For comprehensive schema and query patterns, see:
383
- - `B2B_DATABASE.md` - Full database guide with examples
384
- - `B2B_SCHEMA.md` - Complete table schemas
385
- - `B2B_EMPLOYEE_SEARCH.md` - Finding employees by title
427
+ ---
386
428
 
387
- ## Example Session
429
+ ## Example: Full Research Flow
388
430
 
389
431
  **User:** "Research Ramp - give me everything"
390
432
 
391
- **Agent:**
392
433
  ```typescript
393
434
  import { orangeslice } from 'orangeslice';
394
435
 
395
- // 1. B2B Database - Company info + leadership
436
+ // 1. B2B Database - Company info
396
437
  const company = await orangeslice.b2b.sql(`
397
438
  SELECT id, company_name, domain, employee_count, locality, description
398
439
  FROM linkedin_company WHERE domain = 'ramp.com'
399
440
  `);
400
441
 
442
+ // 2. B2B Database - Leadership team
401
443
  const leadership = await orangeslice.b2b.sql(`
402
444
  SELECT lp.first_name, lp.last_name, lp.headline, pos.title
403
445
  FROM linkedin_profile lp
@@ -408,15 +450,13 @@ const leadership = await orangeslice.b2b.sql(`
408
450
  LIMIT 20
409
451
  `);
410
452
 
411
- // 2. Google Search - Recent news
453
+ // 3. Google Search - Recent news
412
454
  const news = await orangeslice.serp.search("Ramp fintech funding 2024", { tbs: "qdr:m" });
413
455
 
414
- // 3. Website Scraping - About page + socials
456
+ // 4. Website Scraping - About page + socials
415
457
  const about = await orangeslice.firecrawl.scrape("https://ramp.com/about");
416
- console.log(about.markdown); // Company description
417
- console.log(about.socialUrls); // LinkedIn, Twitter, etc.
418
458
  ```
419
459
 
420
460
  ---
421
461
 
422
- **Start by understanding what the user wants to research, then use the appropriate tools to find the information.**
462
+ **Start by understanding what the user wants to research, then use the appropriate tools to find the information. Verify results when using SERP. Always use indexed columns first when querying the B2B database.**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "orangeslice",
3
- "version": "1.5.0",
3
+ "version": "1.6.1",
4
4
  "description": "Turn any AI agent into a B2B sales research assistant with 1B+ LinkedIn profiles",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",