orangeslice 1.4.2 → 1.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -1
- package/dist/browser.d.ts +68 -0
- package/dist/browser.js +114 -0
- package/dist/index.d.ts +14 -2
- package/dist/index.js +11 -2
- package/docs/AGENTS.md +353 -210
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -14,7 +14,8 @@ This copies documentation to `./orangeslice-docs/` and installs the package. Poi
|
|
|
14
14
|
|----------|------------|
|
|
15
15
|
| `b2b` | Query 1B+ LinkedIn profiles, companies, funding, jobs |
|
|
16
16
|
| `serp` | Google search for news, articles, reviews |
|
|
17
|
-
| `firecrawl` | Scrape websites, extract social URLs |
|
|
17
|
+
| `firecrawl` | Scrape static websites, extract social URLs |
|
|
18
|
+
| `browser` | Playwright automation for dynamic/JS sites |
|
|
18
19
|
|
|
19
20
|
## Quick Example
|
|
20
21
|
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
export interface BrowserResponse {
|
|
2
|
+
success: boolean;
|
|
3
|
+
result?: any;
|
|
4
|
+
error?: string;
|
|
5
|
+
browser_live_view_url?: string;
|
|
6
|
+
}
|
|
7
|
+
export interface BrowserOptions {
|
|
8
|
+
/** Browser pool ID (default: pre-warmed pool) */
|
|
9
|
+
pool?: string;
|
|
10
|
+
/** Execution timeout in seconds */
|
|
11
|
+
timeout_sec?: number;
|
|
12
|
+
/** Timeout for acquiring browser from pool */
|
|
13
|
+
acquire_timeout_seconds?: number;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Execute Playwright code with `page` in scope.
|
|
17
|
+
* Browser is automatically acquired from a pre-warmed pool and released when done.
|
|
18
|
+
*
|
|
19
|
+
* @param code - Playwright code to execute (has `page` in scope)
|
|
20
|
+
* @param options - Optional settings for timeout and pool
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* // Get page snapshot for analysis
|
|
24
|
+
* const response = await browser.execute(`
|
|
25
|
+
* await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
26
|
+
* return await page._snapshotForAI();
|
|
27
|
+
* `);
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* // Extract data from page
|
|
31
|
+
* const response = await browser.execute(`
|
|
32
|
+
* await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
|
|
33
|
+
* return await page.evaluate(() => {
|
|
34
|
+
* return [...document.querySelectorAll('.item')].map(el => ({
|
|
35
|
+
* title: el.querySelector('h2')?.textContent?.trim(),
|
|
36
|
+
* url: el.querySelector('a')?.href
|
|
37
|
+
* }));
|
|
38
|
+
* });
|
|
39
|
+
* `);
|
|
40
|
+
* // response = { success: true, result: [...] }
|
|
41
|
+
*/
|
|
42
|
+
export declare function execute(code: string, options?: BrowserOptions): Promise<BrowserResponse>;
|
|
43
|
+
/**
|
|
44
|
+
* Get a page snapshot for AI analysis.
|
|
45
|
+
* Useful for discovering selectors before extraction.
|
|
46
|
+
*
|
|
47
|
+
* @param url - URL to navigate to
|
|
48
|
+
*
|
|
49
|
+
* @example
|
|
50
|
+
* const snapshot = await browser.snapshot("https://example.com/products");
|
|
51
|
+
* // Returns page HTML structure for selector discovery
|
|
52
|
+
*/
|
|
53
|
+
export declare function snapshot(url: string): Promise<BrowserResponse>;
|
|
54
|
+
/**
|
|
55
|
+
* Extract text content from a URL.
|
|
56
|
+
*
|
|
57
|
+
* @param url - URL to navigate to
|
|
58
|
+
*
|
|
59
|
+
* @example
|
|
60
|
+
* const response = await browser.text("https://example.com");
|
|
61
|
+
* // response.result = page text content
|
|
62
|
+
*/
|
|
63
|
+
export declare function text(url: string): Promise<BrowserResponse>;
|
|
64
|
+
export declare const browser: {
|
|
65
|
+
execute: typeof execute;
|
|
66
|
+
snapshot: typeof snapshot;
|
|
67
|
+
text: typeof text;
|
|
68
|
+
};
|
package/dist/browser.js
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.browser = void 0;
|
|
4
|
+
exports.execute = execute;
|
|
5
|
+
exports.snapshot = snapshot;
|
|
6
|
+
exports.text = text;
|
|
7
|
+
const queue_1 = require("./queue");
|
|
8
|
+
const API_URL = process.env.ORANGESLICE_API_URL || "https://orangeslice.ai/api/function?functionId=browser";
|
|
9
|
+
// Shared queue for browser requests (limit concurrent browser sessions)
|
|
10
|
+
const queue = (0, queue_1.createQueue)(2);
|
|
11
|
+
const rateLimiter = (0, queue_1.createRateLimiter)(500); // 500ms between requests
|
|
12
|
+
/**
|
|
13
|
+
* Helper to make POST request, handling redirects manually
|
|
14
|
+
* (Node.js fetch has issues with POST body on redirects)
|
|
15
|
+
*/
|
|
16
|
+
async function fetchWithRedirect(url, body) {
|
|
17
|
+
let response = await fetch(url, {
|
|
18
|
+
method: "POST",
|
|
19
|
+
headers: { "Content-Type": "application/json" },
|
|
20
|
+
body,
|
|
21
|
+
redirect: "manual",
|
|
22
|
+
});
|
|
23
|
+
// Handle redirect manually - re-POST to the new location
|
|
24
|
+
if (response.status >= 300 && response.status < 400) {
|
|
25
|
+
const location = response.headers.get("location");
|
|
26
|
+
if (location) {
|
|
27
|
+
response = await fetch(location, {
|
|
28
|
+
method: "POST",
|
|
29
|
+
headers: { "Content-Type": "application/json" },
|
|
30
|
+
body,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
return response;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Execute Playwright code with `page` in scope.
|
|
38
|
+
* Browser is automatically acquired from a pre-warmed pool and released when done.
|
|
39
|
+
*
|
|
40
|
+
* @param code - Playwright code to execute (has `page` in scope)
|
|
41
|
+
* @param options - Optional settings for timeout and pool
|
|
42
|
+
*
|
|
43
|
+
* @example
|
|
44
|
+
* // Get page snapshot for analysis
|
|
45
|
+
* const response = await browser.execute(`
|
|
46
|
+
* await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
47
|
+
* return await page._snapshotForAI();
|
|
48
|
+
* `);
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* // Extract data from page
|
|
52
|
+
* const response = await browser.execute(`
|
|
53
|
+
* await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
|
|
54
|
+
* return await page.evaluate(() => {
|
|
55
|
+
* return [...document.querySelectorAll('.item')].map(el => ({
|
|
56
|
+
* title: el.querySelector('h2')?.textContent?.trim(),
|
|
57
|
+
* url: el.querySelector('a')?.href
|
|
58
|
+
* }));
|
|
59
|
+
* });
|
|
60
|
+
* `);
|
|
61
|
+
* // response = { success: true, result: [...] }
|
|
62
|
+
*/
|
|
63
|
+
async function execute(code, options = {}) {
|
|
64
|
+
return queue(async () => {
|
|
65
|
+
return rateLimiter(async () => {
|
|
66
|
+
const body = JSON.stringify({ code, ...options });
|
|
67
|
+
const response = await fetchWithRedirect(API_URL, body);
|
|
68
|
+
if (!response.ok) {
|
|
69
|
+
throw new Error(`Browser request failed: ${response.status} ${response.statusText}`);
|
|
70
|
+
}
|
|
71
|
+
const data = (await response.json());
|
|
72
|
+
return data;
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Get a page snapshot for AI analysis.
|
|
78
|
+
* Useful for discovering selectors before extraction.
|
|
79
|
+
*
|
|
80
|
+
* @param url - URL to navigate to
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* const snapshot = await browser.snapshot("https://example.com/products");
|
|
84
|
+
* // Returns page HTML structure for selector discovery
|
|
85
|
+
*/
|
|
86
|
+
async function snapshot(url) {
|
|
87
|
+
const code = `
|
|
88
|
+
await page.goto(${JSON.stringify(url)}, { waitUntil: 'domcontentloaded' });
|
|
89
|
+
return await page._snapshotForAI();
|
|
90
|
+
`;
|
|
91
|
+
return execute(code);
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Extract text content from a URL.
|
|
95
|
+
*
|
|
96
|
+
* @param url - URL to navigate to
|
|
97
|
+
*
|
|
98
|
+
* @example
|
|
99
|
+
* const response = await browser.text("https://example.com");
|
|
100
|
+
* // response.result = page text content
|
|
101
|
+
*/
|
|
102
|
+
async function text(url) {
|
|
103
|
+
const code = `
|
|
104
|
+
await page.goto(${JSON.stringify(url)}, { waitUntil: 'domcontentloaded' });
|
|
105
|
+
return await page.evaluate(() => document.body.innerText);
|
|
106
|
+
`;
|
|
107
|
+
return execute(code);
|
|
108
|
+
}
|
|
109
|
+
// Export as namespace
|
|
110
|
+
exports.browser = {
|
|
111
|
+
execute,
|
|
112
|
+
snapshot,
|
|
113
|
+
text,
|
|
114
|
+
};
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { b2b } from "./b2b";
|
|
2
2
|
import { serp } from "./serp";
|
|
3
3
|
import { firecrawl } from "./firecrawl";
|
|
4
|
-
|
|
4
|
+
import { browser } from "./browser";
|
|
5
|
+
export { b2b, serp, firecrawl, browser };
|
|
5
6
|
/**
|
|
6
7
|
* Main orangeslice namespace - AI sales agent toolkit
|
|
7
8
|
*
|
|
@@ -14,9 +15,15 @@ export { b2b, serp, firecrawl };
|
|
|
14
15
|
* // Google Search
|
|
15
16
|
* const results = await orangeslice.serp.search("best CRM software 2024");
|
|
16
17
|
*
|
|
17
|
-
* // Website Scraping
|
|
18
|
+
* // Website Scraping (simple)
|
|
18
19
|
* const page = await orangeslice.firecrawl.scrape("https://stripe.com/about");
|
|
19
20
|
*
|
|
21
|
+
* // Browser Automation (Playwright)
|
|
22
|
+
* const data = await orangeslice.browser.execute(`
|
|
23
|
+
* await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
|
|
24
|
+
* return await page.evaluate(() => document.title);
|
|
25
|
+
* `);
|
|
26
|
+
*
|
|
20
27
|
* // All calls are automatically rate-limited and queued
|
|
21
28
|
*/
|
|
22
29
|
export declare const orangeslice: {
|
|
@@ -34,5 +41,10 @@ export declare const orangeslice: {
|
|
|
34
41
|
markdown: typeof import("./firecrawl").markdown;
|
|
35
42
|
socials: typeof import("./firecrawl").socials;
|
|
36
43
|
};
|
|
44
|
+
browser: {
|
|
45
|
+
execute: typeof import("./browser").execute;
|
|
46
|
+
snapshot: typeof import("./browser").snapshot;
|
|
47
|
+
text: typeof import("./browser").text;
|
|
48
|
+
};
|
|
37
49
|
};
|
|
38
50
|
export default orangeslice;
|
package/dist/index.js
CHANGED
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.orangeslice = exports.firecrawl = exports.serp = exports.b2b = void 0;
|
|
3
|
+
exports.orangeslice = exports.browser = exports.firecrawl = exports.serp = exports.b2b = void 0;
|
|
4
4
|
const b2b_1 = require("./b2b");
|
|
5
5
|
Object.defineProperty(exports, "b2b", { enumerable: true, get: function () { return b2b_1.b2b; } });
|
|
6
6
|
const serp_1 = require("./serp");
|
|
7
7
|
Object.defineProperty(exports, "serp", { enumerable: true, get: function () { return serp_1.serp; } });
|
|
8
8
|
const firecrawl_1 = require("./firecrawl");
|
|
9
9
|
Object.defineProperty(exports, "firecrawl", { enumerable: true, get: function () { return firecrawl_1.firecrawl; } });
|
|
10
|
+
const browser_1 = require("./browser");
|
|
11
|
+
Object.defineProperty(exports, "browser", { enumerable: true, get: function () { return browser_1.browser; } });
|
|
10
12
|
/**
|
|
11
13
|
* Main orangeslice namespace - AI sales agent toolkit
|
|
12
14
|
*
|
|
@@ -19,14 +21,21 @@ Object.defineProperty(exports, "firecrawl", { enumerable: true, get: function ()
|
|
|
19
21
|
* // Google Search
|
|
20
22
|
* const results = await orangeslice.serp.search("best CRM software 2024");
|
|
21
23
|
*
|
|
22
|
-
* // Website Scraping
|
|
24
|
+
* // Website Scraping (simple)
|
|
23
25
|
* const page = await orangeslice.firecrawl.scrape("https://stripe.com/about");
|
|
24
26
|
*
|
|
27
|
+
* // Browser Automation (Playwright)
|
|
28
|
+
* const data = await orangeslice.browser.execute(`
|
|
29
|
+
* await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
|
|
30
|
+
* return await page.evaluate(() => document.title);
|
|
31
|
+
* `);
|
|
32
|
+
*
|
|
25
33
|
* // All calls are automatically rate-limited and queued
|
|
26
34
|
*/
|
|
27
35
|
exports.orangeslice = {
|
|
28
36
|
b2b: b2b_1.b2b,
|
|
29
37
|
serp: serp_1.serp,
|
|
30
38
|
firecrawl: firecrawl_1.firecrawl,
|
|
39
|
+
browser: browser_1.browser,
|
|
31
40
|
};
|
|
32
41
|
exports.default = exports.orangeslice;
|
package/docs/AGENTS.md
CHANGED
|
@@ -1,300 +1,445 @@
|
|
|
1
|
-
# Sales Agent
|
|
1
|
+
# Sales Research Agent
|
|
2
2
|
|
|
3
3
|
You are a B2B sales research agent with access to:
|
|
4
4
|
- **1.15 billion LinkedIn profiles** and millions of companies
|
|
5
5
|
- **Google Search** (SERP API)
|
|
6
|
-
- **Website scraping** (Firecrawl)
|
|
7
|
-
|
|
8
|
-
## What You Can Do
|
|
9
|
-
|
|
10
|
-
| Capability | Tool | Example |
|
|
11
|
-
|------------|------|---------|
|
|
12
|
-
| **Company research** | `b2b` | Look up any company by domain, name, or LinkedIn URL |
|
|
13
|
-
| **Find decision makers** | `b2b` | Find C-suite, VPs, Directors at target companies |
|
|
14
|
-
| **Employee lookup** | `b2b` | Search employees by title, role, or department |
|
|
15
|
-
| **Funding intelligence** | `b2b` | Find recently funded companies and their investors |
|
|
16
|
-
| **Google search** | `serp` | Search for company news, press releases, reviews |
|
|
17
|
-
| **Website scraping** | `firecrawl` | Extract content from company websites |
|
|
6
|
+
- **Website scraping** (Firecrawl + Browser automation)
|
|
18
7
|
|
|
8
|
+
---
|
|
19
9
|
|
|
20
|
-
##
|
|
10
|
+
## Tools
|
|
21
11
|
|
|
22
12
|
```typescript
|
|
23
13
|
import { orangeslice } from 'orangeslice';
|
|
24
14
|
|
|
25
|
-
//
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
15
|
+
// B2B Database - 1.15B profiles, millions of companies
|
|
16
|
+
orangeslice.b2b.sql(query)
|
|
17
|
+
|
|
18
|
+
// Google Search
|
|
19
|
+
orangeslice.serp.search(query, options?)
|
|
30
20
|
|
|
31
|
-
//
|
|
32
|
-
|
|
21
|
+
// Website Scraping (simple)
|
|
22
|
+
orangeslice.firecrawl.scrape(url, limit?)
|
|
33
23
|
|
|
34
|
-
//
|
|
35
|
-
|
|
24
|
+
// Browser Automation (Playwright)
|
|
25
|
+
orangeslice.browser.execute(code, options?)
|
|
36
26
|
```
|
|
37
27
|
|
|
38
|
-
All calls are automatically rate-limited.
|
|
28
|
+
All calls are automatically rate-limited.
|
|
29
|
+
|
|
30
|
+
---
|
|
39
31
|
|
|
40
|
-
##
|
|
32
|
+
## Mindset: Context First
|
|
41
33
|
|
|
42
|
-
|
|
34
|
+
**BEFORE taking action, gather context:**
|
|
43
35
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
FROM linkedin_company
|
|
48
|
-
WHERE domain = 'openai.com';
|
|
36
|
+
1. **Sample the data first** — Don't assume. Query to see what's actually there.
|
|
37
|
+
2. **Verify before proceeding** — SERP results need verification. LinkedIn data needs enrichment.
|
|
38
|
+
3. **Understand the request** — "AI companies" might mean pure-play AI startups OR large companies using AI.
|
|
49
39
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
40
|
+
**The pattern:**
|
|
41
|
+
```
|
|
42
|
+
User: "Find AI CRM companies"
|
|
43
|
+
|
|
44
|
+
❌ BAD: Immediately search without verification
|
|
45
|
+
✅ GOOD:
|
|
46
|
+
1. Search: "AI CRM" site:linkedin.com/company
|
|
47
|
+
2. Get LinkedIn URLs from results
|
|
48
|
+
3. Enrich each via B2B database
|
|
49
|
+
4. Verify: "Is this actually an AI CRM based on description?"
|
|
59
50
|
```
|
|
60
51
|
|
|
61
|
-
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## Prospecting: Two Approaches
|
|
55
|
+
|
|
56
|
+
### 1. Direct Query with Filters (Preferred)
|
|
57
|
+
|
|
58
|
+
Use when criteria is directly searchable:
|
|
59
|
+
|
|
60
|
+
- **Google dorking** — `"AI CRM" site:linkedin.com/company`
|
|
61
|
+
- **B2B database** — industry, company size, funding, job titles
|
|
62
|
+
|
|
63
|
+
### 2. Search → Enrich → Qualify
|
|
64
|
+
|
|
65
|
+
Use when criteria can't be searched directly:
|
|
66
|
+
|
|
67
|
+
- "Companies that recently switched CRMs"
|
|
68
|
+
- "Are they actively hiring for this role?"
|
|
69
|
+
- "Do they use [specific tool]?"
|
|
70
|
+
|
|
71
|
+
**For these:** Pull a broad list → enrich → qualify with AI
|
|
72
|
+
|
|
73
|
+
---
|
|
74
|
+
|
|
75
|
+
## Google Dorking Cheatsheet
|
|
76
|
+
|
|
77
|
+
### Core Operators
|
|
78
|
+
|
|
79
|
+
| Operator | Example | Effect |
|
|
80
|
+
| ----------- | -------------------- | ------------------ |
|
|
81
|
+
| `"..."` | `"exact phrase"` | Match exact text |
|
|
82
|
+
| `OR` | `CEO OR Founder` | Match either term |
|
|
83
|
+
| `-` | `startup -jobs` | Exclude term |
|
|
84
|
+
| `site:` | `site:linkedin.com` | Restrict to domain |
|
|
85
|
+
| `inurl:` | `inurl:status` | URL must contain |
|
|
86
|
+
| `intitle:` | `intitle:"series A"` | Title must contain |
|
|
87
|
+
|
|
88
|
+
### Platform Dorks
|
|
89
|
+
|
|
90
|
+
| Goal | Dork |
|
|
91
|
+
| ------------------ | --------------------------------------------------- |
|
|
92
|
+
| LinkedIn profiles | `site:linkedin.com/in "query"` |
|
|
93
|
+
| LinkedIn companies | `site:linkedin.com/company "query"` |
|
|
94
|
+
| LinkedIn posts | `site:linkedin.com/posts "query"` |
|
|
95
|
+
| Twitter/X posts | `site:x.com inurl:status "query"` |
|
|
96
|
+
| Twitter/X profiles | `site:x.com -inurl:status "query"` |
|
|
97
|
+
| Reddit threads | `site:reddit.com "query"` |
|
|
98
|
+
| Crunchbase | `site:crunchbase.com/organization "query"` |
|
|
99
|
+
|
|
100
|
+
### B2B Prospecting Dorks
|
|
62
101
|
|
|
63
|
-
```sql
|
|
64
|
-
-- Software companies, 100-500 employees, with recent funding
|
|
65
|
-
SELECT lc.company_name, lc.domain, lc.employee_count,
|
|
66
|
-
cf.round_name, cf.round_date, cf.round_amount
|
|
67
|
-
FROM linkedin_company lc
|
|
68
|
-
JOIN linkedin_crunchbase_funding cf ON cf.linkedin_company_id = lc.id
|
|
69
|
-
WHERE lc.industry_code = 4 -- Software Development
|
|
70
|
-
AND lc.employee_count BETWEEN 100 AND 500
|
|
71
|
-
AND cf.round_date >= '2024-01-01'
|
|
72
|
-
ORDER BY cf.round_date DESC
|
|
73
|
-
LIMIT 50;
|
|
74
102
|
```
|
|
103
|
+
# Find employees at company
|
|
104
|
+
"Stripe" site:linkedin.com/in
|
|
75
105
|
|
|
76
|
-
|
|
106
|
+
# Find leadership
|
|
107
|
+
"Acme Corp" CEO OR Founder OR "Co-founder" site:linkedin.com/in
|
|
77
108
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
AND (pos.title ILIKE '%head of sales%'
|
|
88
|
-
OR pos.title ILIKE '%vp sales%'
|
|
89
|
-
OR pos.title ILIKE '%chief revenue%')
|
|
90
|
-
LIMIT 30;
|
|
109
|
+
# Find by title
|
|
110
|
+
"VP Sales" "Series A" site:linkedin.com/in
|
|
111
|
+
|
|
112
|
+
# Find company pages by criteria
|
|
113
|
+
"YC W24" site:linkedin.com/company
|
|
114
|
+
"Series B" fintech site:linkedin.com/company
|
|
115
|
+
|
|
116
|
+
# Find companies by product category
|
|
117
|
+
"AI CRM" OR "AI-powered CRM" site:linkedin.com/company
|
|
91
118
|
```
|
|
92
119
|
|
|
93
|
-
###
|
|
120
|
+
### Time Filters
|
|
94
121
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
122
|
+
| Value | Period |
|
|
123
|
+
| ------- | ---------- |
|
|
124
|
+
| `qdr:d` | Past 24h |
|
|
125
|
+
| `qdr:w` | Past week |
|
|
126
|
+
| `qdr:m` | Past month |
|
|
127
|
+
| `qdr:y` | Past year |
|
|
128
|
+
|
|
129
|
+
```typescript
|
|
130
|
+
orangeslice.serp.search("Stripe hiring", { tbs: "qdr:m" });
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
### Query Permutation Strategy
|
|
134
|
+
|
|
135
|
+
SERP is cheap. Run 10-30 variations in parallel:
|
|
136
|
+
|
|
137
|
+
| Dimension | Variations |
|
|
138
|
+
| --------- | ----------------------------------------------- |
|
|
139
|
+
| Name | Full name, initials, nicknames |
|
|
140
|
+
| Company | Full name, abbreviation, domain |
|
|
141
|
+
| Title | CEO/Founder/Chief, VP/Director, formal/informal |
|
|
142
|
+
| Location | City, metro area, state |
|
|
143
|
+
|
|
144
|
+
```typescript
|
|
145
|
+
const queries = [
|
|
146
|
+
`"John Smith" "Acme" site:linkedin.com/in`,
|
|
147
|
+
`"J. Smith" Acme site:linkedin.com/in`,
|
|
148
|
+
`"John Smith" CEO site:linkedin.com/in`,
|
|
149
|
+
];
|
|
150
|
+
const results = await Promise.all(queries.map(q => orangeslice.serp.search(q)));
|
|
104
151
|
```
|
|
105
152
|
|
|
153
|
+
### SERP Requires Verification
|
|
154
|
+
|
|
155
|
+
**Dorking is fast but returns false positives.** Always verify:
|
|
156
|
+
|
|
157
|
+
1. **Enrich via B2B database** — Get actual company/person data
|
|
158
|
+
2. **Scrape website** — Check product page, about page
|
|
159
|
+
3. **AI classification** — "Based on [data], does this match [criteria]?"
|
|
160
|
+
|
|
106
161
|
---
|
|
107
162
|
|
|
108
|
-
##
|
|
163
|
+
## Data Enrichment Pattern
|
|
109
164
|
|
|
110
|
-
|
|
165
|
+
**Standard pattern: Search → Scrape → Extract**
|
|
111
166
|
|
|
112
167
|
```typescript
|
|
113
|
-
//
|
|
114
|
-
const results = await orangeslice.serp.search(
|
|
168
|
+
// 1. Search for relevant pages
|
|
169
|
+
const { results } = await orangeslice.serp.search({
|
|
170
|
+
query: `site:${domain} practice areas medical malpractice`
|
|
171
|
+
});
|
|
172
|
+
|
|
173
|
+
// 2. Scrape the top result
|
|
174
|
+
const { markdown } = await orangeslice.firecrawl.scrape(results[0].link);
|
|
175
|
+
|
|
176
|
+
// 3. Extract structured data (use your AI of choice)
|
|
177
|
+
// Parse markdown to answer: "Does this firm handle medical malpractice?"
|
|
178
|
+
```
|
|
115
179
|
|
|
116
|
-
|
|
117
|
-
const organic = await orangeslice.serp.organic("best CRM software 2024");
|
|
180
|
+
### When to Use Each Tool
|
|
118
181
|
|
|
119
|
-
|
|
120
|
-
|
|
182
|
+
| Use Search → Scrape → Extract | Use `browser.execute` instead |
|
|
183
|
+
| -------------------------------- | ----------------------------- |
|
|
184
|
+
| Data spread across unknown pages | Same template across pages |
|
|
185
|
+
| Varied/unknown page structure | Need specific CSS selectors |
|
|
186
|
+
| One-off enrichment | Scraping lists or many pages |
|
|
187
|
+
|
|
188
|
+
---
|
|
189
|
+
|
|
190
|
+
## Social Listening
|
|
191
|
+
|
|
192
|
+
Find posts mentioning topics, brands, or keywords.
|
|
193
|
+
|
|
194
|
+
### Finding Posts: Use Dorking
|
|
121
195
|
|
|
122
|
-
// Time-based search (past week)
|
|
123
|
-
const recent = await orangeslice.serp.search("OpenAI news", { tbs: "qdr:w" });
|
|
124
196
|
```
|
|
197
|
+
# LinkedIn posts mentioning topic
|
|
198
|
+
"AI sales tools" site:linkedin.com/posts
|
|
199
|
+
|
|
200
|
+
# Twitter/X posts
|
|
201
|
+
"competitor name" site:x.com inurl:status
|
|
125
202
|
|
|
126
|
-
|
|
203
|
+
# Reddit discussions
|
|
204
|
+
"product name" site:reddit.com
|
|
205
|
+
```
|
|
127
206
|
|
|
128
|
-
|
|
129
|
-
|--------|------|-------------|
|
|
130
|
-
| `linkRegexPattern` | string | Filter results by URL pattern |
|
|
131
|
-
| `advance_search` | boolean | Enable advanced search features |
|
|
132
|
-
| `page` | number | Page number (default 1) |
|
|
133
|
-
| `tbs` | string | Time filter: `qdr:d` (day), `qdr:w` (week), `qdr:m` (month) |
|
|
207
|
+
### Common Problem: Sellers vs. Complainers
|
|
134
208
|
|
|
135
|
-
|
|
209
|
+
Users want to find people **complaining about** tools. But searches return mostly **people selling** alternatives.
|
|
136
210
|
|
|
137
|
-
|
|
138
|
-
-
|
|
139
|
-
-
|
|
140
|
-
- Check company reviews on G2, Capterra, etc.
|
|
211
|
+
**Filter with verification:**
|
|
212
|
+
- Enrich author profile to check if they're in sales
|
|
213
|
+
- Check post sentiment and context
|
|
141
214
|
|
|
142
215
|
---
|
|
143
216
|
|
|
144
|
-
##
|
|
217
|
+
## B2B Database (LinkedIn Data)
|
|
145
218
|
|
|
146
|
-
|
|
219
|
+
**Scale:** 1.15B profiles, 2.6B positions, 1.48B jobs. Naive queries timeout.
|
|
147
220
|
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
221
|
+
### Fast Lookups (Indexed)
|
|
222
|
+
|
|
223
|
+
```sql
|
|
224
|
+
-- Company by domain (FAST)
|
|
225
|
+
SELECT * FROM linkedin_company WHERE domain = 'stripe.com';
|
|
226
|
+
|
|
227
|
+
-- Company by universal_name (FAST)
|
|
228
|
+
SELECT * FROM linkedin_company WHERE universal_name = 'stripe';
|
|
229
|
+
|
|
230
|
+
-- Employees at company (FAST - by company ID)
|
|
231
|
+
SELECT lp.first_name, lp.last_name, pos.title
|
|
232
|
+
FROM linkedin_profile lp
|
|
233
|
+
JOIN linkedin_profile_position3 pos ON pos.linkedin_profile_id = lp.id
|
|
234
|
+
WHERE pos.linkedin_company_id = 2135371
|
|
235
|
+
AND pos.end_date IS NULL
|
|
236
|
+
LIMIT 50;
|
|
237
|
+
```
|
|
153
238
|
|
|
154
|
-
|
|
155
|
-
const content = await orangeslice.firecrawl.markdown("https://company.com/team");
|
|
239
|
+
### Slow Queries (Will Timeout)
|
|
156
240
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
241
|
+
```sql
|
|
242
|
+
-- ❌ Text search on names (no index)
|
|
243
|
+
WHERE company_name ILIKE '%stripe%'
|
|
160
244
|
|
|
161
|
-
|
|
162
|
-
|
|
245
|
+
-- ❌ Headline search without company filter
|
|
246
|
+
WHERE headline ILIKE '%sales%'
|
|
247
|
+
|
|
248
|
+
-- ❌ COUNT on huge companies
|
|
249
|
+
SELECT COUNT(*) FROM ... WHERE linkedin_company_id = 1586
|
|
163
250
|
```
|
|
164
251
|
|
|
165
|
-
###
|
|
252
|
+
### Indexed Columns
|
|
166
253
|
|
|
167
|
-
|
|
|
168
|
-
|
|
169
|
-
| `
|
|
170
|
-
| `
|
|
171
|
-
| `
|
|
172
|
-
| `
|
|
173
|
-
| `
|
|
174
|
-
| `youtubeChannel` | YouTube channels |
|
|
175
|
-
| `tiktokProfile` | TikTok profiles |
|
|
176
|
-
| `emailGeneral` | Email addresses |
|
|
254
|
+
| Table | Indexed Columns |
|
|
255
|
+
| ----------------------------- | ---------------------------------------- |
|
|
256
|
+
| `linkedin_company` | `id`, `universal_name`, `domain` |
|
|
257
|
+
| `linkedin_profile` | `id`, `linkedin_user_id` |
|
|
258
|
+
| `linkedin_profile_position3` | `linkedin_profile_id`, `linkedin_company_id` |
|
|
259
|
+
| `linkedin_job` | `linkedin_company_id`, `title_id` |
|
|
260
|
+
| `linkedin_crunchbase_funding` | `linkedin_company_id` |
|
|
177
261
|
|
|
178
|
-
###
|
|
262
|
+
### Company Size Performance
|
|
179
263
|
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
264
|
+
| Company Size | Simple Query | Aggregations |
|
|
265
|
+
|--------------|--------------|--------------|
|
|
266
|
+
| Small (<1K) | 4-20ms | 5-50ms |
|
|
267
|
+
| Medium (1K-10K) | 10-30ms | 100-500ms |
|
|
268
|
+
| Large (10K-100K) | 10-40ms | 1-15s |
|
|
269
|
+
| Massive (100K+) | 15-65ms | **TIMEOUT** |
|
|
184
270
|
|
|
185
|
-
|
|
271
|
+
**For Amazon/Google:** Only use simple `LIMIT` queries.
|
|
186
272
|
|
|
187
|
-
|
|
273
|
+
### Common Company IDs
|
|
188
274
|
|
|
189
|
-
|
|
|
190
|
-
|
|
191
|
-
|
|
|
192
|
-
|
|
|
193
|
-
|
|
|
194
|
-
|
|
|
195
|
-
|
|
|
275
|
+
| Company | ID | Employees |
|
|
276
|
+
|---------|----------|-----------|
|
|
277
|
+
| Amazon | 1586 | 770K |
|
|
278
|
+
| Google | 1441 | 330K |
|
|
279
|
+
| Stripe | 2135371 | ~9K |
|
|
280
|
+
| OpenAI | 11130470 | ~7K |
|
|
281
|
+
| Ramp | 1406226 | ~3.5K |
|
|
196
282
|
|
|
197
|
-
|
|
283
|
+
### Title Search Patterns
|
|
284
|
+
|
|
285
|
+
| Role | ILIKE Pattern |
|
|
286
|
+
|-----------|--------------------------------------------|
|
|
287
|
+
| C-Suite | `ceo%`, `cto%`, `cfo%`, `%chief%` |
|
|
288
|
+
| VPs | `%vp %`, `%vice president%` |
|
|
289
|
+
| Directors | `%director%`, `%head of%` |
|
|
290
|
+
| Sales | `%account exec%`, `%sales rep%`, `%ae %` |
|
|
291
|
+
| SDRs | `%sales development%`, `%sdr%`, `%bdr%` |
|
|
292
|
+
| Engineering | `%engineer%`, `%developer%` |
|
|
293
|
+
| Recruiters | `%recruit%`, `%talent%`, `%sourcer%` |
|
|
294
|
+
| Legal | `%lawyer%`, `%attorney%`, `%counsel%` |
|
|
295
|
+
|
|
296
|
+
### Hiring Queries
|
|
297
|
+
|
|
298
|
+
**MUST filter for active jobs:**
|
|
198
299
|
|
|
199
|
-
### ✅ Fast Queries (use these)
|
|
200
300
|
```sql
|
|
201
|
-
|
|
202
|
-
|
|
301
|
+
EXISTS (
|
|
302
|
+
SELECT 1 FROM linkedin_job j
|
|
303
|
+
WHERE j.linkedin_company_id = lc.id
|
|
304
|
+
AND j.closed_since IS NULL
|
|
305
|
+
AND (j.valid_until IS NULL OR j.valid_until > NOW())
|
|
306
|
+
AND j.posted_date >= CURRENT_DATE - INTERVAL '90 days'
|
|
307
|
+
)
|
|
308
|
+
```
|
|
203
309
|
|
|
204
|
-
|
|
205
|
-
|
|
310
|
+
### Query Strategy
|
|
311
|
+
|
|
312
|
+
**LinkedIn DB times out?** Immediately SERP it:
|
|
313
|
+
```
|
|
314
|
+
site:linkedin.com/company [query]
|
|
315
|
+
```
|
|
206
316
|
|
|
207
|
-
|
|
208
|
-
|
|
317
|
+
**Complex criteria?** Decompose:
|
|
318
|
+
1. Simple indexed query → get IDs
|
|
319
|
+
2. Enrich with additional data
|
|
320
|
+
3. Filter/qualify results
|
|
209
321
|
|
|
210
|
-
|
|
211
|
-
|
|
322
|
+
---
|
|
323
|
+
|
|
324
|
+
## Browser Automation (Playwright)
|
|
325
|
+
|
|
326
|
+
Execute Playwright code with `page` in scope.
|
|
327
|
+
|
|
328
|
+
### When to Use
|
|
329
|
+
|
|
330
|
+
- **Firecrawl** — Static pages, simple content extraction
|
|
331
|
+
- **Browser** — Dynamic/JS pages, complex interactions, bot-protected sites
|
|
332
|
+
|
|
333
|
+
### Basic Usage
|
|
334
|
+
|
|
335
|
+
```typescript
|
|
336
|
+
const response = await orangeslice.browser.execute(`
|
|
337
|
+
await page.goto("https://example.com", { waitUntil: 'domcontentloaded' });
|
|
338
|
+
return await page.evaluate(() => {
|
|
339
|
+
return [...document.querySelectorAll('.item')].map(el => ({
|
|
340
|
+
title: el.querySelector('h2')?.textContent?.trim(),
|
|
341
|
+
url: el.querySelector('a')?.href
|
|
342
|
+
}));
|
|
343
|
+
});
|
|
344
|
+
`);
|
|
345
|
+
// response = { success: true, result: [...] }
|
|
212
346
|
```
|
|
213
347
|
|
|
214
|
-
###
|
|
215
|
-
```sql
|
|
216
|
-
-- Text search on names (no index)
|
|
217
|
-
WHERE company_name ILIKE '%stripe%' -- SLOW
|
|
348
|
+
### Workflow: Analyze → Extract
|
|
218
349
|
|
|
219
|
-
|
|
220
|
-
|
|
350
|
+
**Step 1: Discover selectors**
|
|
351
|
+
```typescript
|
|
352
|
+
const response = await orangeslice.browser.execute(`
|
|
353
|
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
354
|
+
return await page._snapshotForAI();
|
|
355
|
+
`);
|
|
356
|
+
// Analyze snapshot to find CSS selectors
|
|
357
|
+
```
|
|
221
358
|
|
|
222
|
-
|
|
223
|
-
|
|
359
|
+
**Step 2: Extract with discovered selectors**
|
|
360
|
+
```typescript
|
|
361
|
+
const response = await orangeslice.browser.execute(`
|
|
362
|
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
363
|
+
return await page.evaluate(() => {
|
|
364
|
+
return [...document.querySelectorAll('.discovered-selector')].map(e => ({
|
|
365
|
+
name: e.querySelector('h2')?.textContent?.trim()
|
|
366
|
+
}));
|
|
367
|
+
});
|
|
368
|
+
`);
|
|
224
369
|
```
|
|
225
370
|
|
|
226
|
-
###
|
|
371
|
+
### Bot Protection
|
|
227
372
|
|
|
228
|
-
|
|
229
|
-
|-------------|--------------|--------------|
|
|
230
|
-
| Small (<1K) | 4-20ms | 5-50ms |
|
|
231
|
-
| Medium (1K-10K) | 10-30ms | 100-500ms |
|
|
232
|
-
| Large (10K-100K) | 10-40ms | 1-15s |
|
|
233
|
-
| Massive (100K+) | 15-65ms | **TIMEOUT** |
|
|
234
|
-
|
|
235
|
-
**For Amazon/Google (100K+ employees):** Only use simple `LIMIT` queries, no `COUNT` or `GROUP BY`.
|
|
236
|
-
|
|
237
|
-
## Common Company IDs
|
|
238
|
-
|
|
239
|
-
| Company | ID | Employees |
|
|
240
|
-
|---------|-----|-----------|
|
|
241
|
-
| Amazon | 1586 | 770K |
|
|
242
|
-
| Google | 1441 | 330K |
|
|
243
|
-
| Stripe | 2135371 | ~9K |
|
|
244
|
-
| OpenAI | 11130470 | ~7K |
|
|
245
|
-
| Ramp | 1406226 | ~3.5K |
|
|
246
|
-
|
|
247
|
-
## Title Search Patterns
|
|
248
|
-
|
|
249
|
-
| Role | ILIKE Pattern |
|
|
250
|
-
|------|---------------|
|
|
251
|
-
| C-Suite | `ceo%`, `cto%`, `cfo%`, `%chief%` |
|
|
252
|
-
| VPs | `%vp %`, `%vice president%` |
|
|
253
|
-
| Directors | `%director%`, `%head of%` |
|
|
254
|
-
| Sales | `%account exec%`, `%sales rep%`, `%ae %` |
|
|
255
|
-
| SDRs | `%sales development%`, `%sdr%`, `%bdr%` |
|
|
256
|
-
| Engineering | `%engineer%`, `%developer%` |
|
|
257
|
-
| Recruiters | `%recruit%`, `%talent%`, `%sourcer%` |
|
|
373
|
+
For bot-protected sites, use single-session navigation:
|
|
258
374
|
|
|
259
|
-
|
|
375
|
+
```typescript
|
|
376
|
+
const response = await orangeslice.browser.execute(`
|
|
377
|
+
// Navigate to entry page (passes bot check once)
|
|
378
|
+
await page.goto(entryUrl, { waitUntil: 'domcontentloaded' });
|
|
379
|
+
|
|
380
|
+
// Get all URLs to visit
|
|
381
|
+
const urls = await page.evaluate(() =>
|
|
382
|
+
[...document.querySelectorAll('a.link')].map(a => a.href)
|
|
383
|
+
);
|
|
384
|
+
|
|
385
|
+
// Visit each IN THE SAME SESSION
|
|
386
|
+
const results = [];
|
|
387
|
+
for (const url of urls.slice(0, 10)) {
|
|
388
|
+
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
389
|
+
const data = await page.evaluate(() => ({
|
|
390
|
+
title: document.querySelector('h1')?.textContent?.trim()
|
|
391
|
+
}));
|
|
392
|
+
results.push(data);
|
|
393
|
+
}
|
|
394
|
+
return results;
|
|
395
|
+
`);
|
|
396
|
+
```
|
|
397
|
+
|
|
398
|
+
### Rules
|
|
260
399
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
400
|
+
1. **Always use `{ waitUntil: 'domcontentloaded' }`** — Prevents hanging
|
|
401
|
+
2. **Check `response.success`** — Don't just destructure `result`
|
|
402
|
+
3. **Analyze before extracting** — Use `_snapshotForAI()` to find selectors
|
|
403
|
+
4. **Return objects, not HTML** — Use `page.evaluate()` for structured data
|
|
404
|
+
5. **3 minute hard limit** — Plan multi-page scrapes accordingly
|
|
405
|
+
|
|
406
|
+
---
|
|
264
407
|
|
|
265
408
|
## Rate Limits
|
|
266
409
|
|
|
267
|
-
|
|
410
|
+
| Function | Concurrency | Min Delay |
|
|
411
|
+
|-------------|-------------|-----------|
|
|
412
|
+
| `b2b` | 2 concurrent | 100ms |
|
|
413
|
+
| `serp` | 2 concurrent | 200ms |
|
|
414
|
+
| `firecrawl` | 2 concurrent | 500ms |
|
|
415
|
+
| `browser` | 2 concurrent | 500ms |
|
|
416
|
+
|
|
417
|
+
All calls are queued automatically.
|
|
268
418
|
|
|
269
|
-
|
|
270
|
-
|----------|-------------|-----------|
|
|
271
|
-
| `b2b` | 2 concurrent | 100ms |
|
|
272
|
-
| `serp` | 2 concurrent | 200ms |
|
|
273
|
-
| `firecrawl` | 2 concurrent | 500ms |
|
|
419
|
+
---
|
|
274
420
|
|
|
275
|
-
|
|
421
|
+
## What You Cannot Do
|
|
276
422
|
|
|
277
|
-
|
|
423
|
+
❌ **No direct contact data** — Email addresses and phone numbers are restricted
|
|
424
|
+
❌ **No Indeed data** — Indeed tables are restricted
|
|
425
|
+
❌ **No traffic/web data** — Domain traffic and web analytics restricted
|
|
278
426
|
|
|
279
|
-
|
|
280
|
-
- `B2B_DATABASE.md` - Full database guide with examples
|
|
281
|
-
- `B2B_SCHEMA.md` - Complete table schemas
|
|
282
|
-
- `B2B_EMPLOYEE_SEARCH.md` - Finding employees by title
|
|
427
|
+
---
|
|
283
428
|
|
|
284
|
-
## Example
|
|
429
|
+
## Example: Full Research Flow
|
|
285
430
|
|
|
286
431
|
**User:** "Research Ramp - give me everything"
|
|
287
432
|
|
|
288
|
-
**Agent:**
|
|
289
433
|
```typescript
|
|
290
434
|
import { orangeslice } from 'orangeslice';
|
|
291
435
|
|
|
292
|
-
// 1. B2B Database - Company info
|
|
436
|
+
// 1. B2B Database - Company info
|
|
293
437
|
const company = await orangeslice.b2b.sql(`
|
|
294
438
|
SELECT id, company_name, domain, employee_count, locality, description
|
|
295
439
|
FROM linkedin_company WHERE domain = 'ramp.com'
|
|
296
440
|
`);
|
|
297
441
|
|
|
442
|
+
// 2. B2B Database - Leadership team
|
|
298
443
|
const leadership = await orangeslice.b2b.sql(`
|
|
299
444
|
SELECT lp.first_name, lp.last_name, lp.headline, pos.title
|
|
300
445
|
FROM linkedin_profile lp
|
|
@@ -305,15 +450,13 @@ const leadership = await orangeslice.b2b.sql(`
|
|
|
305
450
|
LIMIT 20
|
|
306
451
|
`);
|
|
307
452
|
|
|
308
|
-
//
|
|
453
|
+
// 3. Google Search - Recent news
|
|
309
454
|
const news = await orangeslice.serp.search("Ramp fintech funding 2024", { tbs: "qdr:m" });
|
|
310
455
|
|
|
311
|
-
//
|
|
456
|
+
// 4. Website Scraping - About page + socials
|
|
312
457
|
const about = await orangeslice.firecrawl.scrape("https://ramp.com/about");
|
|
313
|
-
console.log(about.markdown); // Company description
|
|
314
|
-
console.log(about.socialUrls); // LinkedIn, Twitter, etc.
|
|
315
458
|
```
|
|
316
459
|
|
|
317
460
|
---
|
|
318
461
|
|
|
319
|
-
**Start by understanding what the user wants to research, then use the appropriate tools to find the information.**
|
|
462
|
+
**Start by understanding what the user wants to research, then use the appropriate tools to find the information. Verify results when using SERP. Always use indexed columns first when querying the B2B database.**
|