orangeslice 1.6.1 → 1.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/apify.d.ts +57 -0
- package/dist/apify.js +127 -0
- package/dist/cli.js +18 -7
- package/dist/generateObject.d.ts +34 -0
- package/dist/generateObject.js +86 -0
- package/dist/geo.d.ts +50 -0
- package/dist/geo.js +92 -0
- package/dist/index.d.ts +32 -3
- package/dist/index.js +24 -3
- package/docs/AGENTS.md +94 -384
- package/docs/apify.md +133 -0
- package/docs/b2b.md +178 -0
- package/docs/browser.md +173 -0
- package/docs/serp.md +167 -0
- package/docs/strategies.md +250 -0
- package/package.json +2 -2
- /package/docs/{B2B_CROSS_TABLE_TEST_FINDINGS.md → b2b-docs/B2B_CROSS_TABLE_TEST_FINDINGS.md} +0 -0
- /package/docs/{B2B_DATABASE.md → b2b-docs/B2B_DATABASE.md} +0 -0
- /package/docs/{B2B_DATABASE_TEST_FINDINGS.md → b2b-docs/B2B_DATABASE_TEST_FINDINGS.md} +0 -0
- /package/docs/{B2B_EMPLOYEE_SEARCH.md → b2b-docs/B2B_EMPLOYEE_SEARCH.md} +0 -0
- /package/docs/{B2B_GENERALIZATION_RULES.md → b2b-docs/B2B_GENERALIZATION_RULES.md} +0 -0
- /package/docs/{B2B_NLP_QUERY_MAPPINGS.md → b2b-docs/B2B_NLP_QUERY_MAPPINGS.md} +0 -0
- /package/docs/{B2B_NORMALIZED_VS_DENORMALIZED.md → b2b-docs/B2B_NORMALIZED_VS_DENORMALIZED.md} +0 -0
- /package/docs/{B2B_SCHEMA.md → b2b-docs/B2B_SCHEMA.md} +0 -0
- /package/docs/{B2B_SQL_COMPREHENSIVE_TEST_FINDINGS.md → b2b-docs/B2B_SQL_COMPREHENSIVE_TEST_FINDINGS.md} +0 -0
- /package/docs/{B2B_TABLE_INDICES.ts → b2b-docs/B2B_TABLE_INDICES.ts} +0 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
# Research Strategies
|
|
2
|
+
|
|
3
|
+
Patterns for prospecting, enrichment, and social listening.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Prospecting: Two Approaches
|
|
8
|
+
|
|
9
|
+
### 1. Direct Query with Filters (Preferred)
|
|
10
|
+
|
|
11
|
+
Use when criteria is directly searchable:
|
|
12
|
+
|
|
13
|
+
- **Google dorking** — `"AI CRM" site:linkedin.com/company`
|
|
14
|
+
- **B2B database** — industry, company size, funding, job titles
|
|
15
|
+
|
|
16
|
+
```typescript
|
|
17
|
+
// Direct: Companies in fintech with 50-500 employees
|
|
18
|
+
const companies = await orangeslice.b2b.sql(`
|
|
19
|
+
SELECT * FROM linkedin_company
|
|
20
|
+
WHERE industry ILIKE '%fintech%'
|
|
21
|
+
AND employee_count BETWEEN 50 AND 500
|
|
22
|
+
LIMIT 100
|
|
23
|
+
`);
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### 2. Search → Enrich → Qualify
|
|
27
|
+
|
|
28
|
+
Use when criteria can't be searched directly:
|
|
29
|
+
|
|
30
|
+
- "Companies that recently switched CRMs"
|
|
31
|
+
- "Are they actively hiring for this role?"
|
|
32
|
+
- "Do they use [specific tool]?"
|
|
33
|
+
|
|
34
|
+
```typescript
|
|
35
|
+
// 1. Broad search
|
|
36
|
+
const { results } = await orangeslice.serp.search(
|
|
37
|
+
`"switched to Salesforce" site:linkedin.com/posts`
|
|
38
|
+
);
|
|
39
|
+
|
|
40
|
+
// 2. Enrich each result
|
|
41
|
+
for (const result of results) {
|
|
42
|
+
const companyName = extractCompanyFromPost(result);
|
|
43
|
+
const company = await orangeslice.b2b.sql(`
|
|
44
|
+
SELECT * FROM linkedin_company WHERE company_name ILIKE '%${companyName}%'
|
|
45
|
+
`);
|
|
46
|
+
|
|
47
|
+
// 3. Qualify
|
|
48
|
+
if (company[0]?.employee_count > 100) {
|
|
49
|
+
// Add to qualified list
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Data Enrichment Pattern
|
|
57
|
+
|
|
58
|
+
**Standard: Search → Scrape → Extract**
|
|
59
|
+
|
|
60
|
+
```typescript
|
|
61
|
+
// 1. Find relevant pages
|
|
62
|
+
const { results } = await orangeslice.serp.search(
|
|
63
|
+
`site:${domain} "practice areas" "medical malpractice"`
|
|
64
|
+
);
|
|
65
|
+
|
|
66
|
+
// 2. Scrape the top result
|
|
67
|
+
const { markdown } = await orangeslice.firecrawl.scrape(results[0].link);
|
|
68
|
+
|
|
69
|
+
// 3. Extract structured data
|
|
70
|
+
const data = await orangeslice.generateObject.generate({
|
|
71
|
+
prompt: `Does this law firm handle medical malpractice cases?\n\n${markdown}`,
|
|
72
|
+
schema: {
|
|
73
|
+
type: "object",
|
|
74
|
+
properties: {
|
|
75
|
+
handles_med_mal: { type: "boolean" },
|
|
76
|
+
practice_areas: { type: "array", items: { type: "string" } }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
});
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### When to Use Each Tool
|
|
83
|
+
|
|
84
|
+
| Use Search → Scrape → Extract | Use `browser.execute` |
|
|
85
|
+
|------------------------------|----------------------|
|
|
86
|
+
| Data spread across unknown pages | Same template across pages |
|
|
87
|
+
| Varied/unknown page structure | Need specific CSS selectors |
|
|
88
|
+
| One-off enrichment | Scraping lists/tables |
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
## Social Listening
|
|
93
|
+
|
|
94
|
+
Find posts mentioning topics, brands, or competitors.
|
|
95
|
+
|
|
96
|
+
### Finding Posts: Use Dorking
|
|
97
|
+
|
|
98
|
+
```
|
|
99
|
+
# LinkedIn posts mentioning topic
|
|
100
|
+
"AI sales tools" site:linkedin.com/posts
|
|
101
|
+
|
|
102
|
+
# Twitter/X posts
|
|
103
|
+
"competitor name" site:x.com inurl:status
|
|
104
|
+
|
|
105
|
+
# Reddit discussions
|
|
106
|
+
"product name" site:reddit.com
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Common Problem: Sellers vs. Complainers
|
|
110
|
+
|
|
111
|
+
Users want people **complaining about** tools. Searches return mostly **people selling** alternatives.
|
|
112
|
+
|
|
113
|
+
**Solution: Filter with verification**
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
const { results } = await orangeslice.serp.search(
|
|
117
|
+
`"hate Salesforce" OR "frustrated with Salesforce" site:linkedin.com/posts`
|
|
118
|
+
);
|
|
119
|
+
|
|
120
|
+
for (const post of results) {
|
|
121
|
+
// Get author info
|
|
122
|
+
const authorUrl = extractAuthorUrl(post.link);
|
|
123
|
+
const profile = await orangeslice.b2b.sql(`...`);
|
|
124
|
+
|
|
125
|
+
// Filter out salespeople
|
|
126
|
+
if (!profile.headline?.toLowerCase().includes('sales')) {
|
|
127
|
+
// This is likely a real complainer
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
---
|
|
133
|
+
|
|
134
|
+
## Company Research Checklist
|
|
135
|
+
|
|
136
|
+
```typescript
|
|
137
|
+
async function researchCompany(domain: string) {
|
|
138
|
+
// 1. Basic company info
|
|
139
|
+
const company = await orangeslice.b2b.sql(`
|
|
140
|
+
SELECT * FROM linkedin_company WHERE domain = '${domain}'
|
|
141
|
+
`);
|
|
142
|
+
|
|
143
|
+
// 2. Leadership team
|
|
144
|
+
const leadership = await orangeslice.b2b.sql(`
|
|
145
|
+
SELECT lp.first_name, lp.last_name, pos.title
|
|
146
|
+
FROM linkedin_profile lp
|
|
147
|
+
JOIN linkedin_profile_position3 pos ON pos.linkedin_profile_id = lp.id
|
|
148
|
+
WHERE pos.linkedin_company_id = ${company[0].id}
|
|
149
|
+
AND pos.end_date IS NULL
|
|
150
|
+
AND (pos.title ILIKE 'ceo%' OR pos.title ILIKE 'cto%' OR pos.title ILIKE '%founder%')
|
|
151
|
+
LIMIT 10
|
|
152
|
+
`);
|
|
153
|
+
|
|
154
|
+
// 3. Funding history
|
|
155
|
+
const funding = await orangeslice.b2b.sql(`
|
|
156
|
+
SELECT * FROM linkedin_crunchbase_funding
|
|
157
|
+
WHERE linkedin_company_id = ${company[0].id}
|
|
158
|
+
ORDER BY announced_date DESC
|
|
159
|
+
`);
|
|
160
|
+
|
|
161
|
+
// 4. Recent news
|
|
162
|
+
const news = await orangeslice.serp.search(
|
|
163
|
+
`"${company[0].company_name}" funding OR acquisition`,
|
|
164
|
+
{ tbs: "qdr:m" }
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
// 5. Website content
|
|
168
|
+
const about = await orangeslice.firecrawl.scrape(`https://${domain}/about`);
|
|
169
|
+
|
|
170
|
+
// 6. Current job openings
|
|
171
|
+
const jobs = await orangeslice.b2b.sql(`
|
|
172
|
+
SELECT title, locality FROM linkedin_job
|
|
173
|
+
WHERE linkedin_company_id = ${company[0].id}
|
|
174
|
+
AND closed_since IS NULL
|
|
175
|
+
LIMIT 20
|
|
176
|
+
`);
|
|
177
|
+
|
|
178
|
+
return { company, leadership, funding, news, about, jobs };
|
|
179
|
+
}
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
## Person Research Checklist
|
|
185
|
+
|
|
186
|
+
```typescript
|
|
187
|
+
async function researchPerson(linkedinUrl: string) {
|
|
188
|
+
// Extract profile ID from URL
|
|
189
|
+
const parts = linkedinUrl.split('/in/');
|
|
190
|
+
const linkedinUserId = parts[1]?.split('/')[0];
|
|
191
|
+
|
|
192
|
+
// 1. Profile info
|
|
193
|
+
const profile = await orangeslice.b2b.sql(`
|
|
194
|
+
SELECT * FROM linkedin_profile
|
|
195
|
+
WHERE linkedin_user_id = '${linkedinUserId}'
|
|
196
|
+
`);
|
|
197
|
+
|
|
198
|
+
// 2. Work history
|
|
199
|
+
const positions = await orangeslice.b2b.sql(`
|
|
200
|
+
SELECT pos.*, lc.company_name, lc.domain
|
|
201
|
+
FROM linkedin_profile_position3 pos
|
|
202
|
+
JOIN linkedin_company lc ON lc.id = pos.linkedin_company_id
|
|
203
|
+
WHERE pos.linkedin_profile_id = ${profile[0].id}
|
|
204
|
+
ORDER BY pos.start_date DESC
|
|
205
|
+
`);
|
|
206
|
+
|
|
207
|
+
// 3. Recent activity (posts, mentions)
|
|
208
|
+
const activity = await orangeslice.serp.search(
|
|
209
|
+
`"${profile[0].first_name} ${profile[0].last_name}" site:linkedin.com`,
|
|
210
|
+
{ tbs: "qdr:m" }
|
|
211
|
+
);
|
|
212
|
+
|
|
213
|
+
return { profile, positions, activity };
|
|
214
|
+
}
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
## Batch Processing Pattern
|
|
220
|
+
|
|
221
|
+
For large lists, process in controlled batches:
|
|
222
|
+
|
|
223
|
+
```typescript
|
|
224
|
+
async function processBatch<T, R>(
|
|
225
|
+
items: T[],
|
|
226
|
+
processor: (item: T) => Promise<R>,
|
|
227
|
+
batchSize = 10
|
|
228
|
+
): Promise<R[]> {
|
|
229
|
+
const results: R[] = [];
|
|
230
|
+
|
|
231
|
+
for (let i = 0; i < items.length; i += batchSize) {
|
|
232
|
+
const batch = items.slice(i, i + batchSize);
|
|
233
|
+
const batchResults = await Promise.all(batch.map(processor));
|
|
234
|
+
results.push(...batchResults);
|
|
235
|
+
|
|
236
|
+
// Rate limiting is handled by orangeslice, but add delay between batches
|
|
237
|
+
if (i + batchSize < items.length) {
|
|
238
|
+
await new Promise(r => setTimeout(r, 1000));
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return results;
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
// Usage
|
|
246
|
+
const domains = ["stripe.com", "ramp.com", "brex.com", ...];
|
|
247
|
+
const companies = await processBatch(domains, async (domain) => {
|
|
248
|
+
return orangeslice.b2b.sql(`SELECT * FROM linkedin_company WHERE domain = '${domain}'`);
|
|
249
|
+
});
|
|
250
|
+
```
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "orangeslice",
|
|
3
|
-
"version": "1.
|
|
4
|
-
"description": "
|
|
3
|
+
"version": "1.7.1",
|
|
4
|
+
"description": "AI agent toolkit: B2B database, SERP, web scraping, browser automation, structured AI output, Apify actors, geocoding",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
7
7
|
"bin": {
|
/package/docs/{B2B_CROSS_TABLE_TEST_FINDINGS.md → b2b-docs/B2B_CROSS_TABLE_TEST_FINDINGS.md}
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
/package/docs/{B2B_NORMALIZED_VS_DENORMALIZED.md → b2b-docs/B2B_NORMALIZED_VS_DENORMALIZED.md}
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|