orangeslice 2.1.5 → 2.3.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of orangeslice might be problematic. Click here for more details.

Files changed (34) hide show
  1. package/README.md +3 -1
  2. package/dist/careers.d.ts +47 -0
  3. package/dist/careers.js +11 -0
  4. package/dist/index.d.ts +41 -0
  5. package/dist/index.js +29 -3
  6. package/dist/integrations.d.ts +60 -0
  7. package/dist/integrations.js +106 -0
  8. package/dist/ocean.d.ts +160 -0
  9. package/dist/ocean.js +23 -0
  10. package/dist/skills.d.ts +57 -0
  11. package/dist/skills.js +33 -0
  12. package/docs/data-enrichement/index.md +10 -2
  13. package/docs/integrations/gmail/createDraft.md +54 -0
  14. package/docs/integrations/gmail/fetchEmails.md +50 -0
  15. package/docs/integrations/gmail/fetchMessageByMessageId.md +36 -0
  16. package/docs/integrations/gmail/fetchMessageByThreadId.md +37 -0
  17. package/docs/integrations/gmail/getProfile.md +37 -0
  18. package/docs/integrations/gmail/index.md +19 -2
  19. package/docs/integrations/gmail/listLabels.md +34 -0
  20. package/docs/integrations/gmail/replyToThread.md +51 -0
  21. package/docs/integrations/index.md +14 -1
  22. package/docs/lookalike-search/index.md +24 -12
  23. package/docs/prospecting/index.md +2 -2
  24. package/docs/services/builtWith/index.md +2 -2
  25. package/docs/services/company/findCareersPage.md +137 -0
  26. package/docs/services/company/findCareersPage.ts +37 -0
  27. package/docs/services/company/linkedin/enrich.md +47 -1
  28. package/docs/services/company/scrapeCareersPage.md +150 -0
  29. package/docs/services/index.md +2 -2
  30. package/docs/services/ocean/search/companies.ts +122 -119
  31. package/docs/services/person/linkedin/findUrl.md +2 -2
  32. package/docs/services/predictLeads/companyJobOpenings.ts +168 -94
  33. package/docs/services/web/search.md +29 -14
  34. package/package.json +1 -1
@@ -0,0 +1,37 @@
1
+ interface FindCareersPageResult {
2
+ /** The original website or URL input */
3
+ inputUrl: string;
4
+ /** Canonical homepage/base URL used during discovery */
5
+ normalizedWebsiteUrl: string;
6
+ /** Best careers page URL found, or null when none was found */
7
+ careerPageUrl: string | null;
8
+ /** Whether the result points to an ATS board, an official careers page, or nothing */
9
+ pageType: "ats" | "official" | "not_found";
10
+ /** ATS provider when pageType is "ats" */
11
+ atsProvider: string | null;
12
+ /** How the page was discovered */
13
+ detectionMethod:
14
+ | "input-ats"
15
+ | "homepage-ats-link"
16
+ | "homepage-careers-link"
17
+ | "deterministic-candidate"
18
+ | "candidate-ats-link"
19
+ | "embedded-ats"
20
+ | "candidate-redirect"
21
+ | "ats-unverified"
22
+ | "not-found";
23
+ /** URLs checked while searching */
24
+ checkedUrls: string[];
25
+ }
26
+
27
+ /**
28
+ * Find the best careers page for a company website.
29
+ * Accepts a homepage URL/domain and returns either a canonical ATS board URL
30
+ * or an official careers page on the company site.
31
+ */
32
+ type findCareersPage = (params: {
33
+ /** Company website or homepage URL */
34
+ website?: string;
35
+ /** Alias for website. Provide website or url. */
36
+ url?: string;
37
+ }) => Promise<FindCareersPageResult>;
@@ -56,7 +56,15 @@ interface B2BCompany {
56
56
  }
57
57
  ```
58
58
 
59
- > Note: company industry coverage in the LinkedIn B2B DB can be sparse. `industry` and `industries` may be `null`, generic, or missing even when the company record exists, so do not rely on them as the only company classification signal.
59
+ > Important: LinkedIn company `industry` / `industries` coverage in the B2B DB is very sparse and often too weak for enrichment. These fields may be `null`, generic, stale, or missing even when the company record exists. Treat them as lookup metadata only, not as a high-confidence classification source for enrichment workflows.
60
+ >
61
+ > Preferred pattern for enrichment/classification:
62
+ >
63
+ > 1. Start from the company `domain` when available
64
+ > 2. `services.scrape.website(...)` the company site or a relevant subpage
65
+ > 3. `services.ai.generateObject(...)` to classify the company from the scraped content
66
+ >
67
+ > Use LinkedIn enrich primarily for fast lookup fields like company identity, URL, headcount, location, and description. Do **not** build industry enrichment pipelines that depend mainly on LinkedIn `industry`.
60
68
 
61
69
  ### Extended (`extended: true`) - `B2BCompanyExtended`
62
70
 
@@ -282,6 +290,44 @@ return {
282
290
  };
283
291
  ```
284
292
 
293
+ ### Classify Industry from Domain, Not LinkedIn
294
+
295
+ If your goal is enrichment or categorization, prefer the company website over LinkedIn `industry`:
296
+
297
+ ```typescript
298
+ const company = await services.company.linkedin.enrich({
299
+ domain: row.domain
300
+ });
301
+
302
+ const { markdown } = await services.scrape.website({
303
+ url: `https://${row.domain}`
304
+ });
305
+
306
+ const { object } = await services.ai.generateObject({
307
+ prompt: `
308
+ Classify this company based on its website content.
309
+
310
+ Do not rely on LinkedIn industry because it is sparse and often too generic.
311
+ Use LinkedIn only as lightweight context for identity verification.
312
+
313
+ Domain: ${row.domain}
314
+ LinkedIn name: ${company?.name ?? "unknown"}
315
+ LinkedIn description: ${company?.description ?? "unknown"}
316
+
317
+ Website content:
318
+ ${markdown}
319
+ `,
320
+ schema: z.object({
321
+ industry: z.string().nullable(),
322
+ subindustry: z.string().nullable(),
323
+ businessModel: z.string().nullable(),
324
+ confidence: z.enum(["low", "medium", "high"])
325
+ })
326
+ });
327
+
328
+ return object;
329
+ ```
330
+
285
331
  ### Handle Missing Companies
286
332
 
287
333
  ```typescript
@@ -0,0 +1,150 @@
1
+ # Scrape ATS Careers Page
2
+
3
+ Extract a standardized list of jobs from a supported **official ATS-hosted** careers page without using a browser when possible.
4
+
5
+ This is best when you already have an ATS careers page URL, or when you first resolved one with `services.company.findCareersPage` and now want the actual jobs.
6
+
7
+ ## Input Parameters
8
+
9
+ Provide **one** of:
10
+
11
+ | Parameter | Type | Required | Description |
12
+ | ---------------- | -------- | -------- | ----------------------------------------------------------------------------------------------- |
13
+ | `careersPageUrl` | `string` | No | Official ATS board URL or ATS job/detail URL, e.g. `https://job-boards.greenhouse.io/anthropic` |
14
+ | `url` | `string` | No | Alias for `careersPageUrl` |
15
+
16
+ **Optional:**
17
+
18
+ | Parameter | Type | Required | Description |
19
+ | --------- | -------- | -------- | ------------------------------------ |
20
+ | `timeout` | `string` | No | Batch timeout override, e.g. `"30m"` |
21
+
22
+ ## Output
23
+
24
+ ```typescript
25
+ {
26
+ status: "success" | "unsupported_url" | "unsupported_provider";
27
+ inputUrl: string;
28
+ normalizedBoardUrl: string | null;
29
+ atsProvider: string | null;
30
+ companyName: string | null;
31
+ source: "api" | "html" | null;
32
+ totalJobs: number;
33
+ jobs: Array<{
34
+ id: string;
35
+ title: string;
36
+ url: string;
37
+ applyUrl: string | null;
38
+ location: string | null;
39
+ locations: string[];
40
+ department: string | null;
41
+ team: string | null;
42
+ employmentType: string | null;
43
+ workplaceType: string | null;
44
+ postedAt: string | null;
45
+ postedText: string | null;
46
+ requisitionId: string | null;
47
+ }>;
48
+ checkedUrls: string[];
49
+ supportedProviders: string[];
50
+ message: string | null;
51
+ }
52
+ ```
53
+
54
+ ## Examples
55
+
56
+ ### Scrape Jobs From a Known ATS Board
57
+
58
+ ```typescript
59
+ const result = await services.company.scrapeCareersPage({
60
+ careersPageUrl: "https://job-boards.greenhouse.io/anthropic"
61
+ });
62
+
63
+ return result.jobs;
64
+ ```
65
+
66
+ ### Resolve Then Scrape
67
+
68
+ ```typescript
69
+ const careers = await services.company.findCareersPage({
70
+ website: row.website
71
+ });
72
+
73
+ if (!careers.careerPageUrl || careers.pageType !== "ats") {
74
+ return [];
75
+ }
76
+
77
+ const jobs = await services.company.scrapeCareersPage({
78
+ careersPageUrl: careers.careerPageUrl
79
+ });
80
+
81
+ return jobs.jobs;
82
+ ```
83
+
84
+ ### Return Lightweight Job Summaries
85
+
86
+ ```typescript
87
+ const result = await services.company.scrapeCareersPage({
88
+ careersPageUrl: row.careers_page
89
+ });
90
+
91
+ return result.jobs.map((job) => ({
92
+ title: job.title,
93
+ location: job.location,
94
+ department: job.department,
95
+ url: job.url
96
+ }));
97
+ ```
98
+
99
+ ### Handle Unsupported Providers Gracefully
100
+
101
+ ```typescript
102
+ const result = await services.company.scrapeCareersPage({
103
+ careersPageUrl: row.careers_page
104
+ });
105
+
106
+ if (result.status !== "success") {
107
+ return {
108
+ status: result.status,
109
+ provider: result.atsProvider,
110
+ message: result.message
111
+ };
112
+ }
113
+
114
+ return result.totalJobs;
115
+ ```
116
+
117
+ ### Pass a Job Detail URL
118
+
119
+ ```typescript
120
+ const result = await services.company.scrapeCareersPage({
121
+ careersPageUrl: "https://jobs.lever.co/mistral/2a357282-9d44-4b41-a249-c75ffe878ce2"
122
+ });
123
+
124
+ return {
125
+ board: result.normalizedBoardUrl,
126
+ jobs: result.totalJobs
127
+ };
128
+ ```
129
+
130
+ ## Supported Providers
131
+
132
+ Current browser-free implementations:
133
+
134
+ - `ashby`
135
+ - `breezy`
136
+ - `greenhouse`
137
+ - `lever`
138
+ - `recruitee`
139
+ - `rippling`
140
+ - `smartrecruiters`
141
+ - `workable`
142
+ - `workday`
143
+
144
+ ## Key Rules
145
+
146
+ 1. **Use this for official ATS pages** - this endpoint is not meant for generic `company.com/careers` pages unless they are clearly hosted by a supported ATS.
147
+ 2. **Prefer resolving first when starting from a company website** - use `services.company.findCareersPage` to find the canonical ATS URL, then pass that into this scraper.
148
+ 3. **Job/detail URLs are okay** - supported ATS detail URLs are normalized back to the board before scraping.
149
+ 4. **Treat `unsupported_provider` as expected** - it means the input was a recognized ATS, but this scraper does not implement that provider yet.
150
+ 5. **Use `checkedUrls` for debugging** - when counts or mappings look off, inspect the URLs that were actually queried.
@@ -1,7 +1,7 @@
1
1
  - **ai**: AI helpers (summaries, classifications, scoring).
2
2
  - **apify**: Run any of 10,000+ Apify actors for web scraping, social media, e-commerce, and more.
3
3
  - **browser**: Kernel browser automation - spin up cloud browsers, execute Playwright code, take screenshots. **Use this for scraping structured lists of repeated data** (e.g., product listings, search results, table rows) where you know the DOM structure. Also ideal for **intercepting network requests** to discover underlying APIs, then paginate those APIs directly in your code (faster & cheaper than clicking through pages). Perfect for JS-heavy sites that don't work with simple HTTP scraping.
4
- - **company**: company data (getting employees at the company, getting company data, getting open jobs).
4
+ - **company**: company data (getting employees at the company, finding careers pages, getting company data, getting open jobs).
5
5
  - **crunchbase**: SQL search over the lean Crunchbase company table (`public.crunchbase_scraper_lean`) for startup prospecting.
6
6
  - **person**: finding a persons linkedin url, enriching it from linkedin, contact info, and searching for specific people / groups on linkedin
7
7
  - **geo**: parsing address
@@ -9,5 +9,5 @@
9
9
  - **email**: send transactional notification emails through Orange Slice's managed sender.
10
10
  - **scrape**: website scraper, sitemap scraper
11
11
  - **web**: SERP
12
- - **predictLeads**: company intelligence datasets (financing events, technologies, products, job openings, news, and related company data).
12
+ - **predictLeads**: company intelligence datasets (financing events, technologies, products, job openings, news, and related company data). Use these as prospecting/enrichment signals, not source-of-truth validation for whether a known company is hiring right now.
13
13
  - **guides**: agent notes & operational docs (see [Error Handling Cheatsheet](../error-handling-cheatsheet.md))
@@ -1,130 +1,133 @@
1
1
  interface OceanCompaniesFilters {
2
- /** Array of domains to find lookalike companies for (e.g., ["stripe.com", "shopify.com"]) */
3
- lookalikeDomains?: string[];
4
- /** Minimum similarity score (0-1) for lookalike matching */
5
- minScore?: number;
6
- /** Company size ranges to filter by */
7
- companySizes?: Array<"0-1" | "2-10" | "11-50" | "51-200" | "201-500" | "501-1000" | "1001-5000" | "5001-10000" | "10001+">;
8
- /** Two-letter country codes to filter by (e.g., ["us", "gb"]) */
9
- countries?: string[];
10
- /** Industry categories to filter by */
11
- industries?: string[];
12
- /** Technology names to filter by (e.g., ["React", "Salesforce"]) */
13
- technologies?: string[];
14
- /** Technology category names to filter by */
15
- technologyCategories?: string[];
16
- /** Keywords to search for */
17
- keywords?: string[];
18
- /** Revenue ranges to filter by (e.g., ["0-1M", "1M-10M"]) */
19
- revenueRanges?: string[];
20
- /** Filter for e-commerce companies */
21
- ecommerce?: boolean;
2
+ /** Array of domains to find lookalike companies for (e.g., ["stripe.com", "shopify.com"]) */
3
+ lookalikeDomains?: string[];
4
+ /** Company size ranges to filter by */
5
+ companySizes?: Array<
6
+ "0-1" | "2-10" | "11-50" | "51-200" | "201-500" | "501-1000" | "1001-5000" | "5001-10000" | "10001+"
7
+ >;
8
+ /** Two-letter country codes to filter by (e.g., ["us", "gb"]) */
9
+ countries?: string[];
10
+ /** Industry names to filter by */
11
+ industries?: string[];
12
+ /** Filter for e-commerce companies */
13
+ ecommerce?: boolean;
14
+ }
15
+
16
+ interface OceanPeopleFilters {
17
+ /** Seniority levels to filter by (e.g., ["C-Level", "VP", "Director", "Manager"]) */
18
+ seniorities?: string[];
19
+ /** Departments to filter by (e.g., ["Engineering", "Sales", "Marketing"]) */
20
+ departments?: string[];
21
+ /** Job title keywords to search for */
22
+ jobTitleKeywords?: string[];
23
+ /** Two-letter country codes to filter by */
24
+ countries?: string[];
25
+ /** Array of Ocean.io people IDs to find lookalikes for */
26
+ lookalikePeopleIds?: string[];
22
27
  }
23
28
 
24
29
  interface OceanCompanyResult {
25
- /** Company domain */
26
- domain: string;
27
- /** Company name */
28
- name?: string;
29
- /** Legal company name */
30
- legalName?: string;
31
- /** Company description */
32
- description?: string;
33
- /** Two-letter country codes where the company operates */
34
- countries?: string[];
35
- /** Primary country code */
36
- primaryCountry?: string;
37
- /** Company size range (e.g., "51-200") */
38
- companySize?: string;
39
- /** Industry categories */
40
- industryCategories?: string[];
41
- /** Industries */
42
- industries?: string[];
43
- /** LinkedIn industry classification */
44
- linkedinIndustry?: string;
45
- /** Whether the company is an e-commerce business */
46
- ecommerce?: boolean;
47
- /** Company keywords */
48
- keywords?: string[];
49
- /** Ocean.io employee count estimate */
50
- employeeCountOcean?: number;
51
- /** LinkedIn employee count */
52
- employeeCountLinkedin?: number;
53
- /** Revenue range (e.g., "1M-10M") */
54
- revenue?: string;
55
- /** Year founded */
56
- yearFounded?: number;
57
- /** Company email addresses */
58
- emails?: string[];
59
- /** Phone numbers with country and primary flag */
60
- phones?: Array<{ country?: string; number: string; primary?: boolean }>;
61
- /** Company logo URL */
62
- logo?: string;
63
- /** Technologies used */
64
- technologies?: string[];
65
- /** Technology categories */
66
- technologyCategories?: string[];
67
- /** Company website root URL */
68
- rootUrl?: string;
69
- /** Social media profiles */
70
- medias?: Record<string, { url?: string; handle?: string; name?: string }>;
71
- /** Office locations */
72
- locations?: Array<{
73
- primary?: boolean;
74
- latitude?: number;
75
- longitude?: number;
76
- country?: string;
77
- locality?: string;
78
- region?: string;
79
- postalCode?: string;
80
- streetAddress?: string;
81
- state?: string;
82
- }>;
83
- /** Department sizes */
84
- departmentSizes?: Array<{ department: string; size: number }>;
85
- /** Headcount growth metrics */
86
- headcountGrowth?: {
87
- threeMonths?: number;
88
- threeMonthsPercentage?: number;
89
- sixMonths?: number;
90
- sixMonthsPercentage?: number;
91
- twelveMonths?: number;
92
- twelveMonthsPercentage?: number;
93
- };
94
- /** Last update timestamp */
95
- updatedAt?: string;
30
+ /** Company domain */
31
+ domain: string;
32
+ /** Company name */
33
+ name?: string;
34
+ /** Legal company name */
35
+ legalName?: string;
36
+ /** Company description */
37
+ description?: string;
38
+ /** Two-letter country codes where the company operates */
39
+ countries?: string[];
40
+ /** Primary country code */
41
+ primaryCountry?: string;
42
+ /** Company size range (e.g., "51-200") */
43
+ companySize?: string;
44
+ /** Industry categories */
45
+ industryCategories?: string[];
46
+ /** Industries */
47
+ industries?: string[];
48
+ /** LinkedIn industry classification */
49
+ linkedinIndustry?: string;
50
+ /** Whether the company is an e-commerce business */
51
+ ecommerce?: boolean;
52
+ /** Company keywords */
53
+ keywords?: string[];
54
+ /** Ocean.io employee count estimate */
55
+ employeeCountOcean?: number;
56
+ /** LinkedIn employee count */
57
+ employeeCountLinkedin?: number;
58
+ /** Revenue range (e.g., "1M-10M") */
59
+ revenue?: string;
60
+ /** Year founded */
61
+ yearFounded?: number;
62
+ /** Company email addresses */
63
+ emails?: string[];
64
+ /** Phone numbers with country and primary flag */
65
+ phones?: Array<{ country?: string; number: string; primary?: boolean }>;
66
+ /** Company logo URL */
67
+ logo?: string;
68
+ /** Technologies used */
69
+ technologies?: string[];
70
+ /** Technology categories */
71
+ technologyCategories?: string[];
72
+ /** Company website root URL */
73
+ rootUrl?: string;
74
+ /** Social media profiles */
75
+ medias?: Record<string, { url?: string; handle?: string; name?: string }>;
76
+ /** Office locations */
77
+ locations?: Array<{
78
+ primary?: boolean;
79
+ latitude?: number;
80
+ longitude?: number;
81
+ country?: string;
82
+ locality?: string;
83
+ region?: string;
84
+ postalCode?: string;
85
+ streetAddress?: string;
86
+ state?: string;
87
+ }>;
88
+ /** Department sizes */
89
+ departmentSizes?: Array<{ department: string; size: number }>;
90
+ /** Headcount growth metrics */
91
+ headcountGrowth?: {
92
+ threeMonths?: number;
93
+ threeMonthsPercentage?: number;
94
+ sixMonths?: number;
95
+ sixMonthsPercentage?: number;
96
+ twelveMonths?: number;
97
+ twelveMonthsPercentage?: number;
98
+ };
99
+ /** Last update timestamp */
100
+ updatedAt?: string;
96
101
  }
97
102
 
98
103
  /**
99
104
  * Search for lookalike companies using Ocean.io.
100
105
  * Provide seed domains via companiesFilters.lookalikeDomains to find similar companies.
101
- * Filter by size, country, industry, technology, revenue, and more.
102
- * Returns up to `size` companies per call (default 10). Use `searchAfter` for pagination.
106
+ * Verified v3 filters: companySizes, countries, industries, ecommerce, and top-level peopleFilters.
107
+ * Use `searchAfter` for pagination. Do not send `from`, `includeDomains`, `excludeDomains`, or `minScore` because Ocean v3 rejects them with 422 errors.
103
108
  */
104
109
  type companies = (params: {
105
- /** Filters for company search (lookalike domains, size, country, industry, etc.) */
106
- companiesFilters?: OceanCompaniesFilters;
107
- /** Number of results to return (default 10, max 100) */
108
- size?: number;
109
- /** Pagination offset (use searchAfter for cursor-based pagination instead) */
110
- from?: number;
111
- /** Cursor token from a previous response for efficient pagination */
112
- searchAfter?: string;
113
- /** Domains to always include in results */
114
- includeDomains?: string[];
115
- /** Domains to exclude from results */
116
- excludeDomains?: string[];
117
- }) => Promise<{
118
- /** Total matching companies */
119
- total: number;
120
- /** Cursor for next page (pass as searchAfter) */
121
- searchAfter?: string;
122
- /** Matched companies with relevance scores */
123
- companies: Array<{
124
- company: OceanCompanyResult;
125
- /** Relevance grade (A = best match) */
126
- relevance?: string;
127
- }>;
128
- /** Domains that were redirected to canonical domains */
129
- redirectMap?: Record<string, string>;
130
- }>;
110
+ /** Filters for company search (lookalike domains, size, country, industry, ecommerce) */
111
+ companiesFilters?: OceanCompaniesFilters;
112
+ /** Optional people filters. Returns companies that have at least one matching person. */
113
+ peopleFilters?: OceanPeopleFilters;
114
+ /** Number of results to return (default 10, max 100) */
115
+ size?: number;
116
+ /** Cursor token from a previous response for efficient pagination */
117
+ searchAfter?: string;
118
+ }) => Promise<{
119
+ /** Ocean.io status detail (typically "OK") */
120
+ detail?: string;
121
+ /** Total matching companies */
122
+ total: number;
123
+ /** Cursor for next page (pass as searchAfter) */
124
+ searchAfter?: string;
125
+ /** Matched companies with relevance scores */
126
+ companies: Array<{
127
+ company: OceanCompanyResult;
128
+ /** Relevance grade (A = best match) */
129
+ relevance?: string;
130
+ }>;
131
+ /** Domains that were redirected to canonical domains */
132
+ redirectMap?: Record<string, string>;
133
+ }>;
@@ -1,4 +1,4 @@
1
- /\*_ Credits: 2 for the name + company search path, or 50 when reverse-email lookup is used. Charged only if a valid URL is returned. _/
1
+ /\*_ Credits: 2 for the search path, or 50 when reverse-email lookup is used. Charged only if a valid URL is returned. _/
2
2
 
3
3
  /\*\*
4
4
 
@@ -15,6 +15,6 @@
15
15
  keyword?: string;
16
16
  /\*_ Location string (e.g., city, state, country) to narrow search results _/
17
17
  location?: string;
18
- /\*_ Email address. If provided, the service tries name + company search first when possible, then falls back to reverse-email lookup. _/
18
+ /\*_ Email address. For work emails, the service may infer the name from the email, try search with that + the email domain, validate the result against B2B current-company domain data, then fall back to reverse-email lookup. _/
19
19
  email?: string;
20
20
  }) => Promise<string | undefined>;