orangeslice 2.1.5 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -90,8 +90,10 @@ const startups = await services.crunchbase.search({
90
90
  ## Service map
91
91
 
92
92
  - `services.company.linkedin.search/enrich`
93
+ - `services.company.findCareersPage/scrapeCareersPage`
93
94
  - `services.crunchbase.search` (returns rows array directly)
94
95
  - `services.company.getEmployeesFromLinkedin` (database-only B2B path)
96
+ - `services.ocean.search.companies/people`
95
97
  - `services.person.linkedin.search/enrich`
96
98
  - `services.web.search/batchSearch`
97
99
  - `services.ai.generateObject`
@@ -106,7 +108,7 @@ const startups = await services.crunchbase.search({
106
108
 
107
109
  All service calls go through `post()` in `src/api.ts`.
108
110
 
109
- - Submit path: `https://enrichly-production.up.railway.app/function`
111
+ - Execute paths: `https://enrichly-production.up.railway.app/execute/*` and `https://enrichly-production.up.railway.app/ctx/*`
110
112
  - Pending responses (`pending: true` / `202`) poll batch-service result endpoints.
111
113
  - Polling timeout supports long-running workflows (up to 10 minutes).
112
114
  - This package now exposes only batch-backed services.
@@ -0,0 +1,47 @@
1
+ export interface FindCareersPageParams {
2
+ website?: string;
3
+ url?: string;
4
+ }
5
+ export interface FindCareersPageResult {
6
+ inputUrl: string;
7
+ normalizedWebsiteUrl: string;
8
+ careerPageUrl: string | null;
9
+ pageType: "ats" | "official" | "not_found";
10
+ atsProvider: string | null;
11
+ detectionMethod: "input-ats" | "homepage-ats-link" | "homepage-careers-link" | "deterministic-candidate" | "candidate-ats-link" | "embedded-ats" | "candidate-redirect" | "ats-unverified" | "not-found";
12
+ checkedUrls: string[];
13
+ }
14
+ export interface ScrapeCareersPageParams {
15
+ careersPageUrl?: string;
16
+ url?: string;
17
+ }
18
+ export interface ScrapeCareersPageJob {
19
+ id: string;
20
+ title: string;
21
+ url: string;
22
+ applyUrl: string | null;
23
+ location: string | null;
24
+ locations: string[];
25
+ department: string | null;
26
+ team: string | null;
27
+ employmentType: string | null;
28
+ workplaceType: string | null;
29
+ postedAt: string | null;
30
+ postedText: string | null;
31
+ requisitionId: string | null;
32
+ }
33
+ export interface ScrapeCareersPageResult {
34
+ status: "success" | "unsupported_url" | "unsupported_provider";
35
+ inputUrl: string;
36
+ normalizedBoardUrl: string | null;
37
+ atsProvider: string | null;
38
+ companyName: string | null;
39
+ source: "api" | "html" | null;
40
+ totalJobs: number;
41
+ jobs: ScrapeCareersPageJob[];
42
+ checkedUrls: string[];
43
+ supportedProviders: string[];
44
+ message: string | null;
45
+ }
46
+ export declare function findCareersPage(params: FindCareersPageParams): Promise<FindCareersPageResult>;
47
+ export declare function scrapeCareersPage(params: ScrapeCareersPageParams): Promise<ScrapeCareersPageResult>;
@@ -0,0 +1,11 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.findCareersPage = findCareersPage;
4
+ exports.scrapeCareersPage = scrapeCareersPage;
5
+ const api_1 = require("./api");
6
+ async function findCareersPage(params) {
7
+ return (0, api_1.post)("/execute/find-careers-page", { ...params });
8
+ }
9
+ async function scrapeCareersPage(params) {
10
+ return (0, api_1.post)("/execute/scrape-careers-page", { ...params });
11
+ }
package/dist/cli.js CHANGED
File without changes
package/dist/index.d.ts CHANGED
@@ -1,11 +1,15 @@
1
1
  export { configure } from "./api";
2
2
  export type { OrangesliceConfig } from "./api";
3
+ export { findCareersPage, scrapeCareersPage } from "./careers";
4
+ export type { FindCareersPageParams, FindCareersPageResult, ScrapeCareersPageParams, ScrapeCareersPageResult, ScrapeCareersPageJob } from "./careers";
3
5
  export { ctx } from "./ctx";
4
6
  export type { Spreadsheet, SpreadsheetListItem, SqlResult, SqlQueryResult, SqlActionResult, RowsAddResult } from "./ctx";
5
7
  export { linkedinSearch } from "./b2b";
6
8
  export type { LinkedInSearchParams, LinkedInSearchResponse } from "./b2b";
7
9
  export { crunchbaseSearch } from "./crunchbase";
8
10
  export type { CrunchbaseSearchParams } from "./crunchbase";
11
+ export { executeOcean, oceanSearchCompanies, oceanSearchPeople, OCEAN_OPERATION_IDS } from "./ocean";
12
+ export type { OceanOperationId, OceanCompaniesFilters, OceanPeopleFilters, OceanCompaniesSearchParams, OceanCompaniesSearchResponse, OceanCompanyResult, OceanCompanyMatch, OceanPeopleSearchParams, OceanPeopleSearchResponse, OceanPersonResult } from "./ocean";
9
13
  export { webSearch, webBatchSearch } from "./serp";
10
14
  export type { WebSearchQuery, WebSearchResult, WebSearchResponse, BatchWebSearchParams } from "./serp";
11
15
  export { generateObject } from "./generateObject";
@@ -25,11 +29,13 @@ export type { PersonLinkedinFindUrlParams, CompanyLinkedinFindUrlParams, PersonC
25
29
  import { runApifyActor } from "./apify";
26
30
  import { linkedinSearch } from "./b2b";
27
31
  import { browserExecute } from "./browser";
32
+ import { findCareersPage, scrapeCareersPage } from "./careers";
28
33
  import { crunchbaseSearch } from "./crunchbase";
29
34
  import { personLinkedinEnrich, personLinkedinFindUrl, personContactGet, companyLinkedinEnrich, companyLinkedinFindUrl, companyGetEmployeesFromLinkedin, geoParseAddress, builtWithLookupDomain, builtWithRelationships, builtWithSearchByTech } from "./expansion";
30
35
  import { scrapeWebsite } from "./firecrawl";
31
36
  import { generateObject } from "./generateObject";
32
37
  import { googleMapsScrape } from "./googleMaps";
38
+ import { oceanSearchCompanies, oceanSearchPeople } from "./ocean";
33
39
  import { webBatchSearch, webSearch } from "./serp";
34
40
  export declare const services: {
35
41
  crunchbase: {
@@ -42,6 +48,8 @@ export declare const services: {
42
48
  search: typeof linkedinSearch;
43
49
  };
44
50
  getEmployeesFromLinkedin: typeof companyGetEmployeesFromLinkedin;
51
+ findCareersPage: typeof findCareersPage;
52
+ scrapeCareersPage: typeof scrapeCareersPage;
45
53
  };
46
54
  person: {
47
55
  linkedin: {
@@ -75,6 +83,12 @@ export declare const services: {
75
83
  googleMaps: {
76
84
  scrape: typeof googleMapsScrape;
77
85
  };
86
+ ocean: {
87
+ search: {
88
+ companies: typeof oceanSearchCompanies;
89
+ people: typeof oceanSearchPeople;
90
+ };
91
+ };
78
92
  builtWith: {
79
93
  lookupDomain: typeof builtWithLookupDomain;
80
94
  relationships: typeof builtWithRelationships;
package/dist/index.js CHANGED
@@ -1,14 +1,22 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.services = exports.builtWithSearchByTech = exports.builtWithRelationships = exports.builtWithLookupDomain = exports.geoParseAddress = exports.companyGetEmployeesFromLinkedin = exports.companyLinkedinFindUrl = exports.companyLinkedinEnrich = exports.personContactGet = exports.personLinkedinFindUrl = exports.personLinkedinEnrich = exports.PREDICT_LEADS_OPERATION_IDS = exports.predictLeads = exports.executePredictLeads = exports.googleMapsScrape = exports.runApifyActor = exports.browserExecute = exports.scrapeWebsite = exports.generateObject = exports.webBatchSearch = exports.webSearch = exports.crunchbaseSearch = exports.linkedinSearch = exports.ctx = exports.configure = void 0;
3
+ exports.services = exports.builtWithSearchByTech = exports.builtWithRelationships = exports.builtWithLookupDomain = exports.geoParseAddress = exports.companyGetEmployeesFromLinkedin = exports.companyLinkedinFindUrl = exports.companyLinkedinEnrich = exports.personContactGet = exports.personLinkedinFindUrl = exports.personLinkedinEnrich = exports.PREDICT_LEADS_OPERATION_IDS = exports.predictLeads = exports.executePredictLeads = exports.googleMapsScrape = exports.runApifyActor = exports.browserExecute = exports.scrapeWebsite = exports.generateObject = exports.webBatchSearch = exports.webSearch = exports.OCEAN_OPERATION_IDS = exports.oceanSearchPeople = exports.oceanSearchCompanies = exports.executeOcean = exports.crunchbaseSearch = exports.linkedinSearch = exports.ctx = exports.scrapeCareersPage = exports.findCareersPage = exports.configure = void 0;
4
4
  var api_1 = require("./api");
5
5
  Object.defineProperty(exports, "configure", { enumerable: true, get: function () { return api_1.configure; } });
6
+ var careers_1 = require("./careers");
7
+ Object.defineProperty(exports, "findCareersPage", { enumerable: true, get: function () { return careers_1.findCareersPage; } });
8
+ Object.defineProperty(exports, "scrapeCareersPage", { enumerable: true, get: function () { return careers_1.scrapeCareersPage; } });
6
9
  var ctx_1 = require("./ctx");
7
10
  Object.defineProperty(exports, "ctx", { enumerable: true, get: function () { return ctx_1.ctx; } });
8
11
  var b2b_1 = require("./b2b");
9
12
  Object.defineProperty(exports, "linkedinSearch", { enumerable: true, get: function () { return b2b_1.linkedinSearch; } });
10
13
  var crunchbase_1 = require("./crunchbase");
11
14
  Object.defineProperty(exports, "crunchbaseSearch", { enumerable: true, get: function () { return crunchbase_1.crunchbaseSearch; } });
15
+ var ocean_1 = require("./ocean");
16
+ Object.defineProperty(exports, "executeOcean", { enumerable: true, get: function () { return ocean_1.executeOcean; } });
17
+ Object.defineProperty(exports, "oceanSearchCompanies", { enumerable: true, get: function () { return ocean_1.oceanSearchCompanies; } });
18
+ Object.defineProperty(exports, "oceanSearchPeople", { enumerable: true, get: function () { return ocean_1.oceanSearchPeople; } });
19
+ Object.defineProperty(exports, "OCEAN_OPERATION_IDS", { enumerable: true, get: function () { return ocean_1.OCEAN_OPERATION_IDS; } });
12
20
  var serp_1 = require("./serp");
13
21
  Object.defineProperty(exports, "webSearch", { enumerable: true, get: function () { return serp_1.webSearch; } });
14
22
  Object.defineProperty(exports, "webBatchSearch", { enumerable: true, get: function () { return serp_1.webBatchSearch; } });
@@ -40,11 +48,13 @@ Object.defineProperty(exports, "builtWithSearchByTech", { enumerable: true, get:
40
48
  const apify_2 = require("./apify");
41
49
  const b2b_2 = require("./b2b");
42
50
  const browser_2 = require("./browser");
51
+ const careers_2 = require("./careers");
43
52
  const crunchbase_2 = require("./crunchbase");
44
53
  const expansion_2 = require("./expansion");
45
54
  const firecrawl_2 = require("./firecrawl");
46
55
  const generateObject_2 = require("./generateObject");
47
56
  const googleMaps_2 = require("./googleMaps");
57
+ const ocean_2 = require("./ocean");
48
58
  const predictLeads_2 = require("./predictLeads");
49
59
  const serp_2 = require("./serp");
50
60
  exports.services = {
@@ -57,7 +67,9 @@ exports.services = {
57
67
  enrich: expansion_2.companyLinkedinEnrich,
58
68
  search: b2b_2.linkedinSearch
59
69
  },
60
- getEmployeesFromLinkedin: expansion_2.companyGetEmployeesFromLinkedin
70
+ getEmployeesFromLinkedin: expansion_2.companyGetEmployeesFromLinkedin,
71
+ findCareersPage: careers_2.findCareersPage,
72
+ scrapeCareersPage: careers_2.scrapeCareersPage
61
73
  },
62
74
  person: {
63
75
  linkedin: {
@@ -91,6 +103,12 @@ exports.services = {
91
103
  googleMaps: {
92
104
  scrape: googleMaps_2.googleMapsScrape
93
105
  },
106
+ ocean: {
107
+ search: {
108
+ companies: ocean_2.oceanSearchCompanies,
109
+ people: ocean_2.oceanSearchPeople
110
+ }
111
+ },
94
112
  builtWith: {
95
113
  lookupDomain: expansion_2.builtWithLookupDomain,
96
114
  relationships: expansion_2.builtWithRelationships,
@@ -0,0 +1,166 @@
1
+ export declare const OCEAN_OPERATION_IDS: {
2
+ readonly searchCompanies: "search-companies";
3
+ readonly searchPeople: "search-people";
4
+ };
5
+ export type OceanOperationId = (typeof OCEAN_OPERATION_IDS)[keyof typeof OCEAN_OPERATION_IDS];
6
+ export interface OceanCompaniesFilters {
7
+ lookalikeDomains?: string[];
8
+ minScore?: number;
9
+ companySizes?: Array<"0-1" | "2-10" | "11-50" | "51-200" | "201-500" | "501-1000" | "1001-5000" | "5001-10000" | "10001+">;
10
+ countries?: string[];
11
+ industries?: string[];
12
+ technologies?: string[];
13
+ technologyCategories?: string[];
14
+ keywords?: string[];
15
+ revenueRanges?: string[];
16
+ ecommerce?: boolean;
17
+ }
18
+ export interface OceanPeopleFilters {
19
+ seniorities?: string[];
20
+ departments?: string[];
21
+ jobTitleKeywords?: string[];
22
+ countries?: string[];
23
+ lookalikePeopleIds?: string[];
24
+ }
25
+ export interface OceanCompaniesSearchParams {
26
+ companiesFilters?: OceanCompaniesFilters;
27
+ size?: number;
28
+ from?: number;
29
+ searchAfter?: string;
30
+ includeDomains?: string[];
31
+ excludeDomains?: string[];
32
+ }
33
+ export interface OceanPhone {
34
+ country?: string;
35
+ number: string;
36
+ primary?: boolean;
37
+ }
38
+ export interface OceanMediaProfile {
39
+ url?: string;
40
+ handle?: string;
41
+ name?: string;
42
+ }
43
+ export interface OceanLocation {
44
+ primary?: boolean;
45
+ latitude?: number;
46
+ longitude?: number;
47
+ country?: string;
48
+ locality?: string;
49
+ region?: string;
50
+ postalCode?: string;
51
+ streetAddress?: string;
52
+ state?: string;
53
+ }
54
+ export interface OceanDepartmentSize {
55
+ department: string;
56
+ size: number;
57
+ }
58
+ export interface OceanHeadcountGrowth {
59
+ threeMonths?: number;
60
+ threeMonthsPercentage?: number;
61
+ sixMonths?: number;
62
+ sixMonthsPercentage?: number;
63
+ twelveMonths?: number;
64
+ twelveMonthsPercentage?: number;
65
+ }
66
+ export interface OceanCompanyResult {
67
+ domain: string;
68
+ name?: string;
69
+ legalName?: string;
70
+ description?: string;
71
+ countries?: string[];
72
+ primaryCountry?: string;
73
+ companySize?: string;
74
+ industryCategories?: string[];
75
+ industries?: string[];
76
+ linkedinIndustry?: string;
77
+ ecommerce?: boolean;
78
+ keywords?: string[];
79
+ employeeCountOcean?: number;
80
+ employeeCountLinkedin?: number;
81
+ revenue?: string;
82
+ yearFounded?: number;
83
+ emails?: string[];
84
+ phones?: OceanPhone[];
85
+ logo?: string;
86
+ technologies?: string[];
87
+ technologyCategories?: string[];
88
+ rootUrl?: string;
89
+ medias?: Record<string, OceanMediaProfile>;
90
+ locations?: OceanLocation[];
91
+ departmentSizes?: OceanDepartmentSize[];
92
+ headcountGrowth?: OceanHeadcountGrowth;
93
+ updatedAt?: string;
94
+ }
95
+ export interface OceanCompanyMatch {
96
+ company: OceanCompanyResult;
97
+ relevance?: string;
98
+ }
99
+ export interface OceanCompaniesSearchResponse {
100
+ total: number;
101
+ searchAfter?: string;
102
+ companies: OceanCompanyMatch[];
103
+ redirectMap?: Record<string, string>;
104
+ }
105
+ export interface OceanPersonExperience {
106
+ dateFrom?: string;
107
+ dateTo?: string;
108
+ description?: string;
109
+ domain?: string;
110
+ jobTitle?: string;
111
+ }
112
+ export interface OceanContactField {
113
+ address?: string;
114
+ status?: string;
115
+ }
116
+ export interface OceanPhoneField {
117
+ numbers?: string[];
118
+ status?: string;
119
+ }
120
+ export interface OceanPersonCompanySnapshot {
121
+ companySize?: string;
122
+ logo?: string;
123
+ name?: string;
124
+ }
125
+ export interface OceanPersonResult {
126
+ id: string;
127
+ domain?: string;
128
+ name?: string;
129
+ firstName?: string;
130
+ lastName?: string;
131
+ country?: string;
132
+ state?: string;
133
+ location?: string;
134
+ linkedinUrl?: string;
135
+ seniorities?: string[];
136
+ departments?: string[];
137
+ photo?: string;
138
+ jobTitle?: string;
139
+ jobTitleEnglish?: string;
140
+ currentJobDescription?: string;
141
+ experiences?: OceanPersonExperience[];
142
+ summary?: string;
143
+ skills?: string[];
144
+ phone?: OceanPhoneField;
145
+ email?: OceanContactField;
146
+ updatedAt?: string;
147
+ company?: OceanPersonCompanySnapshot;
148
+ }
149
+ export interface OceanPeopleSearchParams {
150
+ companiesFilters?: OceanCompaniesFilters;
151
+ peopleFilters?: OceanPeopleFilters;
152
+ size?: number;
153
+ from?: number;
154
+ searchAfter?: string;
155
+ enableEmailSearch?: boolean;
156
+ enablePhoneSearch?: boolean;
157
+ }
158
+ export interface OceanPeopleSearchResponse {
159
+ total: number;
160
+ searchAfter?: string;
161
+ people: OceanPersonResult[];
162
+ redirectMap?: Record<string, string>;
163
+ }
164
+ export declare function executeOcean<T = unknown>(operationId: OceanOperationId, params?: Record<string, unknown>): Promise<T>;
165
+ export declare function oceanSearchCompanies(params: OceanCompaniesSearchParams): Promise<OceanCompaniesSearchResponse>;
166
+ export declare function oceanSearchPeople(params: OceanPeopleSearchParams): Promise<OceanPeopleSearchResponse>;
package/dist/ocean.js ADDED
@@ -0,0 +1,23 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.OCEAN_OPERATION_IDS = void 0;
4
+ exports.executeOcean = executeOcean;
5
+ exports.oceanSearchCompanies = oceanSearchCompanies;
6
+ exports.oceanSearchPeople = oceanSearchPeople;
7
+ const api_1 = require("./api");
8
+ exports.OCEAN_OPERATION_IDS = {
9
+ searchCompanies: "search-companies",
10
+ searchPeople: "search-people"
11
+ };
12
+ async function executeOcean(operationId, params) {
13
+ return (0, api_1.post)("/execute/oceanio", {
14
+ operationId,
15
+ params: params ?? {}
16
+ });
17
+ }
18
+ async function oceanSearchCompanies(params) {
19
+ return executeOcean(exports.OCEAN_OPERATION_IDS.searchCompanies, params);
20
+ }
21
+ async function oceanSearchPeople(params) {
22
+ return executeOcean(exports.OCEAN_OPERATION_IDS.searchPeople, params);
23
+ }
@@ -5,12 +5,18 @@ description: Patterns for enriching company data, tech stack detection, hiring d
5
5
 
6
6
  # Data Enrichment
7
7
 
8
- Standard pattern: **Search → Scrape → Extract**
8
+ Standard pattern: **Search/Domain → Scrape → Extract**
9
9
 
10
- 1. `web.search` with `site:` to find subpages likely to contain the data
10
+ 1. Start with a company `domain` when you already have it, otherwise use `web.search` with `site:` to find relevant pages
11
11
  2. `scrape.website` to get page content as markdown
12
12
  3. `ai.generateObject` to extract structured fields
13
13
 
14
+ For company enrichment/classification, prefer the website over LinkedIn `industry`.
15
+
16
+ - LinkedIn `industry` is acceptable as lightweight lookup context
17
+ - LinkedIn `industry` is too sparse/generic to be your main enrichment signal
18
+ - Preferred workflow: `domain` -> `scrape.website` -> `ai.generateObject`
19
+
14
20
  ---
15
21
 
16
22
  ## Example: Does this law firm handle medical malpractice?
@@ -42,6 +48,8 @@ async function checkMedMalPractice(domain: string) {
42
48
 
43
49
  ---
44
50
 
51
+ ---
52
+
45
53
  ## When to Use
46
54
 
47
55
  | Use Search → Scrape → Extract | Use `browser.execute` instead |
@@ -0,0 +1,54 @@
1
+ # createDraft
2
+
3
+ Create a Gmail draft without sending it yet.
4
+
5
+ ```typescript
6
+ // Create a new draft
7
+ const draft = await integrations.gmail.createDraft({
8
+ recipient_email: "jane@example.com",
9
+ subject: "Draft follow-up",
10
+ body: "Sharing a quick follow-up before I send this."
11
+ });
12
+
13
+ // Draft a reply in an existing thread
14
+ await integrations.gmail.createDraft({
15
+ thread_id: "19bf77729bcb3a44",
16
+ body: "Thanks for the update. I will review this today."
17
+ });
18
+ ```
19
+
20
+ ## Input
21
+
22
+ | Parameter | Type | Required | Description |
23
+ | ------------------ | ---------- | -------- | -------------------------------------------- |
24
+ | `recipient_email` | `string` | No | Primary `To` recipient |
25
+ | `extra_recipients` | `string[]` | No | Additional `To` recipients |
26
+ | `cc` | `string[]` | No | CC recipients |
27
+ | `bcc` | `string[]` | No | BCC recipients |
28
+ | `subject` | `string` | No | Draft subject |
29
+ | `body` | `string` | No | Draft body content |
30
+ | `message_body` | `string` | No | Alternate body field accepted by some tools |
31
+ | `is_html` | `boolean` | No | Set to `true` when `body` contains HTML |
32
+ | `attachment` | `object` | No | Optional attachment payload |
33
+ | `thread_id` | `string` | No | Existing thread to draft a reply into |
34
+ | `from_email` | `string` | No | Optional verified send-as alias |
35
+ | `user_id` | `string` | No | Gmail user id (`\"me\"` by default) |
36
+
37
+ ## Output
38
+
39
+ ```typescript
40
+ {
41
+ successful: boolean;
42
+ data?: {
43
+ id?: string;
44
+ draft_id?: string;
45
+ message?: GmailMessage;
46
+ };
47
+ error?: string;
48
+ }
49
+ ```
50
+
51
+ ## Notes
52
+
53
+ - Creating a draft is a mutating action and should be used intentionally
54
+ - If you pass `thread_id`, leave `subject` empty to stay in the existing thread
@@ -0,0 +1,50 @@
1
+ # fetchEmails
2
+
3
+ Fetch inbox messages or Gmail search results from the connected account.
4
+
5
+ ```typescript
6
+ // Read the current inbox
7
+ const inbox = await integrations.gmail.fetchEmails({
8
+ query: "in:inbox",
9
+ max_results: 10
10
+ });
11
+
12
+ // Search for unread emails from a sender
13
+ const unread = await integrations.gmail.fetchEmails({
14
+ query: "in:inbox is:unread from:alice@example.com",
15
+ max_results: 25
16
+ });
17
+ ```
18
+
19
+ ## Input
20
+
21
+ | Parameter | Type | Required | Description |
22
+ | --------------------- | ---------- | -------- | ----------------------------------------------------- |
23
+ | `query` | `string` | No | Gmail search query such as `in:inbox is:unread` |
24
+ | `verbose` | `boolean` | No | Fetch richer message details |
25
+ | `ids_only` | `boolean` | No | Only return message/thread identifiers |
26
+ | `label_ids` | `string[]` | No | Filter by Gmail label IDs |
27
+ | `page_token` | `string` | No | Pagination token from a previous call |
28
+ | `max_results` | `number` | No | Maximum messages to fetch in this page |
29
+ | `include_payload` | `boolean` | No | Include payload/body data when available |
30
+ | `include_spam_trash` | `boolean` | No | Include spam and trash |
31
+ | `user_id` | `string` | No | Gmail user id (`\"me\"` by default) |
32
+
33
+ ## Output
34
+
35
+ ```typescript
36
+ {
37
+ successful: boolean;
38
+ data?: {
39
+ messages?: GmailMessage[];
40
+ nextPageToken?: string;
41
+ resultSizeEstimate?: number;
42
+ };
43
+ error?: string;
44
+ }
45
+ ```
46
+
47
+ ## Notes
48
+
49
+ - Results are not guaranteed to be sorted by recency, so sort client-side if order matters
50
+ - For large mailboxes, fetch IDs first and then hydrate specific messages with `fetchMessageByMessageId(...)`
@@ -0,0 +1,36 @@
1
+ # fetchMessageByMessageId
2
+
3
+ Fetch a single Gmail message by its Gmail API message ID.
4
+
5
+ ```typescript
6
+ const message = await integrations.gmail.fetchMessageByMessageId({
7
+ message_id: "19b11732c1b578fd",
8
+ format: "full"
9
+ });
10
+
11
+ console.log(message.data?.subject);
12
+ console.log(message.data?.threadId);
13
+ ```
14
+
15
+ ## Input
16
+
17
+ | Parameter | Type | Required | Description |
18
+ | ------------- | ----------------------------------------- | -------- | -------------------------------------- |
19
+ | `message_id` | `string` | Yes | Gmail API message ID |
20
+ | `format` | `\"minimal\" | \"full\" | \"raw\" | \"metadata\"` | No | Response format |
21
+ | `user_id` | `string` | No | Gmail user id (`\"me\"` by default) |
22
+
23
+ ## Output
24
+
25
+ ```typescript
26
+ {
27
+ successful: boolean;
28
+ data?: GmailMessage;
29
+ error?: string;
30
+ }
31
+ ```
32
+
33
+ ## Notes
34
+
35
+ - Use real Gmail `message_id` values returned by Gmail list/search actions
36
+ - `format: "full"` is best when you need headers, payload parts, or body data
@@ -0,0 +1,37 @@
1
+ # fetchMessageByThreadId
2
+
3
+ Fetch all messages belonging to a Gmail thread.
4
+
5
+ ```typescript
6
+ const thread = await integrations.gmail.fetchMessageByThreadId({
7
+ thread_id: "19bf77729bcb3a44"
8
+ });
9
+
10
+ for (const message of thread.data?.messages || []) {
11
+ console.log(message.subject);
12
+ }
13
+ ```
14
+
15
+ ## Input
16
+
17
+ | Parameter | Type | Required | Description |
18
+ | ------------ | -------- | -------- | ----------------------------------- |
19
+ | `thread_id` | `string` | Yes | Gmail API thread ID |
20
+ | `user_id` | `string` | No | Gmail user id (`\"me\"` by default) |
21
+
22
+ ## Output
23
+
24
+ ```typescript
25
+ {
26
+ successful: boolean;
27
+ data?: {
28
+ messages?: GmailMessage[];
29
+ };
30
+ error?: string;
31
+ }
32
+ ```
33
+
34
+ ## Notes
35
+
36
+ - Message order is not guaranteed, so sort by `internalDate` if you need oldest/newest order
37
+ - Read threads first before calling `replyToThread(...)` so you have the correct `thread_id`
@@ -0,0 +1,37 @@
1
+ # getProfile
2
+
3
+ Read Gmail profile metadata for the connected account.
4
+
5
+ ```typescript
6
+ const profile = await integrations.gmail.getProfile();
7
+
8
+ console.log(profile.data?.emailAddress);
9
+ console.log(profile.data?.messagesTotal);
10
+ console.log(profile.data?.threadsTotal);
11
+ ```
12
+
13
+ ## Input
14
+
15
+ | Parameter | Type | Required | Description |
16
+ | --------- | -------- | -------- | ----------------------------------- |
17
+ | `user_id` | `string` | No | Gmail user id (`\"me\"` by default) |
18
+
19
+ ## Output
20
+
21
+ ```typescript
22
+ {
23
+ successful: boolean;
24
+ data?: {
25
+ emailAddress?: string;
26
+ messagesTotal?: number;
27
+ threadsTotal?: number;
28
+ historyId?: string;
29
+ };
30
+ error?: string;
31
+ }
32
+ ```
33
+
34
+ ## Notes
35
+
36
+ - This is a lightweight way to confirm which mailbox is connected
37
+ - Mailbox totals are useful for diagnostics, health checks, and quick account introspection
@@ -1,12 +1,29 @@
1
1
  ---
2
- description: Gmail email sending via Google integration
2
+ description: Gmail inbox, drafts, threads, labels, profile, and email sending via Google integration
3
3
  ---
4
4
 
5
5
  # Gmail Integration
6
6
 
7
7
  Typed functions for Gmail actions powered by Orange Slice Google integrations.
8
8
 
9
- ## Email
9
+ ## Write Actions
10
10
 
11
11
  - `integrations.gmail.sendEmail(input)` - Send an email through the connected Gmail account
12
+ - `integrations.gmail.createDraft(input)` - Create a Gmail draft without sending it
13
+ - `integrations.gmail.replyToThread(input)` - Reply inside an existing Gmail thread
12
14
  - Heavy rate limit: `sendEmail` is capped at **40 calls/day** per connected Gmail account
15
+ - Mutating Gmail actions should be used intentionally because they require approval
16
+
17
+ ## Read Actions
18
+
19
+ - `integrations.gmail.fetchEmails(input)` - Read inbox messages or Gmail search results
20
+ - `integrations.gmail.fetchMessageByMessageId(input)` - Fetch one message by Gmail message ID
21
+ - `integrations.gmail.fetchMessageByThreadId(input)` - Fetch all messages in a Gmail thread
22
+ - `integrations.gmail.listLabels(input)` - List Gmail system and custom labels
23
+ - `integrations.gmail.getProfile(input)` - Read Gmail profile metadata such as mailbox counts
24
+
25
+ ## Notes
26
+
27
+ - Prefer `fetchEmails({ query: "in:inbox", max_results: 10 })` to read the current inbox
28
+ - For large inbox scans, start with smaller `max_results` values or `ids_only: true`
29
+ - Use real `messageId` and `threadId` values returned by Gmail read methods before drilling into a message or thread
@@ -0,0 +1,34 @@
1
+ # listLabels
2
+
3
+ List Gmail system labels and custom labels.
4
+
5
+ ```typescript
6
+ const labels = await integrations.gmail.listLabels();
7
+
8
+ for (const label of labels.data?.labels || []) {
9
+ console.log(label.id, label.name);
10
+ }
11
+ ```
12
+
13
+ ## Input
14
+
15
+ | Parameter | Type | Required | Description |
16
+ | --------- | -------- | -------- | ----------------------------------- |
17
+ | `user_id` | `string` | No | Gmail user id (`\"me\"` by default) |
18
+
19
+ ## Output
20
+
21
+ ```typescript
22
+ {
23
+ successful: boolean;
24
+ data?: {
25
+ labels?: GmailLabel[];
26
+ };
27
+ error?: string;
28
+ }
29
+ ```
30
+
31
+ ## Notes
32
+
33
+ - Use this before any label-based workflow so you work with Gmail label IDs rather than display names
34
+ - System labels and custom labels can both appear in the response
@@ -0,0 +1,51 @@
1
+ # replyToThread
2
+
3
+ Reply inside an existing Gmail thread.
4
+
5
+ ```typescript
6
+ await integrations.gmail.replyToThread({
7
+ thread_id: "19bf77729bcb3a44",
8
+ body: "Thanks for the note. I will get back to you tomorrow."
9
+ });
10
+
11
+ await integrations.gmail.replyToThread({
12
+ thread_id: "19bf77729bcb3a44",
13
+ body: "<p>Reviewed and approved.</p>",
14
+ is_html: true
15
+ });
16
+ ```
17
+
18
+ ## Input
19
+
20
+ | Parameter | Type | Required | Description |
21
+ | ------------------ | ---------- | -------- | -------------------------------------------- |
22
+ | `thread_id` | `string` | Yes | Gmail thread to reply within |
23
+ | `body` | `string` | No | Reply body |
24
+ | `message_body` | `string` | No | Alternate body field accepted by some tools |
25
+ | `subject` | `string` | No | Optional subject override |
26
+ | `cc` | `string[]` | No | CC recipients |
27
+ | `bcc` | `string[]` | No | BCC recipients |
28
+ | `attachment` | `object` | No | Optional attachment payload |
29
+ | `is_html` | `boolean` | No | Set to `true` when `body` contains HTML |
30
+ | `from_email` | `string` | No | Optional verified send-as alias |
31
+ | `user_id` | `string` | No | Gmail user id (`\"me\"` by default) |
32
+
33
+ ## Output
34
+
35
+ ```typescript
36
+ {
37
+ successful: boolean;
38
+ data?: {
39
+ id?: string;
40
+ messageId?: string;
41
+ threadId?: string;
42
+ labelIds?: string[];
43
+ };
44
+ error?: string;
45
+ }
46
+ ```
47
+
48
+ ## Notes
49
+
50
+ - This is a mutating action and should be used intentionally
51
+ - Use a real `thread_id` from `fetchEmails(...)` or `fetchMessageByThreadId(...)`
@@ -242,7 +242,7 @@ See [attio/](./attio/) for all available functions.
242
242
 
243
243
  ### Gmail
244
244
 
245
- Send emails from connected Google Gmail accounts.
245
+ Read and write emails from connected Google Gmail accounts.
246
246
 
247
247
  ```typescript
248
248
  // Send a plain text email
@@ -260,6 +260,19 @@ await integrations.gmail.sendEmail({
260
260
  body: "<h2>Weekly Summary</h2><p>All systems operational.</p>",
261
261
  is_html: true
262
262
  });
263
+
264
+ // Read the current inbox
265
+ const inbox = await integrations.gmail.fetchEmails({
266
+ query: "in:inbox",
267
+ max_results: 10
268
+ });
269
+
270
+ // Create a draft without sending it
271
+ await integrations.gmail.createDraft({
272
+ recipient_email: "john@example.com",
273
+ subject: "Draft follow-up",
274
+ body: "This is saved as a draft."
275
+ });
263
276
  ```
264
277
 
265
278
  See [gmail/](./gmail/) for available functions.
@@ -9,8 +9,8 @@ description: Technographic data enrichment — discover what technologies, frame
9
9
 
10
10
  | Method | Description | Credits |
11
11
  | --------------- | -------------------------------------- | ------- |
12
- | `lookupDomain` | Get full technology stack for a domain | 75 |
13
- | `relationships` | Find related/connected domains | 75 |
12
+ | `lookupDomain` | Get full technology stack for a domain | 20 |
13
+ | `relationships` | Find related/connected domains | 10 |
14
14
  | `searchByTech` | Find companies using a specific tech | 100 |
15
15
 
16
16
  ## Use Cases
@@ -0,0 +1,137 @@
1
+ # Find Company Careers Page
2
+
3
+ Resolve a company's official careers page and, when possible, return the underlying ATS jobs page instead.
4
+
5
+ This is best when you have a company website or a specific company page and want the canonical place to browse jobs.
6
+
7
+ ## Input Parameters
8
+
9
+ Provide **one** of:
10
+
11
+ | Parameter | Type | Required | Description |
12
+ | --------- | -------- | -------- | ------------------------------------------------------------------ |
13
+ | `website` | `string` | No | Company website or page URL, e.g. `stripe.com` or `https://ro.co/` |
14
+ | `url` | `string` | No | Alias for `website` |
15
+
16
+ **Optional:**
17
+
18
+ | Parameter | Type | Required | Description |
19
+ | --------- | -------- | -------- | ------------------------------------ |
20
+ | `timeout` | `string` | No | Batch timeout override, e.g. `"30m"` |
21
+
22
+ ## Output
23
+
24
+ ```typescript
25
+ {
26
+ inputUrl: string;
27
+ normalizedWebsiteUrl: string;
28
+ careerPageUrl: string | null;
29
+ pageType: "ats" | "official" | "not_found";
30
+ atsProvider: string | null;
31
+ detectionMethod:
32
+ | "input-ats"
33
+ | "homepage-ats-link"
34
+ | "homepage-careers-link"
35
+ | "deterministic-candidate"
36
+ | "candidate-ats-link"
37
+ | "embedded-ats"
38
+ | "candidate-redirect"
39
+ | "ats-unverified"
40
+ | "not-found";
41
+ checkedUrls: string[];
42
+ }
43
+ ```
44
+
45
+ ## Examples
46
+
47
+ ### Basic Careers Lookup
48
+
49
+ ```typescript
50
+ const result = await services.company.findCareersPage({
51
+ website: row.website
52
+ });
53
+
54
+ return result.careerPageUrl;
55
+ ```
56
+
57
+ ### Prefer ATS When Available
58
+
59
+ ```typescript
60
+ const result = await services.company.findCareersPage({
61
+ website: "https://plaid.com"
62
+ });
63
+
64
+ return {
65
+ url: result.careerPageUrl,
66
+ type: result.pageType,
67
+ ats: result.atsProvider
68
+ };
69
+ ```
70
+
71
+ ### Handle Not Found
72
+
73
+ ```typescript
74
+ const result = await services.company.findCareersPage({
75
+ website: row.website
76
+ });
77
+
78
+ if (result.pageType === "not_found") {
79
+ return null;
80
+ }
81
+
82
+ return result.careerPageUrl;
83
+ ```
84
+
85
+ ### Debug Why a Result Was Chosen
86
+
87
+ ```typescript
88
+ const result = await services.company.findCareersPage({
89
+ website: row.website
90
+ });
91
+
92
+ return {
93
+ careerPageUrl: result.careerPageUrl,
94
+ pageType: result.pageType,
95
+ atsProvider: result.atsProvider,
96
+ detectionMethod: result.detectionMethod,
97
+ checkedUrls: result.checkedUrls
98
+ };
99
+ ```
100
+
101
+ ## What It Detects
102
+
103
+ - Official careers pages like `https://company.com/careers`
104
+ - Careers subdomains like `https://careers.company.com/`
105
+ - ATS boards when discoverable from the company site
106
+ - Embedded/wrapped ATS pages when the company site hosts the jobs UI directly
107
+
108
+ Common ATS providers currently recognized include:
109
+
110
+ - `ashby`
111
+ - `greenhouse`
112
+ - `lever`
113
+ - `workday`
114
+ - `icims`
115
+ - `gem`
116
+ - `kula`
117
+ - `breezy`
118
+ - `bamboohr`
119
+ - `rippling`
120
+ - `personio`
121
+ - `phenom`
122
+ - `smartrecruiters`
123
+ - `successfactors`
124
+ - `jobvite`
125
+ - `recruitee`
126
+ - `teamtailor`
127
+ - `indeed`
128
+ - `bestjobs`
129
+ - `ejobs`
130
+
131
+ ## Key Rules
132
+
133
+ 1. **Pass the company website when possible** - homepage/company URLs usually produce the best canonical result.
134
+ 2. **ATS is preferred over generic careers pages** - if the company site clearly points to an ATS board, that board is returned.
135
+ 3. **Deep location/provider pages can still work** - the resolver attempts to collapse some subdomains and detail pages back to the parent organization's careers site.
136
+ 4. **`pageType: "official"` is still a success** - many enterprises host jobs on branded careers portals instead of a third-party ATS URL.
137
+ 5. **Use `checkedUrls` for debugging** - when a result looks wrong or missing, inspect the visited candidates.
@@ -0,0 +1,37 @@
1
+ interface FindCareersPageResult {
2
+ /** The original website or URL input */
3
+ inputUrl: string;
4
+ /** Canonical homepage/base URL used during discovery */
5
+ normalizedWebsiteUrl: string;
6
+ /** Best careers page URL found, or null when none was found */
7
+ careerPageUrl: string | null;
8
+ /** Whether the result points to an ATS board, an official careers page, or nothing */
9
+ pageType: "ats" | "official" | "not_found";
10
+ /** ATS provider when pageType is "ats" */
11
+ atsProvider: string | null;
12
+ /** How the page was discovered */
13
+ detectionMethod:
14
+ | "input-ats"
15
+ | "homepage-ats-link"
16
+ | "homepage-careers-link"
17
+ | "deterministic-candidate"
18
+ | "candidate-ats-link"
19
+ | "embedded-ats"
20
+ | "candidate-redirect"
21
+ | "ats-unverified"
22
+ | "not-found";
23
+ /** URLs checked while searching */
24
+ checkedUrls: string[];
25
+ }
26
+
27
+ /**
28
+ * Find the best careers page for a company website.
29
+ * Accepts a homepage URL/domain and returns either a canonical ATS board URL
30
+ * or an official careers page on the company site.
31
+ */
32
+ type findCareersPage = (params: {
33
+ /** Company website or homepage URL */
34
+ website?: string;
35
+ /** Alias for website. Provide website or url. */
36
+ url?: string;
37
+ }) => Promise<FindCareersPageResult>;
@@ -56,7 +56,15 @@ interface B2BCompany {
56
56
  }
57
57
  ```
58
58
 
59
- > Note: company industry coverage in the LinkedIn B2B DB can be sparse. `industry` and `industries` may be `null`, generic, or missing even when the company record exists, so do not rely on them as the only company classification signal.
59
+ > Important: LinkedIn company `industry` / `industries` coverage in the B2B DB is very sparse and often too weak for enrichment. These fields may be `null`, generic, stale, or missing even when the company record exists. Treat them as lookup metadata only, not as a high-confidence classification source for enrichment workflows.
60
+ >
61
+ > Preferred pattern for enrichment/classification:
62
+ >
63
+ > 1. Start from the company `domain` when available
64
+ > 2. `services.scrape.website(...)` the company site or a relevant subpage
65
+ > 3. `services.ai.generateObject(...)` to classify the company from the scraped content
66
+ >
67
+ > Use LinkedIn enrich primarily for fast lookup fields like company identity, URL, headcount, location, and description. Do **not** build industry enrichment pipelines that depend mainly on LinkedIn `industry`.
60
68
 
61
69
  ### Extended (`extended: true`) - `B2BCompanyExtended`
62
70
 
@@ -282,6 +290,44 @@ return {
282
290
  };
283
291
  ```
284
292
 
293
+ ### Classify Industry from Domain, Not LinkedIn
294
+
295
+ If your goal is enrichment or categorization, prefer the company website over LinkedIn `industry`:
296
+
297
+ ```typescript
298
+ const company = await services.company.linkedin.enrich({
299
+ domain: row.domain
300
+ });
301
+
302
+ const { markdown } = await services.scrape.website({
303
+ url: `https://${row.domain}`
304
+ });
305
+
306
+ const { object } = await services.ai.generateObject({
307
+ prompt: `
308
+ Classify this company based on its website content.
309
+
310
+ Do not rely on LinkedIn industry because it is sparse and often too generic.
311
+ Use LinkedIn only as lightweight context for identity verification.
312
+
313
+ Domain: ${row.domain}
314
+ LinkedIn name: ${company?.name ?? "unknown"}
315
+ LinkedIn description: ${company?.description ?? "unknown"}
316
+
317
+ Website content:
318
+ ${markdown}
319
+ `,
320
+ schema: z.object({
321
+ industry: z.string().nullable(),
322
+ subindustry: z.string().nullable(),
323
+ businessModel: z.string().nullable(),
324
+ confidence: z.enum(["low", "medium", "high"])
325
+ })
326
+ });
327
+
328
+ return object;
329
+ ```
330
+
285
331
  ### Handle Missing Companies
286
332
 
287
333
  ```typescript
@@ -0,0 +1,150 @@
1
+ # Scrape ATS Careers Page
2
+
3
+ Extract a standardized list of jobs from a supported **official ATS-hosted** careers page without using a browser when possible.
4
+
5
+ This is best when you already have an ATS careers page URL, or when you first resolved one with `services.company.findCareersPage` and now want the actual jobs.
6
+
7
+ ## Input Parameters
8
+
9
+ Provide **one** of:
10
+
11
+ | Parameter | Type | Required | Description |
12
+ | ---------------- | -------- | -------- | ----------------------------------------------------------------------------------------------- |
13
+ | `careersPageUrl` | `string` | No | Official ATS board URL or ATS job/detail URL, e.g. `https://job-boards.greenhouse.io/anthropic` |
14
+ | `url` | `string` | No | Alias for `careersPageUrl` |
15
+
16
+ **Optional:**
17
+
18
+ | Parameter | Type | Required | Description |
19
+ | --------- | -------- | -------- | ------------------------------------ |
20
+ | `timeout` | `string` | No | Batch timeout override, e.g. `"30m"` |
21
+
22
+ ## Output
23
+
24
+ ```typescript
25
+ {
26
+ status: "success" | "unsupported_url" | "unsupported_provider";
27
+ inputUrl: string;
28
+ normalizedBoardUrl: string | null;
29
+ atsProvider: string | null;
30
+ companyName: string | null;
31
+ source: "api" | "html" | null;
32
+ totalJobs: number;
33
+ jobs: Array<{
34
+ id: string;
35
+ title: string;
36
+ url: string;
37
+ applyUrl: string | null;
38
+ location: string | null;
39
+ locations: string[];
40
+ department: string | null;
41
+ team: string | null;
42
+ employmentType: string | null;
43
+ workplaceType: string | null;
44
+ postedAt: string | null;
45
+ postedText: string | null;
46
+ requisitionId: string | null;
47
+ }>;
48
+ checkedUrls: string[];
49
+ supportedProviders: string[];
50
+ message: string | null;
51
+ }
52
+ ```
53
+
54
+ ## Examples
55
+
56
+ ### Scrape Jobs From a Known ATS Board
57
+
58
+ ```typescript
59
+ const result = await services.company.scrapeCareersPage({
60
+ careersPageUrl: "https://job-boards.greenhouse.io/anthropic"
61
+ });
62
+
63
+ return result.jobs;
64
+ ```
65
+
66
+ ### Resolve Then Scrape
67
+
68
+ ```typescript
69
+ const careers = await services.company.findCareersPage({
70
+ website: row.website
71
+ });
72
+
73
+ if (!careers.careerPageUrl || careers.pageType !== "ats") {
74
+ return [];
75
+ }
76
+
77
+ const jobs = await services.company.scrapeCareersPage({
78
+ careersPageUrl: careers.careerPageUrl
79
+ });
80
+
81
+ return jobs.jobs;
82
+ ```
83
+
84
+ ### Return Lightweight Job Summaries
85
+
86
+ ```typescript
87
+ const result = await services.company.scrapeCareersPage({
88
+ careersPageUrl: row.careers_page
89
+ });
90
+
91
+ return result.jobs.map((job) => ({
92
+ title: job.title,
93
+ location: job.location,
94
+ department: job.department,
95
+ url: job.url
96
+ }));
97
+ ```
98
+
99
+ ### Handle Unsupported Providers Gracefully
100
+
101
+ ```typescript
102
+ const result = await services.company.scrapeCareersPage({
103
+ careersPageUrl: row.careers_page
104
+ });
105
+
106
+ if (result.status !== "success") {
107
+ return {
108
+ status: result.status,
109
+ provider: result.atsProvider,
110
+ message: result.message
111
+ };
112
+ }
113
+
114
+ return result.totalJobs;
115
+ ```
116
+
117
+ ### Pass a Job Detail URL
118
+
119
+ ```typescript
120
+ const result = await services.company.scrapeCareersPage({
121
+ careersPageUrl: "https://jobs.lever.co/mistral/2a357282-9d44-4b41-a249-c75ffe878ce2"
122
+ });
123
+
124
+ return {
125
+ board: result.normalizedBoardUrl,
126
+ jobs: result.totalJobs
127
+ };
128
+ ```
129
+
130
+ ## Supported Providers
131
+
132
+ Current browser-free implementations:
133
+
134
+ - `ashby`
135
+ - `breezy`
136
+ - `greenhouse`
137
+ - `lever`
138
+ - `recruitee`
139
+ - `rippling`
140
+ - `smartrecruiters`
141
+ - `workable`
142
+ - `workday`
143
+
144
+ ## Key Rules
145
+
146
+ 1. **Use this for official ATS pages** - this endpoint is not meant for generic `company.com/careers` pages unless they are clearly hosted by a supported ATS.
147
+ 2. **Prefer resolving first when starting from a company website** - use `services.company.findCareersPage` to find the canonical ATS URL, then pass that into this scraper.
148
+ 3. **Job/detail URLs are okay** - supported ATS detail URLs are normalized back to the board before scraping.
149
+ 4. **Treat `unsupported_provider` as expected** - it means the input was a recognized ATS, but this scraper does not implement that provider yet.
150
+ 5. **Use `checkedUrls` for debugging** - when counts or mappings look off, inspect the URLs that were actually queried.
@@ -1,7 +1,7 @@
1
1
  - **ai**: AI helpers (summaries, classifications, scoring).
2
2
  - **apify**: Run any of 10,000+ Apify actors for web scraping, social media, e-commerce, and more.
3
3
  - **browser**: Kernel browser automation - spin up cloud browsers, execute Playwright code, take screenshots. **Use this for scraping structured lists of repeated data** (e.g., product listings, search results, table rows) where you know the DOM structure. Also ideal for **intercepting network requests** to discover underlying APIs, then paginate those APIs directly in your code (faster & cheaper than clicking through pages). Perfect for JS-heavy sites that don't work with simple HTTP scraping.
4
- - **company**: company data (getting employees at the company, getting company data, getting open jobs).
4
+ - **company**: company data (getting employees at the company, finding careers pages, getting company data, getting open jobs).
5
5
  - **crunchbase**: SQL search over the lean Crunchbase company table (`public.crunchbase_scraper_lean`) for startup prospecting.
6
6
  - **person**: finding a persons linkedin url, enriching it from linkedin, contact info, and searching for specific people / groups on linkedin
7
7
  - **geo**: parsing address
@@ -1,4 +1,4 @@
1
- /\*_ Credits: 2 for the name + company search path, or 50 when reverse-email lookup is used. Charged only if a valid URL is returned. _/
1
+ /\*_ Credits: 2 for the search path, or 50 when reverse-email lookup is used. Charged only if a valid URL is returned. _/
2
2
 
3
3
  /\*\*
4
4
 
@@ -15,6 +15,6 @@
15
15
  keyword?: string;
16
16
  /\*_ Location string (e.g., city, state, country) to narrow search results _/
17
17
  location?: string;
18
- /\*_ Email address. If provided, the service tries name + company search first when possible, then falls back to reverse-email lookup. _/
18
+ /\*_ Email address. For work emails, the service may infer the name from the email, try search with that + the email domain, validate the result against B2B current-company domain data, then fall back to reverse-email lookup. _/
19
19
  email?: string;
20
20
  }) => Promise<string | undefined>;
@@ -67,6 +67,22 @@ Web search returns URLs based on keywords, **not confirmed matches**. Scrape the
67
67
 
68
68
  ---
69
69
 
70
+ ## Company Subpage Discovery Rule
71
+
72
+ To find pages on a company's own website, **never search Google by company name**. Always start from the verified domain and dork with `site:` plus `inurl:` hints.
73
+
74
+ ```ts
75
+ await services.web.search({ query: "site:stripe.com inurl:team OR inurl:about OR inurl:careers" });
76
+ ```
77
+
78
+ Never do this for subpage discovery:
79
+
80
+ ```ts
81
+ await services.web.search({ query: '"Stripe" careers' });
82
+ ```
83
+
84
+ ---
85
+
70
86
  ## Parallel Query Permutations
71
87
 
72
88
  Always run multiple query variations for better coverage.
@@ -87,17 +103,17 @@ const allResults = await services.web.batchSearch({
87
103
  const uniqueLinks = [...new Set(allResults.flatMap((r) => r.results.map((x) => x.link)))];
88
104
  ```
89
105
 
90
- | Use Case | Permutation Ideas |
91
- | -------------- | -------------------------------------------------------- |
92
- | Person search | Full name, initials, nicknames, with/without middle name |
93
- | Company search | Full name, abbreviations, domain, Inc/LLC variations |
94
- | Title search | CEO/Founder/Chief, VP/Director, formal/informal titles |
106
+ | Use Case | Permutation Ideas |
107
+ | -------------- | --------------------------------------------------------------------------- |
108
+ | Person search | Full name, initials, nicknames, with/without middle name |
109
+ | Company search | Prefer verified domain first; use name variants only for off-site discovery |
110
+ | Title search | CEO/Founder/Chief, VP/Director, formal/informal titles |
95
111
 
96
112
  ---
97
113
 
98
114
  ## Google Dorking
99
115
 
100
- Use `site:` and `inurl:` to target specific platforms.
116
+ Use `site:` and `inurl:` to target specific platforms and verified company domains.
101
117
 
102
118
  | Platform | Dork | Example |
103
119
  | ------------------ | --------------------------- | -------------------------------------------- |
@@ -107,20 +123,19 @@ Use `site:` and `inurl:` to target specific platforms.
107
123
  | Reddit | `site:reddit.com` | `site:reddit.com/r/sales "cold email"` |
108
124
 
109
125
  ```ts
110
- // Find company employees with multiple dork variations
111
- const company = "Stripe";
126
+ // Find company subpages from a verified domain
127
+ const domain = "stripe.com";
112
128
  const queries = [
113
- `"${company}" site:linkedin.com/in`,
114
- `"${company}" CEO OR Founder site:linkedin.com/in`,
115
- `"${company}" VP OR Director site:linkedin.com/in`,
116
- `"${company}" Engineer site:linkedin.com/in`,
117
- `stripe.com site:linkedin.com/in`
129
+ `site:${domain} inurl:team OR inurl:about OR inurl:leadership`,
130
+ `site:${domain} inurl:careers OR inurl:jobs`,
131
+ `site:${domain} inurl:blog OR inurl:news OR inurl:press`,
132
+ `site:${domain} inurl:contact OR inurl:locations`
118
133
  ];
119
134
 
120
135
  const results = await services.web.batchSearch({
121
136
  queries: queries.map((query) => ({ query }))
122
137
  });
123
- const profiles = [...new Set(results.flatMap((r) => r.results.map((x) => x.link)))];
138
+ const subpages = [...new Set(results.flatMap((r) => r.results.map((x) => x.link)))];
124
139
  ```
125
140
 
126
141
  ---
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "orangeslice",
3
- "version": "2.1.5",
3
+ "version": "2.2.0",
4
4
  "description": "B2B LinkedIn database prospector - 1.15B profiles, 85M companies",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",