owletto 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +58 -0
- package/src/api-paginated.ts +185 -0
- package/src/base.ts +173 -0
- package/src/browser/launcher.ts +213 -0
- package/src/browser/stealth.ts +297 -0
- package/src/browser-paginated.ts +425 -0
- package/src/cli.ts +438 -0
- package/src/connector-runtime.ts +63 -0
- package/src/connector-types.ts +299 -0
- package/src/http.ts +82 -0
- package/src/index.ts +106 -0
- package/src/logger.ts +10 -0
- package/src/paginated.ts +301 -0
- package/src/retry.ts +168 -0
- package/src/scoring.ts +57 -0
- package/src/types.ts +289 -0
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Connector Types
|
|
3
|
+
*
|
|
4
|
+
* Type definitions for the V1 integration platform.
|
|
5
|
+
* Defines the contract between connectors, the runtime, and the platform.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
// =============================================================================
|
|
9
|
+
// Connector Definition
|
|
10
|
+
// =============================================================================
|
|
11
|
+
|
|
12
|
+
export interface ConnectorDefinition {
|
|
13
|
+
/** Unique connector key, e.g. 'google.gmail' */
|
|
14
|
+
key: string;
|
|
15
|
+
/** Human-readable name */
|
|
16
|
+
name: string;
|
|
17
|
+
/** Description of what this connector does */
|
|
18
|
+
description?: string;
|
|
19
|
+
/** Semantic version */
|
|
20
|
+
version: string;
|
|
21
|
+
/** Auth configuration */
|
|
22
|
+
authSchema?: ConnectorAuthSchema;
|
|
23
|
+
/** Available feed definitions (keyed by feed_key) */
|
|
24
|
+
feeds?: Record<string, FeedDefinition>;
|
|
25
|
+
/** Available action definitions (keyed by action_key) */
|
|
26
|
+
actions?: Record<string, ActionDefinition>;
|
|
27
|
+
/** Global connector options schema (JSON Schema) */
|
|
28
|
+
optionsSchema?: Record<string, unknown>;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
// =============================================================================
|
|
32
|
+
// Auth
|
|
33
|
+
// =============================================================================
|
|
34
|
+
|
|
35
|
+
export interface ConnectorAuthSchema {
|
|
36
|
+
methods: ConnectorAuthMethod[];
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export type ConnectorAuthMethod = ConnectorAuthNone | ConnectorAuthEnvKeys | ConnectorAuthOAuth;
|
|
40
|
+
|
|
41
|
+
export interface ConnectorAuthNone {
|
|
42
|
+
type: 'none';
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
export interface ConnectorAuthEnvKeys {
|
|
46
|
+
type: 'env_keys';
|
|
47
|
+
required?: boolean;
|
|
48
|
+
fields: Array<{
|
|
49
|
+
key: string;
|
|
50
|
+
label?: string;
|
|
51
|
+
description?: string;
|
|
52
|
+
example?: string;
|
|
53
|
+
secret?: boolean;
|
|
54
|
+
}>;
|
|
55
|
+
description?: string;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface ConnectorAuthOAuth {
|
|
59
|
+
type: 'oauth';
|
|
60
|
+
provider: string;
|
|
61
|
+
requiredScopes: string[];
|
|
62
|
+
required?: boolean;
|
|
63
|
+
description?: string;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// =============================================================================
|
|
67
|
+
// Feed Definition
|
|
68
|
+
// =============================================================================
|
|
69
|
+
|
|
70
|
+
export interface FeedDefinition {
|
|
71
|
+
/** Feed key, e.g. 'threads' */
|
|
72
|
+
key: string;
|
|
73
|
+
/** Human-readable name */
|
|
74
|
+
name: string;
|
|
75
|
+
/** Description */
|
|
76
|
+
description?: string;
|
|
77
|
+
/** JSON Schema for feed-specific config */
|
|
78
|
+
configSchema?: Record<string, unknown>;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
export enum FeedMode {
|
|
82
|
+
/** Connector code runs on worker, syncs data */
|
|
83
|
+
sync = 'sync',
|
|
84
|
+
/** Virtual feed backed by saved queries (future) */
|
|
85
|
+
virtual = 'virtual',
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// =============================================================================
|
|
89
|
+
// Action Definition
|
|
90
|
+
// =============================================================================
|
|
91
|
+
|
|
92
|
+
export interface ActionDefinition {
|
|
93
|
+
/** Action key, e.g. 'draft_email' */
|
|
94
|
+
key: string;
|
|
95
|
+
/** Human-readable name */
|
|
96
|
+
name: string;
|
|
97
|
+
/** Description */
|
|
98
|
+
description?: string;
|
|
99
|
+
/** Whether this action requires human approval before execution */
|
|
100
|
+
requiresApproval: boolean;
|
|
101
|
+
/** MCP tool annotations for client-side confirmation UX */
|
|
102
|
+
annotations?: {
|
|
103
|
+
destructiveHint?: boolean;
|
|
104
|
+
openWorldHint?: boolean;
|
|
105
|
+
idempotentHint?: boolean;
|
|
106
|
+
};
|
|
107
|
+
/** JSON Schema for action input */
|
|
108
|
+
inputSchema?: Record<string, unknown>;
|
|
109
|
+
/** JSON Schema for action output */
|
|
110
|
+
outputSchema?: Record<string, unknown>;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// =============================================================================
|
|
114
|
+
// Connection
|
|
115
|
+
// =============================================================================
|
|
116
|
+
|
|
117
|
+
export interface Connection {
|
|
118
|
+
id: number;
|
|
119
|
+
organizationId: string;
|
|
120
|
+
connectorKey: string;
|
|
121
|
+
displayName?: string;
|
|
122
|
+
status: 'active' | 'paused' | 'error' | 'revoked';
|
|
123
|
+
accountId?: string;
|
|
124
|
+
credentials?: Record<string, unknown>;
|
|
125
|
+
entityIds?: number[];
|
|
126
|
+
config?: Record<string, unknown>;
|
|
127
|
+
errorMessage?: string;
|
|
128
|
+
createdBy?: string;
|
|
129
|
+
createdAt: Date;
|
|
130
|
+
updatedAt: Date;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// =============================================================================
|
|
134
|
+
// Feed
|
|
135
|
+
// =============================================================================
|
|
136
|
+
|
|
137
|
+
export interface Feed {
|
|
138
|
+
id: number;
|
|
139
|
+
organizationId: string;
|
|
140
|
+
connectionId: number;
|
|
141
|
+
feedKey: string;
|
|
142
|
+
status: 'active' | 'paused' | 'error';
|
|
143
|
+
entityIds?: number[];
|
|
144
|
+
config?: Record<string, unknown>;
|
|
145
|
+
checkpoint?: Record<string, unknown>;
|
|
146
|
+
syncIntervalMs?: number;
|
|
147
|
+
nextSyncAt?: Date;
|
|
148
|
+
lastSyncAt?: Date;
|
|
149
|
+
lastSyncStatus?: string;
|
|
150
|
+
lastError?: string;
|
|
151
|
+
consecutiveFailures: number;
|
|
152
|
+
itemsCollected: number;
|
|
153
|
+
createdAt: Date;
|
|
154
|
+
updatedAt: Date;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
// =============================================================================
|
|
158
|
+
// Run
|
|
159
|
+
// =============================================================================
|
|
160
|
+
|
|
161
|
+
export type RunType = 'sync' | 'action' | 'code' | 'insight';
|
|
162
|
+
export type RunStatus =
|
|
163
|
+
| 'pending'
|
|
164
|
+
| 'claimed'
|
|
165
|
+
| 'running'
|
|
166
|
+
| 'completed'
|
|
167
|
+
| 'failed'
|
|
168
|
+
| 'cancelled'
|
|
169
|
+
| 'timeout';
|
|
170
|
+
export type ApprovalStatus = 'pending' | 'approved' | 'rejected' | 'auto';
|
|
171
|
+
|
|
172
|
+
export interface Run {
|
|
173
|
+
id: number;
|
|
174
|
+
organizationId: string;
|
|
175
|
+
runType: RunType;
|
|
176
|
+
feedId?: number;
|
|
177
|
+
connectionId?: number;
|
|
178
|
+
actionKey?: string;
|
|
179
|
+
actionInput?: Record<string, unknown>;
|
|
180
|
+
actionOutput?: Record<string, unknown>;
|
|
181
|
+
approvalStatus: ApprovalStatus;
|
|
182
|
+
status: RunStatus;
|
|
183
|
+
claimedBy?: string;
|
|
184
|
+
claimedAt?: Date;
|
|
185
|
+
lastHeartbeatAt?: Date;
|
|
186
|
+
completedAt?: Date;
|
|
187
|
+
connectorKey?: string;
|
|
188
|
+
connectorVersion?: string;
|
|
189
|
+
checkpoint?: Record<string, unknown>;
|
|
190
|
+
itemsCollected: number;
|
|
191
|
+
errorMessage?: string;
|
|
192
|
+
createdAt: Date;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// =============================================================================
|
|
196
|
+
// Event Envelope
|
|
197
|
+
// =============================================================================
|
|
198
|
+
|
|
199
|
+
/**
|
|
200
|
+
* EventEnvelope is the standard output format for connector sync operations.
|
|
201
|
+
* Each envelope becomes a row in the events table.
|
|
202
|
+
*/
|
|
203
|
+
export interface EventEnvelope {
|
|
204
|
+
/** Platform's unique ID for this item */
|
|
205
|
+
external_id: string;
|
|
206
|
+
/** Main text content */
|
|
207
|
+
content: string;
|
|
208
|
+
/** Title / subject line */
|
|
209
|
+
title?: string;
|
|
210
|
+
/** Author name or email */
|
|
211
|
+
author?: string;
|
|
212
|
+
/** Link to original content */
|
|
213
|
+
url?: string;
|
|
214
|
+
/** When the content was originally created/published */
|
|
215
|
+
published_at: Date;
|
|
216
|
+
/** Content kind (e.g. 'thread', 'message', 'email', 'issue') */
|
|
217
|
+
kind?: string;
|
|
218
|
+
/** Engagement/relevance score (0-100) */
|
|
219
|
+
score?: number;
|
|
220
|
+
/** Parent reference for hierarchical content */
|
|
221
|
+
parent_external_id?: string;
|
|
222
|
+
/** Arbitrary metadata */
|
|
223
|
+
metadata?: Record<string, unknown>;
|
|
224
|
+
/** Pre-computed embedding vector */
|
|
225
|
+
embedding?: number[];
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
// =============================================================================
|
|
229
|
+
// Sync Context & Result
|
|
230
|
+
// =============================================================================
|
|
231
|
+
|
|
232
|
+
/**
|
|
233
|
+
* Context passed to ConnectorRuntime.sync()
|
|
234
|
+
*/
|
|
235
|
+
export interface SyncContext {
|
|
236
|
+
/** Feed configuration */
|
|
237
|
+
feedKey: string;
|
|
238
|
+
config: Record<string, unknown>;
|
|
239
|
+
/** Previous checkpoint (null on first sync) */
|
|
240
|
+
checkpoint: Record<string, unknown> | null;
|
|
241
|
+
/** OAuth credentials (if applicable) */
|
|
242
|
+
credentials: SyncCredentials | null;
|
|
243
|
+
/** Entity IDs this feed is linked to */
|
|
244
|
+
entityIds: number[];
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
export interface SyncCredentials {
|
|
248
|
+
provider: string;
|
|
249
|
+
accessToken: string;
|
|
250
|
+
refreshToken?: string | null;
|
|
251
|
+
expiresAt?: string | null;
|
|
252
|
+
scope?: string | null;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Result from ConnectorRuntime.sync()
|
|
257
|
+
*/
|
|
258
|
+
export interface SyncResult {
|
|
259
|
+
/** Events to write to the events table */
|
|
260
|
+
events: EventEnvelope[];
|
|
261
|
+
/** Updated checkpoint to persist */
|
|
262
|
+
checkpoint: Record<string, unknown> | null;
|
|
263
|
+
/** Optional metadata about the sync */
|
|
264
|
+
metadata?: {
|
|
265
|
+
items_found?: number;
|
|
266
|
+
items_skipped?: number;
|
|
267
|
+
[key: string]: unknown;
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// =============================================================================
|
|
272
|
+
// Action Context & Result
|
|
273
|
+
// =============================================================================
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* Context passed to ConnectorRuntime.execute()
|
|
277
|
+
*/
|
|
278
|
+
export interface ActionContext {
|
|
279
|
+
/** Action key to execute */
|
|
280
|
+
actionKey: string;
|
|
281
|
+
/** Action input parameters */
|
|
282
|
+
input: Record<string, unknown>;
|
|
283
|
+
/** OAuth credentials (if applicable) */
|
|
284
|
+
credentials: SyncCredentials | null;
|
|
285
|
+
/** Connection config */
|
|
286
|
+
config: Record<string, unknown>;
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
/**
|
|
290
|
+
* Result from ConnectorRuntime.execute()
|
|
291
|
+
*/
|
|
292
|
+
export interface ActionResult {
|
|
293
|
+
/** Whether the action succeeded */
|
|
294
|
+
success: boolean;
|
|
295
|
+
/** Output data */
|
|
296
|
+
output?: Record<string, unknown>;
|
|
297
|
+
/** Error message if failed */
|
|
298
|
+
error?: string;
|
|
299
|
+
}
|
package/src/http.ts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import ky, { type KyInstance, type Options } from 'ky';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Shared HTTP client configuration for all crawlers
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Default retry configuration
|
|
9
|
+
* - Max 2 retries (3 total attempts)
|
|
10
|
+
* - Only retry transient errors (429, 5xx)
|
|
11
|
+
* - Exponential backoff up to 5 seconds
|
|
12
|
+
* - 30s timeout per request
|
|
13
|
+
*/
|
|
14
|
+
const defaultRetryConfig = {
|
|
15
|
+
retry: {
|
|
16
|
+
limit: 2, // Max 2 retries (3 total attempts)
|
|
17
|
+
methods: ['get', 'post'],
|
|
18
|
+
statusCodes: [
|
|
19
|
+
408, // Request Timeout
|
|
20
|
+
429, // Too Many Requests (rate limit)
|
|
21
|
+
500, // Internal Server Error
|
|
22
|
+
502, // Bad Gateway
|
|
23
|
+
503, // Service Unavailable
|
|
24
|
+
504, // Gateway Timeout
|
|
25
|
+
],
|
|
26
|
+
backoffLimit: 5000, // Max 5 seconds delay between retries
|
|
27
|
+
},
|
|
28
|
+
timeout: 30000, // 30 second timeout per request
|
|
29
|
+
};
|
|
30
|
+
|
|
31
|
+
/**
|
|
32
|
+
* Create a configured ky instance with custom options
|
|
33
|
+
*/
|
|
34
|
+
export function createHttpClient(options?: Options): KyInstance {
|
|
35
|
+
return ky.create({
|
|
36
|
+
...defaultRetryConfig,
|
|
37
|
+
...options,
|
|
38
|
+
// Merge retry config if provided
|
|
39
|
+
retry: options?.retry
|
|
40
|
+
? {
|
|
41
|
+
...defaultRetryConfig.retry,
|
|
42
|
+
...(typeof options.retry === 'number' ? { limit: options.retry } : options.retry),
|
|
43
|
+
}
|
|
44
|
+
: defaultRetryConfig.retry,
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Default HTTP client for crawlers with standard User-Agent
|
|
50
|
+
*/
|
|
51
|
+
export const httpClient = createHttpClient({
|
|
52
|
+
headers: {
|
|
53
|
+
'User-Agent': 'UserResearchBot/1.0',
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
/**
|
|
58
|
+
* Create an HTTP client with authentication headers
|
|
59
|
+
*/
|
|
60
|
+
export function createAuthenticatedClient(
|
|
61
|
+
authHeader: string,
|
|
62
|
+
additionalHeaders?: Record<string, string>
|
|
63
|
+
): KyInstance {
|
|
64
|
+
return createHttpClient({
|
|
65
|
+
headers: {
|
|
66
|
+
'User-Agent': 'UserResearchBot/1.0',
|
|
67
|
+
Authorization: authHeader,
|
|
68
|
+
...additionalHeaders,
|
|
69
|
+
},
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* HTTP client for JSON APIs
|
|
75
|
+
*/
|
|
76
|
+
export const jsonHttpClient = createHttpClient({
|
|
77
|
+
headers: {
|
|
78
|
+
'User-Agent': 'UserResearchBot/1.0',
|
|
79
|
+
Accept: 'application/json',
|
|
80
|
+
'Content-Type': 'application/json',
|
|
81
|
+
},
|
|
82
|
+
});
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// =============================================================================
|
|
2
|
+
// V1 Integration Platform — Connector SDK
|
|
3
|
+
// =============================================================================
|
|
4
|
+
|
|
5
|
+
// TypeBox (schema authoring convenience)
|
|
6
|
+
export type { Static } from '@sinclair/typebox';
|
|
7
|
+
export { Type } from '@sinclair/typebox';
|
|
8
|
+
// ky (shared HTTP dependency)
|
|
9
|
+
export type { KyInstance, Options } from 'ky';
|
|
10
|
+
export { default as ky, HTTPError } from 'ky';
|
|
11
|
+
// Connector runtime & types (primary API)
|
|
12
|
+
export { ConnectorRuntime } from './connector-runtime.js';
|
|
13
|
+
export type {
|
|
14
|
+
ActionContext,
|
|
15
|
+
ActionDefinition,
|
|
16
|
+
ActionResult,
|
|
17
|
+
ApprovalStatus,
|
|
18
|
+
Connection,
|
|
19
|
+
ConnectorAuthEnvKeys,
|
|
20
|
+
ConnectorAuthMethod,
|
|
21
|
+
ConnectorAuthNone,
|
|
22
|
+
ConnectorAuthOAuth,
|
|
23
|
+
ConnectorAuthSchema,
|
|
24
|
+
ConnectorDefinition,
|
|
25
|
+
EventEnvelope,
|
|
26
|
+
Feed,
|
|
27
|
+
FeedDefinition,
|
|
28
|
+
FeedMode,
|
|
29
|
+
Run,
|
|
30
|
+
RunStatus,
|
|
31
|
+
RunType,
|
|
32
|
+
SyncContext,
|
|
33
|
+
SyncCredentials,
|
|
34
|
+
SyncResult,
|
|
35
|
+
} from './connector-types.js';
|
|
36
|
+
|
|
37
|
+
// HTTP clients
|
|
38
|
+
export {
|
|
39
|
+
createAuthenticatedClient,
|
|
40
|
+
createHttpClient,
|
|
41
|
+
httpClient,
|
|
42
|
+
jsonHttpClient,
|
|
43
|
+
} from './http.js';
|
|
44
|
+
|
|
45
|
+
// Logger
|
|
46
|
+
export { sdkLogger, sdkLogger as logger } from './logger.js';
|
|
47
|
+
|
|
48
|
+
// Retry
|
|
49
|
+
export { withHttpRetry } from './retry.js';
|
|
50
|
+
|
|
51
|
+
// Scoring
|
|
52
|
+
export { calculateEngagementScore } from './scoring.js';
|
|
53
|
+
|
|
54
|
+
// =============================================================================
|
|
55
|
+
// Legacy Crawler SDK (kept for backward compatibility during migration)
|
|
56
|
+
// =============================================================================
|
|
57
|
+
|
|
58
|
+
export type { ApiSessionState } from './api-paginated.js';
|
|
59
|
+
export { ApiPaginatedCrawler } from './api-paginated.js';
|
|
60
|
+
export { BaseCrawler, RateLimitError } from './base.js';
|
|
61
|
+
export type { BrowserLaunchOptions, EnhancedBrowser } from './browser/launcher.js';
|
|
62
|
+
export {
|
|
63
|
+
captureErrorArtifacts,
|
|
64
|
+
launchBrowser,
|
|
65
|
+
withErrorCapture,
|
|
66
|
+
} from './browser/launcher.js';
|
|
67
|
+
export type { StealthBrowser, StealthBrowserOptions } from './browser/stealth.js';
|
|
68
|
+
export {
|
|
69
|
+
getRandomDelay,
|
|
70
|
+
humanWait,
|
|
71
|
+
launchStealthBrowser,
|
|
72
|
+
randomScroll,
|
|
73
|
+
testBotDetection,
|
|
74
|
+
} from './browser/stealth.js';
|
|
75
|
+
export type {
|
|
76
|
+
BrowserCrawlerConfig,
|
|
77
|
+
BrowserPaginationConfig,
|
|
78
|
+
BrowserSessionState,
|
|
79
|
+
CaptchaConfig,
|
|
80
|
+
CookieConsentConfig,
|
|
81
|
+
} from './browser-paginated.js';
|
|
82
|
+
export { BrowserPaginatedCrawler } from './browser-paginated.js';
|
|
83
|
+
export type {
|
|
84
|
+
PageFetchResult,
|
|
85
|
+
PaginatedCheckpoint,
|
|
86
|
+
PaginateResult,
|
|
87
|
+
PaginationConfig,
|
|
88
|
+
} from './paginated.js';
|
|
89
|
+
export { PaginatedCrawler } from './paginated.js';
|
|
90
|
+
export type {
|
|
91
|
+
Checkpoint,
|
|
92
|
+
Content,
|
|
93
|
+
CrawlerAuthEnvField,
|
|
94
|
+
CrawlerAuthEnvKeysMethod,
|
|
95
|
+
CrawlerAuthMethod,
|
|
96
|
+
CrawlerAuthOAuthMethod,
|
|
97
|
+
CrawlerAuthSchema,
|
|
98
|
+
CrawlerOptions,
|
|
99
|
+
CrawlResult,
|
|
100
|
+
Env,
|
|
101
|
+
ICrawler,
|
|
102
|
+
ParentSourceDefinition,
|
|
103
|
+
ScoringConfig,
|
|
104
|
+
SearchResult,
|
|
105
|
+
SessionState,
|
|
106
|
+
} from './types.js';
|
package/src/logger.ts
ADDED