owletto 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,299 @@
1
+ /**
2
+ * Connector Types
3
+ *
4
+ * Type definitions for the V1 integration platform.
5
+ * Defines the contract between connectors, the runtime, and the platform.
6
+ */
7
+
8
+ // =============================================================================
9
+ // Connector Definition
10
+ // =============================================================================
11
+
12
+ export interface ConnectorDefinition {
13
+ /** Unique connector key, e.g. 'google.gmail' */
14
+ key: string;
15
+ /** Human-readable name */
16
+ name: string;
17
+ /** Description of what this connector does */
18
+ description?: string;
19
+ /** Semantic version */
20
+ version: string;
21
+ /** Auth configuration */
22
+ authSchema?: ConnectorAuthSchema;
23
+ /** Available feed definitions (keyed by feed_key) */
24
+ feeds?: Record<string, FeedDefinition>;
25
+ /** Available action definitions (keyed by action_key) */
26
+ actions?: Record<string, ActionDefinition>;
27
+ /** Global connector options schema (JSON Schema) */
28
+ optionsSchema?: Record<string, unknown>;
29
+ }
30
+
31
+ // =============================================================================
32
+ // Auth
33
+ // =============================================================================
34
+
35
+ export interface ConnectorAuthSchema {
36
+ methods: ConnectorAuthMethod[];
37
+ }
38
+
39
+ export type ConnectorAuthMethod = ConnectorAuthNone | ConnectorAuthEnvKeys | ConnectorAuthOAuth;
40
+
41
+ export interface ConnectorAuthNone {
42
+ type: 'none';
43
+ }
44
+
45
+ export interface ConnectorAuthEnvKeys {
46
+ type: 'env_keys';
47
+ required?: boolean;
48
+ fields: Array<{
49
+ key: string;
50
+ label?: string;
51
+ description?: string;
52
+ example?: string;
53
+ secret?: boolean;
54
+ }>;
55
+ description?: string;
56
+ }
57
+
58
+ export interface ConnectorAuthOAuth {
59
+ type: 'oauth';
60
+ provider: string;
61
+ requiredScopes: string[];
62
+ required?: boolean;
63
+ description?: string;
64
+ }
65
+
66
+ // =============================================================================
67
+ // Feed Definition
68
+ // =============================================================================
69
+
70
+ export interface FeedDefinition {
71
+ /** Feed key, e.g. 'threads' */
72
+ key: string;
73
+ /** Human-readable name */
74
+ name: string;
75
+ /** Description */
76
+ description?: string;
77
+ /** JSON Schema for feed-specific config */
78
+ configSchema?: Record<string, unknown>;
79
+ }
80
+
81
+ export enum FeedMode {
82
+ /** Connector code runs on worker, syncs data */
83
+ sync = 'sync',
84
+ /** Virtual feed backed by saved queries (future) */
85
+ virtual = 'virtual',
86
+ }
87
+
88
+ // =============================================================================
89
+ // Action Definition
90
+ // =============================================================================
91
+
92
+ export interface ActionDefinition {
93
+ /** Action key, e.g. 'draft_email' */
94
+ key: string;
95
+ /** Human-readable name */
96
+ name: string;
97
+ /** Description */
98
+ description?: string;
99
+ /** Whether this action requires human approval before execution */
100
+ requiresApproval: boolean;
101
+ /** MCP tool annotations for client-side confirmation UX */
102
+ annotations?: {
103
+ destructiveHint?: boolean;
104
+ openWorldHint?: boolean;
105
+ idempotentHint?: boolean;
106
+ };
107
+ /** JSON Schema for action input */
108
+ inputSchema?: Record<string, unknown>;
109
+ /** JSON Schema for action output */
110
+ outputSchema?: Record<string, unknown>;
111
+ }
112
+
113
+ // =============================================================================
114
+ // Connection
115
+ // =============================================================================
116
+
117
+ export interface Connection {
118
+ id: number;
119
+ organizationId: string;
120
+ connectorKey: string;
121
+ displayName?: string;
122
+ status: 'active' | 'paused' | 'error' | 'revoked';
123
+ accountId?: string;
124
+ credentials?: Record<string, unknown>;
125
+ entityIds?: number[];
126
+ config?: Record<string, unknown>;
127
+ errorMessage?: string;
128
+ createdBy?: string;
129
+ createdAt: Date;
130
+ updatedAt: Date;
131
+ }
132
+
133
+ // =============================================================================
134
+ // Feed
135
+ // =============================================================================
136
+
137
+ export interface Feed {
138
+ id: number;
139
+ organizationId: string;
140
+ connectionId: number;
141
+ feedKey: string;
142
+ status: 'active' | 'paused' | 'error';
143
+ entityIds?: number[];
144
+ config?: Record<string, unknown>;
145
+ checkpoint?: Record<string, unknown>;
146
+ syncIntervalMs?: number;
147
+ nextSyncAt?: Date;
148
+ lastSyncAt?: Date;
149
+ lastSyncStatus?: string;
150
+ lastError?: string;
151
+ consecutiveFailures: number;
152
+ itemsCollected: number;
153
+ createdAt: Date;
154
+ updatedAt: Date;
155
+ }
156
+
157
+ // =============================================================================
158
+ // Run
159
+ // =============================================================================
160
+
161
+ export type RunType = 'sync' | 'action' | 'code' | 'insight';
162
+ export type RunStatus =
163
+ | 'pending'
164
+ | 'claimed'
165
+ | 'running'
166
+ | 'completed'
167
+ | 'failed'
168
+ | 'cancelled'
169
+ | 'timeout';
170
+ export type ApprovalStatus = 'pending' | 'approved' | 'rejected' | 'auto';
171
+
172
+ export interface Run {
173
+ id: number;
174
+ organizationId: string;
175
+ runType: RunType;
176
+ feedId?: number;
177
+ connectionId?: number;
178
+ actionKey?: string;
179
+ actionInput?: Record<string, unknown>;
180
+ actionOutput?: Record<string, unknown>;
181
+ approvalStatus: ApprovalStatus;
182
+ status: RunStatus;
183
+ claimedBy?: string;
184
+ claimedAt?: Date;
185
+ lastHeartbeatAt?: Date;
186
+ completedAt?: Date;
187
+ connectorKey?: string;
188
+ connectorVersion?: string;
189
+ checkpoint?: Record<string, unknown>;
190
+ itemsCollected: number;
191
+ errorMessage?: string;
192
+ createdAt: Date;
193
+ }
194
+
195
+ // =============================================================================
196
+ // Event Envelope
197
+ // =============================================================================
198
+
199
+ /**
200
+ * EventEnvelope is the standard output format for connector sync operations.
201
+ * Each envelope becomes a row in the events table.
202
+ */
203
+ export interface EventEnvelope {
204
+ /** Platform's unique ID for this item */
205
+ external_id: string;
206
+ /** Main text content */
207
+ content: string;
208
+ /** Title / subject line */
209
+ title?: string;
210
+ /** Author name or email */
211
+ author?: string;
212
+ /** Link to original content */
213
+ url?: string;
214
+ /** When the content was originally created/published */
215
+ published_at: Date;
216
+ /** Content kind (e.g. 'thread', 'message', 'email', 'issue') */
217
+ kind?: string;
218
+ /** Engagement/relevance score (0-100) */
219
+ score?: number;
220
+ /** Parent reference for hierarchical content */
221
+ parent_external_id?: string;
222
+ /** Arbitrary metadata */
223
+ metadata?: Record<string, unknown>;
224
+ /** Pre-computed embedding vector */
225
+ embedding?: number[];
226
+ }
227
+
228
+ // =============================================================================
229
+ // Sync Context & Result
230
+ // =============================================================================
231
+
232
+ /**
233
+ * Context passed to ConnectorRuntime.sync()
234
+ */
235
+ export interface SyncContext {
236
+ /** Feed configuration */
237
+ feedKey: string;
238
+ config: Record<string, unknown>;
239
+ /** Previous checkpoint (null on first sync) */
240
+ checkpoint: Record<string, unknown> | null;
241
+ /** OAuth credentials (if applicable) */
242
+ credentials: SyncCredentials | null;
243
+ /** Entity IDs this feed is linked to */
244
+ entityIds: number[];
245
+ }
246
+
247
+ export interface SyncCredentials {
248
+ provider: string;
249
+ accessToken: string;
250
+ refreshToken?: string | null;
251
+ expiresAt?: string | null;
252
+ scope?: string | null;
253
+ }
254
+
255
+ /**
256
+ * Result from ConnectorRuntime.sync()
257
+ */
258
+ export interface SyncResult {
259
+ /** Events to write to the events table */
260
+ events: EventEnvelope[];
261
+ /** Updated checkpoint to persist */
262
+ checkpoint: Record<string, unknown> | null;
263
+ /** Optional metadata about the sync */
264
+ metadata?: {
265
+ items_found?: number;
266
+ items_skipped?: number;
267
+ [key: string]: unknown;
268
+ };
269
+ }
270
+
271
+ // =============================================================================
272
+ // Action Context & Result
273
+ // =============================================================================
274
+
275
+ /**
276
+ * Context passed to ConnectorRuntime.execute()
277
+ */
278
+ export interface ActionContext {
279
+ /** Action key to execute */
280
+ actionKey: string;
281
+ /** Action input parameters */
282
+ input: Record<string, unknown>;
283
+ /** OAuth credentials (if applicable) */
284
+ credentials: SyncCredentials | null;
285
+ /** Connection config */
286
+ config: Record<string, unknown>;
287
+ }
288
+
289
+ /**
290
+ * Result from ConnectorRuntime.execute()
291
+ */
292
+ export interface ActionResult {
293
+ /** Whether the action succeeded */
294
+ success: boolean;
295
+ /** Output data */
296
+ output?: Record<string, unknown>;
297
+ /** Error message if failed */
298
+ error?: string;
299
+ }
package/src/http.ts ADDED
@@ -0,0 +1,82 @@
1
+ import ky, { type KyInstance, type Options } from 'ky';
2
+
3
+ /**
4
+ * Shared HTTP client configuration for all crawlers
5
+ */
6
+
7
+ /**
8
+ * Default retry configuration
9
+ * - Max 2 retries (3 total attempts)
10
+ * - Only retry transient errors (429, 5xx)
11
+ * - Exponential backoff up to 5 seconds
12
+ * - 30s timeout per request
13
+ */
14
+ const defaultRetryConfig = {
15
+ retry: {
16
+ limit: 2, // Max 2 retries (3 total attempts)
17
+ methods: ['get', 'post'],
18
+ statusCodes: [
19
+ 408, // Request Timeout
20
+ 429, // Too Many Requests (rate limit)
21
+ 500, // Internal Server Error
22
+ 502, // Bad Gateway
23
+ 503, // Service Unavailable
24
+ 504, // Gateway Timeout
25
+ ],
26
+ backoffLimit: 5000, // Max 5 seconds delay between retries
27
+ },
28
+ timeout: 30000, // 30 second timeout per request
29
+ };
30
+
31
+ /**
32
+ * Create a configured ky instance with custom options
33
+ */
34
+ export function createHttpClient(options?: Options): KyInstance {
35
+ return ky.create({
36
+ ...defaultRetryConfig,
37
+ ...options,
38
+ // Merge retry config if provided
39
+ retry: options?.retry
40
+ ? {
41
+ ...defaultRetryConfig.retry,
42
+ ...(typeof options.retry === 'number' ? { limit: options.retry } : options.retry),
43
+ }
44
+ : defaultRetryConfig.retry,
45
+ });
46
+ }
47
+
48
+ /**
49
+ * Default HTTP client for crawlers with standard User-Agent
50
+ */
51
+ export const httpClient = createHttpClient({
52
+ headers: {
53
+ 'User-Agent': 'UserResearchBot/1.0',
54
+ },
55
+ });
56
+
57
+ /**
58
+ * Create an HTTP client with authentication headers
59
+ */
60
+ export function createAuthenticatedClient(
61
+ authHeader: string,
62
+ additionalHeaders?: Record<string, string>
63
+ ): KyInstance {
64
+ return createHttpClient({
65
+ headers: {
66
+ 'User-Agent': 'UserResearchBot/1.0',
67
+ Authorization: authHeader,
68
+ ...additionalHeaders,
69
+ },
70
+ });
71
+ }
72
+
73
+ /**
74
+ * HTTP client for JSON APIs
75
+ */
76
+ export const jsonHttpClient = createHttpClient({
77
+ headers: {
78
+ 'User-Agent': 'UserResearchBot/1.0',
79
+ Accept: 'application/json',
80
+ 'Content-Type': 'application/json',
81
+ },
82
+ });
package/src/index.ts ADDED
@@ -0,0 +1,106 @@
1
+ // =============================================================================
2
+ // V1 Integration Platform — Connector SDK
3
+ // =============================================================================
4
+
5
+ // TypeBox (schema authoring convenience)
6
+ export type { Static } from '@sinclair/typebox';
7
+ export { Type } from '@sinclair/typebox';
8
+ // ky (shared HTTP dependency)
9
+ export type { KyInstance, Options } from 'ky';
10
+ export { default as ky, HTTPError } from 'ky';
11
+ // Connector runtime & types (primary API)
12
+ export { ConnectorRuntime } from './connector-runtime.js';
13
+ export type {
14
+ ActionContext,
15
+ ActionDefinition,
16
+ ActionResult,
17
+ ApprovalStatus,
18
+ Connection,
19
+ ConnectorAuthEnvKeys,
20
+ ConnectorAuthMethod,
21
+ ConnectorAuthNone,
22
+ ConnectorAuthOAuth,
23
+ ConnectorAuthSchema,
24
+ ConnectorDefinition,
25
+ EventEnvelope,
26
+ Feed,
27
+ FeedDefinition,
28
+ FeedMode,
29
+ Run,
30
+ RunStatus,
31
+ RunType,
32
+ SyncContext,
33
+ SyncCredentials,
34
+ SyncResult,
35
+ } from './connector-types.js';
36
+
37
+ // HTTP clients
38
+ export {
39
+ createAuthenticatedClient,
40
+ createHttpClient,
41
+ httpClient,
42
+ jsonHttpClient,
43
+ } from './http.js';
44
+
45
+ // Logger
46
+ export { sdkLogger, sdkLogger as logger } from './logger.js';
47
+
48
+ // Retry
49
+ export { withHttpRetry } from './retry.js';
50
+
51
+ // Scoring
52
+ export { calculateEngagementScore } from './scoring.js';
53
+
54
+ // =============================================================================
55
+ // Legacy Crawler SDK (kept for backward compatibility during migration)
56
+ // =============================================================================
57
+
58
+ export type { ApiSessionState } from './api-paginated.js';
59
+ export { ApiPaginatedCrawler } from './api-paginated.js';
60
+ export { BaseCrawler, RateLimitError } from './base.js';
61
+ export type { BrowserLaunchOptions, EnhancedBrowser } from './browser/launcher.js';
62
+ export {
63
+ captureErrorArtifacts,
64
+ launchBrowser,
65
+ withErrorCapture,
66
+ } from './browser/launcher.js';
67
+ export type { StealthBrowser, StealthBrowserOptions } from './browser/stealth.js';
68
+ export {
69
+ getRandomDelay,
70
+ humanWait,
71
+ launchStealthBrowser,
72
+ randomScroll,
73
+ testBotDetection,
74
+ } from './browser/stealth.js';
75
+ export type {
76
+ BrowserCrawlerConfig,
77
+ BrowserPaginationConfig,
78
+ BrowserSessionState,
79
+ CaptchaConfig,
80
+ CookieConsentConfig,
81
+ } from './browser-paginated.js';
82
+ export { BrowserPaginatedCrawler } from './browser-paginated.js';
83
+ export type {
84
+ PageFetchResult,
85
+ PaginatedCheckpoint,
86
+ PaginateResult,
87
+ PaginationConfig,
88
+ } from './paginated.js';
89
+ export { PaginatedCrawler } from './paginated.js';
90
+ export type {
91
+ Checkpoint,
92
+ Content,
93
+ CrawlerAuthEnvField,
94
+ CrawlerAuthEnvKeysMethod,
95
+ CrawlerAuthMethod,
96
+ CrawlerAuthOAuthMethod,
97
+ CrawlerAuthSchema,
98
+ CrawlerOptions,
99
+ CrawlResult,
100
+ Env,
101
+ ICrawler,
102
+ ParentSourceDefinition,
103
+ ScoringConfig,
104
+ SearchResult,
105
+ SessionState,
106
+ } from './types.js';
package/src/logger.ts ADDED
@@ -0,0 +1,10 @@
1
+ import pino from 'pino';
2
+
3
+ /**
4
+ * SDK logger instance
5
+ * Uses its own pino instance, separate from the main app logger
6
+ */
7
+ export const sdkLogger = pino({
8
+ name: 'owletto-sdk',
9
+ level: process.env.LOG_LEVEL || 'info',
10
+ });