trawl-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +112 -0
- package/package.json +17 -0
- package/src/client.ts +341 -0
- package/src/errors.ts +42 -0
- package/src/index.ts +4 -0
- package/src/types.ts +82 -0
- package/tests/client.test.ts +329 -0
- package/tsconfig.json +12 -0
package/README.md
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# @trawl/sdk
|
|
2
|
+
|
|
3
|
+
The official TypeScript SDK for the [Trawl](https://gettrawl.com) API — extract transcripts from YouTube, podcasts, TikTok, earnings calls, SEC filings, news, and academic papers.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install @trawl/sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Quick Start
|
|
12
|
+
|
|
13
|
+
```typescript
|
|
14
|
+
import { TrawlClient } from "@trawl/sdk";
|
|
15
|
+
|
|
16
|
+
const client = new TrawlClient({ apiKey: "trawl_your_key" });
|
|
17
|
+
|
|
18
|
+
// Search YouTube
|
|
19
|
+
const results = await client.search.youtube("machine learning", 5);
|
|
20
|
+
results.results.forEach((v) => console.log(`${v.title} — ${v.channel}`));
|
|
21
|
+
|
|
22
|
+
// Extract a transcript (no auth needed)
|
|
23
|
+
const preview = await client.transcripts.preview("https://youtube.com/watch?v=dQw4w9WgXcQ");
|
|
24
|
+
console.log(`${preview.segments.length} segments`);
|
|
25
|
+
|
|
26
|
+
// Bulk download as ZIP
|
|
27
|
+
const zip = await client.bulk.download(["dQw4w9WgXcQ", "9bZkp7q19f0"], "jsonl");
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## All Resources
|
|
31
|
+
|
|
32
|
+
```typescript
|
|
33
|
+
const client = new TrawlClient({ apiKey: "trawl_your_key" });
|
|
34
|
+
|
|
35
|
+
// YouTube
|
|
36
|
+
client.search.youtube(query, maxResults?)
|
|
37
|
+
client.transcripts.extract(url, language?)
|
|
38
|
+
client.transcripts.preview(url) // No auth
|
|
39
|
+
client.transcripts.download(id, format?)
|
|
40
|
+
client.bulk.download(videoIds, format?) // No auth
|
|
41
|
+
|
|
42
|
+
// Podcasts
|
|
43
|
+
client.search.podcasts(query, maxResults?)
|
|
44
|
+
client.podcasts.episodes(podcastId)
|
|
45
|
+
client.podcasts.transcribe(audioUrl, options?)
|
|
46
|
+
|
|
47
|
+
// Earnings
|
|
48
|
+
client.earnings.search(ticker)
|
|
49
|
+
client.earnings.getTranscript(ticker, year, quarter)
|
|
50
|
+
|
|
51
|
+
// SEC Filings
|
|
52
|
+
client.filings.search(ticker, formType?)
|
|
53
|
+
|
|
54
|
+
// News
|
|
55
|
+
client.news.search(query, language?, country?)
|
|
56
|
+
client.news.getArticleText(url)
|
|
57
|
+
|
|
58
|
+
// Papers
|
|
59
|
+
client.papers.search(query, source?)
|
|
60
|
+
client.papers.extract(arxivId?, doi?)
|
|
61
|
+
|
|
62
|
+
// AI
|
|
63
|
+
client.ai.summarize(transcriptId)
|
|
64
|
+
client.ai.topics(transcriptId)
|
|
65
|
+
client.ai.entities(transcriptId)
|
|
66
|
+
client.ai.previewSummarize(text) // No auth
|
|
67
|
+
|
|
68
|
+
// Jobs
|
|
69
|
+
client.jobs.get(jobId)
|
|
70
|
+
client.jobs.list()
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Error Handling
|
|
74
|
+
|
|
75
|
+
```typescript
|
|
76
|
+
import { AuthenticationError, RateLimitError, NotFoundError } from "@trawl/sdk";
|
|
77
|
+
|
|
78
|
+
try {
|
|
79
|
+
await client.transcripts.extract("invalid-url");
|
|
80
|
+
} catch (e) {
|
|
81
|
+
if (e instanceof RateLimitError) {
|
|
82
|
+
console.log(`Retry after ${e.retryAfter}s`);
|
|
83
|
+
} else if (e instanceof AuthenticationError) {
|
|
84
|
+
console.log("Invalid API key");
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
## TypeScript
|
|
90
|
+
|
|
91
|
+
Full TypeScript support with exported interfaces:
|
|
92
|
+
|
|
93
|
+
```typescript
|
|
94
|
+
import type {
|
|
95
|
+
TranscriptResponse,
|
|
96
|
+
SearchResponse,
|
|
97
|
+
SearchResult,
|
|
98
|
+
TranscriptSegment,
|
|
99
|
+
PaperResult,
|
|
100
|
+
} from "@trawl/sdk";
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Requirements
|
|
104
|
+
|
|
105
|
+
- Node.js 18+ (native `fetch`)
|
|
106
|
+
- TypeScript 5+ (optional)
|
|
107
|
+
|
|
108
|
+
## Links
|
|
109
|
+
|
|
110
|
+
- [API Documentation](https://gettrawl.com/docs)
|
|
111
|
+
- [RAG Pipeline Tutorial](https://gettrawl.com/blog/youtube-rag-pipeline)
|
|
112
|
+
- [GitHub](https://github.com/trawl/trawl)
|
package/package.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "trawl-sdk",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "TypeScript SDK for the Trawl unified content API",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "src/index.ts",
|
|
7
|
+
"types": "src/index.ts",
|
|
8
|
+
"scripts": {
|
|
9
|
+
"test": "vitest run",
|
|
10
|
+
"build": "tsup src/index.ts --format esm,cjs --dts"
|
|
11
|
+
},
|
|
12
|
+
"devDependencies": {
|
|
13
|
+
"typescript": "^5.5.0",
|
|
14
|
+
"vitest": "^2.0.0",
|
|
15
|
+
"tsup": "^8.0.0"
|
|
16
|
+
}
|
|
17
|
+
}
|
package/src/client.ts
ADDED
|
@@ -0,0 +1,341 @@
|
|
|
1
|
+
import {
|
|
2
|
+
AuthenticationError,
|
|
3
|
+
NotFoundError,
|
|
4
|
+
RateLimitError,
|
|
5
|
+
TrawlError,
|
|
6
|
+
ValidationError,
|
|
7
|
+
} from "./errors";
|
|
8
|
+
import type {
|
|
9
|
+
Job,
|
|
10
|
+
JobListResponse,
|
|
11
|
+
PodcastEpisode,
|
|
12
|
+
PodcastEpisodeListResponse,
|
|
13
|
+
PodcastSearchResponse,
|
|
14
|
+
SearchResponse,
|
|
15
|
+
TranscriptPreviewResponse,
|
|
16
|
+
TranscriptResponse,
|
|
17
|
+
} from "./types";
|
|
18
|
+
|
|
19
|
+
const DEFAULT_BASE_URL = "https://api.gettrawl.com";
|
|
20
|
+
|
|
21
|
+
async function handleError(response: Response): Promise<never> {
|
|
22
|
+
let detail: string;
|
|
23
|
+
try {
|
|
24
|
+
const body = await response.json();
|
|
25
|
+
detail = body.detail || response.statusText;
|
|
26
|
+
} catch {
|
|
27
|
+
detail = response.statusText;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
switch (response.status) {
|
|
31
|
+
case 401:
|
|
32
|
+
throw new AuthenticationError(detail);
|
|
33
|
+
case 404:
|
|
34
|
+
throw new NotFoundError(detail);
|
|
35
|
+
case 422:
|
|
36
|
+
throw new ValidationError(detail);
|
|
37
|
+
case 429: {
|
|
38
|
+
const retryAfter = response.headers.get("Retry-After");
|
|
39
|
+
throw new RateLimitError(
|
|
40
|
+
detail,
|
|
41
|
+
retryAfter ? parseInt(retryAfter, 10) : undefined,
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
default:
|
|
45
|
+
throw new TrawlError(detail, response.status);
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export interface TrawlClientOptions {
|
|
50
|
+
apiKey?: string;
|
|
51
|
+
baseUrl?: string;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
class TranscriptsResource {
|
|
55
|
+
constructor(
|
|
56
|
+
private baseUrl: string,
|
|
57
|
+
private headers: Record<string, string>,
|
|
58
|
+
) {}
|
|
59
|
+
|
|
60
|
+
async extract(
|
|
61
|
+
url: string,
|
|
62
|
+
language?: string,
|
|
63
|
+
): Promise<TranscriptResponse> {
|
|
64
|
+
const body: Record<string, unknown> = { url };
|
|
65
|
+
if (language) body.language = language;
|
|
66
|
+
const res = await fetch(`${this.baseUrl}/api/transcripts`, {
|
|
67
|
+
method: "POST",
|
|
68
|
+
headers: this.headers,
|
|
69
|
+
body: JSON.stringify(body),
|
|
70
|
+
});
|
|
71
|
+
if (!res.ok) await handleError(res);
|
|
72
|
+
return res.json();
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
async preview(
|
|
76
|
+
url: string,
|
|
77
|
+
language?: string,
|
|
78
|
+
): Promise<TranscriptPreviewResponse> {
|
|
79
|
+
const body: Record<string, unknown> = { url };
|
|
80
|
+
if (language) body.language = language;
|
|
81
|
+
const res = await fetch(`${this.baseUrl}/api/transcripts/preview`, {
|
|
82
|
+
method: "POST",
|
|
83
|
+
headers: { "Content-Type": "application/json" },
|
|
84
|
+
body: JSON.stringify(body),
|
|
85
|
+
});
|
|
86
|
+
if (!res.ok) await handleError(res);
|
|
87
|
+
return res.json();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
async download(transcriptId: number, format = "txt"): Promise<string> {
|
|
91
|
+
const res = await fetch(
|
|
92
|
+
`${this.baseUrl}/api/transcripts/${transcriptId}/download?format=${format}`,
|
|
93
|
+
{ headers: this.headers },
|
|
94
|
+
);
|
|
95
|
+
if (!res.ok) await handleError(res);
|
|
96
|
+
return res.text();
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
class SearchResource {
|
|
101
|
+
constructor(
|
|
102
|
+
private baseUrl: string,
|
|
103
|
+
private headers: Record<string, string>,
|
|
104
|
+
) {}
|
|
105
|
+
|
|
106
|
+
async youtube(query: string, maxResults = 10): Promise<SearchResponse> {
|
|
107
|
+
const params = new URLSearchParams({
|
|
108
|
+
q: query,
|
|
109
|
+
max_results: String(maxResults),
|
|
110
|
+
});
|
|
111
|
+
const res = await fetch(`${this.baseUrl}/api/search?${params}`, {
|
|
112
|
+
headers: this.headers,
|
|
113
|
+
});
|
|
114
|
+
if (!res.ok) await handleError(res);
|
|
115
|
+
return res.json();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
async podcasts(query: string, maxResults = 20): Promise<PodcastSearchResponse> {
|
|
119
|
+
const params = new URLSearchParams({
|
|
120
|
+
q: query,
|
|
121
|
+
max_results: String(maxResults),
|
|
122
|
+
});
|
|
123
|
+
const res = await fetch(`${this.baseUrl}/api/podcasts/search?${params}`, {
|
|
124
|
+
headers: this.headers,
|
|
125
|
+
});
|
|
126
|
+
if (!res.ok) await handleError(res);
|
|
127
|
+
return res.json();
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
class PodcastsResource {
|
|
132
|
+
constructor(
|
|
133
|
+
private baseUrl: string,
|
|
134
|
+
private headers: Record<string, string>,
|
|
135
|
+
) {}
|
|
136
|
+
|
|
137
|
+
async episodes(
|
|
138
|
+
podcastId: number,
|
|
139
|
+
maxResults = 50,
|
|
140
|
+
): Promise<PodcastEpisode[]> {
|
|
141
|
+
const params = new URLSearchParams({
|
|
142
|
+
max_results: String(maxResults),
|
|
143
|
+
});
|
|
144
|
+
const res = await fetch(
|
|
145
|
+
`${this.baseUrl}/api/podcasts/${podcastId}/episodes?${params}`,
|
|
146
|
+
{ headers: this.headers },
|
|
147
|
+
);
|
|
148
|
+
if (!res.ok) await handleError(res);
|
|
149
|
+
const data: PodcastEpisodeListResponse = await res.json();
|
|
150
|
+
return data.episodes;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
async transcribe(
|
|
154
|
+
audioUrl: string,
|
|
155
|
+
options?: Record<string, unknown>,
|
|
156
|
+
): Promise<Job> {
|
|
157
|
+
const res = await fetch(`${this.baseUrl}/api/podcasts/transcribe`, {
|
|
158
|
+
method: "POST",
|
|
159
|
+
headers: this.headers,
|
|
160
|
+
body: JSON.stringify({ audio_url: audioUrl, ...options }),
|
|
161
|
+
});
|
|
162
|
+
if (!res.ok) await handleError(res);
|
|
163
|
+
return res.json();
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
class JobsResource {
|
|
168
|
+
constructor(
|
|
169
|
+
private baseUrl: string,
|
|
170
|
+
private headers: Record<string, string>,
|
|
171
|
+
) {}
|
|
172
|
+
|
|
173
|
+
async get(jobId: string): Promise<Job> {
|
|
174
|
+
const res = await fetch(`${this.baseUrl}/api/jobs/${jobId}`, {
|
|
175
|
+
headers: this.headers,
|
|
176
|
+
});
|
|
177
|
+
if (!res.ok) await handleError(res);
|
|
178
|
+
return res.json();
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
async list(): Promise<Job[]> {
|
|
182
|
+
const res = await fetch(`${this.baseUrl}/api/jobs`, {
|
|
183
|
+
headers: this.headers,
|
|
184
|
+
});
|
|
185
|
+
if (!res.ok) await handleError(res);
|
|
186
|
+
const data: JobListResponse = await res.json();
|
|
187
|
+
return data.items;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
class BulkResource {
|
|
192
|
+
constructor(
|
|
193
|
+
private baseUrl: string,
|
|
194
|
+
private headers: Record<string, string>,
|
|
195
|
+
) {}
|
|
196
|
+
|
|
197
|
+
async download(
|
|
198
|
+
videoIds: string[],
|
|
199
|
+
format = "txt",
|
|
200
|
+
): Promise<ArrayBuffer> {
|
|
201
|
+
const res = await fetch(`${this.baseUrl}/api/bulk-download`, {
|
|
202
|
+
method: "POST",
|
|
203
|
+
headers: { "Content-Type": "application/json" },
|
|
204
|
+
body: JSON.stringify({ video_ids: videoIds, format }),
|
|
205
|
+
});
|
|
206
|
+
if (!res.ok) await handleError(res);
|
|
207
|
+
return res.arrayBuffer();
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
class EarningsResource {
|
|
212
|
+
constructor(private baseUrl: string, private headers: Record<string, string>) {}
|
|
213
|
+
|
|
214
|
+
async search(ticker: string): Promise<Record<string, unknown>> {
|
|
215
|
+
const res = await fetch(`${this.baseUrl}/api/earnings/search?ticker=${encodeURIComponent(ticker)}`, { headers: this.headers });
|
|
216
|
+
if (!res.ok) await handleError(res);
|
|
217
|
+
return res.json();
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
async getTranscript(ticker: string, year: number, quarter: number): Promise<Record<string, unknown>> {
|
|
221
|
+
const res = await fetch(`${this.baseUrl}/api/earnings/${ticker}/${year}/${quarter}`, { headers: this.headers });
|
|
222
|
+
if (!res.ok) await handleError(res);
|
|
223
|
+
return res.json();
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
class FilingsResource {
|
|
228
|
+
constructor(private baseUrl: string, private headers: Record<string, string>) {}
|
|
229
|
+
|
|
230
|
+
async search(ticker: string, formType?: string): Promise<Record<string, unknown>> {
|
|
231
|
+
const params = new URLSearchParams({ ticker });
|
|
232
|
+
if (formType) params.set("form_type", formType);
|
|
233
|
+
const res = await fetch(`${this.baseUrl}/api/filings/search?${params}`, { headers: this.headers });
|
|
234
|
+
if (!res.ok) await handleError(res);
|
|
235
|
+
return res.json();
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
class NewsResource {
|
|
240
|
+
constructor(private baseUrl: string, private headers: Record<string, string>) {}
|
|
241
|
+
|
|
242
|
+
async search(query: string, language?: string, country?: string): Promise<Record<string, unknown>> {
|
|
243
|
+
const params = new URLSearchParams({ q: query });
|
|
244
|
+
if (language) params.set("language", language);
|
|
245
|
+
if (country) params.set("country", country);
|
|
246
|
+
const res = await fetch(`${this.baseUrl}/api/news/search?${params}`, { headers: this.headers });
|
|
247
|
+
if (!res.ok) await handleError(res);
|
|
248
|
+
return res.json();
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
async getArticleText(url: string): Promise<Record<string, unknown>> {
|
|
252
|
+
const res = await fetch(`${this.baseUrl}/api/news/article?url=${encodeURIComponent(url)}`, { headers: this.headers });
|
|
253
|
+
if (!res.ok) await handleError(res);
|
|
254
|
+
return res.json();
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
class PapersResourceSDK {
|
|
259
|
+
constructor(private baseUrl: string, private headers: Record<string, string>) {}
|
|
260
|
+
|
|
261
|
+
async search(query: string, source = "all"): Promise<Record<string, unknown>> {
|
|
262
|
+
const params = new URLSearchParams({ q: query, source });
|
|
263
|
+
const res = await fetch(`${this.baseUrl}/api/papers/search?${params}`, { headers: this.headers });
|
|
264
|
+
if (!res.ok) await handleError(res);
|
|
265
|
+
return res.json();
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
async extract(arxivId?: string, doi?: string): Promise<Record<string, unknown>> {
|
|
269
|
+
const res = await fetch(`${this.baseUrl}/api/papers/extract`, {
|
|
270
|
+
method: "POST", headers: this.headers,
|
|
271
|
+
body: JSON.stringify({ arxiv_id: arxivId || null, doi: doi || null }),
|
|
272
|
+
});
|
|
273
|
+
if (!res.ok) await handleError(res);
|
|
274
|
+
return res.json();
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
class AIResource {
|
|
279
|
+
constructor(private baseUrl: string, private headers: Record<string, string>) {}
|
|
280
|
+
|
|
281
|
+
async summarize(transcriptId: number): Promise<Record<string, unknown>> {
|
|
282
|
+
const res = await fetch(`${this.baseUrl}/api/ai/${transcriptId}/summarize`, { method: "POST", headers: this.headers });
|
|
283
|
+
if (!res.ok) await handleError(res);
|
|
284
|
+
return res.json();
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
async topics(transcriptId: number): Promise<Record<string, unknown>> {
|
|
288
|
+
const res = await fetch(`${this.baseUrl}/api/ai/${transcriptId}/topics`, { method: "POST", headers: this.headers });
|
|
289
|
+
if (!res.ok) await handleError(res);
|
|
290
|
+
return res.json();
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
async entities(transcriptId: number): Promise<Record<string, unknown>> {
|
|
294
|
+
const res = await fetch(`${this.baseUrl}/api/ai/${transcriptId}/entities`, { method: "POST", headers: this.headers });
|
|
295
|
+
if (!res.ok) await handleError(res);
|
|
296
|
+
return res.json();
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
async previewSummarize(text: string): Promise<Record<string, unknown>> {
|
|
300
|
+
const res = await fetch(`${this.baseUrl}/api/ai/preview/summarize`, {
|
|
301
|
+
method: "POST", headers: this.headers,
|
|
302
|
+
body: JSON.stringify({ text }),
|
|
303
|
+
});
|
|
304
|
+
if (!res.ok) await handleError(res);
|
|
305
|
+
return res.json();
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
export class TrawlClient {
|
|
310
|
+
public transcripts: TranscriptsResource;
|
|
311
|
+
public search: SearchResource;
|
|
312
|
+
public podcasts: PodcastsResource;
|
|
313
|
+
public jobs: JobsResource;
|
|
314
|
+
public bulk: BulkResource;
|
|
315
|
+
public earnings: EarningsResource;
|
|
316
|
+
public filings: FilingsResource;
|
|
317
|
+
public news: NewsResource;
|
|
318
|
+
public papers: PapersResourceSDK;
|
|
319
|
+
public ai: AIResource;
|
|
320
|
+
|
|
321
|
+
constructor(options: TrawlClientOptions = {}) {
|
|
322
|
+
const baseUrl = options.baseUrl || DEFAULT_BASE_URL;
|
|
323
|
+
const headers: Record<string, string> = {
|
|
324
|
+
"Content-Type": "application/json",
|
|
325
|
+
};
|
|
326
|
+
if (options.apiKey) {
|
|
327
|
+
headers["X-API-Key"] = options.apiKey;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
this.transcripts = new TranscriptsResource(baseUrl, headers);
|
|
331
|
+
this.search = new SearchResource(baseUrl, headers);
|
|
332
|
+
this.podcasts = new PodcastsResource(baseUrl, headers);
|
|
333
|
+
this.jobs = new JobsResource(baseUrl, headers);
|
|
334
|
+
this.bulk = new BulkResource(baseUrl, headers);
|
|
335
|
+
this.earnings = new EarningsResource(baseUrl, headers);
|
|
336
|
+
this.filings = new FilingsResource(baseUrl, headers);
|
|
337
|
+
this.news = new NewsResource(baseUrl, headers);
|
|
338
|
+
this.papers = new PapersResourceSDK(baseUrl, headers);
|
|
339
|
+
this.ai = new AIResource(baseUrl, headers);
|
|
340
|
+
}
|
|
341
|
+
}
|
package/src/errors.ts
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
export class TrawlError extends Error {
|
|
2
|
+
constructor(
|
|
3
|
+
message: string,
|
|
4
|
+
public statusCode?: number,
|
|
5
|
+
) {
|
|
6
|
+
super(message);
|
|
7
|
+
this.name = "TrawlError";
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
// Backwards compatibility alias
|
|
12
|
+
export const TranscriptaError = TrawlError;
|
|
13
|
+
|
|
14
|
+
export class AuthenticationError extends TrawlError {
|
|
15
|
+
constructor(message: string) {
|
|
16
|
+
super(message, 401);
|
|
17
|
+
this.name = "AuthenticationError";
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export class NotFoundError extends TrawlError {
|
|
22
|
+
constructor(message: string) {
|
|
23
|
+
super(message, 404);
|
|
24
|
+
this.name = "NotFoundError";
|
|
25
|
+
}
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
export class RateLimitError extends TrawlError {
|
|
29
|
+
retryAfter?: number;
|
|
30
|
+
constructor(message: string, retryAfter?: number) {
|
|
31
|
+
super(message, 429);
|
|
32
|
+
this.name = "RateLimitError";
|
|
33
|
+
this.retryAfter = retryAfter;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export class ValidationError extends TrawlError {
|
|
38
|
+
constructor(message: string) {
|
|
39
|
+
super(message, 422);
|
|
40
|
+
this.name = "ValidationError";
|
|
41
|
+
}
|
|
42
|
+
}
|
package/src/index.ts
ADDED
package/src/types.ts
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
export interface TranscriptSegment {
|
|
2
|
+
text: string;
|
|
3
|
+
start: number;
|
|
4
|
+
duration: number;
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
export interface TranscriptResponse {
|
|
8
|
+
id: number;
|
|
9
|
+
video_id: string;
|
|
10
|
+
title: string | null;
|
|
11
|
+
channel: string | null;
|
|
12
|
+
thumbnail: string | null;
|
|
13
|
+
language: string;
|
|
14
|
+
is_auto_generated: boolean;
|
|
15
|
+
segments: TranscriptSegment[];
|
|
16
|
+
created_at: string;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface TranscriptPreviewResponse {
|
|
20
|
+
video_id: string;
|
|
21
|
+
title: string | null;
|
|
22
|
+
channel: string | null;
|
|
23
|
+
thumbnail: string | null;
|
|
24
|
+
language: string;
|
|
25
|
+
is_auto_generated: boolean;
|
|
26
|
+
segments: TranscriptSegment[];
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export interface SearchResult {
|
|
30
|
+
video_id: string;
|
|
31
|
+
title: string;
|
|
32
|
+
channel: string;
|
|
33
|
+
thumbnail: string;
|
|
34
|
+
published_at: string;
|
|
35
|
+
description: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export interface SearchResponse {
|
|
39
|
+
query: string;
|
|
40
|
+
results: SearchResult[];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export interface PodcastSearchResult {
|
|
44
|
+
id: number;
|
|
45
|
+
title: string;
|
|
46
|
+
author: string;
|
|
47
|
+
description: string;
|
|
48
|
+
artwork_url: string;
|
|
49
|
+
feed_url: string;
|
|
50
|
+
episode_count: number;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface PodcastSearchResponse {
|
|
54
|
+
query: string;
|
|
55
|
+
results: PodcastSearchResult[];
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface PodcastEpisode {
|
|
59
|
+
id: number;
|
|
60
|
+
title: string;
|
|
61
|
+
description: string;
|
|
62
|
+
audio_url: string;
|
|
63
|
+
duration_seconds: number;
|
|
64
|
+
published_at: string;
|
|
65
|
+
has_transcript: boolean;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export interface PodcastEpisodeListResponse {
|
|
69
|
+
podcast_id: number;
|
|
70
|
+
episodes: PodcastEpisode[];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export interface Job {
|
|
74
|
+
id: string;
|
|
75
|
+
status: string;
|
|
76
|
+
created_at: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface JobListResponse {
|
|
80
|
+
items: Job[];
|
|
81
|
+
total: number;
|
|
82
|
+
}
|
|
@@ -0,0 +1,329 @@
|
|
|
1
|
+
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
2
|
+
import { TrawlClient } from "../src/client";
|
|
3
|
+
import {
|
|
4
|
+
AuthenticationError,
|
|
5
|
+
NotFoundError,
|
|
6
|
+
RateLimitError,
|
|
7
|
+
ValidationError,
|
|
8
|
+
TrawlError,
|
|
9
|
+
} from "../src/errors";
|
|
10
|
+
|
|
11
|
+
const mockFetch = vi.fn();
|
|
12
|
+
global.fetch = mockFetch;
|
|
13
|
+
|
|
14
|
+
beforeEach(() => {
|
|
15
|
+
mockFetch.mockReset();
|
|
16
|
+
});
|
|
17
|
+
|
|
18
|
+
function mockResponse(status: number, body: unknown = {}, headers: Record<string, string> = {}) {
|
|
19
|
+
return {
|
|
20
|
+
ok: status >= 200 && status < 300,
|
|
21
|
+
status,
|
|
22
|
+
statusText: status === 200 ? "OK" : "Error",
|
|
23
|
+
json: () => Promise.resolve(body),
|
|
24
|
+
text: () => Promise.resolve(typeof body === "string" ? body : JSON.stringify(body)),
|
|
25
|
+
arrayBuffer: () => Promise.resolve(new ArrayBuffer(8)),
|
|
26
|
+
headers: new Map(Object.entries(headers)),
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const TRANSCRIPT = {
|
|
31
|
+
id: 1,
|
|
32
|
+
video_id: "abc123",
|
|
33
|
+
title: "Test",
|
|
34
|
+
channel: "Chan",
|
|
35
|
+
thumbnail: null,
|
|
36
|
+
language: "en",
|
|
37
|
+
is_auto_generated: true,
|
|
38
|
+
segments: [{ text: "Hello", start: 0, duration: 1 }],
|
|
39
|
+
created_at: "2026-01-01",
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
describe("TrawlClient", () => {
|
|
43
|
+
it("creates client with API key", () => {
|
|
44
|
+
const client = new TrawlClient({ apiKey: "trawl_test", baseUrl: "http://test" });
|
|
45
|
+
expect(client.transcripts).toBeDefined();
|
|
46
|
+
expect(client.search).toBeDefined();
|
|
47
|
+
expect(client.podcasts).toBeDefined();
|
|
48
|
+
expect(client.jobs).toBeDefined();
|
|
49
|
+
expect(client.bulk).toBeDefined();
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
describe("transcripts", () => {
|
|
54
|
+
it("extracts a transcript", async () => {
|
|
55
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, TRANSCRIPT));
|
|
56
|
+
const client = new TrawlClient({ apiKey: "trawl_test", baseUrl: "http://test" });
|
|
57
|
+
|
|
58
|
+
const result = await client.transcripts.extract("https://youtube.com/watch?v=abc");
|
|
59
|
+
|
|
60
|
+
expect(mockFetch).toHaveBeenCalledWith(
|
|
61
|
+
"http://test/api/transcripts",
|
|
62
|
+
expect.objectContaining({ method: "POST" }),
|
|
63
|
+
);
|
|
64
|
+
expect(result.video_id).toBe("abc123");
|
|
65
|
+
expect(result.segments).toHaveLength(1);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
it("previews a transcript (no auth)", async () => {
|
|
69
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { ...TRANSCRIPT, id: undefined }));
|
|
70
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
71
|
+
|
|
72
|
+
const result = await client.transcripts.preview("https://youtube.com/watch?v=abc");
|
|
73
|
+
|
|
74
|
+
expect(result.video_id).toBe("abc123");
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
it("downloads a transcript", async () => {
|
|
78
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, "Hello world"));
|
|
79
|
+
const client = new TrawlClient({ apiKey: "trawl_test", baseUrl: "http://test" });
|
|
80
|
+
|
|
81
|
+
const result = await client.transcripts.download(1, "txt");
|
|
82
|
+
|
|
83
|
+
expect(result).toContain("Hello");
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
it("throws AuthenticationError on 401", async () => {
|
|
87
|
+
mockFetch.mockResolvedValueOnce(mockResponse(401, { detail: "Not authenticated" }));
|
|
88
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
89
|
+
|
|
90
|
+
await expect(client.transcripts.extract("https://youtube.com/watch?v=abc")).rejects.toThrow(
|
|
91
|
+
AuthenticationError,
|
|
92
|
+
);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it("throws NotFoundError on 404", async () => {
|
|
96
|
+
mockFetch.mockResolvedValueOnce(mockResponse(404, { detail: "Not found" }));
|
|
97
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
98
|
+
|
|
99
|
+
await expect(client.transcripts.extract("https://youtube.com/watch?v=abc")).rejects.toThrow(
|
|
100
|
+
NotFoundError,
|
|
101
|
+
);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("throws ValidationError on 422", async () => {
|
|
105
|
+
mockFetch.mockResolvedValueOnce(mockResponse(422, { detail: "Invalid URL" }));
|
|
106
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
107
|
+
|
|
108
|
+
await expect(client.transcripts.extract("bad-url")).rejects.toThrow(ValidationError);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it("throws RateLimitError on 429 with retry-after", async () => {
|
|
112
|
+
mockFetch.mockResolvedValueOnce(
|
|
113
|
+
mockResponse(429, { detail: "Rate limited" }, { "Retry-After": "30" }),
|
|
114
|
+
);
|
|
115
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
await client.transcripts.extract("https://youtube.com/watch?v=abc");
|
|
119
|
+
expect.fail("Should have thrown");
|
|
120
|
+
} catch (e) {
|
|
121
|
+
expect(e).toBeInstanceOf(RateLimitError);
|
|
122
|
+
expect((e as RateLimitError).retryAfter).toBe(30);
|
|
123
|
+
}
|
|
124
|
+
});
|
|
125
|
+
});
|
|
126
|
+
|
|
127
|
+
describe("search", () => {
|
|
128
|
+
it("searches YouTube", async () => {
|
|
129
|
+
mockFetch.mockResolvedValueOnce(
|
|
130
|
+
mockResponse(200, {
|
|
131
|
+
query: "python",
|
|
132
|
+
results: [{ video_id: "v1", title: "Python Tutorial", channel: "Ch", thumbnail: "", published_at: "", description: "" }],
|
|
133
|
+
}),
|
|
134
|
+
);
|
|
135
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
136
|
+
|
|
137
|
+
const result = await client.search.youtube("python", 5);
|
|
138
|
+
|
|
139
|
+
expect(result.query).toBe("python");
|
|
140
|
+
expect(result.results).toHaveLength(1);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it("searches podcasts", async () => {
|
|
144
|
+
mockFetch.mockResolvedValueOnce(
|
|
145
|
+
mockResponse(200, {
|
|
146
|
+
query: "ai",
|
|
147
|
+
results: [{ id: 1, title: "AI Pod", author: "Host", description: "", artwork_url: "", feed_url: "", episode_count: 10 }],
|
|
148
|
+
}),
|
|
149
|
+
);
|
|
150
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
151
|
+
|
|
152
|
+
const result = await client.search.podcasts("ai");
|
|
153
|
+
|
|
154
|
+
expect(result.results[0].title).toBe("AI Pod");
|
|
155
|
+
});
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
describe("podcasts", () => {
|
|
159
|
+
it("lists episodes", async () => {
|
|
160
|
+
mockFetch.mockResolvedValueOnce(
|
|
161
|
+
mockResponse(200, {
|
|
162
|
+
podcast_id: 1,
|
|
163
|
+
episodes: [{ id: 1, title: "Ep 1", description: "", audio_url: "", duration_seconds: 3600, published_at: "", has_transcript: false }],
|
|
164
|
+
}),
|
|
165
|
+
);
|
|
166
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
167
|
+
|
|
168
|
+
const episodes = await client.podcasts.episodes(1);
|
|
169
|
+
|
|
170
|
+
expect(episodes).toHaveLength(1);
|
|
171
|
+
expect(episodes[0].title).toBe("Ep 1");
|
|
172
|
+
});
|
|
173
|
+
|
|
174
|
+
it("submits transcription job", async () => {
|
|
175
|
+
mockFetch.mockResolvedValueOnce(
|
|
176
|
+
mockResponse(200, { id: "job-1", status: "pending", created_at: "2026-01-01" }),
|
|
177
|
+
);
|
|
178
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
179
|
+
|
|
180
|
+
const job = await client.podcasts.transcribe("https://example.com/ep.mp3");
|
|
181
|
+
|
|
182
|
+
expect(job.id).toBe("job-1");
|
|
183
|
+
expect(job.status).toBe("pending");
|
|
184
|
+
});
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
describe("jobs", () => {
|
|
188
|
+
it("gets a job", async () => {
|
|
189
|
+
mockFetch.mockResolvedValueOnce(
|
|
190
|
+
mockResponse(200, { id: "j1", status: "completed", created_at: "2026-01-01" }),
|
|
191
|
+
);
|
|
192
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
193
|
+
|
|
194
|
+
const job = await client.jobs.get("j1");
|
|
195
|
+
|
|
196
|
+
expect(job.status).toBe("completed");
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
it("lists jobs", async () => {
|
|
200
|
+
mockFetch.mockResolvedValueOnce(
|
|
201
|
+
mockResponse(200, { items: [{ id: "j1", status: "pending", created_at: "2026-01-01" }], total: 1 }),
|
|
202
|
+
);
|
|
203
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
204
|
+
|
|
205
|
+
const jobs = await client.jobs.list();
|
|
206
|
+
|
|
207
|
+
expect(jobs).toHaveLength(1);
|
|
208
|
+
});
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
describe("bulk", () => {
|
|
212
|
+
it("downloads ZIP", async () => {
|
|
213
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200));
|
|
214
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
215
|
+
|
|
216
|
+
const result = await client.bulk.download(["v1", "v2"], "jsonl");
|
|
217
|
+
|
|
218
|
+
expect(result).toBeInstanceOf(ArrayBuffer);
|
|
219
|
+
expect(mockFetch).toHaveBeenCalledWith(
|
|
220
|
+
"http://test/api/bulk-download",
|
|
221
|
+
expect.objectContaining({ method: "POST" }),
|
|
222
|
+
);
|
|
223
|
+
});
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
describe("earnings", () => {
|
|
227
|
+
it("searches earnings by ticker", async () => {
|
|
228
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { ticker: "AAPL", results: [{ year: 2024, quarter: 4 }] }));
|
|
229
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
230
|
+
const result = await client.earnings.search("AAPL");
|
|
231
|
+
expect(result.ticker).toBe("AAPL");
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it("gets earnings transcript", async () => {
|
|
235
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { ticker: "AAPL", sections: [] }));
|
|
236
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
237
|
+
const result = await client.earnings.getTranscript("AAPL", 2024, 4);
|
|
238
|
+
expect(result.ticker).toBe("AAPL");
|
|
239
|
+
});
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
describe("filings", () => {
|
|
243
|
+
it("searches SEC filings", async () => {
|
|
244
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { results: [{ form_type: "10-K" }] }));
|
|
245
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
246
|
+
const result = await client.filings.search("AAPL", "10-K");
|
|
247
|
+
expect(result.results).toHaveLength(1);
|
|
248
|
+
});
|
|
249
|
+
});
|
|
250
|
+
|
|
251
|
+
describe("news", () => {
|
|
252
|
+
it("searches global news", async () => {
|
|
253
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { articles: [{ title: "Fed Holds Rates" }] }));
|
|
254
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
255
|
+
const result = await client.news.search("Federal Reserve");
|
|
256
|
+
expect(result.articles).toHaveLength(1);
|
|
257
|
+
});
|
|
258
|
+
|
|
259
|
+
it("extracts article text", async () => {
|
|
260
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { text: "Article content", word_count: 50 }));
|
|
261
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
262
|
+
const result = await client.news.getArticleText("https://example.com");
|
|
263
|
+
expect(result.word_count).toBe(50);
|
|
264
|
+
});
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
describe("papers", () => {
|
|
268
|
+
it("searches papers", async () => {
|
|
269
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { results: [{ title: "Attention" }] }));
|
|
270
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
271
|
+
const result = await client.papers.search("transformer");
|
|
272
|
+
expect(result.results).toHaveLength(1);
|
|
273
|
+
});
|
|
274
|
+
|
|
275
|
+
it("extracts paper", async () => {
|
|
276
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { paper: { paper_id: "2301.08745" } }));
|
|
277
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
278
|
+
const result = await client.papers.extract("2301.08745");
|
|
279
|
+
expect(result.paper.paper_id).toBe("2301.08745");
|
|
280
|
+
});
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
describe("ai", () => {
|
|
284
|
+
it("summarizes transcript", async () => {
|
|
285
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { title: "Summary", key_points: [] }));
|
|
286
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
287
|
+
const result = await client.ai.summarize(1);
|
|
288
|
+
expect(result.title).toBe("Summary");
|
|
289
|
+
});
|
|
290
|
+
|
|
291
|
+
it("extracts entities", async () => {
|
|
292
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { entities: { people: [] } }));
|
|
293
|
+
const client = new TrawlClient({ apiKey: "test", baseUrl: "http://test" });
|
|
294
|
+
const result = await client.ai.entities(1);
|
|
295
|
+
expect(result.entities).toBeDefined();
|
|
296
|
+
});
|
|
297
|
+
|
|
298
|
+
it("preview summarizes text", async () => {
|
|
299
|
+
mockFetch.mockResolvedValueOnce(mockResponse(200, { title: "Preview" }));
|
|
300
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
301
|
+
const result = await client.ai.previewSummarize("Some text");
|
|
302
|
+
expect(result.title).toBe("Preview");
|
|
303
|
+
});
|
|
304
|
+
});
|
|
305
|
+
|
|
306
|
+
describe("has all resources", () => {
|
|
307
|
+
it("exposes all 10 resources", () => {
|
|
308
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
309
|
+
expect(client.transcripts).toBeDefined();
|
|
310
|
+
expect(client.search).toBeDefined();
|
|
311
|
+
expect(client.podcasts).toBeDefined();
|
|
312
|
+
expect(client.jobs).toBeDefined();
|
|
313
|
+
expect(client.bulk).toBeDefined();
|
|
314
|
+
expect(client.earnings).toBeDefined();
|
|
315
|
+
expect(client.filings).toBeDefined();
|
|
316
|
+
expect(client.news).toBeDefined();
|
|
317
|
+
expect(client.papers).toBeDefined();
|
|
318
|
+
expect(client.ai).toBeDefined();
|
|
319
|
+
});
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
describe("error handling", () => {
|
|
323
|
+
it("throws TrawlError on 500", async () => {
|
|
324
|
+
mockFetch.mockResolvedValueOnce(mockResponse(500, { detail: "Server error" }));
|
|
325
|
+
const client = new TrawlClient({ baseUrl: "http://test" });
|
|
326
|
+
|
|
327
|
+
await expect(client.search.youtube("test")).rejects.toThrow(TrawlError);
|
|
328
|
+
});
|
|
329
|
+
});
|
package/tsconfig.json
ADDED