@llm-newsletter-kit/core 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +73 -0
- package/NOTICE +27 -0
- package/README.md +240 -0
- package/dist/index.cjs +1757 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.ts +828 -0
- package/dist/index.js +1755 -0
- package/dist/index.js.map +1 -0
- package/package.json +111 -0
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,828 @@
|
|
|
1
|
+
import { LanguageModel } from 'ai';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Common type aliases.
|
|
5
|
+
*
|
|
6
|
+
* - Provides explicit alias types for date/URL/Markdown/HTML, etc.
|
|
7
|
+
* - All comments are written in English JSDoc style.
|
|
8
|
+
*/
|
|
9
|
+
/**
|
|
10
|
+
* ISO 8601 date string. Use YYYY-MM-DD without time.
|
|
11
|
+
* @example "2025-10-15"
|
|
12
|
+
*/
|
|
13
|
+
type IsoDateString = string;
|
|
14
|
+
/**
|
|
15
|
+
* URL string pointing to external or internal resources.
|
|
16
|
+
* @example "https://example.com/news/123"
|
|
17
|
+
*/
|
|
18
|
+
type UrlString = string;
|
|
19
|
+
/**
|
|
20
|
+
* Markdown-formatted text.
|
|
21
|
+
* @example "# Title\n\nBody content."
|
|
22
|
+
*/
|
|
23
|
+
type MarkdownString = string;
|
|
24
|
+
/**
|
|
25
|
+
* HTML-formatted string.
|
|
26
|
+
* @example "<h1>Title</h1><p>Body</p>"
|
|
27
|
+
*/
|
|
28
|
+
type HtmlString = string;
|
|
29
|
+
/**
|
|
30
|
+
* String-based unique identifier. Used for DOM ids, data record ids, etc.
|
|
31
|
+
* @example "item-42"
|
|
32
|
+
*/
|
|
33
|
+
type UniqueIdentifier = string;
|
|
34
|
+
/**
|
|
35
|
+
* Type for date identifiers.
|
|
36
|
+
*
|
|
37
|
+
* The DateType enum is used to distinguish date-related values.
|
|
38
|
+
* It can be used to differentiate between registered dates and ranges.
|
|
39
|
+
*
|
|
40
|
+
* Enum members:
|
|
41
|
+
* - REGISTERED: indicates a registered date.
|
|
42
|
+
* - DURATION: indicates a duration or time range.
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* ```ts
|
|
46
|
+
* const type: DateType = DateType.REGISTERED;
|
|
47
|
+
* ```
|
|
48
|
+
*/
|
|
49
|
+
declare enum DateType {
|
|
50
|
+
REGISTERED = "registered",
|
|
51
|
+
DURATION = "duration"
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/**
|
|
55
|
+
* Structure of an article that has not yet been processed (no importance score, image analysis, or tagging).
|
|
56
|
+
*/
|
|
57
|
+
type UnscoredArticle = {
|
|
58
|
+
/**
|
|
59
|
+
* Identifier. Supports both string and number to accommodate various DB schemas.
|
|
60
|
+
*/
|
|
61
|
+
id: string | number;
|
|
62
|
+
/**
|
|
63
|
+
* Article title.
|
|
64
|
+
* @example "Weekly Tech Newsletter"
|
|
65
|
+
*/
|
|
66
|
+
title: string;
|
|
67
|
+
/**
|
|
68
|
+
* Article body in Markdown format.
|
|
69
|
+
* @example "### Highlights\n- New framework announced"
|
|
70
|
+
*/
|
|
71
|
+
detailContent: MarkdownString;
|
|
72
|
+
/**
|
|
73
|
+
* Whether the article has an attached image.
|
|
74
|
+
* @example true
|
|
75
|
+
*/
|
|
76
|
+
hasAttachedImage: boolean;
|
|
77
|
+
/**
|
|
78
|
+
* Image analysis result. Null if no LLM analysis result exists.
|
|
79
|
+
*/
|
|
80
|
+
imageContextByLlm: string | null;
|
|
81
|
+
/**
|
|
82
|
+
* First classification tag.
|
|
83
|
+
* @example "News"
|
|
84
|
+
*/
|
|
85
|
+
tag1: string | null;
|
|
86
|
+
/**
|
|
87
|
+
* Second classification tag.
|
|
88
|
+
* @example "Jobs"
|
|
89
|
+
*/
|
|
90
|
+
tag2: string | null;
|
|
91
|
+
/**
|
|
92
|
+
* Third classification tag.
|
|
93
|
+
* @example "Announcement"
|
|
94
|
+
*/
|
|
95
|
+
tag3: string | null;
|
|
96
|
+
/**
|
|
97
|
+
* URL for board collection. Not the original article detail URL. Typically matches CrawlingTarget.url.
|
|
98
|
+
* @example "https://example.com/board/notice"
|
|
99
|
+
*/
|
|
100
|
+
targetUrl: UrlString;
|
|
101
|
+
};
|
|
102
|
+
/**
|
|
103
|
+
* Article type used after the analysis phase (image/tagging/score) for updates.
|
|
104
|
+
*/
|
|
105
|
+
type ArticleForUpdateByAnalysis = UnscoredArticle & {
|
|
106
|
+
/**
|
|
107
|
+
* Importance score. Range 1–10.
|
|
108
|
+
* @example 8
|
|
109
|
+
*/
|
|
110
|
+
importanceScore: number;
|
|
111
|
+
};
|
|
112
|
+
/**
|
|
113
|
+
* Article type used in the newsletter content generation phase.
|
|
114
|
+
*/
|
|
115
|
+
type ArticleForGenerateContent = ArticleForUpdateByAnalysis & {
|
|
116
|
+
/**
|
|
117
|
+
* Content type of the article. Typically a group name from CrawlingTargetGroup or similar grouping.
|
|
118
|
+
* @example "News"
|
|
119
|
+
*/
|
|
120
|
+
contentType: string;
|
|
121
|
+
/**
|
|
122
|
+
* Original detail page URL of the article.
|
|
123
|
+
* @example "https://example.com/news/123"
|
|
124
|
+
*/
|
|
125
|
+
url: UrlString;
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
/**
|
|
129
|
+
* Structure returned after parsing a list page (HTML).
|
|
130
|
+
*/
|
|
131
|
+
type ParsedTargetListItem = {
|
|
132
|
+
/**
|
|
133
|
+
* Unique id present in the HTML. If none exists, you may omit it.
|
|
134
|
+
* @example "post-2025-0001"
|
|
135
|
+
*/
|
|
136
|
+
uniqId?: UniqueIdentifier;
|
|
137
|
+
/**
|
|
138
|
+
* Article title.
|
|
139
|
+
* @example "AI Industry Trends Report Released"
|
|
140
|
+
*/
|
|
141
|
+
title: string;
|
|
142
|
+
/**
|
|
143
|
+
* Article date in ISO format (YYYY-MM-DD), no time included.
|
|
144
|
+
* @example "2025-10-15"
|
|
145
|
+
*/
|
|
146
|
+
date: IsoDateString;
|
|
147
|
+
/**
|
|
148
|
+
* Whether it is a registered date or a duration.
|
|
149
|
+
* @example DateType.REGISTERED
|
|
150
|
+
*/
|
|
151
|
+
dateType: DateType;
|
|
152
|
+
/**
|
|
153
|
+
* URL to the linked detail page.
|
|
154
|
+
* @example "https://example.com/board/notice/1234"
|
|
155
|
+
*/
|
|
156
|
+
detailUrl: UrlString;
|
|
157
|
+
};
|
|
158
|
+
/**
|
|
159
|
+
* Structure returned after parsing a detail page (HTML).
|
|
160
|
+
*/
|
|
161
|
+
type ParsedTargetDetail = {
|
|
162
|
+
/**
|
|
163
|
+
* Parsed detail page content in Markdown.
|
|
164
|
+
* Convert HTML to Markdown using libraries such as turndown.
|
|
165
|
+
* @example "## Notice\n\n- Application period: 2025-10-15 ~ 2025-10-31"
|
|
166
|
+
*/
|
|
167
|
+
detailContent: MarkdownString;
|
|
168
|
+
/**
|
|
169
|
+
* Whether there is any file attachment.
|
|
170
|
+
* @example true
|
|
171
|
+
*/
|
|
172
|
+
hasAttachedFile: boolean;
|
|
173
|
+
/**
|
|
174
|
+
* Whether an image is included.
|
|
175
|
+
* @example false
|
|
176
|
+
*/
|
|
177
|
+
hasAttachedImage: boolean;
|
|
178
|
+
};
|
|
179
|
+
/**
|
|
180
|
+
* Fully structured crawling target object combining list and detail parsing results.
|
|
181
|
+
*/
|
|
182
|
+
type ParsedTarget = ParsedTargetListItem & ParsedTargetDetail;
|
|
183
|
+
/**
|
|
184
|
+
* Target to crawl.
|
|
185
|
+
* For example, a board/list page of a website. Parsing methods must be defined.
|
|
186
|
+
*/
|
|
187
|
+
type CrawlingTarget = {
|
|
188
|
+
/**
|
|
189
|
+
* Identifier for the crawling target. Any unique id works; uuid is recommended.
|
|
190
|
+
* @example "crawling-target-001"
|
|
191
|
+
*/
|
|
192
|
+
id: UniqueIdentifier;
|
|
193
|
+
/**
|
|
194
|
+
* Name of the crawling target.
|
|
195
|
+
* @example "Notice Board"
|
|
196
|
+
*/
|
|
197
|
+
name: string;
|
|
198
|
+
/**
|
|
199
|
+
* URL of the crawling target. Should point to a specific board (list) page URL.
|
|
200
|
+
* @example "https://example.com/board/notice"
|
|
201
|
+
*/
|
|
202
|
+
url: UrlString;
|
|
203
|
+
/**
|
|
204
|
+
* Method to structurally parse data from a list page (HTML).
|
|
205
|
+
* Synchronous parsing with clear rules is recommended, but async is supported to allow LLM/external backends.
|
|
206
|
+
*
|
|
207
|
+
* @param html Original HTML string of the list page
|
|
208
|
+
* @returns Parsed list items
|
|
209
|
+
* @example
|
|
210
|
+
* ```ts
|
|
211
|
+
* const items = target.parseList(html);
|
|
212
|
+
* items[0].title; // "Notice Title"
|
|
213
|
+
* ```
|
|
214
|
+
*/
|
|
215
|
+
parseList: (html: string) => Promise<ParsedTargetListItem[]> | ParsedTargetListItem[];
|
|
216
|
+
/**
|
|
217
|
+
* Method to structurally parse data from a detail page (HTML).
|
|
218
|
+
* Synchronous parsing with clear rules is recommended, but async is supported to allow LLM/external backends.
|
|
219
|
+
*
|
|
220
|
+
* @param html Original HTML string of the detail page
|
|
221
|
+
* @returns Parsed detail information
|
|
222
|
+
*/
|
|
223
|
+
parseDetail: (html: string) => Promise<ParsedTargetDetail> | ParsedTargetDetail;
|
|
224
|
+
};
|
|
225
|
+
/**
|
|
226
|
+
* Grouped type for crawling targets.
|
|
227
|
+
* For example, groups like News, Jobs, Programs/Bids, etc.
|
|
228
|
+
*/
|
|
229
|
+
type CrawlingTargetGroup = {
|
|
230
|
+
/**
|
|
231
|
+
* Identifier for a group. Any unique id works; uuid is recommended.
|
|
232
|
+
* @example "group-news"
|
|
233
|
+
*/
|
|
234
|
+
id: UniqueIdentifier;
|
|
235
|
+
/**
|
|
236
|
+
* Group name.
|
|
237
|
+
* @example "News"
|
|
238
|
+
*/
|
|
239
|
+
name: string;
|
|
240
|
+
/**
|
|
241
|
+
* Targets included in this group.
|
|
242
|
+
*/
|
|
243
|
+
targets: CrawlingTarget[];
|
|
244
|
+
};
|
|
245
|
+
|
|
246
|
+
type ContentOptions = {
|
|
247
|
+
/**
|
|
248
|
+
* Output language for the newsletter. e.g., "English", "Spanish"
|
|
249
|
+
* @example "English"
|
|
250
|
+
*/
|
|
251
|
+
outputLanguage: string;
|
|
252
|
+
/**
|
|
253
|
+
* Target domain(s) for the newsletter (one or many)
|
|
254
|
+
* @example ["AI", "Cloud"]
|
|
255
|
+
*/
|
|
256
|
+
expertField: string | string[];
|
|
257
|
+
};
|
|
258
|
+
type LLMQueryOptions = {
|
|
259
|
+
/**
|
|
260
|
+
* Number of retries when LLM calls fail.
|
|
261
|
+
* @default 5
|
|
262
|
+
*/
|
|
263
|
+
maxRetries?: number;
|
|
264
|
+
};
|
|
265
|
+
type ChainOptions = {
|
|
266
|
+
/**
|
|
267
|
+
* Maximum retry attempts when the chain fails while running.
|
|
268
|
+
* @default 3
|
|
269
|
+
*/
|
|
270
|
+
stopAfterAttempt?: number;
|
|
271
|
+
};
|
|
272
|
+
|
|
273
|
+
/**
|
|
274
|
+
* Token markers in template.
|
|
275
|
+
*/
|
|
276
|
+
type HtmlTemplateMarkers = {
|
|
277
|
+
/**
|
|
278
|
+
* Title token
|
|
279
|
+
*
|
|
280
|
+
* When the title is set to "NEWSLETTER_TITLE", it replaces the "{{NEWSLETTER_TITLE}}" pattern in the template string.
|
|
281
|
+
*
|
|
282
|
+
* @default "NEWSLETTER_TITLE"
|
|
283
|
+
*/
|
|
284
|
+
title?: string;
|
|
285
|
+
/**
|
|
286
|
+
* Content HTML token
|
|
287
|
+
*
|
|
288
|
+
* When content is set to "NEWSLETTER_CONTENT", it replaces the "{{NEWSLETTER_CONTENT}}" pattern in the template string.
|
|
289
|
+
*
|
|
290
|
+
* @default "NEWSLETTER_CONTENT"
|
|
291
|
+
*/
|
|
292
|
+
content?: string;
|
|
293
|
+
};
|
|
294
|
+
/**
|
|
295
|
+
* String template and marker set
|
|
296
|
+
*/
|
|
297
|
+
type HtmlTemplate = {
|
|
298
|
+
/**
|
|
299
|
+
* Original template string
|
|
300
|
+
*/
|
|
301
|
+
html: string;
|
|
302
|
+
/**
|
|
303
|
+
* Uses default markers when not specified
|
|
304
|
+
*/
|
|
305
|
+
markers?: HtmlTemplateMarkers;
|
|
306
|
+
};
|
|
307
|
+
type RequiredHtmlTemplate = Pick<HtmlTemplate, 'html'> & {
|
|
308
|
+
markers: Required<HtmlTemplateMarkers>;
|
|
309
|
+
};
|
|
310
|
+
|
|
311
|
+
/**
|
|
312
|
+
* Type representing an email address.
|
|
313
|
+
* @example "user@example.com"
|
|
314
|
+
*/
|
|
315
|
+
type EmailAddress = string;
|
|
316
|
+
/**
|
|
317
|
+
* Information about an email attachment.
|
|
318
|
+
*/
|
|
319
|
+
type EmailAttachment = {
|
|
320
|
+
/**
|
|
321
|
+
* Attachment file name
|
|
322
|
+
*/
|
|
323
|
+
filename: string;
|
|
324
|
+
/**
|
|
325
|
+
* Attachment content (Buffer or string)
|
|
326
|
+
*/
|
|
327
|
+
content: Buffer | string;
|
|
328
|
+
/**
|
|
329
|
+
* Attachment MIME type (optional)
|
|
330
|
+
* @example 'application/pdf', 'image/png'
|
|
331
|
+
*/
|
|
332
|
+
contentType?: string;
|
|
333
|
+
/**
|
|
334
|
+
* Content-ID for inline images (optional)
|
|
335
|
+
* Can be referenced in HTML as <img src="cid:contentId">
|
|
336
|
+
*/
|
|
337
|
+
contentId?: string;
|
|
338
|
+
};
|
|
339
|
+
/**
|
|
340
|
+
* Email message information.
|
|
341
|
+
*/
|
|
342
|
+
type EmailMessage = {
|
|
343
|
+
/**
|
|
344
|
+
* Sender email address
|
|
345
|
+
*/
|
|
346
|
+
from: EmailAddress;
|
|
347
|
+
/**
|
|
348
|
+
* Recipient email address(es)
|
|
349
|
+
*/
|
|
350
|
+
to: EmailAddress | EmailAddress[];
|
|
351
|
+
/**
|
|
352
|
+
* Email subject
|
|
353
|
+
*/
|
|
354
|
+
subject: string;
|
|
355
|
+
/**
|
|
356
|
+
* HTML body
|
|
357
|
+
*/
|
|
358
|
+
html: HtmlString;
|
|
359
|
+
/**
|
|
360
|
+
* Plain text body (optional)
|
|
361
|
+
*/
|
|
362
|
+
text?: string;
|
|
363
|
+
/**
|
|
364
|
+
* CC address(es) (optional)
|
|
365
|
+
*/
|
|
366
|
+
cc?: EmailAddress | EmailAddress[];
|
|
367
|
+
/**
|
|
368
|
+
* BCC address(es) (optional)
|
|
369
|
+
*/
|
|
370
|
+
bcc?: EmailAddress | EmailAddress[];
|
|
371
|
+
/**
|
|
372
|
+
* Reply-to address (optional)
|
|
373
|
+
* Address for recipients to reply to
|
|
374
|
+
*/
|
|
375
|
+
replyTo?: EmailAddress;
|
|
376
|
+
/**
|
|
377
|
+
* Additional headers (optional)
|
|
378
|
+
* @example { 'X-Priority': '1', 'X-Mailer': 'MyApp' }
|
|
379
|
+
*/
|
|
380
|
+
headers?: Record<string, string>;
|
|
381
|
+
/**
|
|
382
|
+
* Attachments (optional)
|
|
383
|
+
* Supports both regular attachments and inline images.
|
|
384
|
+
*/
|
|
385
|
+
attachments?: EmailAttachment[];
|
|
386
|
+
};
|
|
387
|
+
|
|
388
|
+
/**
|
|
389
|
+
* Global logging level type used across the project.
|
|
390
|
+
*/
|
|
391
|
+
type LogLevel = 'debug' | 'info' | 'error';
|
|
392
|
+
/**
|
|
393
|
+
* Structured log message format.
|
|
394
|
+
* - event: Recommended "domain.action[.state]" style, e.g., "crawl.group.start" | "fetch.success" | "task.error"
|
|
395
|
+
* - level: Usually implied by the called method, but can be explicit (info/debug).
|
|
396
|
+
* - taskId: Identifier to correlate logs for the same job.
|
|
397
|
+
* - durationMs: Include elapsed time (ms) on ".done"/".error" logs.
|
|
398
|
+
* - data: Additional context. Prefer JSON‑serializable values only.
|
|
399
|
+
* Examples:
|
|
400
|
+
* logger.info({ event: 'task.start', taskId })
|
|
401
|
+
* logger.debug({ event: 'crawl.list.fetch.start', data: { url } })
|
|
402
|
+
*/
|
|
403
|
+
type LogMessage<TaskId = unknown, Extra extends Record<string, unknown> = Record<string, unknown>> = {
|
|
404
|
+
/** Event name, e.g., "crawl.group.start", "fetch.success" */
|
|
405
|
+
event: string;
|
|
406
|
+
/** Log level (optional; implied by the method if omitted) */
|
|
407
|
+
level?: LogLevel;
|
|
408
|
+
/** Associated task identifier */
|
|
409
|
+
taskId?: TaskId;
|
|
410
|
+
/** Elapsed time in milliseconds (typically for done/error) */
|
|
411
|
+
durationMs?: number;
|
|
412
|
+
/** Additional data container */
|
|
413
|
+
data?: Extra;
|
|
414
|
+
};
|
|
415
|
+
|
|
416
|
+
/**
|
|
417
|
+
* Logger interface used across the application.
|
|
418
|
+
* - Accepts structured LogMessage, implement to integrate with systems like Logstash, CloudWatch, Datadog, etc.
|
|
419
|
+
* - Typically, info is for operational events, debug for detailed tracing, and error for exceptions/critical failures.
|
|
420
|
+
* - error accepts either a structured LogMessage or an arbitrary error object (Error, unknown).
|
|
421
|
+
*
|
|
422
|
+
* Usage examples:
|
|
423
|
+
* logger.info({ event: 'task.start', taskId })
|
|
424
|
+
* logger.debug({ event: 'crawl.list.fetch.start', data: { url } })
|
|
425
|
+
* logger.error({ event: 'fetch.failed', data: { url, attempt, error: err.message } })
|
|
426
|
+
*/
|
|
427
|
+
interface AppLogger {
|
|
428
|
+
/** Info-level logs for operational events/state. */
|
|
429
|
+
info: (message: LogMessage) => void;
|
|
430
|
+
/** Debug-level logs for detailed debugging/tracing. */
|
|
431
|
+
debug: (message: LogMessage) => void;
|
|
432
|
+
/** Error-level logs. Accepts structured messages or arbitrary errors (Error/unknown). */
|
|
433
|
+
error: (message: LogMessage | unknown) => void;
|
|
434
|
+
}
|
|
435
|
+
/**
|
|
436
|
+
* Email sending service interface.
|
|
437
|
+
*/
|
|
438
|
+
interface EmailService {
|
|
439
|
+
/**
|
|
440
|
+
* Send an email.
|
|
441
|
+
* @param message Email message to send
|
|
442
|
+
* @throws May throw on delivery failures
|
|
443
|
+
*/
|
|
444
|
+
send: (message: EmailMessage) => Promise<void>;
|
|
445
|
+
}
|
|
446
|
+
/**
|
|
447
|
+
* Service that provides dates for internal use or insertion into the newsletter.
|
|
448
|
+
* Clients can consider locale/language/timezone themselves.
|
|
449
|
+
*/
|
|
450
|
+
interface DateService {
|
|
451
|
+
/**
|
|
452
|
+
* Return current date in ISO format (YYYY-MM-DD).
|
|
453
|
+
* @returns ISO date string
|
|
454
|
+
* @example "2024-10-15"
|
|
455
|
+
*/
|
|
456
|
+
getCurrentISODateString: () => IsoDateString;
|
|
457
|
+
/**
|
|
458
|
+
* Return a localized display date string for use in newsletter content.
|
|
459
|
+
*/
|
|
460
|
+
getDisplayDateString: () => string;
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/**
|
|
464
|
+
* Publishable final Newsletter entity.
|
|
465
|
+
*/
|
|
466
|
+
type Newsletter = {
|
|
467
|
+
/**
|
|
468
|
+
* Newsletter title.
|
|
469
|
+
* @example "LLM Newsletter #12"
|
|
470
|
+
*/
|
|
471
|
+
title: string;
|
|
472
|
+
/**
|
|
473
|
+
* Newsletter content in Markdown. Will be applied to a template and converted to HTML.
|
|
474
|
+
*/
|
|
475
|
+
content: MarkdownString;
|
|
476
|
+
/**
|
|
477
|
+
* Final HTML body of the newsletter, ready to send.
|
|
478
|
+
*/
|
|
479
|
+
htmlBody: HtmlString;
|
|
480
|
+
/**
|
|
481
|
+
* Issue number of the newsletter.
|
|
482
|
+
* @example 12
|
|
483
|
+
*/
|
|
484
|
+
issueOrder: number;
|
|
485
|
+
/**
|
|
486
|
+
* Publication date of the newsletter in ISO format (YYYY-MM-DD). Time is not included.
|
|
487
|
+
* @example "2025-10-15"
|
|
488
|
+
*/
|
|
489
|
+
date: IsoDateString;
|
|
490
|
+
};
|
|
491
|
+
|
|
492
|
+
/**
|
|
493
|
+
* Task module managed by the client for newsletter generation.
|
|
494
|
+
* e.g., can ensure single execution (prevent duplicates) like "run once a day".
|
|
495
|
+
*/
|
|
496
|
+
interface TaskService<TaskId> {
|
|
497
|
+
/**
|
|
498
|
+
* Start a task. May return an error if a task is already running.
|
|
499
|
+
* @returns taskId
|
|
500
|
+
*/
|
|
501
|
+
start: () => Promise<TaskId>;
|
|
502
|
+
/**
|
|
503
|
+
* End a task.
|
|
504
|
+
* @returns void
|
|
505
|
+
*/
|
|
506
|
+
end: () => Promise<void>;
|
|
507
|
+
}
|
|
508
|
+
/**
|
|
509
|
+
* Values and methods required for crawling.
|
|
510
|
+
*/
|
|
511
|
+
interface CrawlingProvider {
|
|
512
|
+
/**
|
|
513
|
+
* Maximum number of concurrent jobs.
|
|
514
|
+
* Used to control parallelism for resource management and to prevent overload.
|
|
515
|
+
* @default 5
|
|
516
|
+
*/
|
|
517
|
+
maxConcurrency?: number;
|
|
518
|
+
/**
|
|
519
|
+
* Crawling target groups.
|
|
520
|
+
*/
|
|
521
|
+
crawlingTargetGroups: CrawlingTargetGroup[];
|
|
522
|
+
/**
|
|
523
|
+
* Look up existing stored articles for the given URLs to compare with newly collected ones.
|
|
524
|
+
* @param articleUrls Original article URLs to query
|
|
525
|
+
* @returns Previously stored (parsed) articles
|
|
526
|
+
*/
|
|
527
|
+
fetchExistingArticlesByUrls: (articleUrls: UrlString[]) => Promise<ParsedTarget[]>;
|
|
528
|
+
/**
|
|
529
|
+
* Persist structured results collected for a specific crawling group (batch-save recommended).
|
|
530
|
+
* @param articles Parsed articles
|
|
531
|
+
* @param context Crawling execution context
|
|
532
|
+
* @returns Number of saved articles
|
|
533
|
+
*/
|
|
534
|
+
saveCrawledArticles: <TaskId>(articles: ParsedTarget[], context: {
|
|
535
|
+
taskId: TaskId;
|
|
536
|
+
targetGroup: Omit<CrawlingTargetGroup, 'targets'>;
|
|
537
|
+
target: CrawlingTarget;
|
|
538
|
+
}) => Promise<number>;
|
|
539
|
+
}
|
|
540
|
+
/**
|
|
541
|
+
* Minimum importance score policy for a specific crawling target.
|
|
542
|
+
*/
|
|
543
|
+
type MinimumImportanceScoreRule = {
|
|
544
|
+
/**
|
|
545
|
+
* Target URL the minimum score applies to. Same as CrawlingTarget.url.
|
|
546
|
+
*/
|
|
547
|
+
targetUrl: UrlString;
|
|
548
|
+
/**
|
|
549
|
+
* Minimum importance score to apply.
|
|
550
|
+
* @example 5
|
|
551
|
+
*/
|
|
552
|
+
minScore: number;
|
|
553
|
+
};
|
|
554
|
+
/**
|
|
555
|
+
* Values and methods required for LLM analysis.
|
|
556
|
+
*/
|
|
557
|
+
interface AnalysisProvider {
|
|
558
|
+
/**
|
|
559
|
+
* Options for classification and tag generation.
|
|
560
|
+
*/
|
|
561
|
+
classifyTagOptions: {
|
|
562
|
+
/**
|
|
563
|
+
* Model to use. A relatively light model is acceptable.
|
|
564
|
+
*/
|
|
565
|
+
model: LanguageModel;
|
|
566
|
+
};
|
|
567
|
+
/**
|
|
568
|
+
* Options for image analysis.
|
|
569
|
+
*/
|
|
570
|
+
analyzeImagesOptions: {
|
|
571
|
+
/**
|
|
572
|
+
* Model to use. Must be a multimodal model.
|
|
573
|
+
*/
|
|
574
|
+
model: LanguageModel;
|
|
575
|
+
};
|
|
576
|
+
/**
|
|
577
|
+
* Options for importance score generation.
|
|
578
|
+
*/
|
|
579
|
+
determineScoreOptions: {
|
|
580
|
+
/**
|
|
581
|
+
* Model to use. A relatively light model is acceptable.
|
|
582
|
+
*/
|
|
583
|
+
model: LanguageModel;
|
|
584
|
+
/**
|
|
585
|
+
* Minimum score policies per crawling target.
|
|
586
|
+
* @example
|
|
587
|
+
* ```ts
|
|
588
|
+
* minimumImportanceScoreRules: [
|
|
589
|
+
* { targetUrl: 'https://example.com/board/notice', minScore: 5 }
|
|
590
|
+
* ]
|
|
591
|
+
* ```
|
|
592
|
+
*/
|
|
593
|
+
minimumImportanceScoreRules?: MinimumImportanceScoreRule[];
|
|
594
|
+
};
|
|
595
|
+
/**
|
|
596
|
+
* Fetch articles without an importance score from the DB.
|
|
597
|
+
* @returns List of unscored articles
|
|
598
|
+
*/
|
|
599
|
+
fetchUnscoredArticles: () => Promise<UnscoredArticle[]>;
|
|
600
|
+
/**
|
|
601
|
+
* Fetch existing tag list to classify collected articles before newsletter generation.
|
|
602
|
+
* @returns Array of tag strings
|
|
603
|
+
*/
|
|
604
|
+
fetchTags: () => Promise<string[]>;
|
|
605
|
+
/**
|
|
606
|
+
* Update analysis results and importance scores after all work is done.
|
|
607
|
+
* @param article Article data to update
|
|
608
|
+
*/
|
|
609
|
+
update: (article: ArticleForUpdateByAnalysis) => Promise<void>;
|
|
610
|
+
}
|
|
611
|
+
/**
|
|
612
|
+
* Values and methods required to generate a newsletter.
|
|
613
|
+
*/
|
|
614
|
+
interface ContentGenerateProvider {
|
|
615
|
+
/**
|
|
616
|
+
* Language model to use. A high‑performance model is recommended.
|
|
617
|
+
*/
|
|
618
|
+
model: LanguageModel;
|
|
619
|
+
/**
|
|
620
|
+
* Maximum tokens allowed for generation.
|
|
621
|
+
* Used to prevent excessively long outputs and control token usage.
|
|
622
|
+
*/
|
|
623
|
+
maxOutputTokens?: number;
|
|
624
|
+
/**
|
|
625
|
+
* Temperature controlling randomness (0.0–1.0).
|
|
626
|
+
* Higher values can be more creative but less consistent.
|
|
627
|
+
* @default 0.3
|
|
628
|
+
*/
|
|
629
|
+
temperature?: number;
|
|
630
|
+
/**
|
|
631
|
+
* Controls nucleus sampling (top‑p, 0.0–1.0).
|
|
632
|
+
* Sample from tokens whose cumulative probability exceeds the threshold.
|
|
633
|
+
* @default 0.95
|
|
634
|
+
*/
|
|
635
|
+
topP?: number;
|
|
636
|
+
/**
|
|
637
|
+
* Restrict sampling to top‑K tokens.
|
|
638
|
+
* Helps balance diversity and quality.
|
|
639
|
+
*/
|
|
640
|
+
topK?: number;
|
|
641
|
+
/**
|
|
642
|
+
* Controls penalty for repeating tokens (−2.0–2.0).
|
|
643
|
+
* Higher values discourage repetition.
|
|
644
|
+
*/
|
|
645
|
+
presencePenalty?: number;
|
|
646
|
+
/**
|
|
647
|
+
* Controls penalty based on token frequency (−2.0–2.0).
|
|
648
|
+
* Higher values discourage frequent tokens.
|
|
649
|
+
*/
|
|
650
|
+
frequencyPenalty?: number;
|
|
651
|
+
/**
|
|
652
|
+
* Issue number of the newsletter.
|
|
653
|
+
*/
|
|
654
|
+
issueOrder: number;
|
|
655
|
+
/**
|
|
656
|
+
* Publication criteria for issuing a newsletter.
|
|
657
|
+
* @example
|
|
658
|
+
* ```ts
|
|
659
|
+
* publicationCriteria: {
|
|
660
|
+
* minimumArticleCountForIssue: 5,
|
|
661
|
+
* priorityArticleScoreThreshold: 8,
|
|
662
|
+
* }
|
|
663
|
+
* ```
|
|
664
|
+
*/
|
|
665
|
+
publicationCriteria?: {
|
|
666
|
+
/**
|
|
667
|
+
* Minimum number of articles required to issue a newsletter
|
|
668
|
+
* @default 5
|
|
669
|
+
*/
|
|
670
|
+
minimumArticleCountForIssue: number;
|
|
671
|
+
/**
|
|
672
|
+
* If there exists an article with importance ≥ this score, issue regardless of count
|
|
673
|
+
* @default 8
|
|
674
|
+
*/
|
|
675
|
+
priorityArticleScoreThreshold: number;
|
|
676
|
+
};
|
|
677
|
+
/**
|
|
678
|
+
* Subscription page URL. Can be inserted as a CTA link in the newsletter.
|
|
679
|
+
*/
|
|
680
|
+
subscribePageUrl?: UrlString;
|
|
681
|
+
/**
|
|
682
|
+
* Brand name of the newsletter.
|
|
683
|
+
* @example "Dev Insight"
|
|
684
|
+
*/
|
|
685
|
+
newsletterBrandName: string;
|
|
686
|
+
/**
|
|
687
|
+
* Fetch candidate articles from the DB for newsletter generation.
|
|
688
|
+
*/
|
|
689
|
+
fetchArticleCandidates: () => Promise<ArticleForGenerateContent[]>;
|
|
690
|
+
/**
|
|
691
|
+
* HTML template for the newsletter.
|
|
692
|
+
* Generated content is applied to this template to produce the final HTML.
|
|
693
|
+
*/
|
|
694
|
+
htmlTemplate: HtmlTemplate;
|
|
695
|
+
/**
|
|
696
|
+
* Persist the newsletter (recommend saving relationships together).
|
|
697
|
+
* - Receives `usedArticles` so relations can be handled transactionally.
|
|
698
|
+
*/
|
|
699
|
+
saveNewsletter: (input: {
|
|
700
|
+
newsletter: Newsletter;
|
|
701
|
+
usedArticles: ArticleForGenerateContent[];
|
|
702
|
+
}) => Promise<{
|
|
703
|
+
id: string | number;
|
|
704
|
+
}>;
|
|
705
|
+
}
|
|
706
|
+
/**
|
|
707
|
+
* Options for newsletter generation.
|
|
708
|
+
* - Controls LLM retry counts, logger injection, etc.
|
|
709
|
+
*/
|
|
710
|
+
type GenerateNewsletterOptions = {
|
|
711
|
+
/**
|
|
712
|
+
* Logger implementation. If not provided, a no‑op logger is used.
|
|
713
|
+
*/
|
|
714
|
+
logger?: AppLogger;
|
|
715
|
+
/**
|
|
716
|
+
* LLM behavior configuration.
|
|
717
|
+
*/
|
|
718
|
+
llm?: LLMQueryOptions;
|
|
719
|
+
/**
|
|
720
|
+
* Internal chain behavior configuration.
|
|
721
|
+
*/
|
|
722
|
+
chain?: ChainOptions;
|
|
723
|
+
/**
|
|
724
|
+
* Preview newsletter delivery configuration.
|
|
725
|
+
* When present, a preview email is sent to reviewers.
|
|
726
|
+
*/
|
|
727
|
+
previewNewsletter?: {
|
|
728
|
+
/**
|
|
729
|
+
* Fetch a newsletter entity to use for preview.
|
|
730
|
+
*/
|
|
731
|
+
fetchNewsletterForPreview: () => Promise<Newsletter>;
|
|
732
|
+
/**
|
|
733
|
+
* Email delivery service implementation.
|
|
734
|
+
*/
|
|
735
|
+
emailService: EmailService;
|
|
736
|
+
/**
|
|
737
|
+
* Base configuration for the preview email.
|
|
738
|
+
* subject/html/text are generated automatically and omitted here.
|
|
739
|
+
*/
|
|
740
|
+
emailMessage: Omit<EmailMessage, 'subject' | 'html' | 'text'>;
|
|
741
|
+
};
|
|
742
|
+
};
|
|
743
|
+
/**
|
|
744
|
+
* Configuration object passed to the GenerateNewsletter constructor.
|
|
745
|
+
*/
|
|
746
|
+
type GenerateNewsletterConfig<TaskId> = {
|
|
747
|
+
/**
|
|
748
|
+
* Content generation settings.
|
|
749
|
+
* Defines the output language and target domains.
|
|
750
|
+
*/
|
|
751
|
+
contentOptions: ContentOptions;
|
|
752
|
+
/**
|
|
753
|
+
* Service that supplies date values.
|
|
754
|
+
* Manages publication date and display strings.
|
|
755
|
+
*/
|
|
756
|
+
dateService: DateService;
|
|
757
|
+
/**
|
|
758
|
+
* Task service used to ensure single execution and avoid duplicates.
|
|
759
|
+
*/
|
|
760
|
+
taskService: TaskService<TaskId>;
|
|
761
|
+
/**
|
|
762
|
+
* Provider for crawling (targets, persistence, queries, etc.).
|
|
763
|
+
*/
|
|
764
|
+
crawlingProvider: CrawlingProvider;
|
|
765
|
+
/**
|
|
766
|
+
* Provider for analysis (image analysis, tagging, scoring, etc.).
|
|
767
|
+
*/
|
|
768
|
+
analysisProvider: AnalysisProvider;
|
|
769
|
+
/**
|
|
770
|
+
* Provider for content generation (LLM, template, save/publish, etc.).
|
|
771
|
+
*/
|
|
772
|
+
contentGenerateProvider: ContentGenerateProvider;
|
|
773
|
+
/**
|
|
774
|
+
* Optional behavior/settings.
|
|
775
|
+
*/
|
|
776
|
+
options?: GenerateNewsletterOptions;
|
|
777
|
+
};
|
|
778
|
+
|
|
779
|
+
/**
|
|
780
|
+
* Core class that orchestrates LLM-based newsletter generation.
|
|
781
|
+
* - Responsible for the flow: Crawling → Analysis → Content Generation → Save; external dependencies are injected via DI.
|
|
782
|
+
*/
|
|
783
|
+
declare class GenerateNewsletter<TaskId> {
|
|
784
|
+
/** Internal fields provided via dependency injection */
|
|
785
|
+
private readonly dateService;
|
|
786
|
+
private readonly taskService;
|
|
787
|
+
private readonly crawlingProvider;
|
|
788
|
+
private readonly analysisProvider;
|
|
789
|
+
private readonly contentGenerateProvider;
|
|
790
|
+
private readonly logger;
|
|
791
|
+
private readonly options;
|
|
792
|
+
private readonly previewNewsletterOptions?;
|
|
793
|
+
/** Independent internal field **/
|
|
794
|
+
private taskId;
|
|
795
|
+
/**
|
|
796
|
+
* Constructor
|
|
797
|
+
*
|
|
798
|
+
* @param config
|
|
799
|
+
* @example
|
|
800
|
+
* const generator = new GenerateNewsletter({
|
|
801
|
+
* outputLanguage: 'English',
|
|
802
|
+
* expertField: ['AI', 'Cloud'],
|
|
803
|
+
* dateService,
|
|
804
|
+
* taskService,
|
|
805
|
+
* tagProvider,
|
|
806
|
+
* crawlingProvider,
|
|
807
|
+
* analysisProvider,
|
|
808
|
+
* contentGenerateProvider,
|
|
809
|
+
* options: { llm: { maxRetries: 5 } },
|
|
810
|
+
* });
|
|
811
|
+
*/
|
|
812
|
+
constructor(config: GenerateNewsletterConfig<TaskId>);
|
|
813
|
+
/**
|
|
814
|
+
* Execute the full newsletter generation pipeline.
|
|
815
|
+
*/
|
|
816
|
+
generate(): Promise<string | number | null>;
|
|
817
|
+
/**
|
|
818
|
+
* Run the pipeline while managing the task lifecycle.
|
|
819
|
+
*/
|
|
820
|
+
private executeWithTaskManagement;
|
|
821
|
+
private logNewsletterResult;
|
|
822
|
+
private sendPreviewNewsletterIfConfigured;
|
|
823
|
+
private startTask;
|
|
824
|
+
private endTask;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
export { DateType, GenerateNewsletter };
|
|
828
|
+
export type { AnalysisProvider, AppLogger, ArticleForGenerateContent, ArticleForUpdateByAnalysis, ContentGenerateProvider, CrawlingProvider, CrawlingTarget, CrawlingTargetGroup, DateService, EmailAddress, EmailAttachment, EmailMessage, EmailService, GenerateNewsletterConfig, GenerateNewsletterOptions, HtmlString, HtmlTemplate, HtmlTemplateMarkers, IsoDateString, LogLevel, LogMessage, MarkdownString, MinimumImportanceScoreRule, Newsletter, ParsedTarget, ParsedTargetDetail, ParsedTargetListItem, RequiredHtmlTemplate, TaskService, UniqueIdentifier, UnscoredArticle, UrlString };
|