@llm-newsletter-kit/core 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,1755 @@
1
+ import { RunnablePassthrough, RunnableSequence } from '@langchain/core/runnables';
2
+ import { pick, omit } from 'es-toolkit';
3
+ import { generateObject } from 'ai';
4
+ import { z } from 'zod';
5
+ import juice from 'juice';
6
+ import DOMPurify from 'dompurify';
7
+ import { JSDOM } from 'jsdom';
8
+ import { marked } from 'marked';
9
+ import { randomUUID } from 'node:crypto';
10
+
11
+ /**
12
+ * Common type aliases.
13
+ *
14
+ * - Provides explicit alias types for date/URL/Markdown/HTML, etc.
15
+ * - All comments are written in English JSDoc style.
16
+ */
17
+ /**
18
+ * Type for date identifiers.
19
+ *
20
+ * The DateType enum is used to distinguish date-related values.
21
+ * It can be used to differentiate between registered dates and ranges.
22
+ *
23
+ * Enum members:
24
+ * - REGISTERED: indicates a registered date.
25
+ * - DURATION: indicates a duration or time range.
26
+ *
27
+ * @example
28
+ * ```ts
29
+ * const type: DateType = DateType.REGISTERED;
30
+ * ```
31
+ */
32
+ var DateType;
33
+ (function (DateType) {
34
+ DateType["REGISTERED"] = "registered";
35
+ DateType["DURATION"] = "duration";
36
+ })(DateType || (DateType = {}));
37
+
38
+ /**
39
+ * Executor that provides a standardized start/done/error logging pattern.
40
+ * - Uses the injected logger and taskId to attach common fields to every log.
41
+ * - Pass config.event as a prefix like "crawl.group";
42
+ * ".start"/".done"/".error" are appended automatically.
43
+ */
44
+ class LoggingExecutor {
45
+ logger;
46
+ taskId;
47
+ constructor(logger, taskId) {
48
+ this.logger = logger;
49
+ this.taskId = taskId;
50
+ }
51
+ async executeWithLogging(config, fn) {
52
+ const level = config.level ?? 'debug';
53
+ const startedAt = Date.now();
54
+ const startMsg = {
55
+ event: `${config.event}.start`,
56
+ level,
57
+ taskId: this.taskId,
58
+ data: config.startFields ?? {},
59
+ };
60
+ this.logger[level](startMsg);
61
+ try {
62
+ const result = await fn();
63
+ const durationMs = Date.now() - startedAt;
64
+ const doneExtra = config.doneFields
65
+ ? (config.doneFields(result) ?? {})
66
+ : {};
67
+ const doneMsg = {
68
+ event: `${config.event}.done`,
69
+ level,
70
+ taskId: this.taskId,
71
+ durationMs,
72
+ data: { ...(config.startFields ?? {}), ...doneExtra },
73
+ };
74
+ this.logger[level](doneMsg);
75
+ return result;
76
+ }
77
+ catch (err) {
78
+ const durationMs = Date.now() - startedAt;
79
+ const errorMsg = {
80
+ event: `${config.event}.error`,
81
+ level,
82
+ taskId: this.taskId,
83
+ durationMs,
84
+ data: { ...(config.startFields ?? {}) },
85
+ };
86
+ this.logger[level](errorMsg);
87
+ this.logger.error(err);
88
+ throw err;
89
+ }
90
+ }
91
+ }
92
+
93
+ const ensureStringArray = (value) => {
94
+ return typeof value === 'string' ? [value] : value;
95
+ };
96
+
97
+ class BaseLLMQuery {
98
+ model;
99
+ expertFields;
100
+ logger;
101
+ taskId;
102
+ options;
103
+ executeWithLogging;
104
+ constructor(config) {
105
+ this.model = config.model;
106
+ this.expertFields = ensureStringArray(config.options.content.expertField);
107
+ this.logger = config.logger;
108
+ this.taskId = config.taskId;
109
+ this.options = config.options;
110
+ this.executeWithLogging = config.loggingExecutor.executeWithLogging.bind(config.loggingExecutor);
111
+ }
112
+ }
113
+ class LLMQuery extends BaseLLMQuery {
114
+ targetArticle;
115
+ constructor(config) {
116
+ super(config);
117
+ this.targetArticle = config.targetArticle;
118
+ }
119
+ }
120
+
121
+ class AnalyzeImages extends LLMQuery {
122
+ schema = z.object({
123
+ imageContext: z
124
+ .string()
125
+ .describe('A comprehensive description of all information extracted from the images'),
126
+ });
127
+ constructor(config) {
128
+ super(config);
129
+ }
130
+ async execute() {
131
+ if (!this.targetArticle.hasAttachedImage ||
132
+ !this.targetArticle.detailContent) {
133
+ return null;
134
+ }
135
+ if (this.imageMessages.length === 0) {
136
+ return null;
137
+ }
138
+ const { object } = await generateObject({
139
+ model: this.model,
140
+ maxRetries: this.options.llm.maxRetries,
141
+ schema: this.schema,
142
+ system: this.systemPrompt,
143
+ messages: [
144
+ {
145
+ role: 'user',
146
+ content: [this.textMessage, ...this.imageMessages],
147
+ },
148
+ ],
149
+ });
150
+ return object.imageContext;
151
+ }
152
+ get systemPrompt() {
153
+ return `# Image Analysis Expert System
154
+
155
+ ## Identity & Expertise
156
+ You are a specialized image analysis expert in: ${this.expertFields.join(', ')}
157
+
158
+ ## Core Responsibilities
159
+ 1. Extract visual information unavailable from text alone
160
+ 2. Identify industry-specific elements, facilities, and stakeholders
161
+ 3. Accurately read and transcribe text, charts, and data visualizations
162
+ 4. Synthesize visual information with article context
163
+
164
+ ## Analysis Framework
165
+
166
+ ### Information Categories to Extract
167
+ - Industry-relevant visual elements
168
+ - Text and numerical data within images
169
+ - Key subjects (people, places, objects, infrastructure)
170
+ - Contextual relationships to ${this.expertFields.join(', ')}
171
+ - Information gaps filled by visual analysis
172
+
173
+ ### Quality Standards
174
+ - Accuracy and specificity in descriptions
175
+ - Professional relevance for industry practitioners
176
+ - Integration with accompanying text content
177
+ - Completeness in covering all visual information
178
+
179
+ ## Output Specifications
180
+ - Language: ${this.options.content.outputLanguage}
181
+ - Format: Single cohesive explanation (not numbered list)
182
+ - Focus: Practical insights for industry professionals
183
+ - Integration: Seamlessly merge all extracted information`;
184
+ }
185
+ get imageUrls() {
186
+ // Markdown image pattern: ![alt text](url) or ![](url)
187
+ // Includes http, https, relative paths, and data URIs
188
+ const imageRegex = /!\[.*?\]\(([^)]+)\)/g;
189
+ const urls = [];
190
+ let match;
191
+ while ((match = imageRegex.exec(this.targetArticle.detailContent)) !== null) {
192
+ const url = match[1].trim();
193
+ // Validate URL format (http, https, relative path, data URI)
194
+ if (url &&
195
+ (url.startsWith('http://') ||
196
+ url.startsWith('https://') ||
197
+ url.startsWith('//') || // Protocol-relative URL
198
+ url.startsWith('/') || // Absolute path
199
+ url.startsWith('./') || // Relative path
200
+ url.startsWith('../') || // Parent directory relative path
201
+ url.startsWith('data:image/')) // Data URI
202
+ ) {
203
+ urls.push(url);
204
+ }
205
+ }
206
+ // Process max 5 images only (to save cost)
207
+ return urls.slice(0, 5);
208
+ }
209
+ get imageMessages() {
210
+ return this.imageUrls.map((url) => ({
211
+ type: 'image',
212
+ image: url,
213
+ }));
214
+ }
215
+ get textMessage() {
216
+ return {
217
+ type: 'text',
218
+ text: `## Analysis Task
219
+
220
+ **Document Context:**
221
+ - Title: ${this.targetArticle.title}
222
+ - Content: ${this.targetArticle.detailContent}
223
+
224
+ ## Instructions
225
+
226
+ Analyze the provided images and synthesize your findings into a single comprehensive explanation that:
227
+
228
+ 1. **Identifies Visual Content**: Extract industry-specific elements, infrastructure, and stakeholders relevant to ${this.expertFields.join(', ')}
229
+
230
+ 2. **Captures Text & Data**: Accurately read and include all visible text, numerical data, charts, and graphs
231
+
232
+ 3. **Describes Visual Elements**: Detail important subjects (people, places, objects) and their significance
233
+
234
+ 4. **Establishes Connections**: Link visual information to ${this.expertFields.join(', ')} context and article content
235
+
236
+ 5. **Provides Context**: Explain what industry professionals should understand from these images
237
+
238
+ 6. **Complements Text**: Add visual insights not covered in the article text
239
+
240
+ **Format**: Present all findings as one flowing narrative without enumeration.`,
241
+ };
242
+ }
243
+ }
244
+
245
+ class ClassifyTags extends LLMQuery {
246
+ schema = z.object({
247
+ tag1: z.string(),
248
+ tag2: z.string(),
249
+ tag3: z.string(),
250
+ });
251
+ existTags = [];
252
+ constructor(config) {
253
+ super(config);
254
+ }
255
+ async execute({ existTags }) {
256
+ this.existTags = existTags;
257
+ const { object } = await generateObject({
258
+ model: this.model,
259
+ maxRetries: this.options.llm.maxRetries,
260
+ schema: this.schema,
261
+ system: this.systemPrompt,
262
+ prompt: this.userPrompt,
263
+ });
264
+ return object;
265
+ }
266
+ get systemPrompt() {
267
+ return `You are an AI specializing in analyzing and categorizing articles for professionals in ${this.expertFields.join(', ')}.
268
+
269
+ ## Core Responsibility
270
+ Analyze article titles and content to generate 3 optimal, detailed classifications by evaluating compatibility with existing tags and determining when new tags are justified.
271
+
272
+ ## Output Language
273
+ All classifications must be written in ${this.options.content.outputLanguage}.
274
+
275
+ ## Classification Rules
276
+ 1. **Reuse Threshold**: Use existing classifications if compatibility is 80% or higher
277
+ 2. **New Tag Criteria**: Create new classifications only when:
278
+ - Best existing match scores below 80% compatibility
279
+ - New tag demonstrates versatility across 10+ similar articles
280
+ 3. **Naming Standards**:
281
+ - Length: 3-15 characters
282
+ - Style: Clear, intuitive ${this.options.content.outputLanguage} terms
283
+ - Balance industry precision with general reader comprehension
284
+ 4. **Scope Exclusion**: Avoid broad, general tags like ${this.expertFields.map((v) => `"${v}"`).join(', ')} (too generic for this expert audience)
285
+
286
+ ## Decision Framework
287
+ Prioritize in order:
288
+ - Content accuracy and relevance
289
+ - Classification system consistency
290
+ - User intuitiveness and searchability
291
+ - Long-term scalability and maintainability`;
292
+ }
293
+ get userPrompt() {
294
+ return `**Task**: Classify this article with 3 optimal detailed tags.
295
+
296
+ **Article Information**
297
+ - Title: ${this.targetArticle.title}
298
+ - Content: ${this.targetArticle.detailContent}
299
+
300
+ **Available Existing Tags**
301
+ \`\`\`
302
+ ${JSON.stringify(this.existTags, null, 2)}
303
+ \`\`\`
304
+
305
+ **Analysis Steps**
306
+ 1. Extract core concepts, industry sectors, and specific topics from the article
307
+ 2. Score each existing tag for compatibility (0-100%)
308
+ 3. Identify which existing tags meet the 80%+ threshold
309
+ 4. For tags below 80%, determine if a new tag would better serve the content
310
+ 5. Validate new tag names for clarity, length, and future applicability
311
+
312
+ **Output Requirements**
313
+ Return exactly 3 classifications following the system rules. Each classification should:
314
+ - Match the article's core content
315
+ - Fit logically within the overall classification system
316
+ - Be immediately understandable to your target audience
317
+ - Support future similar articles`;
318
+ }
319
+ }
320
+
321
+ class DetermineArticleImportance extends LLMQuery {
322
+ minimumImportanceScoreRules;
323
+ schema = z.object({
324
+ importanceScore: z
325
+ .number()
326
+ .min(1)
327
+ .max(10)
328
+ .describe('Article importance score (1-10, 10 is most important)'),
329
+ });
330
+ dateService;
331
+ constructor(config) {
332
+ super(config);
333
+ this.minimumImportanceScoreRules = config.minimumImportanceScoreRules ?? [];
334
+ this.dateService = config.dateService;
335
+ }
336
+ async execute() {
337
+ const { object } = await generateObject({
338
+ model: this.model,
339
+ maxRetries: this.options.llm.maxRetries,
340
+ schema: this.schema,
341
+ system: this.systemPrompt,
342
+ prompt: this.userPrompt,
343
+ });
344
+ return object.importanceScore;
345
+ }
346
+ get minPoint() {
347
+ const targetRule = this.minimumImportanceScoreRules.find(({ targetUrl }) => targetUrl === this.targetArticle.targetUrl);
348
+ return targetRule?.minScore ?? 1;
349
+ }
350
+ get hasHigherMinimumScore() {
351
+ return this.minPoint > 1;
352
+ }
353
+ get systemPrompt() {
354
+ return `You are an expert in importance evaluation in the field of ${this.expertFields.join(', ')}.
355
+
356
+ Role:
357
+ - Analyze titles and content in depth to objectively evaluate the importance of news and announcements.
358
+ - Extract the most important insights for industry professionals. Main readers are practitioners from research institutions, local/public officials, graduate students, and field experts in ${this.expertFields.join(', ')}.
359
+ - Score based on urgency, impact, and scarcity of information.
360
+
361
+ Importance Score Criteria (${this.minPoint}-10):
362
+ 10: Information with immediate and significant impact on entire industry (e.g., major legislation passed, large budget allocation, critical discoveries/events that transform the field)
363
+ 8-9: Information with important impact on many stakeholders (e.g., major policy changes, major findings/achievements released, large project announcements)
364
+ 7-8: Very important academic/professional achievements or information in specific fields (e.g., journal publication/release, major research results announcement, professional report publication, important academic events, research database construction/release, designation of important field resources/assets, medium-scale bid information)
365
+ 5-6: General important information limited to specific fields or regions (e.g., small project permits, general event notices, small-scale bids)
366
+ 4-5: General industry news or small/medium-scale event information
367
+ 2-3: Simple information sharing or repetitive daily news
368
+ ${this.hasHigherMinimumScore ? '' : `1: Information without current significance - Expired support programs, past events, invalid bid notices or recruitment information, notices that have lost practical value, or administrative/simple notices like "membership fee status", "meeting minutes", "internal schedule notices"`}
369
+
370
+ Evaluation Criteria:
371
+ - Academic Value: Journal publications, research reports, academic seminars/symposiums, research output presentations etc. minimum 7 points (knowledge base expansion and long-term reference value)
372
+ - Practical Impact: Information requiring immediate response like policies, regulations, bids, recruitment
373
+ - Impact Range: How many stakeholders are affected
374
+ - Scarcity: How rare and exclusive the information is
375
+ - Temporal Context: Practical value at current time considering deadlines, event schedules${this.hasHigherMinimumScore ? '' : ' (However, recent academic achievements maintain high scores)'}
376
+
377
+ Important Notes:
378
+ - Evaluate considering characteristics and context of ${this.expertFields.join(', ')} fields.
379
+ - Be sensitive to core keywords, events, policies considered important in the field.`;
380
+ }
381
+ get userPrompt() {
382
+ return `Please rate the importance of this article from ${this.minPoint} to 10.
383
+
384
+ **Current Date:** ${this.dateService.getCurrentISODateString()}
385
+
386
+ **Title:** ${this.targetArticle.title || 'No Title'}
387
+
388
+ **Content:** ${this.targetArticle.detailContent || 'No Content'}
389
+
390
+ **Tags:** ${this.targetArticle.tag1 || ''}, ${this.targetArticle.tag2 || ''}, ${this.targetArticle.tag3 || ''}
391
+ ${this.targetArticle.imageContextByLlm
392
+ ? `
393
+ **Image Analysis:** ${this.targetArticle.imageContextByLlm}`
394
+ : ''}`;
395
+ }
396
+ }
397
+
398
+ class BaseChain {
399
+ logger;
400
+ taskId;
401
+ provider;
402
+ options;
403
+ executeWithLogging;
404
+ constructor(config) {
405
+ this.logger = config.logger;
406
+ this.taskId = config.taskId;
407
+ this.provider = config.provider;
408
+ this.options = config.options;
409
+ this.executeWithLogging = config.loggingExecutor.executeWithLogging.bind(config.loggingExecutor);
410
+ }
411
+ }
412
+ class Chain extends BaseChain {
413
+ }
414
+ class PrivateChain extends BaseChain {
415
+ }
416
+
417
+ class ArticleInsightsChain extends PrivateChain {
418
+ dateService;
419
+ constructor(config) {
420
+ super(config);
421
+ this.dateService = config.dateService;
422
+ }
423
+ /* istanbul ignore next - pipeline arrow functions are exercised via higher-level tests */
424
+ get chain() {
425
+ return RunnablePassthrough.assign({
426
+ generatedTags: () => this.classifyArticles(),
427
+ generatedImageContextList: () => this.extractImageContext(),
428
+ })
429
+ .pipe({
430
+ mergedArticles: ({ generatedTags, generatedImageContextList }) => this.mergeTagsAndImageContext(generatedTags, generatedImageContextList),
431
+ })
432
+ .pipe({
433
+ determinedArticles: ({ mergedArticles }) => this.determineImportance(mergedArticles),
434
+ });
435
+ }
436
+ async generateInsights() {
437
+ const { determinedArticles: initial } = await this.chain.invoke({});
438
+ const maxIterations = 5; // Maximum number of iterations to prevent infinite loop
439
+ const reprocess = async (current, iteration) => {
440
+ if (iteration >= maxIterations) {
441
+ this.logger.debug({
442
+ event: 'insights.warning.maxIterationsReached',
443
+ taskId: this.taskId,
444
+ data: {
445
+ iterationCount: iteration,
446
+ maxIterationCount: maxIterations,
447
+ },
448
+ });
449
+ return current;
450
+ }
451
+ // Filter incomplete posts (where any of tag1, tag2, tag3, or importance_score is null)
452
+ const incompleteArticles = current.filter((article) => !article.tag1 ||
453
+ !article.tag2 ||
454
+ !article.tag3 ||
455
+ !article.importanceScore);
456
+ if (incompleteArticles.length === 0) {
457
+ this.logger.debug({
458
+ event: 'insights.complete',
459
+ taskId: this.taskId,
460
+ });
461
+ return current; // Exit when all posts have been fully processed
462
+ }
463
+ this.logger.debug({
464
+ event: 'insights.incomplete.restart',
465
+ taskId: this.taskId,
466
+ data: {
467
+ incompleteArticleCount: incompleteArticles.length,
468
+ iterationCount: iteration,
469
+ },
470
+ });
471
+ // Reprocess incomplete posts only by reusing an insight object
472
+ const { determinedArticles: reprocessedArticles } = await this.chain.invoke({});
473
+ // Update original determinedPosts with reprocessed articles
474
+ const updated = current.map((article) => {
475
+ const reprocessedArticle = reprocessedArticles.find((reArticle) => reArticle.id === article.id);
476
+ return reprocessedArticle || article;
477
+ });
478
+ this.logger.debug({
479
+ event: 'insights.incomplete.restart.done',
480
+ taskId: this.taskId,
481
+ data: {
482
+ iterationCount: iteration,
483
+ },
484
+ });
485
+ return reprocess(updated, iteration + 1);
486
+ };
487
+ return reprocess(initial, 0);
488
+ }
489
+ async classifyArticles() {
490
+ return this.executeWithLogging({
491
+ event: 'insights.articles.classify',
492
+ level: 'debug',
493
+ doneFields: (articles) => ({ count: articles }),
494
+ }, async () => {
495
+ const pushTag = (tag) => {
496
+ if (tag && !this.provider.tags.includes(tag)) {
497
+ this.provider.tags.push(tag);
498
+ }
499
+ };
500
+ const articlesWithTags = [];
501
+ for (const [i, article] of this.provider.unscoredArticles.entries()) {
502
+ const existTags = this.provider.tags;
503
+ if (article.tag1 && article.tag2 && article.tag3) {
504
+ continue;
505
+ }
506
+ this.logger.debug({
507
+ event: 'insights.articles.classify.start',
508
+ taskId: this.taskId,
509
+ data: {
510
+ count: `${i + 1} / ${this.provider.unscoredArticles.length}`,
511
+ articleId: article.id,
512
+ title: article.title?.substring(0, 50) + '...',
513
+ existingTags: existTags.length,
514
+ },
515
+ });
516
+ try {
517
+ const classifyTags = new ClassifyTags(this.getLlmQueryConfig(this.provider.classifyTagOptions.model, article));
518
+ const generatedTags = await classifyTags.execute({ existTags });
519
+ pushTag(generatedTags.tag1);
520
+ pushTag(generatedTags.tag2);
521
+ pushTag(generatedTags.tag3);
522
+ this.logger.debug({
523
+ event: 'insights.articles.classify.end',
524
+ taskId: this.taskId,
525
+ data: {
526
+ count: `${i + 1} / ${this.provider.unscoredArticles.length}`,
527
+ articleId: article.id,
528
+ result: `tag1: ${generatedTags.tag1}, tag2: ${generatedTags.tag2}, tag3: ${generatedTags.tag3}`,
529
+ },
530
+ });
531
+ articlesWithTags.push({
532
+ id: article.id,
533
+ ...generatedTags,
534
+ });
535
+ }
536
+ catch (error) {
537
+ this.logger.debug({
538
+ event: 'insights.articles.classify.end.error',
539
+ taskId: this.taskId,
540
+ data: {
541
+ count: `${i + 1} / ${this.provider.unscoredArticles.length}`,
542
+ articleId: article.id,
543
+ error: error instanceof Error ? error.message : String(error),
544
+ title: article.title?.substring(0, 50) + '...',
545
+ },
546
+ });
547
+ // NOTE: Despite the error, it does not significantly hinder newsletter generation, so we proceed. Tagging helps produce a better newsletter, but it is not strictly required.
548
+ }
549
+ }
550
+ return articlesWithTags;
551
+ });
552
+ }
553
+ async extractImageContext() {
554
+ return this.executeWithLogging({
555
+ event: 'insights.images.extract',
556
+ level: 'debug',
557
+ doneFields: (articles) => ({ articles }),
558
+ }, async () => {
559
+ const articlesWithImageContext = [];
560
+ for (const [i, article] of this.provider.unscoredArticles.entries()) {
561
+ if (!article.hasAttachedImage) {
562
+ this.logger.debug({
563
+ event: 'insights.images.extract.pass.noimage',
564
+ taskId: this.taskId,
565
+ data: {
566
+ articleId: article.id,
567
+ },
568
+ });
569
+ continue;
570
+ }
571
+ if (article.imageContextByLlm) {
572
+ this.logger.debug({
573
+ event: 'insights.images.extract.pass.exist',
574
+ taskId: this.taskId,
575
+ data: {
576
+ articleId: article.id,
577
+ },
578
+ });
579
+ continue;
580
+ }
581
+ this.logger.debug({
582
+ event: 'insights.images.extract.start',
583
+ taskId: this.taskId,
584
+ data: {
585
+ count: `${i + 1} / ${this.provider.unscoredArticles.length}`,
586
+ articleId: article.id,
587
+ },
588
+ });
589
+ try {
590
+ const analyzeImages = new AnalyzeImages(this.getLlmQueryConfig(this.provider.analyzeImagesOptions.model, article));
591
+ const imageContextByLlm = await analyzeImages.execute();
592
+ if (imageContextByLlm) {
593
+ articlesWithImageContext.push({
594
+ id: article.id,
595
+ imageContextByLlm,
596
+ });
597
+ this.logger.debug({
598
+ event: 'insights.images.extract.end',
599
+ taskId: this.taskId,
600
+ data: {
601
+ count: `${i + 1} / ${this.provider.unscoredArticles.length}`,
602
+ articleId: article.id,
603
+ },
604
+ });
605
+ }
606
+ else {
607
+ this.logger.debug({
608
+ event: 'insights.images.extract.end.noimage',
609
+ taskId: this.taskId,
610
+ data: {
611
+ count: `${i + 1} / ${this.provider.unscoredArticles.length}`,
612
+ articleId: article.id,
613
+ },
614
+ });
615
+ }
616
+ }
617
+ catch (error) {
618
+ this.logger.debug({
619
+ event: 'insights.images.extract.end.error',
620
+ taskId: this.taskId,
621
+ data: {
622
+ count: `${i + 1} / ${this.provider.unscoredArticles.length}`,
623
+ articleId: article.id,
624
+ error: error instanceof Error ? error.message : String(error),
625
+ },
626
+ });
627
+ // NOTE: Image analysis failure should not interrupt the overall process
628
+ }
629
+ }
630
+ return articlesWithImageContext;
631
+ });
632
+ }
633
+ async mergeTagsAndImageContext(generatedTags, generatedImageContextList) {
634
+ return this.executeWithLogging({
635
+ event: 'insights.context.merge',
636
+ level: 'debug',
637
+ startFields: {
638
+ generatedTags,
639
+ generatedImageContextList,
640
+ },
641
+ doneFields: (count) => ({ count }),
642
+ }, async () => {
643
+ return this.provider.unscoredArticles.map((article) => {
644
+ const articleWithTags = generatedTags.find(({ id }) => id === article.id);
645
+ const articleWithImageContext = generatedImageContextList.find(({ id }) => id === article.id);
646
+ if (articleWithTags) {
647
+ article = {
648
+ ...article,
649
+ tag1: articleWithTags.tag1,
650
+ tag2: articleWithTags.tag2,
651
+ tag3: articleWithTags.tag3,
652
+ };
653
+ }
654
+ if (articleWithImageContext) {
655
+ article = {
656
+ ...article,
657
+ imageContextByLlm: articleWithImageContext.imageContextByLlm,
658
+ };
659
+ }
660
+ return article;
661
+ });
662
+ });
663
+ }
664
+ async determineImportance(mergedArticles) {
665
+ return this.executeWithLogging({
666
+ event: 'insights.importance.determine',
667
+ level: 'debug',
668
+ startFields: {
669
+ mergedArticles,
670
+ },
671
+ doneFields: (articles) => ({ articles }),
672
+ }, async () => {
673
+ const determinedArticles = [];
674
+ for (const [i, article] of mergedArticles.entries()) {
675
+ this.logger.debug({
676
+ event: 'insights.importance.determine.start',
677
+ taskId: this.taskId,
678
+ data: {
679
+ count: `${i + 1} / ${mergedArticles.length}`,
680
+ articleId: article.id,
681
+ title: article.title?.substring(0, 50) + '...',
682
+ },
683
+ });
684
+ try {
685
+ const determineArticleImportance = new DetermineArticleImportance({
686
+ ...this.getLlmQueryConfig(this.provider.determineScoreOptions.model, article),
687
+ minimumImportanceScoreRules: this.provider.determineScoreOptions.minimumImportanceScoreRules,
688
+ dateService: this.dateService,
689
+ });
690
+ const importanceScore = await determineArticleImportance.execute();
691
+ const processedArticle = {
692
+ ...article,
693
+ importanceScore,
694
+ };
695
+ // Push result first to avoid losing it if logging fails
696
+ determinedArticles.push(processedArticle);
697
+ // Best-effort logging that won't affect the result
698
+ try {
699
+ this.logger.debug({
700
+ event: 'insights.importance.determine.end',
701
+ taskId: this.taskId,
702
+ data: {
703
+ count: `${i + 1} / ${mergedArticles.length}`,
704
+ articleId: article.id,
705
+ importanceScore: importanceScore,
706
+ },
707
+ });
708
+ }
709
+ catch {
710
+ // ignore logging errors
711
+ }
712
+ }
713
+ catch (error) {
714
+ // Log error but ensure we still return a fallback score
715
+ try {
716
+ this.logger.debug({
717
+ event: 'insights.importance.determine.end.error',
718
+ taskId: this.taskId,
719
+ data: {
720
+ count: `${i + 1} / ${mergedArticles.length}`,
721
+ articleId: article.id,
722
+ error: error instanceof Error ? error.message : String(error),
723
+ },
724
+ });
725
+ }
726
+ catch {
727
+ // ignore logging errors
728
+ }
729
+ // NOTE: Importance analysis failure should not stop the pipeline; use a fallback score instead
730
+ determinedArticles.push({
731
+ ...article,
732
+ importanceScore: 1, // Set to minimum importance as a sane default
733
+ });
734
+ }
735
+ }
736
+ return determinedArticles;
737
+ });
738
+ }
739
+ getLlmQueryConfig(model, targetArticle) {
740
+ return {
741
+ model,
742
+ logger: this.logger,
743
+ taskId: this.taskId,
744
+ targetArticle: targetArticle,
745
+ options: pick(this.options, ['content', 'llm']),
746
+ loggingExecutor: new LoggingExecutor(this.logger, this.taskId),
747
+ };
748
+ }
749
+ }
750
+
751
+ class AnalysisChain extends Chain {
752
+ dateService;
753
+ constructor(config) {
754
+ super(config);
755
+ this.dateService = config.dateService;
756
+ }
757
+ get chain() {
758
+ return RunnablePassthrough.assign({
759
+ unscoredArticles: () => this.fetchUnscoredArticles(),
760
+ tags: () => this.fetchTags(),
761
+ })
762
+ .pipe(RunnablePassthrough.assign({
763
+ determinedArticles: ({ unscoredArticles, tags }) => this.analyzeArticles(unscoredArticles, tags),
764
+ }))
765
+ .pipe(RunnablePassthrough.assign({
766
+ processedCount: ({ determinedArticles }) => this.updateAnalysisContext(determinedArticles),
767
+ }))
768
+ .withRetry({ stopAfterAttempt: this.options.chain.stopAfterAttempt });
769
+ }
770
+ async fetchUnscoredArticles() {
771
+ return this.executeWithLogging({
772
+ event: 'analysis.articles.fetch',
773
+ level: 'debug',
774
+ doneFields: (items) => ({ count: items.length }),
775
+ }, async () => {
776
+ return await this.provider.fetchUnscoredArticles();
777
+ });
778
+ }
779
+ async fetchTags() {
780
+ return this.executeWithLogging({
781
+ event: 'analysis.tags.fetch',
782
+ level: 'debug',
783
+ doneFields: (items) => ({ count: items.length }),
784
+ }, async () => {
785
+ return await this.provider.fetchTags();
786
+ });
787
+ }
788
+ async analyzeArticles(unscoredArticles, tags) {
789
+ return this.executeWithLogging({
790
+ event: 'analysis.articles.analyze',
791
+ level: 'debug',
792
+ startFields: {
793
+ unscoredArticles,
794
+ tags,
795
+ },
796
+ doneFields: (items) => ({ count: items.length }),
797
+ }, async () => {
798
+ const articleInsightsChain = new ArticleInsightsChain({
799
+ logger: this.logger,
800
+ taskId: this.taskId,
801
+ provider: {
802
+ unscoredArticles,
803
+ tags,
804
+ classifyTagOptions: this.provider.classifyTagOptions,
805
+ analyzeImagesOptions: this.provider.analyzeImagesOptions,
806
+ determineScoreOptions: this.provider.determineScoreOptions,
807
+ },
808
+ options: this.options,
809
+ loggingExecutor: new LoggingExecutor(this.logger, this.taskId),
810
+ dateService: this.dateService,
811
+ });
812
+ return await articleInsightsChain.generateInsights();
813
+ });
814
+ }
815
+ async updateAnalysisContext(determinedArticles) {
816
+ return this.executeWithLogging({
817
+ event: 'analysis.articles.update',
818
+ level: 'debug',
819
+ startFields: {
820
+ determinedArticles,
821
+ },
822
+ doneFields: (count) => ({ count }),
823
+ }, async () => {
824
+ for (let i = 0; i < determinedArticles.length; i++) {
825
+ const article = determinedArticles[i];
826
+ await this.provider.update(article);
827
+ }
828
+ return determinedArticles.length;
829
+ });
830
+ }
831
+ }
832
+
833
+ function markdownToHtml(markdown) {
834
+ const html = marked.parse(markdown);
835
+ const window = new JSDOM('').window;
836
+ const purify = DOMPurify(window);
837
+ const sanitized = purify.sanitize(html);
838
+ const withTargetBlank = addTargetBlankToAnchors(sanitized);
839
+ const withDelReplaced = replaceDelTagsWithTilde(withTargetBlank);
840
+ return correctUnconvertedBoldSyntax(withDelReplaced);
841
+ }
842
+ function addTargetBlankToAnchors(htmlString) {
843
+ // Regular expression to find '<a>' tags
844
+ // This regex matches '<a>' tags that contain 'href' attribute and optionally other attributes
845
+ // Excludes 'target="[^"]*"' to check if target attribute already exists
846
+ const regex = /<a(\s+[^>]*?)?(?<!target="[^"]*")>/gi;
847
+ // Use regex to find '<a>' tags and add 'target="_blank"'
848
+ return htmlString.replace(regex, (_match, attributes) => {
849
+ // Handle undefined attributes as empty string
850
+ const currentAttributes = attributes || '';
851
+ // Double check if target attribute exists (safety check for regex limitations)
852
+ if (currentAttributes.includes('target=')) {
853
+ return `<a${currentAttributes}>`; // If target attribute exists, return without modification
854
+ }
855
+ else {
856
+ // Add target="_blank" attribute
857
+ return `<a${currentAttributes} target="_blank">`;
858
+ }
859
+ });
860
+ }
861
+ function replaceDelTagsWithTilde(htmlString) {
862
+ // Replace opening and closing del tags with tilde (~)
863
+ return htmlString.replace(/<del>/gi, '~').replace(/<\/del>/gi, '~');
864
+ }
865
+ function correctUnconvertedBoldSyntax(htmlString) {
866
+ // Replace unconverted "**text**" markdown syntax with <b> tags
867
+ // Matches "**" followed by one or more non-asterisk characters, followed by "**"
868
+ return htmlString.replace(/\*\*([^*]+)\*\*/g, '<b>$1</b>');
869
+ }
870
+
871
+ let GenerateNewsletter$1 = class GenerateNewsletter extends BaseLLMQuery {
872
+ maxOutputTokens;
873
+ temperature;
874
+ topP;
875
+ topK;
876
+ presencePenalty;
877
+ frequencyPenalty;
878
+ targetArticles;
879
+ dateService;
880
+ subscribePageUrl;
881
+ newsletterBrandName;
882
+ schema = z.object({
883
+ title: z
884
+ .string()
885
+ .max(100)
886
+ .min(20)
887
+ .describe('Title of the newsletter email'),
888
+ content: z.string().describe('Email content in markdown format'),
889
+ isWrittenInOutputLanguage: z
890
+ .boolean()
891
+ .describe(`Whether the content is written in ${this.options.content.outputLanguage}`),
892
+ copyrightVerified: z
893
+ .boolean()
894
+ .describe('Verification status of copyright compliance (true: verified, false: potential violation)'),
895
+ factAccuracy: z
896
+ .boolean()
897
+ .describe('Verification of fact-based content from provided data (true: facts only, false: contains unsupported content)'),
898
+ });
899
+ constructor(config) {
900
+ super(config);
901
+ this.maxOutputTokens = config.maxOutputTokens;
902
+ this.temperature = config.temperature ?? 0.3;
903
+ this.topP = config.topP;
904
+ this.topK = config.topK;
905
+ this.presencePenalty = config.presencePenalty;
906
+ this.frequencyPenalty = config.frequencyPenalty;
907
+ this.targetArticles = config.targetArticles;
908
+ this.dateService = config.dateService;
909
+ this.subscribePageUrl = config.subscribePageUrl;
910
+ this.newsletterBrandName = config.newsletterBrandName;
911
+ }
912
+ async execute() {
913
+ const { object } = await generateObject({
914
+ model: this.model,
915
+ maxRetries: this.options.llm.maxRetries,
916
+ maxOutputTokens: this.maxOutputTokens,
917
+ temperature: this.temperature,
918
+ topP: this.topP,
919
+ topK: this.topK,
920
+ presencePenalty: this.presencePenalty,
921
+ frequencyPenalty: this.frequencyPenalty,
922
+ schema: this.schema,
923
+ system: this.systemPrompt,
924
+ prompt: this.userPrompt,
925
+ });
926
+ if (!object.isWrittenInOutputLanguage) {
927
+ return this.execute();
928
+ }
929
+ if (!object.copyrightVerified) {
930
+ return this.execute();
931
+ }
932
+ if (!object.factAccuracy) {
933
+ return this.execute();
934
+ }
935
+ return pick(object, ['title', 'content']);
936
+ }
937
+ get systemPrompt() {
938
+ return `You are a newsletter production expert for "${this.newsletterBrandName}" who analyzes and delivers trends in the fields of ${this.expertFields.join(', ')}. Your goal is to provide in-depth analysis that helps industry professionals easily understand complex information and make informed decisions.
939
+
940
+ Important rule for displaying date ranges: When displaying date ranges, you must use a hyphen (-) instead of a tilde (~). For example, use 'June 1-2, 2025' instead of 'June 1~2, 2025'. The tilde (~) can be rendered as strikethrough in markdown.
941
+
942
+ **Key Principles for Preventing Hallucination:**
943
+ 1. **Fact-Based Writing**: Use only content explicitly stated in the provided sources, do not expand through inference or speculation.
944
+ 2. **Accurate Citation**: Use expressions directly from the sources without arbitrarily interpreting or elaborating on meanings.
945
+ 3. **Conservative Approach**: Do not mention uncertain or ambiguous content, or express it very cautiously.
946
+ 4. **Verifiable Information**: All information must be directly verifiable from the provided sources.
947
+ 5. **No Speculation**: Do not use speculative expressions like "appears to be" or "is expected to".
948
+ 6. **No Fictional Standards/Policies**: Do not mention non-existent standards/policies or systems incorrectly reported as planned for future implementation.
949
+
950
+ Roles:
951
+ - Friendly Guide: Deliver information like a trusted colleague rather than a rigid expert. Use appropriate emoticons in titles and section headings to improve readability.
952
+ - Information Integrator: Group similar topics or related news to show broader context and trends. Focus on connections between news items rather than individual stories, and explain patterns based on data.
953
+ - Credibility Builder: All information must be provided with sources. Whenever specific content or titles are mentioned in the body, links must be provided in [original title](URL) format. Understand that source citation is not just formal but a key element in enhancing newsletter credibility and accessibility.
954
+ - Fact Checker: Use only facts from provided source materials. Do not make unsubstantiated claims or speculate beyond the materials.
955
+
956
+ **Important Prohibitions:**
957
+ - Do not bundle or omit structured list items (permits/reports/notices etc.) with "... and n more" etc. (tables must list all items in individual rows).
958
+ - Do not describe policies or plans of governments/organizations/companies not explicitly mentioned in sources as facts.
959
+ - Do not mention unconfirmed future plans or non-existent standards/policies.
960
+ - Do not add details not present in source materials.
961
+
962
+ **Content Organization Principles:**
963
+ - Use only accurate content from provided sources
964
+ - No additional details or specific interpretations beyond source materials
965
+ - All information must be verifiable and traceable
966
+ - Focus on clear facts rather than inference or speculation
967
+ - Exclude uncertain content and include only confirmed information
968
+ - For tables, include as much identifying information as possible, but mark "—" if not in source
969
+ - No arbitrary estimation of domain-specific procedures or schedules (state only confirmed facts)
970
+
971
+ Importance Score Criteria (1-10 points, expressed as stars):
972
+ - ★★★★★ (9-10 points): [Very Important] Laws/regulations that can change industry landscape, large budgets/investments, groundbreaking research/technology announcements that all professionals must know and prepare immediate responses for. These require immediate action and direct changes to business strategy.
973
+ - ★★★★☆ (7-8 points): [Important] Major policy changes, large projects/programs, important research/product announcements affecting specific fields or multiple organizations that should be referenced for key decisions. These need action soon and affect mid-term planning.
974
+ - ★★★☆☆ (5-6 points): [Reference] Medium-scale projects, services, approvals, major events/campaigns affecting specific regions or organizations. These are changes that professionals in the field should know about.
975
+ - ★★☆☆☆ (3-4 points): General industry trends, small events, routine permits/reports that are good to know. No direct action needed but helpful for trend awareness.
976
+ - ★☆☆☆☆ (1-2 points): Simple information sharing or repetitive news. Just for reference.
977
+
978
+ Copyright Protection & Fact-Checking Principles:
979
+ - Extract only factual information from sources, completely exclude creative expressions
980
+ - When constructing new sentences from extracted facts, do not follow source structure
981
+ - Review for remaining source expressions after writing and modify to dry style
982
+ - Do not present content not specified in provided materials as fact
983
+ - Analysis and insights must be data-based; avoid baseless predictions or claims
984
+ - If information is uncertain or requires speculation, clearly use phrases like "is estimated" or "may be possible"
985
+
986
+ Output Format & Requirements:
987
+ 1. Language: ${this.options.content.outputLanguage}
988
+
989
+ 2. Start: Specify date (${this.dateService.getDisplayDateString()}) and begin with neutral, objective greeting. Briefly introduce key factual information to be covered in today's newsletter.
990
+
991
+ 3. Overall Briefing: Before the main listing, create a briefing section conveying objective facts about today's news in these aspects:
992
+ - Key Trends: Explain major patterns or trends found in this news based on data. Ex: 'Over 00% of today's news relates to 00'.
993
+ - Immediate Impact: Emphasize most important changes or decisions affecting industry immediately, specifically mentioning which fields will be most impacted.
994
+
995
+ 4. Category Classification & Content Organization:
996
+ - Group news by logical categories based on related tags and content (e.g., Policy/Regulation, Budget/Support, Research/Development, Products/Services, Operations/Process, Recruitment/Events) rather than just listing by importance.
997
+ - Use appropriate emoticons for each category for visual distinction.
998
+ - Sort by importance within categories, making high-importance items more prominent.
999
+ - Add short paragraph at category start summarizing overall trends or changes in that area, specifying important points and areas to focus on.
1000
+ - Group similar news together for joint analysis when multiple related items exist.
1001
+ - When content is essentially identical (e.g., same job posting, event notice, announcement) from different sources, integrate around most detailed and accurate information without duplication.
1002
+ - Use tables when helpful to show commonalities and differences between multiple items at a glance.
1003
+ - Always provide links in [original title](URL) format whenever article titles or content are mentioned. Do not write as general text like "View" or "Article" or numbered references like [Post3](URL).
1004
+
1005
+ 5. Detailed Content Writing Guidelines (Importance-Based Length Control):
1006
+ **ABSOLUTE RULE: The writing length limits below are MAXIMUM constraints. DO NOT EXCEED these limits under any circumstances.**
1007
+
1008
+ - **Tier 1 (9-10 points) - Full Detail Allowed:**
1009
+ - Key Facts: 1-2 sentences in **bold** with source link [original title](URL).
1010
+ - Related Targets & Scope: Bullet points for different target groups.
1011
+ - Important Dates & Procedures: Deadlines, methods, required documents by step.
1012
+ - Related Facts: Budget/scale/participants/scope as factual data.
1013
+ - Use tables when comparing multiple items.
1014
+
1015
+ - **Tier 2 (6-8 points) - ABSOLUTE MAXIMUM 3 SENTENCES:**
1016
+ - Format: ONE sentence with **bold** key fact + [original title](URL) link. OPTIONALLY add ONE more sentence with critical detail (deadline/budget/target). NEVER EXCEED 3 SENTENCES TOTAL.
1017
+ - DO NOT write bullet points, DO NOT write multiple paragraphs, DO NOT add subsections.
1018
+
1019
+ - **Tier 3 (1-5 points) - ABSOLUTE MAXIMUM 1 SENTENCE:**
1020
+ - Format: ONE single sentence with core fact + [original title](URL) link. PERIOD. NO ADDITIONAL SENTENCES.
1021
+ - Multiple low-priority items can be grouped into a single bullet list.
1022
+
1023
+ **VIOLATION WARNING: If you write more than the maximum sentences allowed for Tier 2 or Tier 3, the output will be rejected and you must regenerate.**
1024
+
1025
+ - **Structured Lists (Permits/Reports/Notices):** Create tables listing every item in individual rows without abbreviation, regardless of importance score.
1026
+ - Use professional but friendly tone that's easy to understand. (Ex: Use "is notable" instead of "is", "is recommended" instead of "must", "needs to" etc.)
1027
+ - Can use blockquotes to highlight expert comments or particularly emphasized insights.
1028
+
1029
+ 6. Closing: Write objective closing including:
1030
+ - Brief summary of key factual information covered today.
1031
+ - Objectively list ongoing important schedules or imminent deadlines.
1032
+ - Maintain neutral and objective tone.
1033
+ - Do not write preview or anticipatory messages about next newsletter.
1034
+ - Do not include contact information for inquiries.
1035
+
1036
+ 7. Title Writing Guidelines:
1037
+ - Title should objectively convey core facts of 1-2 most important news items today.
1038
+ - Write with key facts rather than simple "Newsletter", more effective with specific figures or schedules.
1039
+ - Use neutral and objective terms in title (e.g., 'announced', 'implementing', 'deadline approaching').
1040
+ - Keep title length 20-50 characters and can include 1-2 relevant emoticons.
1041
+ - Place most important key facts at beginning of title.
1042
+ - Write title clearly and factually to maintain professionalism and credibility.
1043
+
1044
+ 8. Additional Requirements:
1045
+ - Comprehensively analyze posts to create email containing most important information for ${this.expertFields.join(', ')} field experts.
1046
+ - Naturally include date at beginning in the format: "${this.dateService.getDisplayDateString()} ${this.expertFields.join(', ')} [News Term]". Replace [News Term] with the word for "News" appropriate for the output language (e.g., "News" for English, "소식" for Korean). Declare this part as \`Heading 1\`(#).
1047
+ - Write body in markdown format, effectively using headings(#, ##, ###), bold(**), italics(_), bullet points(-, *) etc. to improve readability.
1048
+ - Group related news to provide broader context, and mention development status if there's continuity with content covered in previous issues.
1049
+ - **Source citation is most important for ensuring credibility.** Must provide links in [original title](URL) format using source's title. Do not write as "View", "Article", "[Post3](URL)" format.
1050
+ - Specify source whenever article titles or content are quoted in newsletter, ensure all information is provided with links.
1051
+ - Discover connections and patterns between news items to provide integrated insights rather than simple listing, and provide data-based insightful analysis.
1052
+ - Structure entire content so experts can quickly scan and grasp key information, design so busy experts can understand most important content within 2-3 minutes.
1053
+ - Including simple small data analysis (e.g., "00% of this news budget-related", "30% increase in 00-related news vs last week") adds more valuable insight where possible.
1054
+ ${this.subscribePageUrl ? `- Add \`${this.subscribePageUrl}\`(Subscribe to ${this.newsletterBrandName}) page access link button at appropriate attention-worthy spot for natural recommendation to others.` : ''}`;
1055
+ }
1056
+ get userPrompt() {
1057
+ return `Below is the complete list of newly collected ${this.expertFields.join(', ')} related news:
1058
+
1059
+ ${this.targetArticles
1060
+ .map((post, index) => `## Post ${index + 1}
1061
+ **Title:** ${post.title}
1062
+ **Content:** ${post.detailContent}
1063
+ **Importance:** ${post.importanceScore}/10
1064
+ **Tags:** ${[post.tag1, post.tag2, post.tag3].filter(Boolean).join(', ')}
1065
+ **Content Type:** ${post.contentType}
1066
+ **URL:** ${post.url}
1067
+ ${post.imageContextByLlm ? `**Image Analysis:** ${post.imageContextByLlm}` : ''}
1068
+ `)
1069
+ .join('\n\n')}
1070
+
1071
+
1072
+ ---
1073
+ **Comprehensive Analysis and Daily Newsletter Generation Request:**
1074
+ Based on all post information provided above, please generate a ${this.expertFields.join(', ')} trends newsletter for ${this.dateService.getDisplayDateString()}. Please note the following:
1075
+
1076
+ 1. **STRICT LENGTH CONTROL BY IMPORTANCE SCORE:**
1077
+ - 9-10 points: Full detailed coverage allowed (Key Facts + Targets + Dates + Related Facts)
1078
+ - 6-8 points: MAXIMUM 3 SENTENCES ONLY. Do not write detailed analysis, subsections, or bullet points.
1079
+ - 1-5 points: MAXIMUM 1 SENTENCE ONLY. Just core fact + link.
1080
+
1081
+ 2. Prioritize high importance items (9-10 points) first and structure information by importance and topic.
1082
+ 3. Instead of simply listing news items, group similar topics to strengthen information connectivity. For duplicate content from different sources (e.g., same job posting, same event notice), minimize redundancy by mentioning once or grouping around the most detailed content.
1083
+ 4. Sort by highest importance within categories and analyze category trends and patterns to assess industry-wide impact.
1084
+ 5. Include 1-2 most important news items in the title and use appropriate emoticons to enhance visual readability.
1085
+ 6. Structure the final output so experts can quickly grasp key information and provide insights that aid practical decision-making.
1086
+ 7. Source citation is crucial for credibility. Whenever mentioning any news, always provide links in [original title](URL) format. Do not use formats like "View Details" or "Post3". Content must always be accompanied by source links.
1087
+ 8. Never create content not present in the provided materials. All analysis and insights must be based strictly on the provided post materials, without adding arbitrary information or presenting it as fact.
1088
+ 9. Structured List (Permits/Reports/Notices etc.) Table Formatting Principles (No Omissions/Abbreviations):
1089
+ - Do not bundle any items in this category; never abbreviate with "... and n more", "others", etc.
1090
+ - List all items in tables with one item per row. Do not shorten tables regardless of length.
1091
+ - Each row must include [original title](URL) and when available, list the following fields in order: Organization (or Publisher) | Region/Basin | Number/ID (Permit/Receipt/Report number etc.) | Date (Post/Issue/Permit date as shown in source).
1092
+ - Mark missing values as "—" rather than leaving blank or omitting columns.
1093
+ - Maintain table format without using bullet/number lists.
1094
+
1095
+ Please follow the roles and output format defined in the system prompt (friendly introduction, overall briefing, category classification, in-depth analysis, polite closing, etc.).`;
1096
+ }
1097
+ };
1098
+
1099
+ class ContentGenerateChain extends Chain {
1100
+ dateService;
1101
+ minimumArticleCountForIssue;
1102
+ priorityArticleScoreThreshold;
1103
+ htmlTemplate;
1104
+ constructor(config) {
1105
+ super(config);
1106
+ this.dateService = config.dateService;
1107
+ this.minimumArticleCountForIssue =
1108
+ config.provider.publicationCriteria?.minimumArticleCountForIssue ?? 5;
1109
+ this.priorityArticleScoreThreshold =
1110
+ config.provider.publicationCriteria?.priorityArticleScoreThreshold ?? 8;
1111
+ this.htmlTemplate = {
1112
+ html: config.provider.htmlTemplate.html,
1113
+ markers: {
1114
+ title: config.provider.htmlTemplate.markers?.title ?? 'title',
1115
+ content: config.provider.htmlTemplate.markers?.content ?? 'content',
1116
+ },
1117
+ };
1118
+ }
1119
+ get chain() {
1120
+ return RunnablePassthrough.assign({
1121
+ candidateArticles: () => this.fetchArticleCandidates(),
1122
+ })
1123
+ .pipe(RunnablePassthrough.assign({
1124
+ generatedCoreContent: ({ candidateArticles }) => this.generateCoreContent(candidateArticles),
1125
+ candidateArticles: ({ candidateArticles }) => candidateArticles,
1126
+ }))
1127
+ .pipe(RunnablePassthrough.assign({
1128
+ html: ({ generatedCoreContent }) => this.renderHtml(generatedCoreContent),
1129
+ generatedCoreContent: ({ generatedCoreContent }) => generatedCoreContent,
1130
+ candidateArticles: ({ candidateArticles }) => candidateArticles,
1131
+ }))
1132
+ .pipe(RunnablePassthrough.assign({
1133
+ newsletterId: ({ html, generatedCoreContent, candidateArticles }) => this.createNewsletter(html, generatedCoreContent, candidateArticles),
1134
+ }))
1135
+ .withRetry({ stopAfterAttempt: this.options.chain.stopAfterAttempt });
1136
+ }
1137
+ async fetchArticleCandidates() {
1138
+ return this.executeWithLogging({
1139
+ event: 'generate.content.articles.fetch',
1140
+ level: 'debug',
1141
+ doneFields: (items) => ({ count: items.length }),
1142
+ }, async () => {
1143
+ return await this.provider.fetchArticleCandidates();
1144
+ });
1145
+ }
1146
+ async generateCoreContent(candidateArticles) {
1147
+ return this.executeWithLogging({
1148
+ event: 'generate.content.core.generate',
1149
+ level: 'info',
1150
+ startFields: { count: candidateArticles.length },
1151
+ doneFields: (result) => ({ title: result?.title }),
1152
+ }, async () => {
1153
+ if (candidateArticles.length === 0) {
1154
+ this.logger.debug({
1155
+ event: 'generate.content.core.generate.noarticle',
1156
+ taskId: this.taskId,
1157
+ });
1158
+ return null;
1159
+ }
1160
+ const hasHighImportancePost = candidateArticles.some(({ importanceScore }) => importanceScore >= this.priorityArticleScoreThreshold);
1161
+ if (candidateArticles.length <= this.minimumArticleCountForIssue &&
1162
+ !hasHighImportancePost) {
1163
+ this.logger.debug({
1164
+ event: 'generate.content.core.generate.criteria',
1165
+ taskId: this.taskId,
1166
+ data: {
1167
+ count: candidateArticles.length,
1168
+ hasHighImportancePost,
1169
+ },
1170
+ });
1171
+ return null;
1172
+ }
1173
+ const generateNewsletter = new GenerateNewsletter$1({
1174
+ model: this.provider.model,
1175
+ maxOutputTokens: this.provider.maxOutputTokens,
1176
+ temperature: this.provider.temperature,
1177
+ topP: this.provider.topP,
1178
+ topK: this.provider.topK,
1179
+ presencePenalty: this.provider.presencePenalty,
1180
+ frequencyPenalty: this.provider.frequencyPenalty,
1181
+ logger: this.logger,
1182
+ taskId: this.taskId,
1183
+ targetArticles: candidateArticles,
1184
+ options: pick(this.options, ['content', 'llm']),
1185
+ loggingExecutor: new LoggingExecutor(this.logger, this.taskId),
1186
+ subscribePageUrl: this.provider.subscribePageUrl,
1187
+ newsletterBrandName: this.provider.newsletterBrandName,
1188
+ dateService: this.dateService,
1189
+ });
1190
+ return await generateNewsletter.execute();
1191
+ });
1192
+ }
1193
+ async renderHtml(coreContent) {
1194
+ return this.executeWithLogging({
1195
+ event: 'generate.content.html.render',
1196
+ level: 'debug',
1197
+ startFields: { coreContent },
1198
+ doneFields: (html) => ({ html }),
1199
+ }, async () => {
1200
+ if (!coreContent) {
1201
+ return null;
1202
+ }
1203
+ return this.htmlTemplate.html
1204
+ .replaceAll(`{{${this.htmlTemplate.markers.title}}}`, coreContent.title)
1205
+ .replaceAll(`{{${this.htmlTemplate.markers.content}}}`, markdownToHtml(coreContent.content));
1206
+ });
1207
+ }
1208
+ async createNewsletter(html, coreContent, candidateArticles) {
1209
+ return this.executeWithLogging({
1210
+ event: 'generate.content.newsletter.create',
1211
+ level: 'debug',
1212
+ startFields: { html, count: candidateArticles.length },
1213
+ doneFields: (id) => ({ id }),
1214
+ }, async () => {
1215
+ if (!html || !coreContent) {
1216
+ return null;
1217
+ }
1218
+ const { id } = await this.provider.saveNewsletter({
1219
+ newsletter: {
1220
+ ...coreContent,
1221
+ htmlBody: juice(html),
1222
+ issueOrder: this.provider.issueOrder,
1223
+ date: this.dateService.getCurrentISODateString(),
1224
+ },
1225
+ usedArticles: candidateArticles,
1226
+ });
1227
+ return id;
1228
+ });
1229
+ }
1230
+ }
1231
+
1232
+ // User-Agent list used by real browsers
1233
+ const USER_AGENTS = [
1234
+ // Windows - Chrome, Edge, Firefox
1235
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
1236
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36 Edg/124.0.0.0',
1237
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0',
1238
+ // macOS - Chrome, Safari, Firefox
1239
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
1240
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
1241
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:126.0) Gecko/20100101 Firefox/126.0',
1242
+ // Linux - Chrome, Firefox
1243
+ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36',
1244
+ 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0',
1245
+ // Additional common combinations
1246
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36',
1247
+ 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
1248
+ ];
1249
+ // Pick a random User-Agent
1250
+ const getRandomUserAgent = () => USER_AGENTS[Math.floor(Math.random() * USER_AGENTS.length)];
1251
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
1252
+ const clamp = (n, min, max) => Math.max(min, Math.min(max, n));
1253
+ function parseRetryAfter(header) {
1254
+ if (!header)
1255
+ return null;
1256
+ // Seconds value
1257
+ const seconds = Number(header);
1258
+ if (!Number.isNaN(seconds))
1259
+ return clamp(seconds * 1000, 0, 60_000);
1260
+ // HTTP date format
1261
+ const date = new Date(header);
1262
+ const diff = date.getTime() - Date.now();
1263
+ if (Number.isFinite(diff) && diff > 0)
1264
+ return clamp(diff, 0, 60_000);
1265
+ return null;
1266
+ }
1267
+ function shouldRetry(status, error) {
1268
+ if (status === 429)
1269
+ return true; // Too Many Requests (429)
1270
+ if (status && status >= 500)
1271
+ return true; // 5xx server error
1272
+ if (status && status >= 400 && status < 500)
1273
+ return false; // Fatal client error
1274
+ // Network error or aborted
1275
+ if (error instanceof Error) {
1276
+ const msg = error.message.toLowerCase();
1277
+ if (msg.includes('aborted') ||
1278
+ msg.includes('timeout') ||
1279
+ msg.includes('network') ||
1280
+ msg.includes('fetch')) {
1281
+ return true;
1282
+ }
1283
+ }
1284
+ return false;
1285
+ }
1286
+ async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/') {
1287
+ const maxRetries = 5;
1288
+ const baseTimeoutMs = 10_000; // Base 10s, increases per attempt
1289
+ let lastError = null;
1290
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
1291
+ const controller = new AbortController();
1292
+ const timeoutMs = clamp(baseTimeoutMs * Math.pow(1.3, attempt - 1), 5_000, 30_000);
1293
+ const timeout = setTimeout(() => controller.abort(`timeout after ${timeoutMs}ms`), timeoutMs);
1294
+ try {
1295
+ const startedAt = Date.now();
1296
+ const response = await fetch(url, {
1297
+ // mode: 'cors' // Not applicable in Node, left here for behavioral parity with browsers
1298
+ redirect: 'follow',
1299
+ // @ts-expect-error Undici/Fetch in Node may allow duplex; safe to ignore
1300
+ duplex: 'half',
1301
+ signal: controller.signal,
1302
+ headers: {
1303
+ 'User-Agent': getRandomUserAgent(), // Randomize User-Agent
1304
+ Accept: 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
1305
+ 'Accept-Language': 'ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7',
1306
+ Referer: referer, // Include previous page information
1307
+ Connection: 'keep-alive',
1308
+ // Compression is handled automatically by undici/node-fetch, no need to set Accept-Encoding explicitly
1309
+ },
1310
+ });
1311
+ clearTimeout(timeout);
1312
+ const duration = Date.now() - startedAt;
1313
+ const status = response.status;
1314
+ if (!response.ok) {
1315
+ const retryAfterMs = parseRetryAfter(response.headers.get('retry-after'));
1316
+ const canRetry = shouldRetry(status, null);
1317
+ logger.debug({
1318
+ event: 'fetch.error',
1319
+ data: { url, status, attempt, canRetry, duration, retryAfterMs },
1320
+ });
1321
+ if (!canRetry || attempt === maxRetries) {
1322
+ const msg = `Request failed (status=${status}) - ${url}`;
1323
+ lastError = new Error(msg);
1324
+ break;
1325
+ }
1326
+ const backoff = Math.pow(2, attempt - 1) * 1000 + Math.random() * 1000;
1327
+ const delay = retryAfterMs != null ? Math.max(retryAfterMs, backoff) : backoff;
1328
+ await sleep(delay);
1329
+ continue;
1330
+ }
1331
+ const contentType = response.headers.get('content-type') || '';
1332
+ if (!contentType.toLowerCase().includes('text/html')) {
1333
+ // If not HTML, log a warning and continue (keep I/O compatibility)
1334
+ logger.debug({
1335
+ event: 'fetch.nonHtml',
1336
+ data: { url, contentType, attempt },
1337
+ });
1338
+ }
1339
+ const html = await response.text();
1340
+ logger.debug({
1341
+ event: 'fetch.success',
1342
+ data: { url, status, attempt, duration, size: html.length },
1343
+ });
1344
+ // Short randomized sleep after success to reduce server load during crawling
1345
+ const sleepTime = Math.random() * 500 + 250;
1346
+ await sleep(sleepTime);
1347
+ return html;
1348
+ }
1349
+ catch (error) {
1350
+ clearTimeout(timeout);
1351
+ const canRetry = shouldRetry(null, error);
1352
+ lastError = error instanceof Error ? error : new Error(String(error));
1353
+ logger.debug({
1354
+ event: 'fetch.catch',
1355
+ data: { url, attempt, canRetry, error: lastError.message },
1356
+ });
1357
+ if (!canRetry || attempt === maxRetries) {
1358
+ logger.error({
1359
+ event: 'fetch.failed',
1360
+ data: { url, attempt, error: lastError.message },
1361
+ });
1362
+ throw lastError;
1363
+ }
1364
+ // Wait before the next attempt (exponential backoff + jitter)
1365
+ const retryDelay = Math.pow(2, attempt - 1) * 1000 + Math.random() * 1000;
1366
+ await sleep(retryDelay);
1367
+ }
1368
+ }
1369
+ // Should not reach here; keep for type safety
1370
+ throw lastError;
1371
+ }
1372
+
1373
+ class CrawlingChain extends Chain {
1374
+ constructor(config) {
1375
+ const provider = config.provider;
1376
+ provider.maxConcurrency ??= 5;
1377
+ super({ ...config, provider });
1378
+ }
1379
+ get chain() {
1380
+ const mapping = this.provider.crawlingTargetGroups.reduce((result, group) => {
1381
+ return {
1382
+ ...result,
1383
+ [group.name]: () => this.executeGroupPipeline(group),
1384
+ };
1385
+ }, {});
1386
+ return RunnablePassthrough.assign(mapping);
1387
+ }
1388
+ async executeGroupPipeline(group) {
1389
+ const groupLabel = group.name;
1390
+ const chain = RunnablePassthrough.assign({
1391
+ listPageHtml: ({ target }) => this.fetchListPageHtml(target),
1392
+ })
1393
+ .pipe({
1394
+ parsedList: ({ target, listPageHtml }) => this.parseListPageHtml(target, listPageHtml),
1395
+ target: ({ target }) => target,
1396
+ })
1397
+ .pipe({
1398
+ list: ({ target, parsedList }) => this.dedupeListItems(target, parsedList),
1399
+ target: ({ target }) => target,
1400
+ })
1401
+ .pipe({
1402
+ detailPagesHtmlWithPipelineId: ({ target, list }) => this.fetchDetailPagesHtml(target, list),
1403
+ target: ({ target }) => target,
1404
+ list: ({ list }) => list,
1405
+ })
1406
+ .pipe({
1407
+ parsedDetails: ({ target, detailPagesHtmlWithPipelineId }) => this.parseDetailPagesHtml(target, detailPagesHtmlWithPipelineId),
1408
+ target: ({ target }) => target,
1409
+ list: ({ list }) => list,
1410
+ })
1411
+ .pipe({
1412
+ processedArticles: ({ target, list, parsedDetails }) => this.mergeParsedArticles(target, list, parsedDetails),
1413
+ target: ({ target }) => target,
1414
+ })
1415
+ .pipe({
1416
+ count: ({ target, processedArticles }) => this.saveArticles(group, target, processedArticles),
1417
+ })
1418
+ .withRetry({ stopAfterAttempt: this.options.chain.stopAfterAttempt });
1419
+ return this.executeWithLogging({
1420
+ event: 'crawl.group',
1421
+ level: 'debug',
1422
+ startFields: {
1423
+ group: groupLabel,
1424
+ targets: group.targets.length,
1425
+ },
1426
+ doneFields: (total) => ({ totalSaved: total }),
1427
+ }, async () => {
1428
+ const results = await chain.batch(group.targets.map((target) => ({ target })), {
1429
+ maxConcurrency: this.provider.maxConcurrency,
1430
+ });
1431
+ return results.reduce((sum, result) => sum + result.count, 0);
1432
+ });
1433
+ }
1434
+ async fetchListPageHtml(target) {
1435
+ return this.executeWithLogging({
1436
+ event: 'crawl.list.fetch',
1437
+ level: 'debug',
1438
+ startFields: { target: this.describeTarget(target) },
1439
+ }, async () => {
1440
+ return await getHtmlFromUrl(this.logger, target.url);
1441
+ });
1442
+ }
1443
+ async parseListPageHtml(target, listPageHtml) {
1444
+ return this.executeWithLogging({
1445
+ event: 'crawl.list.parse',
1446
+ level: 'debug',
1447
+ startFields: {
1448
+ target: this.describeTarget(target),
1449
+ htmlLength: listPageHtml.length,
1450
+ },
1451
+ doneFields: (items) => ({ count: items.length }),
1452
+ }, async () => {
1453
+ return (await target.parseList(listPageHtml)).map((item) => ({
1454
+ ...item,
1455
+ pipelineId: randomUUID(),
1456
+ }));
1457
+ });
1458
+ }
1459
+ async dedupeListItems(target, parsedList) {
1460
+ return this.executeWithLogging({
1461
+ event: 'crawl.list.dedupe',
1462
+ level: 'debug',
1463
+ startFields: {
1464
+ target: this.describeTarget(target),
1465
+ inCount: parsedList.length,
1466
+ },
1467
+ doneFields: (deduped) => ({
1468
+ outCount: deduped.length,
1469
+ filtered: parsedList.length - deduped.length,
1470
+ }),
1471
+ }, async () => {
1472
+ const existingArticles = await this.provider.fetchExistingArticlesByUrls(parsedList.map(({ detailUrl }) => detailUrl));
1473
+ const existingUrlSet = new Set(existingArticles.map(({ detailUrl }) => detailUrl));
1474
+ return parsedList.filter((item) => !existingUrlSet.has(item.detailUrl));
1475
+ });
1476
+ }
1477
+ async fetchDetailPagesHtml(target, list) {
1478
+ return this.executeWithLogging({
1479
+ event: 'crawl.detail.fetch',
1480
+ level: 'debug',
1481
+ startFields: {
1482
+ target: this.describeTarget(target),
1483
+ count: list.length,
1484
+ },
1485
+ doneFields: (htmlList) => ({ count: htmlList.length }),
1486
+ }, async () => {
1487
+ const htmlList = await Promise.all(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl)));
1488
+ return htmlList.map((html, index) => ({
1489
+ pipelineId: list[index].pipelineId,
1490
+ html,
1491
+ }));
1492
+ });
1493
+ }
1494
+ async parseDetailPagesHtml(target, detailPagesHtmlWithPipelineId) {
1495
+ return this.executeWithLogging({
1496
+ event: 'crawl.detail.parse',
1497
+ level: 'debug',
1498
+ startFields: {
1499
+ target: this.describeTarget(target),
1500
+ count: detailPagesHtmlWithPipelineId.length,
1501
+ },
1502
+ doneFields: (details) => ({ count: details.length }),
1503
+ }, async () => {
1504
+ const detail = await Promise.all(detailPagesHtmlWithPipelineId.map(({ html }) => target.parseDetail(html)));
1505
+ return detail.map((detail, index) => ({
1506
+ pipelineId: detailPagesHtmlWithPipelineId[index].pipelineId,
1507
+ ...detail,
1508
+ }));
1509
+ });
1510
+ }
1511
+ // Although this is a synchronous method, using async wrapping to maintain consistency with the executeWithLogging interface
1512
+ async mergeParsedArticles(target, list, parsedDetails) {
1513
+ return this.executeWithLogging({
1514
+ event: 'crawl.merge',
1515
+ level: 'debug',
1516
+ startFields: {
1517
+ target: this.describeTarget(target),
1518
+ listCount: list.length,
1519
+ detailCount: parsedDetails.length,
1520
+ },
1521
+ doneFields: (merged) => ({ count: merged.length }),
1522
+ }, async () => {
1523
+ const listItemMap = new Map(list.map((item) => [item.pipelineId, item]));
1524
+ const merged = parsedDetails.map((detail) => {
1525
+ const listItem = listItemMap.get(detail.pipelineId);
1526
+ if (!listItem) {
1527
+ throw new Error(`No matching list item for detail with pipelineId: ${detail.pipelineId}`);
1528
+ }
1529
+ return {
1530
+ ...omit(listItem, ['pipelineId']),
1531
+ ...omit(detail, ['pipelineId']),
1532
+ };
1533
+ });
1534
+ return merged;
1535
+ });
1536
+ }
1537
+ async saveArticles(group, target, processedArticles) {
1538
+ const omittedGroup = omit(group, ['targets']);
1539
+ return this.executeWithLogging({
1540
+ event: 'crawl.save',
1541
+ level: 'debug',
1542
+ startFields: {
1543
+ group: omittedGroup,
1544
+ target: this.describeTarget(target),
1545
+ count: processedArticles.length,
1546
+ },
1547
+ doneFields: (saved) => ({ saved }),
1548
+ }, async () => {
1549
+ return await this.provider.saveCrawledArticles(processedArticles, {
1550
+ taskId: this.taskId,
1551
+ targetGroup: omittedGroup,
1552
+ target,
1553
+ });
1554
+ });
1555
+ }
1556
+ describeTarget(target) {
1557
+ return {
1558
+ name: target.name || 'unknown',
1559
+ listUrl: target.url,
1560
+ };
1561
+ }
1562
+ }
1563
+
1564
+ /**
1565
+ * Core class that orchestrates LLM-based newsletter generation.
1566
+ * - Responsible for the flow: Crawling → Analysis → Content Generation → Save; external dependencies are injected via DI.
1567
+ */
1568
+ class GenerateNewsletter {
1569
+ /** Internal fields provided via dependency injection */
1570
+ dateService;
1571
+ taskService;
1572
+ crawlingProvider;
1573
+ analysisProvider;
1574
+ contentGenerateProvider;
1575
+ logger;
1576
+ options;
1577
+ previewNewsletterOptions;
1578
+ /** Independent internal field **/
1579
+ taskId = null;
1580
+ /**
1581
+ * Constructor
1582
+ *
1583
+ * @param config
1584
+ * @example
1585
+ * const generator = new GenerateNewsletter({
1586
+ * outputLanguage: 'English',
1587
+ * expertField: ['AI', 'Cloud'],
1588
+ * dateService,
1589
+ * taskService,
1590
+ * tagProvider,
1591
+ * crawlingProvider,
1592
+ * analysisProvider,
1593
+ * contentGenerateProvider,
1594
+ * options: { llm: { maxRetries: 5 } },
1595
+ * });
1596
+ */
1597
+ constructor(config) {
1598
+ const defaultOptions = {
1599
+ content: config.contentOptions,
1600
+ llm: { maxRetries: 5 },
1601
+ chain: { stopAfterAttempt: 3 },
1602
+ };
1603
+ this.dateService = config.dateService;
1604
+ this.taskService = config.taskService;
1605
+ this.crawlingProvider = config.crawlingProvider;
1606
+ this.analysisProvider = config.analysisProvider;
1607
+ this.contentGenerateProvider = config.contentGenerateProvider;
1608
+ this.options = {
1609
+ ...defaultOptions,
1610
+ ...config.options,
1611
+ llm: {
1612
+ ...defaultOptions.llm,
1613
+ ...config.options?.llm,
1614
+ },
1615
+ chain: {
1616
+ ...defaultOptions.chain,
1617
+ ...config.options?.chain,
1618
+ },
1619
+ };
1620
+ // Default logger (no-op)
1621
+ this.logger = config.options?.logger ?? {
1622
+ info: (_msg) => { },
1623
+ debug: (_msg) => { },
1624
+ error: (_msg) => { },
1625
+ };
1626
+ // Store preview newsletter options
1627
+ this.previewNewsletterOptions = config.options?.previewNewsletter;
1628
+ }
1629
+ /**
1630
+ * Execute the full newsletter generation pipeline.
1631
+ */
1632
+ async generate() {
1633
+ const { newsletterId } = await this.executeWithTaskManagement(async () => {
1634
+ const loggingExecutor = new LoggingExecutor(this.logger, this.taskId);
1635
+ const crawlingChain = new CrawlingChain({
1636
+ logger: this.logger,
1637
+ taskId: this.taskId,
1638
+ provider: this.crawlingProvider,
1639
+ options: this.options,
1640
+ loggingExecutor,
1641
+ });
1642
+ const analysisChain = new AnalysisChain({
1643
+ logger: this.logger,
1644
+ taskId: this.taskId,
1645
+ provider: this.analysisProvider,
1646
+ options: this.options,
1647
+ loggingExecutor,
1648
+ dateService: this.dateService,
1649
+ });
1650
+ const contentGenerateChain = new ContentGenerateChain({
1651
+ logger: this.logger,
1652
+ taskId: this.taskId,
1653
+ provider: this.contentGenerateProvider,
1654
+ options: this.options,
1655
+ loggingExecutor,
1656
+ dateService: this.dateService,
1657
+ });
1658
+ const taskChain = RunnableSequence.from([
1659
+ crawlingChain.chain,
1660
+ analysisChain.chain,
1661
+ contentGenerateChain.chain,
1662
+ ]);
1663
+ return await taskChain.invoke({});
1664
+ });
1665
+ this.logNewsletterResult(newsletterId);
1666
+ await this.sendPreviewNewsletterIfConfigured(newsletterId);
1667
+ return newsletterId;
1668
+ }
1669
+ /**
1670
+ * Run the pipeline while managing the task lifecycle.
1671
+ */
1672
+ async executeWithTaskManagement(pipeline) {
1673
+ await this.startTask();
1674
+ const executor = new LoggingExecutor(this.logger, this.taskId);
1675
+ try {
1676
+ return await executor.executeWithLogging({
1677
+ event: 'task',
1678
+ level: 'info',
1679
+ }, async () => {
1680
+ return await pipeline();
1681
+ });
1682
+ }
1683
+ finally {
1684
+ await this.endTask();
1685
+ }
1686
+ }
1687
+ logNewsletterResult(newsletterId) {
1688
+ if (newsletterId === null) {
1689
+ this.logger.info({
1690
+ event: 'generate.result.skipped',
1691
+ taskId: this.taskId,
1692
+ data: { reason: 'publicationCriteria.notMet' },
1693
+ });
1694
+ return;
1695
+ }
1696
+ this.logger.info({
1697
+ event: 'generate.result.created',
1698
+ taskId: this.taskId,
1699
+ data: { newsletterId },
1700
+ });
1701
+ }
1702
+ async sendPreviewNewsletterIfConfigured(newsletterId) {
1703
+ const preview = this.previewNewsletterOptions;
1704
+ if (!preview) {
1705
+ return;
1706
+ }
1707
+ if (newsletterId === null) {
1708
+ this.logger.info({
1709
+ event: 'generate.preview.skip',
1710
+ taskId: this.taskId,
1711
+ data: { reason: 'noNewsletterCreated' },
1712
+ });
1713
+ return;
1714
+ }
1715
+ try {
1716
+ // Fetch newsletter entity for preview
1717
+ const newsletter = await preview.fetchNewsletterForPreview();
1718
+ // Compose email subject/html/text
1719
+ const subject = `[Preview] ${newsletter.title}`;
1720
+ const html = newsletter.htmlBody;
1721
+ const text = `${newsletter.title}\n\nIssue #${newsletter.issueOrder} - ${newsletter.date}`;
1722
+ await preview.emailService.send({
1723
+ ...preview.emailMessage,
1724
+ subject,
1725
+ html,
1726
+ text,
1727
+ });
1728
+ this.logger.info({
1729
+ event: 'generate.preview.sent',
1730
+ taskId: this.taskId,
1731
+ data: {
1732
+ newsletterId,
1733
+ to: preview.emailMessage.to,
1734
+ },
1735
+ });
1736
+ }
1737
+ catch (err) {
1738
+ this.logger.error({
1739
+ event: 'generate.preview.error',
1740
+ taskId: this.taskId,
1741
+ data: { newsletterId },
1742
+ });
1743
+ this.logger.error(err);
1744
+ }
1745
+ }
1746
+ async startTask() {
1747
+ this.taskId = await this.taskService.start();
1748
+ }
1749
+ async endTask() {
1750
+ await this.taskService.end();
1751
+ }
1752
+ }
1753
+
1754
+ export { DateType, GenerateNewsletter };
1755
+ //# sourceMappingURL=index.js.map