@llm-newsletter-kit/core 1.1.6 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -0
- package/dist/index.cjs +84 -86
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.ts +16 -0
- package/dist/index.js +84 -86
- package/dist/index.js.map +1 -1
- package/package.json +14 -8
package/README.md
CHANGED
|
@@ -168,6 +168,49 @@ This kit prioritizes flexibility over rigid tooling. Instead of locking you into
|
|
|
168
168
|
- **Asynchronous Injection:** Parsing logic is injected asynchronously, allowing you to integrate third-party APIs or AI-based parsers effortlessly.
|
|
169
169
|
- **Recommendation:** While the kit supports LLM-based parsing (HTML-to-JSON), we generally recommend **rule-based parsing** (e.g., CSS selectors) for production environments to ensure speed, cost-efficiency, and stability.
|
|
170
170
|
|
|
171
|
+
## Playground
|
|
172
|
+
|
|
173
|
+
Playground scripts let you run individual LLM query classes in isolation — no full pipeline needed. Useful for prompt tuning, testing new options, or debugging output quality.
|
|
174
|
+
|
|
175
|
+
### Setup
|
|
176
|
+
|
|
177
|
+
1. Install playground dependencies:
|
|
178
|
+
```bash
|
|
179
|
+
npm install -D tsx @ai-sdk/openai
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
2. Copy example data files and customize:
|
|
183
|
+
```bash
|
|
184
|
+
mkdir -p playground/data
|
|
185
|
+
cp playground/data-examples/config.example.json playground/data/config.json
|
|
186
|
+
cp playground/data-examples/articles.example.json playground/data/articles.json
|
|
187
|
+
cp playground/data-examples/template.example.html playground/data/template.html
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
3. Edit `playground/data/config.json` with your OpenAI API key and options.
|
|
191
|
+
4. Edit `playground/data/articles.json` with your target articles.
|
|
192
|
+
5. (Optional) Replace `playground/data/template.html` with your actual email template.
|
|
193
|
+
|
|
194
|
+
### Run
|
|
195
|
+
|
|
196
|
+
```bash
|
|
197
|
+
npm run playground:generate-newsletter
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Output
|
|
201
|
+
|
|
202
|
+
Results are saved to `playground/output/` (git-ignored):
|
|
203
|
+
- `newsletter.md` — Generated markdown with title in frontmatter
|
|
204
|
+
- `newsletter.html` — Rendered HTML with CSS inlined (juice)
|
|
205
|
+
|
|
206
|
+
### Data Management
|
|
207
|
+
|
|
208
|
+
| Directory | Git | Purpose |
|
|
209
|
+
|---|---|---|
|
|
210
|
+
| `playground/data-examples/` | Tracked | Format reference files (`.example.*`) |
|
|
211
|
+
| `playground/data/` | Ignored | Your actual config, articles, templates |
|
|
212
|
+
| `playground/output/` | Ignored | Generated results |
|
|
213
|
+
|
|
171
214
|
## Development / Build / Test / CI
|
|
172
215
|
|
|
173
216
|
For the full developer guide (environment, scripts, testing/coverage, and CI), see [CONTRIBUTING.md](./CONTRIBUTING.md).
|
package/dist/index.cjs
CHANGED
|
@@ -4,10 +4,9 @@ var runnables = require('@langchain/core/runnables');
|
|
|
4
4
|
var esToolkit = require('es-toolkit');
|
|
5
5
|
var ai = require('ai');
|
|
6
6
|
var zod = require('zod');
|
|
7
|
-
var juice = require('juice');
|
|
8
|
-
var DOMPurify = require('dompurify');
|
|
9
7
|
var jsdom = require('jsdom');
|
|
10
|
-
var
|
|
8
|
+
var juice = require('juice');
|
|
9
|
+
var safeMarkdown2Html = require('safe-markdown2html');
|
|
11
10
|
var node_crypto = require('node:crypto');
|
|
12
11
|
|
|
13
12
|
/**
|
|
@@ -120,6 +119,17 @@ class LLMQuery extends BaseLLMQuery {
|
|
|
120
119
|
}
|
|
121
120
|
}
|
|
122
121
|
|
|
122
|
+
const ZERO_USAGE = {
|
|
123
|
+
inputTokens: undefined,
|
|
124
|
+
inputTokenDetails: {
|
|
125
|
+
noCacheTokens: undefined,
|
|
126
|
+
cacheReadTokens: undefined,
|
|
127
|
+
cacheWriteTokens: undefined,
|
|
128
|
+
},
|
|
129
|
+
outputTokens: undefined,
|
|
130
|
+
outputTokenDetails: { textTokens: undefined, reasoningTokens: undefined },
|
|
131
|
+
totalTokens: undefined,
|
|
132
|
+
};
|
|
123
133
|
class AnalyzeImages extends LLMQuery {
|
|
124
134
|
schema = zod.z.object({
|
|
125
135
|
imageContext: zod.z
|
|
@@ -132,12 +142,12 @@ class AnalyzeImages extends LLMQuery {
|
|
|
132
142
|
async execute() {
|
|
133
143
|
if (!this.targetArticle.hasAttachedImage ||
|
|
134
144
|
!this.targetArticle.detailContent) {
|
|
135
|
-
return null;
|
|
145
|
+
return { result: null, usage: ZERO_USAGE };
|
|
136
146
|
}
|
|
137
147
|
if (this.imageMessages.length === 0) {
|
|
138
|
-
return null;
|
|
148
|
+
return { result: null, usage: ZERO_USAGE };
|
|
139
149
|
}
|
|
140
|
-
const { output } = await ai.generateText({
|
|
150
|
+
const { output, usage } = await ai.generateText({
|
|
141
151
|
model: this.model,
|
|
142
152
|
maxRetries: this.options.llm.maxRetries,
|
|
143
153
|
output: ai.Output.object({
|
|
@@ -151,7 +161,7 @@ class AnalyzeImages extends LLMQuery {
|
|
|
151
161
|
},
|
|
152
162
|
],
|
|
153
163
|
});
|
|
154
|
-
return output.imageContext;
|
|
164
|
+
return { result: output.imageContext, usage };
|
|
155
165
|
}
|
|
156
166
|
get systemPrompt() {
|
|
157
167
|
return `# Image Analysis Expert System
|
|
@@ -258,7 +268,7 @@ class ClassifyTags extends LLMQuery {
|
|
|
258
268
|
}
|
|
259
269
|
async execute({ existTags }) {
|
|
260
270
|
this.existTags = existTags;
|
|
261
|
-
const { output } = await ai.generateText({
|
|
271
|
+
const { output, usage } = await ai.generateText({
|
|
262
272
|
model: this.model,
|
|
263
273
|
maxRetries: this.options.llm.maxRetries,
|
|
264
274
|
output: ai.Output.object({
|
|
@@ -267,7 +277,7 @@ class ClassifyTags extends LLMQuery {
|
|
|
267
277
|
system: this.systemPrompt,
|
|
268
278
|
prompt: this.userPrompt,
|
|
269
279
|
});
|
|
270
|
-
return output;
|
|
280
|
+
return { result: output, usage };
|
|
271
281
|
}
|
|
272
282
|
get systemPrompt() {
|
|
273
283
|
return `You are an AI specializing in analyzing and categorizing articles for professionals in ${this.expertFields.join(', ')}.
|
|
@@ -340,7 +350,7 @@ class DetermineArticleImportance extends LLMQuery {
|
|
|
340
350
|
this.dateService = config.dateService;
|
|
341
351
|
}
|
|
342
352
|
async execute() {
|
|
343
|
-
const { output } = await ai.generateText({
|
|
353
|
+
const { output, usage } = await ai.generateText({
|
|
344
354
|
model: this.model,
|
|
345
355
|
maxRetries: this.options.llm.maxRetries,
|
|
346
356
|
output: ai.Output.object({
|
|
@@ -349,7 +359,7 @@ class DetermineArticleImportance extends LLMQuery {
|
|
|
349
359
|
system: this.systemPrompt,
|
|
350
360
|
prompt: this.userPrompt,
|
|
351
361
|
});
|
|
352
|
-
return output.importanceScore;
|
|
362
|
+
return { result: output.importanceScore, usage };
|
|
353
363
|
}
|
|
354
364
|
get minPoint() {
|
|
355
365
|
const targetRule = this.minimumImportanceScoreRules.find(({ targetUrl }) => targetUrl === this.targetArticle.targetUrl);
|
|
@@ -523,7 +533,9 @@ class ArticleInsightsChain extends PrivateChain {
|
|
|
523
533
|
});
|
|
524
534
|
try {
|
|
525
535
|
const classifyTags = new ClassifyTags(this.getLlmQueryConfig(this.provider.classifyTagOptions.model, article));
|
|
526
|
-
const generatedTags = await classifyTags.execute({
|
|
536
|
+
const { result: generatedTags } = await classifyTags.execute({
|
|
537
|
+
existTags,
|
|
538
|
+
});
|
|
527
539
|
pushTag(generatedTags.tag1);
|
|
528
540
|
pushTag(generatedTags.tag2);
|
|
529
541
|
pushTag(generatedTags.tag3);
|
|
@@ -596,7 +608,7 @@ class ArticleInsightsChain extends PrivateChain {
|
|
|
596
608
|
});
|
|
597
609
|
try {
|
|
598
610
|
const analyzeImages = new AnalyzeImages(this.getLlmQueryConfig(this.provider.analyzeImagesOptions.model, article));
|
|
599
|
-
const imageContextByLlm = await analyzeImages.execute();
|
|
611
|
+
const { result: imageContextByLlm } = await analyzeImages.execute();
|
|
600
612
|
if (imageContextByLlm) {
|
|
601
613
|
articlesWithImageContext.push({
|
|
602
614
|
id: article.id,
|
|
@@ -695,7 +707,7 @@ class ArticleInsightsChain extends PrivateChain {
|
|
|
695
707
|
minimumImportanceScoreRules: this.provider.determineScoreOptions.minimumImportanceScoreRules,
|
|
696
708
|
dateService: this.dateService,
|
|
697
709
|
});
|
|
698
|
-
const importanceScore = await determineArticleImportance.execute();
|
|
710
|
+
const { result: importanceScore } = await determineArticleImportance.execute();
|
|
699
711
|
const processedArticle = {
|
|
700
712
|
...article,
|
|
701
713
|
importanceScore,
|
|
@@ -838,55 +850,6 @@ class AnalysisChain extends Chain {
|
|
|
838
850
|
}
|
|
839
851
|
}
|
|
840
852
|
|
|
841
|
-
function preprocessBoldSyntax(markdown) {
|
|
842
|
-
// Convert **text** to <strong>text</strong> before marked parsing
|
|
843
|
-
// This fixes issues where marked doesn't properly handle bold syntax with parentheses
|
|
844
|
-
return markdown.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
|
|
845
|
-
}
|
|
846
|
-
function markdownToHtml(markdown) {
|
|
847
|
-
const preprocessed = preprocessBoldSyntax(markdown);
|
|
848
|
-
const html = marked.marked.parse(preprocessed);
|
|
849
|
-
const window = new jsdom.JSDOM('').window;
|
|
850
|
-
const purify = DOMPurify(window);
|
|
851
|
-
const sanitized = purify.sanitize(html);
|
|
852
|
-
const withCorrectedUrls = correctMalformedUrls(sanitized);
|
|
853
|
-
const withTargetBlank = addTargetBlankToAnchors(withCorrectedUrls);
|
|
854
|
-
const withDelReplaced = replaceDelTagsWithTilde(withTargetBlank);
|
|
855
|
-
return correctUnconvertedBoldSyntax(withDelReplaced);
|
|
856
|
-
}
|
|
857
|
-
function addTargetBlankToAnchors(htmlString) {
|
|
858
|
-
// DOMPurify removes target attributes, so we can safely add target="_blank" to all anchors
|
|
859
|
-
return htmlString.replace(/<a\s+([^>]*)>/gi, (_match, attributes) => {
|
|
860
|
-
return `<a ${attributes} target="_blank">`;
|
|
861
|
-
});
|
|
862
|
-
}
|
|
863
|
-
function replaceDelTagsWithTilde(htmlString) {
|
|
864
|
-
// Replace opening and closing del tags with tilde (~)
|
|
865
|
-
return htmlString.replace(/<del>/gi, '~').replace(/<\/del>/gi, '~');
|
|
866
|
-
}
|
|
867
|
-
function correctUnconvertedBoldSyntax(htmlString) {
|
|
868
|
-
// Replace unconverted "**text**" markdown syntax with <b> tags
|
|
869
|
-
// Matches "**" followed by one or more non-asterisk characters, followed by "**"
|
|
870
|
-
return htmlString.replace(/\*\*([^*]+)\*\*/g, '<b>$1</b>');
|
|
871
|
-
}
|
|
872
|
-
function correctMalformedUrls(htmlString) {
|
|
873
|
-
// Pattern matches anchors with `)` followed by optional closing markup (</b> or **) and URL-encoded characters
|
|
874
|
-
// Capture groups:
|
|
875
|
-
// 1: attributes before href
|
|
876
|
-
// 2: URL base (before `)`)
|
|
877
|
-
// 3: closing markup (</b> or ** or empty)
|
|
878
|
-
// 4: URL-encoded part (starts with %)
|
|
879
|
-
// 5: attributes after href
|
|
880
|
-
// 6: link text base (before `)`)
|
|
881
|
-
// 7: text after `)` in link text (may include **)
|
|
882
|
-
const regex = /<a\s+([^>]*?)href="([^"]*?)\)((?:<\/b>|\*\*)?)(%[0-9A-Fa-f]{2}[^"]*?)"([^>]*?)>([^<]*?)\)((?:\*\*)?[^<]*?)<\/a>/g;
|
|
883
|
-
return htmlString.replace(regex, (_match, beforeHref, urlBase, closingMarkup, _encodedPart, afterHref, textBase, textAfterClosingParen) => {
|
|
884
|
-
// Remove leading ** from textAfterClosingParen since it's already captured as closingMarkup (</b>)
|
|
885
|
-
const cleanedText = textAfterClosingParen.replace(/^\*\*/, '');
|
|
886
|
-
return `<a ${beforeHref}href="${urlBase}"${afterHref}>${textBase}</a>${closingMarkup})${cleanedText}`;
|
|
887
|
-
});
|
|
888
|
-
}
|
|
889
|
-
|
|
890
853
|
let GenerateNewsletter$1 = class GenerateNewsletter extends BaseLLMQuery {
|
|
891
854
|
maxOutputTokens;
|
|
892
855
|
temperature;
|
|
@@ -929,7 +892,7 @@ let GenerateNewsletter$1 = class GenerateNewsletter extends BaseLLMQuery {
|
|
|
929
892
|
this.newsletterBrandName = config.newsletterBrandName;
|
|
930
893
|
}
|
|
931
894
|
async execute() {
|
|
932
|
-
const { output } = await ai.generateText({
|
|
895
|
+
const { output, usage } = await ai.generateText({
|
|
933
896
|
model: this.model,
|
|
934
897
|
maxRetries: this.options.llm.maxRetries,
|
|
935
898
|
maxOutputTokens: this.maxOutputTokens,
|
|
@@ -944,16 +907,19 @@ let GenerateNewsletter$1 = class GenerateNewsletter extends BaseLLMQuery {
|
|
|
944
907
|
system: this.systemPrompt,
|
|
945
908
|
prompt: this.userPrompt,
|
|
946
909
|
});
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
return
|
|
910
|
+
const needsRetry = !output.isWrittenInOutputLanguage ||
|
|
911
|
+
!output.copyrightVerified ||
|
|
912
|
+
!output.factAccuracy ||
|
|
913
|
+
(this.options.content.titleContext &&
|
|
914
|
+
!output.title.includes(this.options.content.titleContext));
|
|
915
|
+
if (needsRetry) {
|
|
916
|
+
const retryResult = await this.execute();
|
|
917
|
+
return {
|
|
918
|
+
result: retryResult.result,
|
|
919
|
+
usage: addUsage(usage, retryResult.usage),
|
|
920
|
+
};
|
|
955
921
|
}
|
|
956
|
-
return esToolkit.pick(output, ['title', 'content']);
|
|
922
|
+
return { result: esToolkit.pick(output, ['title', 'content']), usage };
|
|
957
923
|
}
|
|
958
924
|
get systemPrompt() {
|
|
959
925
|
return `You are a newsletter production expert for "${this.newsletterBrandName}" who analyzes and delivers trends in the fields of ${this.expertFields.join(', ')}. Your goal is to provide in-depth analysis that helps industry professionals easily understand complex information and make informed decisions.
|
|
@@ -1007,14 +973,14 @@ Copyright Protection & Fact-Checking Principles:
|
|
|
1007
973
|
Output Format & Requirements:
|
|
1008
974
|
1. Language: ${this.options.content.outputLanguage}
|
|
1009
975
|
|
|
1010
|
-
2. Start: Specify date (${this.dateService.getDisplayDateString()}) and begin with neutral, objective greeting. Briefly introduce key factual information to be covered in today's newsletter
|
|
976
|
+
2. Start: ${this.options.content.freeFormIntro ? 'Begin directly with the Overall Briefing section (no separate opening heading or greeting).' : `Specify date (${this.dateService.getDisplayDateString()}) and begin with neutral, objective greeting. Briefly introduce key factual information to be covered in today's newsletter.`}
|
|
1011
977
|
|
|
1012
|
-
3. Overall Briefing: Before the main listing, create a briefing section conveying objective facts about today's news in these aspects:
|
|
978
|
+
3. Overall Briefing: Before the main listing, create a briefing section conveying objective facts about today's news${this.options.content.freeFormIntro ? `. Structure: Start with a Heading 2 (##) briefing section heading in the format "## 📮 ${this.dateService.getDisplayDateString()} [Briefing/Summary word in output language]" (e.g., "## 📮 2월 6일 브리핑" for Korean, "## 📮 Feb 6 Briefing" for English) — do NOT include domain or field names in the heading. Immediately follow with a brief paragraph introducing key factual information to be covered in today's newsletter, then include the following bullet points:` : ' in these aspects:'}
|
|
1013
979
|
- Key Trends: Explain major patterns or trends found in this news based on data. Ex: 'Over 00% of today's news relates to 00'.
|
|
1014
980
|
- Immediate Impact: Emphasize most important changes or decisions affecting industry immediately, specifically mentioning which fields will be most impacted.
|
|
1015
981
|
|
|
1016
982
|
4. Category Classification & Content Organization:
|
|
1017
|
-
- Group news by logical categories based on related tags and content (e.g., Policy/Regulation, Budget/Support, Research/Development, Products/Services, Operations/Process, Recruitment/Events) rather than just listing by importance.
|
|
983
|
+
- Group news by logical categories based on related tags and content (e.g., Policy/Regulation, Budget/Support, Research/Development, Products/Services, Operations/Process, Recruitment/Events) rather than just listing by importance.${this.options.content.freeFormIntro ? '\n - Use Heading 2 (##) for each category heading (same level as the briefing heading). Do NOT use Heading 3 (###) for categories.' : ''}
|
|
1018
984
|
- Use appropriate emoticons for each category for visual distinction.
|
|
1019
985
|
- Sort by importance within categories, making high-importance items more prominent.
|
|
1020
986
|
- Add short paragraph at category start summarizing overall trends or changes in that area, specifying important points and areas to focus on.
|
|
@@ -1054,18 +1020,22 @@ Output Format & Requirements:
|
|
|
1054
1020
|
- Do not write preview or anticipatory messages about next newsletter.
|
|
1055
1021
|
- Do not include contact information for inquiries.
|
|
1056
1022
|
|
|
1057
|
-
7. Title Writing Guidelines
|
|
1023
|
+
7. Title Writing Guidelines:${this.options.content.titleContext
|
|
1024
|
+
? `\n - **Required title keyword**: "${this.options.content.titleContext}". This phrase MUST appear in the title. Combine it with key context from today's newsletter content to form a natural, complete title.
|
|
1025
|
+
- Keep title length 20-100 characters and can include 1-2 relevant emoticons.
|
|
1026
|
+
- Use neutral and objective terms in title (e.g., 'announced', 'implementing', 'deadline approaching').
|
|
1027
|
+
- Write title clearly and factually to maintain professionalism and credibility.`
|
|
1028
|
+
: `
|
|
1058
1029
|
- Title should objectively convey core facts of 1-2 most important news items today.
|
|
1059
1030
|
- Write with key facts rather than simple "Newsletter", more effective with specific figures or schedules.
|
|
1060
1031
|
- Use neutral and objective terms in title (e.g., 'announced', 'implementing', 'deadline approaching').
|
|
1061
1032
|
- Keep title length 20-50 characters and can include 1-2 relevant emoticons.
|
|
1062
1033
|
- Place most important key facts at beginning of title.
|
|
1063
|
-
- Write title clearly and factually to maintain professionalism and credibility
|
|
1034
|
+
- Write title clearly and factually to maintain professionalism and credibility.`}
|
|
1064
1035
|
|
|
1065
1036
|
8. Additional Requirements:
|
|
1066
1037
|
- Comprehensively analyze posts to create email containing most important information for ${this.expertFields.join(', ')} field experts.
|
|
1067
|
-
|
|
1068
|
-
- Write body in markdown format, effectively using headings(#, ##, ###), bold(**), italics(_), bullet points(-, *) etc. to improve readability.
|
|
1038
|
+
${this.options.content.freeFormIntro ? '' : `- Naturally include date at beginning in the format: "${this.dateService.getDisplayDateString()} ${this.expertFields.join(', ')} [News Term]". Replace [News Term] with the word for "News" appropriate for the output language (e.g., "News" for English, "소식" for Korean). Declare this part as \`Heading 1\`(#).\n `}- Write body in markdown format, effectively using headings(#, ##, ###), bold(**), italics(_), bullet points(-, *) etc. to improve readability.
|
|
1069
1039
|
- Group related news to provide broader context, and mention development status if there's continuity with content covered in previous issues.
|
|
1070
1040
|
- **Source citation is most important for ensuring credibility.** Must provide links in [original title](URL) format using source's title. Do not write as "View", "Article", "[Post3](URL)" format.
|
|
1071
1041
|
- Specify source whenever article titles or content are quoted in newsletter, ensure all information is provided with links.
|
|
@@ -1116,6 +1086,27 @@ Based on all post information provided above, please generate a ${this.expertFie
|
|
|
1116
1086
|
Please follow the roles and output format defined in the system prompt (friendly introduction, overall briefing, category classification, in-depth analysis, polite closing, etc.).`;
|
|
1117
1087
|
}
|
|
1118
1088
|
};
|
|
1089
|
+
function addNum(a, b) {
|
|
1090
|
+
if (a == null && b == null)
|
|
1091
|
+
return undefined;
|
|
1092
|
+
return (a ?? 0) + (b ?? 0);
|
|
1093
|
+
}
|
|
1094
|
+
function addUsage(a, b) {
|
|
1095
|
+
return {
|
|
1096
|
+
inputTokens: addNum(a.inputTokens, b.inputTokens),
|
|
1097
|
+
inputTokenDetails: {
|
|
1098
|
+
noCacheTokens: addNum(a.inputTokenDetails?.noCacheTokens, b.inputTokenDetails?.noCacheTokens),
|
|
1099
|
+
cacheReadTokens: addNum(a.inputTokenDetails?.cacheReadTokens, b.inputTokenDetails?.cacheReadTokens),
|
|
1100
|
+
cacheWriteTokens: addNum(a.inputTokenDetails?.cacheWriteTokens, b.inputTokenDetails?.cacheWriteTokens),
|
|
1101
|
+
},
|
|
1102
|
+
outputTokens: addNum(a.outputTokens, b.outputTokens),
|
|
1103
|
+
outputTokenDetails: {
|
|
1104
|
+
textTokens: addNum(a.outputTokenDetails?.textTokens, b.outputTokenDetails?.textTokens),
|
|
1105
|
+
reasoningTokens: addNum(a.outputTokenDetails?.reasoningTokens, b.outputTokenDetails?.reasoningTokens),
|
|
1106
|
+
},
|
|
1107
|
+
totalTokens: addNum(a.totalTokens, b.totalTokens),
|
|
1108
|
+
};
|
|
1109
|
+
}
|
|
1119
1110
|
|
|
1120
1111
|
class ContentGenerateChain extends Chain {
|
|
1121
1112
|
dateService;
|
|
@@ -1208,7 +1199,8 @@ class ContentGenerateChain extends Chain {
|
|
|
1208
1199
|
newsletterBrandName: this.provider.newsletterBrandName,
|
|
1209
1200
|
dateService: this.dateService,
|
|
1210
1201
|
});
|
|
1211
|
-
|
|
1202
|
+
const { result } = await generateNewsletter.execute();
|
|
1203
|
+
return result;
|
|
1212
1204
|
});
|
|
1213
1205
|
}
|
|
1214
1206
|
async renderHtml(coreContent) {
|
|
@@ -1223,7 +1215,13 @@ class ContentGenerateChain extends Chain {
|
|
|
1223
1215
|
}
|
|
1224
1216
|
return this.htmlTemplate.html
|
|
1225
1217
|
.replaceAll(`{{${this.htmlTemplate.markers.title}}}`, coreContent.title)
|
|
1226
|
-
.replaceAll(`{{${this.htmlTemplate.markers.content}}}`,
|
|
1218
|
+
.replaceAll(`{{${this.htmlTemplate.markers.content}}}`, safeMarkdown2Html(coreContent.content, {
|
|
1219
|
+
window: new jsdom.JSDOM('').window,
|
|
1220
|
+
linkTargetBlank: true,
|
|
1221
|
+
fixMalformedUrls: true,
|
|
1222
|
+
fixBoldSyntax: true,
|
|
1223
|
+
convertStrikethrough: true,
|
|
1224
|
+
}));
|
|
1227
1225
|
});
|
|
1228
1226
|
}
|
|
1229
1227
|
async createNewsletter(html, coreContent, candidateArticles) {
|
|
@@ -1304,7 +1302,7 @@ function shouldRetry(status, error) {
|
|
|
1304
1302
|
}
|
|
1305
1303
|
return false;
|
|
1306
1304
|
}
|
|
1307
|
-
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/') {
|
|
1305
|
+
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/', customFetch) {
|
|
1308
1306
|
const maxRetries = 5;
|
|
1309
1307
|
const baseTimeoutMs = 10_000; // Base 10s, increases per attempt
|
|
1310
1308
|
let lastError = null;
|
|
@@ -1314,7 +1312,7 @@ async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/')
|
|
|
1314
1312
|
const timeout = setTimeout(() => controller.abort(`timeout after ${timeoutMs}ms`), timeoutMs);
|
|
1315
1313
|
try {
|
|
1316
1314
|
const startedAt = Date.now();
|
|
1317
|
-
const response = await fetch(url, {
|
|
1315
|
+
const response = await (customFetch ?? fetch)(url, {
|
|
1318
1316
|
// mode: 'cors' // Not applicable in Node, left here for behavioral parity with browsers
|
|
1319
1317
|
redirect: 'follow',
|
|
1320
1318
|
// @ts-expect-error Undici/Fetch in Node may allow duplex; safe to ignore
|
|
@@ -1458,7 +1456,7 @@ class CrawlingChain extends Chain {
|
|
|
1458
1456
|
startFields: { target: this.describeTarget(target) },
|
|
1459
1457
|
}, async () => {
|
|
1460
1458
|
try {
|
|
1461
|
-
return await getHtmlFromUrl(this.logger, target.url);
|
|
1459
|
+
return await getHtmlFromUrl(this.logger, target.url, undefined, this.provider.customFetch);
|
|
1462
1460
|
}
|
|
1463
1461
|
catch (error) {
|
|
1464
1462
|
this.logger.error({
|
|
@@ -1537,7 +1535,7 @@ class CrawlingChain extends Chain {
|
|
|
1537
1535
|
failedCount: result.failedCount,
|
|
1538
1536
|
}),
|
|
1539
1537
|
}, async () => {
|
|
1540
|
-
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl)));
|
|
1538
|
+
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl, undefined, this.provider.customFetch)));
|
|
1541
1539
|
const detailPagesHtmlWithPipelineId = [];
|
|
1542
1540
|
const successList = [];
|
|
1543
1541
|
let failedCount = 0;
|
package/dist/index.cjs.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.cjs","sources":[],"sourcesContent":[],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.cjs","sources":[],"sourcesContent":[],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"}
|
package/dist/index.d.ts
CHANGED
|
@@ -254,6 +254,17 @@ type ContentOptions = {
|
|
|
254
254
|
* @example ["AI", "Cloud"]
|
|
255
255
|
*/
|
|
256
256
|
expertField: string | string[];
|
|
257
|
+
/**
|
|
258
|
+
* When true, removes the fixed date+field heading directive
|
|
259
|
+
* and moves the brief introduction into the briefing section.
|
|
260
|
+
*/
|
|
261
|
+
freeFormIntro?: boolean;
|
|
262
|
+
/**
|
|
263
|
+
* Context string to prioritize when generating the newsletter title.
|
|
264
|
+
* When provided, the LLM will consider this value as the top priority
|
|
265
|
+
* along with the generated newsletter content for title creation.
|
|
266
|
+
*/
|
|
267
|
+
titleContext?: string;
|
|
257
268
|
};
|
|
258
269
|
type LLMQueryOptions = {
|
|
259
270
|
/**
|
|
@@ -515,6 +526,11 @@ interface CrawlingProvider {
|
|
|
515
526
|
* @default 5
|
|
516
527
|
*/
|
|
517
528
|
maxConcurrency?: number;
|
|
529
|
+
/**
|
|
530
|
+
* Optional custom fetch function (e.g., proxy-based fetch).
|
|
531
|
+
* When provided, this function is used instead of the global `fetch` for HTTP requests.
|
|
532
|
+
*/
|
|
533
|
+
customFetch?: typeof fetch;
|
|
518
534
|
/**
|
|
519
535
|
* Crawling target groups.
|
|
520
536
|
*/
|
package/dist/index.js
CHANGED
|
@@ -2,10 +2,9 @@ import { RunnablePassthrough, RunnableSequence } from '@langchain/core/runnables
|
|
|
2
2
|
import { pick, omit } from 'es-toolkit';
|
|
3
3
|
import { generateText, Output } from 'ai';
|
|
4
4
|
import { z } from 'zod';
|
|
5
|
-
import juice from 'juice';
|
|
6
|
-
import DOMPurify from 'dompurify';
|
|
7
5
|
import { JSDOM } from 'jsdom';
|
|
8
|
-
import
|
|
6
|
+
import juice from 'juice';
|
|
7
|
+
import safeMarkdown2Html from 'safe-markdown2html';
|
|
9
8
|
import { randomUUID } from 'node:crypto';
|
|
10
9
|
|
|
11
10
|
/**
|
|
@@ -118,6 +117,17 @@ class LLMQuery extends BaseLLMQuery {
|
|
|
118
117
|
}
|
|
119
118
|
}
|
|
120
119
|
|
|
120
|
+
const ZERO_USAGE = {
|
|
121
|
+
inputTokens: undefined,
|
|
122
|
+
inputTokenDetails: {
|
|
123
|
+
noCacheTokens: undefined,
|
|
124
|
+
cacheReadTokens: undefined,
|
|
125
|
+
cacheWriteTokens: undefined,
|
|
126
|
+
},
|
|
127
|
+
outputTokens: undefined,
|
|
128
|
+
outputTokenDetails: { textTokens: undefined, reasoningTokens: undefined },
|
|
129
|
+
totalTokens: undefined,
|
|
130
|
+
};
|
|
121
131
|
class AnalyzeImages extends LLMQuery {
|
|
122
132
|
schema = z.object({
|
|
123
133
|
imageContext: z
|
|
@@ -130,12 +140,12 @@ class AnalyzeImages extends LLMQuery {
|
|
|
130
140
|
async execute() {
|
|
131
141
|
if (!this.targetArticle.hasAttachedImage ||
|
|
132
142
|
!this.targetArticle.detailContent) {
|
|
133
|
-
return null;
|
|
143
|
+
return { result: null, usage: ZERO_USAGE };
|
|
134
144
|
}
|
|
135
145
|
if (this.imageMessages.length === 0) {
|
|
136
|
-
return null;
|
|
146
|
+
return { result: null, usage: ZERO_USAGE };
|
|
137
147
|
}
|
|
138
|
-
const { output } = await generateText({
|
|
148
|
+
const { output, usage } = await generateText({
|
|
139
149
|
model: this.model,
|
|
140
150
|
maxRetries: this.options.llm.maxRetries,
|
|
141
151
|
output: Output.object({
|
|
@@ -149,7 +159,7 @@ class AnalyzeImages extends LLMQuery {
|
|
|
149
159
|
},
|
|
150
160
|
],
|
|
151
161
|
});
|
|
152
|
-
return output.imageContext;
|
|
162
|
+
return { result: output.imageContext, usage };
|
|
153
163
|
}
|
|
154
164
|
get systemPrompt() {
|
|
155
165
|
return `# Image Analysis Expert System
|
|
@@ -256,7 +266,7 @@ class ClassifyTags extends LLMQuery {
|
|
|
256
266
|
}
|
|
257
267
|
async execute({ existTags }) {
|
|
258
268
|
this.existTags = existTags;
|
|
259
|
-
const { output } = await generateText({
|
|
269
|
+
const { output, usage } = await generateText({
|
|
260
270
|
model: this.model,
|
|
261
271
|
maxRetries: this.options.llm.maxRetries,
|
|
262
272
|
output: Output.object({
|
|
@@ -265,7 +275,7 @@ class ClassifyTags extends LLMQuery {
|
|
|
265
275
|
system: this.systemPrompt,
|
|
266
276
|
prompt: this.userPrompt,
|
|
267
277
|
});
|
|
268
|
-
return output;
|
|
278
|
+
return { result: output, usage };
|
|
269
279
|
}
|
|
270
280
|
get systemPrompt() {
|
|
271
281
|
return `You are an AI specializing in analyzing and categorizing articles for professionals in ${this.expertFields.join(', ')}.
|
|
@@ -338,7 +348,7 @@ class DetermineArticleImportance extends LLMQuery {
|
|
|
338
348
|
this.dateService = config.dateService;
|
|
339
349
|
}
|
|
340
350
|
async execute() {
|
|
341
|
-
const { output } = await generateText({
|
|
351
|
+
const { output, usage } = await generateText({
|
|
342
352
|
model: this.model,
|
|
343
353
|
maxRetries: this.options.llm.maxRetries,
|
|
344
354
|
output: Output.object({
|
|
@@ -347,7 +357,7 @@ class DetermineArticleImportance extends LLMQuery {
|
|
|
347
357
|
system: this.systemPrompt,
|
|
348
358
|
prompt: this.userPrompt,
|
|
349
359
|
});
|
|
350
|
-
return output.importanceScore;
|
|
360
|
+
return { result: output.importanceScore, usage };
|
|
351
361
|
}
|
|
352
362
|
get minPoint() {
|
|
353
363
|
const targetRule = this.minimumImportanceScoreRules.find(({ targetUrl }) => targetUrl === this.targetArticle.targetUrl);
|
|
@@ -521,7 +531,9 @@ class ArticleInsightsChain extends PrivateChain {
|
|
|
521
531
|
});
|
|
522
532
|
try {
|
|
523
533
|
const classifyTags = new ClassifyTags(this.getLlmQueryConfig(this.provider.classifyTagOptions.model, article));
|
|
524
|
-
const generatedTags = await classifyTags.execute({
|
|
534
|
+
const { result: generatedTags } = await classifyTags.execute({
|
|
535
|
+
existTags,
|
|
536
|
+
});
|
|
525
537
|
pushTag(generatedTags.tag1);
|
|
526
538
|
pushTag(generatedTags.tag2);
|
|
527
539
|
pushTag(generatedTags.tag3);
|
|
@@ -594,7 +606,7 @@ class ArticleInsightsChain extends PrivateChain {
|
|
|
594
606
|
});
|
|
595
607
|
try {
|
|
596
608
|
const analyzeImages = new AnalyzeImages(this.getLlmQueryConfig(this.provider.analyzeImagesOptions.model, article));
|
|
597
|
-
const imageContextByLlm = await analyzeImages.execute();
|
|
609
|
+
const { result: imageContextByLlm } = await analyzeImages.execute();
|
|
598
610
|
if (imageContextByLlm) {
|
|
599
611
|
articlesWithImageContext.push({
|
|
600
612
|
id: article.id,
|
|
@@ -693,7 +705,7 @@ class ArticleInsightsChain extends PrivateChain {
|
|
|
693
705
|
minimumImportanceScoreRules: this.provider.determineScoreOptions.minimumImportanceScoreRules,
|
|
694
706
|
dateService: this.dateService,
|
|
695
707
|
});
|
|
696
|
-
const importanceScore = await determineArticleImportance.execute();
|
|
708
|
+
const { result: importanceScore } = await determineArticleImportance.execute();
|
|
697
709
|
const processedArticle = {
|
|
698
710
|
...article,
|
|
699
711
|
importanceScore,
|
|
@@ -836,55 +848,6 @@ class AnalysisChain extends Chain {
|
|
|
836
848
|
}
|
|
837
849
|
}
|
|
838
850
|
|
|
839
|
-
function preprocessBoldSyntax(markdown) {
|
|
840
|
-
// Convert **text** to <strong>text</strong> before marked parsing
|
|
841
|
-
// This fixes issues where marked doesn't properly handle bold syntax with parentheses
|
|
842
|
-
return markdown.replace(/\*\*(.+?)\*\*/g, '<strong>$1</strong>');
|
|
843
|
-
}
|
|
844
|
-
function markdownToHtml(markdown) {
|
|
845
|
-
const preprocessed = preprocessBoldSyntax(markdown);
|
|
846
|
-
const html = marked.parse(preprocessed);
|
|
847
|
-
const window = new JSDOM('').window;
|
|
848
|
-
const purify = DOMPurify(window);
|
|
849
|
-
const sanitized = purify.sanitize(html);
|
|
850
|
-
const withCorrectedUrls = correctMalformedUrls(sanitized);
|
|
851
|
-
const withTargetBlank = addTargetBlankToAnchors(withCorrectedUrls);
|
|
852
|
-
const withDelReplaced = replaceDelTagsWithTilde(withTargetBlank);
|
|
853
|
-
return correctUnconvertedBoldSyntax(withDelReplaced);
|
|
854
|
-
}
|
|
855
|
-
function addTargetBlankToAnchors(htmlString) {
|
|
856
|
-
// DOMPurify removes target attributes, so we can safely add target="_blank" to all anchors
|
|
857
|
-
return htmlString.replace(/<a\s+([^>]*)>/gi, (_match, attributes) => {
|
|
858
|
-
return `<a ${attributes} target="_blank">`;
|
|
859
|
-
});
|
|
860
|
-
}
|
|
861
|
-
function replaceDelTagsWithTilde(htmlString) {
|
|
862
|
-
// Replace opening and closing del tags with tilde (~)
|
|
863
|
-
return htmlString.replace(/<del>/gi, '~').replace(/<\/del>/gi, '~');
|
|
864
|
-
}
|
|
865
|
-
function correctUnconvertedBoldSyntax(htmlString) {
|
|
866
|
-
// Replace unconverted "**text**" markdown syntax with <b> tags
|
|
867
|
-
// Matches "**" followed by one or more non-asterisk characters, followed by "**"
|
|
868
|
-
return htmlString.replace(/\*\*([^*]+)\*\*/g, '<b>$1</b>');
|
|
869
|
-
}
|
|
870
|
-
function correctMalformedUrls(htmlString) {
|
|
871
|
-
// Pattern matches anchors with `)` followed by optional closing markup (</b> or **) and URL-encoded characters
|
|
872
|
-
// Capture groups:
|
|
873
|
-
// 1: attributes before href
|
|
874
|
-
// 2: URL base (before `)`)
|
|
875
|
-
// 3: closing markup (</b> or ** or empty)
|
|
876
|
-
// 4: URL-encoded part (starts with %)
|
|
877
|
-
// 5: attributes after href
|
|
878
|
-
// 6: link text base (before `)`)
|
|
879
|
-
// 7: text after `)` in link text (may include **)
|
|
880
|
-
const regex = /<a\s+([^>]*?)href="([^"]*?)\)((?:<\/b>|\*\*)?)(%[0-9A-Fa-f]{2}[^"]*?)"([^>]*?)>([^<]*?)\)((?:\*\*)?[^<]*?)<\/a>/g;
|
|
881
|
-
return htmlString.replace(regex, (_match, beforeHref, urlBase, closingMarkup, _encodedPart, afterHref, textBase, textAfterClosingParen) => {
|
|
882
|
-
// Remove leading ** from textAfterClosingParen since it's already captured as closingMarkup (</b>)
|
|
883
|
-
const cleanedText = textAfterClosingParen.replace(/^\*\*/, '');
|
|
884
|
-
return `<a ${beforeHref}href="${urlBase}"${afterHref}>${textBase}</a>${closingMarkup})${cleanedText}`;
|
|
885
|
-
});
|
|
886
|
-
}
|
|
887
|
-
|
|
888
851
|
let GenerateNewsletter$1 = class GenerateNewsletter extends BaseLLMQuery {
|
|
889
852
|
maxOutputTokens;
|
|
890
853
|
temperature;
|
|
@@ -927,7 +890,7 @@ let GenerateNewsletter$1 = class GenerateNewsletter extends BaseLLMQuery {
|
|
|
927
890
|
this.newsletterBrandName = config.newsletterBrandName;
|
|
928
891
|
}
|
|
929
892
|
async execute() {
|
|
930
|
-
const { output } = await generateText({
|
|
893
|
+
const { output, usage } = await generateText({
|
|
931
894
|
model: this.model,
|
|
932
895
|
maxRetries: this.options.llm.maxRetries,
|
|
933
896
|
maxOutputTokens: this.maxOutputTokens,
|
|
@@ -942,16 +905,19 @@ let GenerateNewsletter$1 = class GenerateNewsletter extends BaseLLMQuery {
|
|
|
942
905
|
system: this.systemPrompt,
|
|
943
906
|
prompt: this.userPrompt,
|
|
944
907
|
});
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
return
|
|
908
|
+
const needsRetry = !output.isWrittenInOutputLanguage ||
|
|
909
|
+
!output.copyrightVerified ||
|
|
910
|
+
!output.factAccuracy ||
|
|
911
|
+
(this.options.content.titleContext &&
|
|
912
|
+
!output.title.includes(this.options.content.titleContext));
|
|
913
|
+
if (needsRetry) {
|
|
914
|
+
const retryResult = await this.execute();
|
|
915
|
+
return {
|
|
916
|
+
result: retryResult.result,
|
|
917
|
+
usage: addUsage(usage, retryResult.usage),
|
|
918
|
+
};
|
|
953
919
|
}
|
|
954
|
-
return pick(output, ['title', 'content']);
|
|
920
|
+
return { result: pick(output, ['title', 'content']), usage };
|
|
955
921
|
}
|
|
956
922
|
get systemPrompt() {
|
|
957
923
|
return `You are a newsletter production expert for "${this.newsletterBrandName}" who analyzes and delivers trends in the fields of ${this.expertFields.join(', ')}. Your goal is to provide in-depth analysis that helps industry professionals easily understand complex information and make informed decisions.
|
|
@@ -1005,14 +971,14 @@ Copyright Protection & Fact-Checking Principles:
|
|
|
1005
971
|
Output Format & Requirements:
|
|
1006
972
|
1. Language: ${this.options.content.outputLanguage}
|
|
1007
973
|
|
|
1008
|
-
2. Start: Specify date (${this.dateService.getDisplayDateString()}) and begin with neutral, objective greeting. Briefly introduce key factual information to be covered in today's newsletter
|
|
974
|
+
2. Start: ${this.options.content.freeFormIntro ? 'Begin directly with the Overall Briefing section (no separate opening heading or greeting).' : `Specify date (${this.dateService.getDisplayDateString()}) and begin with neutral, objective greeting. Briefly introduce key factual information to be covered in today's newsletter.`}
|
|
1009
975
|
|
|
1010
|
-
3. Overall Briefing: Before the main listing, create a briefing section conveying objective facts about today's news in these aspects:
|
|
976
|
+
3. Overall Briefing: Before the main listing, create a briefing section conveying objective facts about today's news${this.options.content.freeFormIntro ? `. Structure: Start with a Heading 2 (##) briefing section heading in the format "## 📮 ${this.dateService.getDisplayDateString()} [Briefing/Summary word in output language]" (e.g., "## 📮 2월 6일 브리핑" for Korean, "## 📮 Feb 6 Briefing" for English) — do NOT include domain or field names in the heading. Immediately follow with a brief paragraph introducing key factual information to be covered in today's newsletter, then include the following bullet points:` : ' in these aspects:'}
|
|
1011
977
|
- Key Trends: Explain major patterns or trends found in this news based on data. Ex: 'Over 00% of today's news relates to 00'.
|
|
1012
978
|
- Immediate Impact: Emphasize most important changes or decisions affecting industry immediately, specifically mentioning which fields will be most impacted.
|
|
1013
979
|
|
|
1014
980
|
4. Category Classification & Content Organization:
|
|
1015
|
-
- Group news by logical categories based on related tags and content (e.g., Policy/Regulation, Budget/Support, Research/Development, Products/Services, Operations/Process, Recruitment/Events) rather than just listing by importance.
|
|
981
|
+
- Group news by logical categories based on related tags and content (e.g., Policy/Regulation, Budget/Support, Research/Development, Products/Services, Operations/Process, Recruitment/Events) rather than just listing by importance.${this.options.content.freeFormIntro ? '\n - Use Heading 2 (##) for each category heading (same level as the briefing heading). Do NOT use Heading 3 (###) for categories.' : ''}
|
|
1016
982
|
- Use appropriate emoticons for each category for visual distinction.
|
|
1017
983
|
- Sort by importance within categories, making high-importance items more prominent.
|
|
1018
984
|
- Add short paragraph at category start summarizing overall trends or changes in that area, specifying important points and areas to focus on.
|
|
@@ -1052,18 +1018,22 @@ Output Format & Requirements:
|
|
|
1052
1018
|
- Do not write preview or anticipatory messages about next newsletter.
|
|
1053
1019
|
- Do not include contact information for inquiries.
|
|
1054
1020
|
|
|
1055
|
-
7. Title Writing Guidelines
|
|
1021
|
+
7. Title Writing Guidelines:${this.options.content.titleContext
|
|
1022
|
+
? `\n - **Required title keyword**: "${this.options.content.titleContext}". This phrase MUST appear in the title. Combine it with key context from today's newsletter content to form a natural, complete title.
|
|
1023
|
+
- Keep title length 20-100 characters and can include 1-2 relevant emoticons.
|
|
1024
|
+
- Use neutral and objective terms in title (e.g., 'announced', 'implementing', 'deadline approaching').
|
|
1025
|
+
- Write title clearly and factually to maintain professionalism and credibility.`
|
|
1026
|
+
: `
|
|
1056
1027
|
- Title should objectively convey core facts of 1-2 most important news items today.
|
|
1057
1028
|
- Write with key facts rather than simple "Newsletter", more effective with specific figures or schedules.
|
|
1058
1029
|
- Use neutral and objective terms in title (e.g., 'announced', 'implementing', 'deadline approaching').
|
|
1059
1030
|
- Keep title length 20-50 characters and can include 1-2 relevant emoticons.
|
|
1060
1031
|
- Place most important key facts at beginning of title.
|
|
1061
|
-
- Write title clearly and factually to maintain professionalism and credibility
|
|
1032
|
+
- Write title clearly and factually to maintain professionalism and credibility.`}
|
|
1062
1033
|
|
|
1063
1034
|
8. Additional Requirements:
|
|
1064
1035
|
- Comprehensively analyze posts to create email containing most important information for ${this.expertFields.join(', ')} field experts.
|
|
1065
|
-
|
|
1066
|
-
- Write body in markdown format, effectively using headings(#, ##, ###), bold(**), italics(_), bullet points(-, *) etc. to improve readability.
|
|
1036
|
+
${this.options.content.freeFormIntro ? '' : `- Naturally include date at beginning in the format: "${this.dateService.getDisplayDateString()} ${this.expertFields.join(', ')} [News Term]". Replace [News Term] with the word for "News" appropriate for the output language (e.g., "News" for English, "소식" for Korean). Declare this part as \`Heading 1\`(#).\n `}- Write body in markdown format, effectively using headings(#, ##, ###), bold(**), italics(_), bullet points(-, *) etc. to improve readability.
|
|
1067
1037
|
- Group related news to provide broader context, and mention development status if there's continuity with content covered in previous issues.
|
|
1068
1038
|
- **Source citation is most important for ensuring credibility.** Must provide links in [original title](URL) format using source's title. Do not write as "View", "Article", "[Post3](URL)" format.
|
|
1069
1039
|
- Specify source whenever article titles or content are quoted in newsletter, ensure all information is provided with links.
|
|
@@ -1114,6 +1084,27 @@ Based on all post information provided above, please generate a ${this.expertFie
|
|
|
1114
1084
|
Please follow the roles and output format defined in the system prompt (friendly introduction, overall briefing, category classification, in-depth analysis, polite closing, etc.).`;
|
|
1115
1085
|
}
|
|
1116
1086
|
};
|
|
1087
|
+
function addNum(a, b) {
|
|
1088
|
+
if (a == null && b == null)
|
|
1089
|
+
return undefined;
|
|
1090
|
+
return (a ?? 0) + (b ?? 0);
|
|
1091
|
+
}
|
|
1092
|
+
function addUsage(a, b) {
|
|
1093
|
+
return {
|
|
1094
|
+
inputTokens: addNum(a.inputTokens, b.inputTokens),
|
|
1095
|
+
inputTokenDetails: {
|
|
1096
|
+
noCacheTokens: addNum(a.inputTokenDetails?.noCacheTokens, b.inputTokenDetails?.noCacheTokens),
|
|
1097
|
+
cacheReadTokens: addNum(a.inputTokenDetails?.cacheReadTokens, b.inputTokenDetails?.cacheReadTokens),
|
|
1098
|
+
cacheWriteTokens: addNum(a.inputTokenDetails?.cacheWriteTokens, b.inputTokenDetails?.cacheWriteTokens),
|
|
1099
|
+
},
|
|
1100
|
+
outputTokens: addNum(a.outputTokens, b.outputTokens),
|
|
1101
|
+
outputTokenDetails: {
|
|
1102
|
+
textTokens: addNum(a.outputTokenDetails?.textTokens, b.outputTokenDetails?.textTokens),
|
|
1103
|
+
reasoningTokens: addNum(a.outputTokenDetails?.reasoningTokens, b.outputTokenDetails?.reasoningTokens),
|
|
1104
|
+
},
|
|
1105
|
+
totalTokens: addNum(a.totalTokens, b.totalTokens),
|
|
1106
|
+
};
|
|
1107
|
+
}
|
|
1117
1108
|
|
|
1118
1109
|
class ContentGenerateChain extends Chain {
|
|
1119
1110
|
dateService;
|
|
@@ -1206,7 +1197,8 @@ class ContentGenerateChain extends Chain {
|
|
|
1206
1197
|
newsletterBrandName: this.provider.newsletterBrandName,
|
|
1207
1198
|
dateService: this.dateService,
|
|
1208
1199
|
});
|
|
1209
|
-
|
|
1200
|
+
const { result } = await generateNewsletter.execute();
|
|
1201
|
+
return result;
|
|
1210
1202
|
});
|
|
1211
1203
|
}
|
|
1212
1204
|
async renderHtml(coreContent) {
|
|
@@ -1221,7 +1213,13 @@ class ContentGenerateChain extends Chain {
|
|
|
1221
1213
|
}
|
|
1222
1214
|
return this.htmlTemplate.html
|
|
1223
1215
|
.replaceAll(`{{${this.htmlTemplate.markers.title}}}`, coreContent.title)
|
|
1224
|
-
.replaceAll(`{{${this.htmlTemplate.markers.content}}}`,
|
|
1216
|
+
.replaceAll(`{{${this.htmlTemplate.markers.content}}}`, safeMarkdown2Html(coreContent.content, {
|
|
1217
|
+
window: new JSDOM('').window,
|
|
1218
|
+
linkTargetBlank: true,
|
|
1219
|
+
fixMalformedUrls: true,
|
|
1220
|
+
fixBoldSyntax: true,
|
|
1221
|
+
convertStrikethrough: true,
|
|
1222
|
+
}));
|
|
1225
1223
|
});
|
|
1226
1224
|
}
|
|
1227
1225
|
async createNewsletter(html, coreContent, candidateArticles) {
|
|
@@ -1302,7 +1300,7 @@ function shouldRetry(status, error) {
|
|
|
1302
1300
|
}
|
|
1303
1301
|
return false;
|
|
1304
1302
|
}
|
|
1305
|
-
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/') {
|
|
1303
|
+
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/', customFetch) {
|
|
1306
1304
|
const maxRetries = 5;
|
|
1307
1305
|
const baseTimeoutMs = 10_000; // Base 10s, increases per attempt
|
|
1308
1306
|
let lastError = null;
|
|
@@ -1312,7 +1310,7 @@ async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/')
|
|
|
1312
1310
|
const timeout = setTimeout(() => controller.abort(`timeout after ${timeoutMs}ms`), timeoutMs);
|
|
1313
1311
|
try {
|
|
1314
1312
|
const startedAt = Date.now();
|
|
1315
|
-
const response = await fetch(url, {
|
|
1313
|
+
const response = await (customFetch ?? fetch)(url, {
|
|
1316
1314
|
// mode: 'cors' // Not applicable in Node, left here for behavioral parity with browsers
|
|
1317
1315
|
redirect: 'follow',
|
|
1318
1316
|
// @ts-expect-error Undici/Fetch in Node may allow duplex; safe to ignore
|
|
@@ -1456,7 +1454,7 @@ class CrawlingChain extends Chain {
|
|
|
1456
1454
|
startFields: { target: this.describeTarget(target) },
|
|
1457
1455
|
}, async () => {
|
|
1458
1456
|
try {
|
|
1459
|
-
return await getHtmlFromUrl(this.logger, target.url);
|
|
1457
|
+
return await getHtmlFromUrl(this.logger, target.url, undefined, this.provider.customFetch);
|
|
1460
1458
|
}
|
|
1461
1459
|
catch (error) {
|
|
1462
1460
|
this.logger.error({
|
|
@@ -1535,7 +1533,7 @@ class CrawlingChain extends Chain {
|
|
|
1535
1533
|
failedCount: result.failedCount,
|
|
1536
1534
|
}),
|
|
1537
1535
|
}, async () => {
|
|
1538
|
-
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl)));
|
|
1536
|
+
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl, undefined, this.provider.customFetch)));
|
|
1539
1537
|
const detailPagesHtmlWithPipelineId = [];
|
|
1540
1538
|
const successList = [];
|
|
1541
1539
|
let failedCount = 0;
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sources":[],"sourcesContent":[],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"index.js","sources":[],"sourcesContent":[],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;"}
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@llm-newsletter-kit/core",
|
|
3
3
|
"private": false,
|
|
4
4
|
"type": "module",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.3.0",
|
|
6
6
|
"description": "An extensible framework to automate your entire newsletter workflow. Handles data collection, LLM-based content analysis, and email generation, letting you focus on your unique domain logic.",
|
|
7
7
|
"main": "dist/index.cjs",
|
|
8
8
|
"module": "dist/index.js",
|
|
@@ -41,25 +41,30 @@
|
|
|
41
41
|
"lint:fix": "eslint --fix ./src",
|
|
42
42
|
"lint:ci": "eslint --quiet ./src",
|
|
43
43
|
"typecheck": "tsc --noEmit",
|
|
44
|
-
"format": "prettier --write \"src/**/*.{ts,tsx,js,jsx,json,css,md,mdx}\""
|
|
44
|
+
"format": "prettier --write \"src/**/*.{ts,tsx,js,jsx,json,css,md,mdx}\"",
|
|
45
|
+
"playground": "tsx",
|
|
46
|
+
"playground:generate-newsletter": "tsx playground/generate-newsletter.ts"
|
|
45
47
|
},
|
|
46
48
|
"author": "kimhongyeon",
|
|
47
49
|
"license": "Apache-2.0",
|
|
48
50
|
"dependencies": {
|
|
49
51
|
"@langchain/core": "^1.1.19",
|
|
50
|
-
"ai": "^6.0.
|
|
51
|
-
"dompurify": "^3.3.1",
|
|
52
|
+
"ai": "^6.0.77",
|
|
52
53
|
"es-toolkit": "^1.44.0",
|
|
53
|
-
"jsdom": "^
|
|
54
|
-
"juice": "^11.1.
|
|
55
|
-
"
|
|
54
|
+
"jsdom": "^28.0.0",
|
|
55
|
+
"juice": "^11.1.1",
|
|
56
|
+
"safe-markdown2html": "^1.0.0",
|
|
56
57
|
"zod": "^4.3.6"
|
|
57
58
|
},
|
|
58
59
|
"devDependencies": {
|
|
60
|
+
"@ai-sdk/anthropic": "^3.0.38",
|
|
61
|
+
"@ai-sdk/google": "^3.0.22",
|
|
62
|
+
"@ai-sdk/openai": "^3.0.26",
|
|
63
|
+
"@ai-sdk/togetherai": "^2.0.31",
|
|
59
64
|
"@eslint/js": "^9.39.2",
|
|
60
65
|
"@trivago/prettier-plugin-sort-imports": "^6.0.2",
|
|
61
66
|
"@types/jsdom": "^27.0.0",
|
|
62
|
-
"@types/node": "^25.2.
|
|
67
|
+
"@types/node": "^25.2.1",
|
|
63
68
|
"@vitest/coverage-v8": "^3.2.4",
|
|
64
69
|
"@vitest/expect": "^3.2.4",
|
|
65
70
|
"eslint": "^9.39.2",
|
|
@@ -69,6 +74,7 @@
|
|
|
69
74
|
"rollup": "^4.57.1",
|
|
70
75
|
"rollup-plugin-dts": "^6.3.0",
|
|
71
76
|
"rollup-plugin-typescript2": "^0.36.0",
|
|
77
|
+
"tsx": "^4.21.0",
|
|
72
78
|
"typescript": "^5.9.3",
|
|
73
79
|
"typescript-eslint": "^8.54.0",
|
|
74
80
|
"vitest": "^3.2.4"
|