@llm-newsletter-kit/core 1.2.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +4 -4
- package/dist/index.d.ts +5 -0
- package/dist/index.js +4 -4
- package/package.json +10 -10
package/dist/index.cjs
CHANGED
|
@@ -1302,7 +1302,7 @@ function shouldRetry(status, error) {
|
|
|
1302
1302
|
}
|
|
1303
1303
|
return false;
|
|
1304
1304
|
}
|
|
1305
|
-
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/') {
|
|
1305
|
+
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/', customFetch) {
|
|
1306
1306
|
const maxRetries = 5;
|
|
1307
1307
|
const baseTimeoutMs = 10_000; // Base 10s, increases per attempt
|
|
1308
1308
|
let lastError = null;
|
|
@@ -1312,7 +1312,7 @@ async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/')
|
|
|
1312
1312
|
const timeout = setTimeout(() => controller.abort(`timeout after ${timeoutMs}ms`), timeoutMs);
|
|
1313
1313
|
try {
|
|
1314
1314
|
const startedAt = Date.now();
|
|
1315
|
-
const response = await fetch(url, {
|
|
1315
|
+
const response = await (customFetch ?? fetch)(url, {
|
|
1316
1316
|
// mode: 'cors' // Not applicable in Node, left here for behavioral parity with browsers
|
|
1317
1317
|
redirect: 'follow',
|
|
1318
1318
|
// @ts-expect-error Undici/Fetch in Node may allow duplex; safe to ignore
|
|
@@ -1456,7 +1456,7 @@ class CrawlingChain extends Chain {
|
|
|
1456
1456
|
startFields: { target: this.describeTarget(target) },
|
|
1457
1457
|
}, async () => {
|
|
1458
1458
|
try {
|
|
1459
|
-
return await getHtmlFromUrl(this.logger, target.url);
|
|
1459
|
+
return await getHtmlFromUrl(this.logger, target.url, undefined, this.provider.customFetch);
|
|
1460
1460
|
}
|
|
1461
1461
|
catch (error) {
|
|
1462
1462
|
this.logger.error({
|
|
@@ -1535,7 +1535,7 @@ class CrawlingChain extends Chain {
|
|
|
1535
1535
|
failedCount: result.failedCount,
|
|
1536
1536
|
}),
|
|
1537
1537
|
}, async () => {
|
|
1538
|
-
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl)));
|
|
1538
|
+
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl, undefined, this.provider.customFetch)));
|
|
1539
1539
|
const detailPagesHtmlWithPipelineId = [];
|
|
1540
1540
|
const successList = [];
|
|
1541
1541
|
let failedCount = 0;
|
package/dist/index.d.ts
CHANGED
|
@@ -526,6 +526,11 @@ interface CrawlingProvider {
|
|
|
526
526
|
* @default 5
|
|
527
527
|
*/
|
|
528
528
|
maxConcurrency?: number;
|
|
529
|
+
/**
|
|
530
|
+
* Optional custom fetch function (e.g., proxy-based fetch).
|
|
531
|
+
* When provided, this function is used instead of the global `fetch` for HTTP requests.
|
|
532
|
+
*/
|
|
533
|
+
customFetch?: typeof fetch;
|
|
529
534
|
/**
|
|
530
535
|
* Crawling target groups.
|
|
531
536
|
*/
|
package/dist/index.js
CHANGED
|
@@ -1300,7 +1300,7 @@ function shouldRetry(status, error) {
|
|
|
1300
1300
|
}
|
|
1301
1301
|
return false;
|
|
1302
1302
|
}
|
|
1303
|
-
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/') {
|
|
1303
|
+
async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/', customFetch) {
|
|
1304
1304
|
const maxRetries = 5;
|
|
1305
1305
|
const baseTimeoutMs = 10_000; // Base 10s, increases per attempt
|
|
1306
1306
|
let lastError = null;
|
|
@@ -1310,7 +1310,7 @@ async function getHtmlFromUrl(logger, url, referer = 'https://www.google.com/')
|
|
|
1310
1310
|
const timeout = setTimeout(() => controller.abort(`timeout after ${timeoutMs}ms`), timeoutMs);
|
|
1311
1311
|
try {
|
|
1312
1312
|
const startedAt = Date.now();
|
|
1313
|
-
const response = await fetch(url, {
|
|
1313
|
+
const response = await (customFetch ?? fetch)(url, {
|
|
1314
1314
|
// mode: 'cors' // Not applicable in Node, left here for behavioral parity with browsers
|
|
1315
1315
|
redirect: 'follow',
|
|
1316
1316
|
// @ts-expect-error Undici/Fetch in Node may allow duplex; safe to ignore
|
|
@@ -1454,7 +1454,7 @@ class CrawlingChain extends Chain {
|
|
|
1454
1454
|
startFields: { target: this.describeTarget(target) },
|
|
1455
1455
|
}, async () => {
|
|
1456
1456
|
try {
|
|
1457
|
-
return await getHtmlFromUrl(this.logger, target.url);
|
|
1457
|
+
return await getHtmlFromUrl(this.logger, target.url, undefined, this.provider.customFetch);
|
|
1458
1458
|
}
|
|
1459
1459
|
catch (error) {
|
|
1460
1460
|
this.logger.error({
|
|
@@ -1533,7 +1533,7 @@ class CrawlingChain extends Chain {
|
|
|
1533
1533
|
failedCount: result.failedCount,
|
|
1534
1534
|
}),
|
|
1535
1535
|
}, async () => {
|
|
1536
|
-
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl)));
|
|
1536
|
+
const settled = await Promise.allSettled(list.map((data) => getHtmlFromUrl(this.logger, data.detailUrl, undefined, this.provider.customFetch)));
|
|
1537
1537
|
const detailPagesHtmlWithPipelineId = [];
|
|
1538
1538
|
const successList = [];
|
|
1539
1539
|
let failedCount = 0;
|
package/package.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"name": "@llm-newsletter-kit/core",
|
|
3
3
|
"private": false,
|
|
4
4
|
"type": "module",
|
|
5
|
-
"version": "1.
|
|
5
|
+
"version": "1.3.1",
|
|
6
6
|
"description": "An extensible framework to automate your entire newsletter workflow. Handles data collection, LLM-based content analysis, and email generation, letting you focus on your unique domain logic.",
|
|
7
7
|
"main": "dist/index.cjs",
|
|
8
8
|
"module": "dist/index.js",
|
|
@@ -48,8 +48,8 @@
|
|
|
48
48
|
"author": "kimhongyeon",
|
|
49
49
|
"license": "Apache-2.0",
|
|
50
50
|
"dependencies": {
|
|
51
|
-
"@langchain/core": "^1.1.
|
|
52
|
-
"ai": "^6.0.
|
|
51
|
+
"@langchain/core": "^1.1.24",
|
|
52
|
+
"ai": "^6.0.84",
|
|
53
53
|
"es-toolkit": "^1.44.0",
|
|
54
54
|
"jsdom": "^28.0.0",
|
|
55
55
|
"juice": "^11.1.1",
|
|
@@ -57,18 +57,18 @@
|
|
|
57
57
|
"zod": "^4.3.6"
|
|
58
58
|
},
|
|
59
59
|
"devDependencies": {
|
|
60
|
-
"@ai-sdk/anthropic": "^3.0.
|
|
61
|
-
"@ai-sdk/google": "^3.0.
|
|
62
|
-
"@ai-sdk/openai": "^3.0.
|
|
63
|
-
"@ai-sdk/togetherai": "^2.0.
|
|
60
|
+
"@ai-sdk/anthropic": "^3.0.43",
|
|
61
|
+
"@ai-sdk/google": "^3.0.29",
|
|
62
|
+
"@ai-sdk/openai": "^3.0.28",
|
|
63
|
+
"@ai-sdk/togetherai": "^2.0.33",
|
|
64
64
|
"@eslint/js": "^9.39.2",
|
|
65
65
|
"@trivago/prettier-plugin-sort-imports": "^6.0.2",
|
|
66
66
|
"@types/jsdom": "^27.0.0",
|
|
67
|
-
"@types/node": "^25.2.
|
|
67
|
+
"@types/node": "^25.2.3",
|
|
68
68
|
"@vitest/coverage-v8": "^3.2.4",
|
|
69
69
|
"@vitest/expect": "^3.2.4",
|
|
70
70
|
"eslint": "^9.39.2",
|
|
71
|
-
"eslint-plugin-unused-imports": "^4.
|
|
71
|
+
"eslint-plugin-unused-imports": "^4.4.1",
|
|
72
72
|
"prettier": "^3.8.1",
|
|
73
73
|
"rimraf": "^6.1.2",
|
|
74
74
|
"rollup": "^4.57.1",
|
|
@@ -76,7 +76,7 @@
|
|
|
76
76
|
"rollup-plugin-typescript2": "^0.36.0",
|
|
77
77
|
"tsx": "^4.21.0",
|
|
78
78
|
"typescript": "^5.9.3",
|
|
79
|
-
"typescript-eslint": "^8.
|
|
79
|
+
"typescript-eslint": "^8.55.0",
|
|
80
80
|
"vitest": "^3.2.4"
|
|
81
81
|
},
|
|
82
82
|
"repository": {
|