@synstack/web 1.1.6 → 1.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +0 -3
- package/dist/web.index.cjs +0 -1
- package/dist/web.index.cjs.map +1 -1
- package/dist/web.index.d.cts +5 -5
- package/dist/web.index.d.ts +5 -5
- package/dist/web.index.js +0 -1
- package/dist/web.index.js.map +1 -1
- package/package.json +7 -7
- package/src/web.lib.ts +1 -1
package/README.md
CHANGED
@@ -2,9 +2,6 @@
|
|
2
2
|
|
3
3
|
Web utilities for fetching and parsing web content
|
4
4
|
|
5
|
-
> [!WARNING]
|
6
|
-
> This package is included in the [@synstack/synscript](../synscript/README.md) package. It is not recommended to install both packages at the same time.
|
7
|
-
|
8
5
|
## What is it for?
|
9
6
|
|
10
7
|
This package provides utilities for fetching and parsing web content, including JSON data, plain text, and article extraction:
|
package/dist/web.index.cjs
CHANGED
@@ -39,7 +39,6 @@ __export(web_bundle_exports, {
|
|
39
39
|
// src/web.lib.ts
|
40
40
|
var import_readability = require("@mozilla/readability");
|
41
41
|
var import_linkedom = require("linkedom");
|
42
|
-
var import_zod = require("zod");
|
43
42
|
var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
|
44
43
|
var fetchText = (url) => fetch(url).then((response) => response.text());
|
45
44
|
var fetchArticle = async (url) => {
|
package/dist/web.index.cjs.map
CHANGED
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"sources":["../src/web.index.ts","../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export * from \"./web.bundle.ts\";\nexport * as web from \"./web.bundle.ts\";\nexport { ArticleNotFoundException } from \"./web.lib.ts\";\n","export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport {
|
1
|
+
{"version":3,"sources":["../src/web.index.ts","../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export * from \"./web.bundle.ts\";\nexport * as web from \"./web.bundle.ts\";\nexport { ArticleNotFoundException } from \"./web.lib.ts\";\n","export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport type { ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,yBAA4B;AAC5B,sBAA0B;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,UAAM,2BAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,+BAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
|
package/dist/web.index.d.cts
CHANGED
@@ -23,11 +23,11 @@ declare const fetchText: (url: string) => Promise<string>;
|
|
23
23
|
declare const fetchArticle: (url: string) => Promise<{
|
24
24
|
url: string;
|
25
25
|
content: string;
|
26
|
-
title: string;
|
27
|
-
byline: string;
|
28
|
-
siteName: string;
|
29
|
-
lang: string;
|
30
|
-
publishedTime: string;
|
26
|
+
title: string | null | undefined;
|
27
|
+
byline: string | null | undefined;
|
28
|
+
siteName: string | null | undefined;
|
29
|
+
lang: string | null | undefined;
|
30
|
+
publishedTime: string | null | undefined;
|
31
31
|
}>;
|
32
32
|
declare class ArticleNotFoundException extends Error {
|
33
33
|
constructor(url: string);
|
package/dist/web.index.d.ts
CHANGED
@@ -23,11 +23,11 @@ declare const fetchText: (url: string) => Promise<string>;
|
|
23
23
|
declare const fetchArticle: (url: string) => Promise<{
|
24
24
|
url: string;
|
25
25
|
content: string;
|
26
|
-
title: string;
|
27
|
-
byline: string;
|
28
|
-
siteName: string;
|
29
|
-
lang: string;
|
30
|
-
publishedTime: string;
|
26
|
+
title: string | null | undefined;
|
27
|
+
byline: string | null | undefined;
|
28
|
+
siteName: string | null | undefined;
|
29
|
+
lang: string | null | undefined;
|
30
|
+
publishedTime: string | null | undefined;
|
31
31
|
}>;
|
32
32
|
declare class ArticleNotFoundException extends Error {
|
33
33
|
constructor(url: string);
|
package/dist/web.index.js
CHANGED
@@ -15,7 +15,6 @@ __export(web_bundle_exports, {
|
|
15
15
|
// src/web.lib.ts
|
16
16
|
import { Readability } from "@mozilla/readability";
|
17
17
|
import { parseHTML } from "linkedom";
|
18
|
-
import "zod";
|
19
18
|
var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
|
20
19
|
var fetchText = (url) => fetch(url).then((response) => response.text());
|
21
20
|
var fetchArticle = async (url) => {
|
package/dist/web.index.js.map
CHANGED
@@ -1 +1 @@
|
|
1
|
-
{"version":3,"sources":["../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport {
|
1
|
+
{"version":3,"sources":["../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport type { ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,MAAM,UAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
|
package/package.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4
4
|
"publishConfig": {
|
5
5
|
"access": "public"
|
6
6
|
},
|
7
|
-
"version": "1.1.
|
7
|
+
"version": "1.1.8",
|
8
8
|
"description": "Web scraping utilities",
|
9
9
|
"keywords": [
|
10
10
|
"web",
|
@@ -46,14 +46,14 @@
|
|
46
46
|
}
|
47
47
|
},
|
48
48
|
"dependencies": {
|
49
|
-
"@mozilla/readability": "^0.
|
50
|
-
"linkedom": "^0.18.
|
49
|
+
"@mozilla/readability": "^0.6.0",
|
50
|
+
"linkedom": "^0.18.10"
|
51
51
|
},
|
52
52
|
"devDependencies": {
|
53
|
-
"@types/node": "^22.
|
53
|
+
"@types/node": "^22.15.17",
|
54
54
|
"tsup": "^8.4.0",
|
55
|
-
"typescript": "^5.
|
56
|
-
"zod": "^3.24.
|
55
|
+
"typescript": "^5.8.3",
|
56
|
+
"zod": "^3.24.4"
|
57
57
|
},
|
58
58
|
"peerDependencies": {
|
59
59
|
"zod": "*"
|
@@ -63,5 +63,5 @@
|
|
63
63
|
"!src/**/*.test.ts",
|
64
64
|
"dist/**/*"
|
65
65
|
],
|
66
|
-
"gitHead": "
|
66
|
+
"gitHead": "4bf0bcaecdadd27ce3fe78ae0ce32ae1d09f0a24"
|
67
67
|
}
|