@synstack/web 1.1.5 → 1.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,9 +2,6 @@
2
2
 
3
3
  Web utilities for fetching and parsing web content
4
4
 
5
- > [!WARNING]
6
- > This package is included in the [@synstack/synscript](../synscript/README.md) package. It is not recommended to install both packages at the same time.
7
-
8
5
  ## What is it for?
9
6
 
10
7
  This package provides utilities for fetching and parsing web content, including JSON data, plain text, and article extraction:
@@ -39,7 +39,6 @@ __export(web_bundle_exports, {
39
39
  // src/web.lib.ts
40
40
  var import_readability = require("@mozilla/readability");
41
41
  var import_linkedom = require("linkedom");
42
- var import_zod = require("zod");
43
42
  var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
44
43
  var fetchText = (url) => fetch(url).then((response) => response.text());
45
44
  var fetchArticle = async (url) => {
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/web.index.ts","../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export * from \"./web.bundle.ts\";\nexport * as web from \"./web.bundle.ts\";\nexport { ArticleNotFoundException } from \"./web.lib.ts\";\n","export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,yBAA4B;AAC5B,sBAA0B;AAC1B,iBAA+B;AAQxB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,UAAM,2BAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,+BAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/web.index.ts","../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export * from \"./web.bundle.ts\";\nexport * as web from \"./web.bundle.ts\";\nexport { ArticleNotFoundException } from \"./web.lib.ts\";\n","export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport type { ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,yBAA4B;AAC5B,sBAA0B;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,UAAM,2BAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,+BAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
@@ -23,11 +23,11 @@ declare const fetchText: (url: string) => Promise<string>;
23
23
  declare const fetchArticle: (url: string) => Promise<{
24
24
  url: string;
25
25
  content: string;
26
- title: string;
27
- byline: string;
28
- siteName: string;
29
- lang: string;
30
- publishedTime: string;
26
+ title: string | null | undefined;
27
+ byline: string | null | undefined;
28
+ siteName: string | null | undefined;
29
+ lang: string | null | undefined;
30
+ publishedTime: string | null | undefined;
31
31
  }>;
32
32
  declare class ArticleNotFoundException extends Error {
33
33
  constructor(url: string);
@@ -23,11 +23,11 @@ declare const fetchText: (url: string) => Promise<string>;
23
23
  declare const fetchArticle: (url: string) => Promise<{
24
24
  url: string;
25
25
  content: string;
26
- title: string;
27
- byline: string;
28
- siteName: string;
29
- lang: string;
30
- publishedTime: string;
26
+ title: string | null | undefined;
27
+ byline: string | null | undefined;
28
+ siteName: string | null | undefined;
29
+ lang: string | null | undefined;
30
+ publishedTime: string | null | undefined;
31
31
  }>;
32
32
  declare class ArticleNotFoundException extends Error {
33
33
  constructor(url: string);
package/dist/web.index.js CHANGED
@@ -15,7 +15,6 @@ __export(web_bundle_exports, {
15
15
  // src/web.lib.ts
16
16
  import { Readability } from "@mozilla/readability";
17
17
  import { parseHTML } from "linkedom";
18
- import "zod";
19
18
  var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
20
19
  var fetchText = (url) => fetch(url).then((response) => response.text());
21
20
  var fetchArticle = async (url) => {
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AAC1B,OAA+B;AAQxB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,MAAM,UAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
1
+ {"version":3,"sources":["../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export { fetchArticle, fetchJson, fetchText } from \"./web.lib.ts\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport type { ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,MAAM,UAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
package/package.json CHANGED
@@ -4,7 +4,7 @@
4
4
  "publishConfig": {
5
5
  "access": "public"
6
6
  },
7
- "version": "1.1.5",
7
+ "version": "1.1.7",
8
8
  "description": "Web scraping utilities",
9
9
  "keywords": [
10
10
  "web",
@@ -46,14 +46,14 @@
46
46
  }
47
47
  },
48
48
  "dependencies": {
49
- "@mozilla/readability": "^0.5.0",
50
- "linkedom": "^0.18.5"
49
+ "@mozilla/readability": "^0.6.0",
50
+ "linkedom": "^0.18.9"
51
51
  },
52
52
  "devDependencies": {
53
53
  "@types/node": "^22.10.1",
54
- "tsup": "^8.3.5",
55
- "typescript": "^5.7.2",
56
- "zod": "^3.23.8"
54
+ "tsup": "^8.4.0",
55
+ "typescript": "^5.8.2",
56
+ "zod": "^3.24.2"
57
57
  },
58
58
  "peerDependencies": {
59
59
  "zod": "*"
@@ -63,5 +63,5 @@
63
63
  "!src/**/*.test.ts",
64
64
  "dist/**/*"
65
65
  ],
66
- "gitHead": "886036553ab02c6c1b98289b1de64240de866521"
66
+ "gitHead": "4aae6e86f7664e5b928ecb3a492ec035307168d1"
67
67
  }
package/src/web.lib.ts CHANGED
@@ -1,6 +1,6 @@
1
1
  import { Readability } from "@mozilla/readability";
2
2
  import { parseHTML } from "linkedom";
3
- import { type ZodSchema } from "zod";
3
+ import type { ZodSchema } from "zod";
4
4
 
5
5
  /**
6
6
  * Retrieves an URL as JSON