@synstack/web 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1 @@
1
+ # @synstack/yaml
@@ -0,0 +1,78 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/web.index.ts
21
+ var web_index_exports = {};
22
+ __export(web_index_exports, {
23
+ ArticleNotFoundException: () => ArticleNotFoundException,
24
+ fetchArticle: () => fetchArticle,
25
+ fetchJson: () => fetchJson,
26
+ fetchText: () => fetchText,
27
+ web: () => web_bundle_exports
28
+ });
29
+ module.exports = __toCommonJS(web_index_exports);
30
+
31
+ // src/web.bundle.ts
32
+ var web_bundle_exports = {};
33
+ __export(web_bundle_exports, {
34
+ fetchArticle: () => fetchArticle,
35
+ fetchJson: () => fetchJson,
36
+ fetchText: () => fetchText
37
+ });
38
+
39
+ // src/web.lib.ts
40
+ var import_readability = require("@mozilla/readability");
41
+ var import_linkedom = require("linkedom");
42
+ var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
43
+ var fetchText = (url) => fetch(url).then((response) => response.text());
44
+ var fetchArticle = async (url) => {
45
+ const content = await fetchText(url);
46
+ const doc = (0, import_linkedom.parseHTML)(content, { url });
47
+ const reader = new import_readability.Readability(doc.window.document);
48
+ const article = reader.parse();
49
+ if (!article?.content) throw new ArticleNotFoundException(url);
50
+ return {
51
+ url,
52
+ content: article.content,
53
+ title: article.title,
54
+ byline: article.byline,
55
+ siteName: article.siteName,
56
+ lang: article.lang,
57
+ publishedTime: article.publishedTime
58
+ };
59
+ };
60
+ var ArticleNotFoundException = class extends Error {
61
+ constructor(url) {
62
+ super(
63
+ `
64
+ No article found at the URL
65
+ URL: ${url}
66
+ `.trim()
67
+ );
68
+ }
69
+ };
70
+ // Annotate the CommonJS export names for ESM import in node:
71
+ 0 && (module.exports = {
72
+ ArticleNotFoundException,
73
+ fetchArticle,
74
+ fetchJson,
75
+ fetchText,
76
+ web
77
+ });
78
+ //# sourceMappingURL=web.index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/web.index.ts","../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export * from \"./web.bundle\";\nexport * as web from \"./web.bundle\";\nexport { ArticleNotFoundException } from \"./web.lib\";\n","export { fetchArticle, fetchJson, fetchText } from \"./web.lib\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,yBAA4B;AAC5B,sBAA0B;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,UAAM,2BAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,+BAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,43 @@
1
+ import { ZodSchema } from 'zod';
2
+
3
+ /**
4
+ * Retrieves an URL as JSON
5
+ * @param url
6
+ * @param options.schema an optional Zod schema to validate the data against
7
+ * @returns the JSON as a JS object
8
+ */
9
+ declare const fetchJson: <T>(url: string, options?: {
10
+ schema?: ZodSchema<T>;
11
+ }) => Promise<T>;
12
+ /**
13
+ * Retrieves an URL as a string
14
+ * @param url
15
+ * @returns the plain text content of the URL
16
+ */
17
+ declare const fetchText: (url: string) => Promise<string>;
18
+ /**
19
+ * Extract an article from a URL
20
+ * @param url
21
+ * @returns the article content as a JS object
22
+ */
23
+ declare const fetchArticle: (url: string) => Promise<{
24
+ url: string;
25
+ content: string;
26
+ title: string;
27
+ byline: string;
28
+ siteName: string;
29
+ lang: string;
30
+ publishedTime: string;
31
+ }>;
32
+ declare class ArticleNotFoundException extends Error {
33
+ constructor(url: string);
34
+ }
35
+
36
+ declare const web_bundle_fetchArticle: typeof fetchArticle;
37
+ declare const web_bundle_fetchJson: typeof fetchJson;
38
+ declare const web_bundle_fetchText: typeof fetchText;
39
+ declare namespace web_bundle {
40
+ export { web_bundle_fetchArticle as fetchArticle, web_bundle_fetchJson as fetchJson, web_bundle_fetchText as fetchText };
41
+ }
42
+
43
+ export { ArticleNotFoundException, fetchArticle, fetchJson, fetchText, web_bundle as web };
@@ -0,0 +1,43 @@
1
+ import { ZodSchema } from 'zod';
2
+
3
+ /**
4
+ * Retrieves an URL as JSON
5
+ * @param url
6
+ * @param options.schema an optional Zod schema to validate the data against
7
+ * @returns the JSON as a JS object
8
+ */
9
+ declare const fetchJson: <T>(url: string, options?: {
10
+ schema?: ZodSchema<T>;
11
+ }) => Promise<T>;
12
+ /**
13
+ * Retrieves an URL as a string
14
+ * @param url
15
+ * @returns the plain text content of the URL
16
+ */
17
+ declare const fetchText: (url: string) => Promise<string>;
18
+ /**
19
+ * Extract an article from a URL
20
+ * @param url
21
+ * @returns the article content as a JS object
22
+ */
23
+ declare const fetchArticle: (url: string) => Promise<{
24
+ url: string;
25
+ content: string;
26
+ title: string;
27
+ byline: string;
28
+ siteName: string;
29
+ lang: string;
30
+ publishedTime: string;
31
+ }>;
32
+ declare class ArticleNotFoundException extends Error {
33
+ constructor(url: string);
34
+ }
35
+
36
+ declare const web_bundle_fetchArticle: typeof fetchArticle;
37
+ declare const web_bundle_fetchJson: typeof fetchJson;
38
+ declare const web_bundle_fetchText: typeof fetchText;
39
+ declare namespace web_bundle {
40
+ export { web_bundle_fetchArticle as fetchArticle, web_bundle_fetchJson as fetchJson, web_bundle_fetchText as fetchText };
41
+ }
42
+
43
+ export { ArticleNotFoundException, fetchArticle, fetchJson, fetchText, web_bundle as web };
@@ -0,0 +1,53 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __export = (target, all) => {
3
+ for (var name in all)
4
+ __defProp(target, name, { get: all[name], enumerable: true });
5
+ };
6
+
7
+ // src/web.bundle.ts
8
+ var web_bundle_exports = {};
9
+ __export(web_bundle_exports, {
10
+ fetchArticle: () => fetchArticle,
11
+ fetchJson: () => fetchJson,
12
+ fetchText: () => fetchText
13
+ });
14
+
15
+ // src/web.lib.ts
16
+ import { Readability } from "@mozilla/readability";
17
+ import { parseHTML } from "linkedom";
18
+ var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
19
+ var fetchText = (url) => fetch(url).then((response) => response.text());
20
+ var fetchArticle = async (url) => {
21
+ const content = await fetchText(url);
22
+ const doc = parseHTML(content, { url });
23
+ const reader = new Readability(doc.window.document);
24
+ const article = reader.parse();
25
+ if (!article?.content) throw new ArticleNotFoundException(url);
26
+ return {
27
+ url,
28
+ content: article.content,
29
+ title: article.title,
30
+ byline: article.byline,
31
+ siteName: article.siteName,
32
+ lang: article.lang,
33
+ publishedTime: article.publishedTime
34
+ };
35
+ };
36
+ var ArticleNotFoundException = class extends Error {
37
+ constructor(url) {
38
+ super(
39
+ `
40
+ No article found at the URL
41
+ URL: ${url}
42
+ `.trim()
43
+ );
44
+ }
45
+ };
46
+ export {
47
+ ArticleNotFoundException,
48
+ fetchArticle,
49
+ fetchJson,
50
+ fetchText,
51
+ web_bundle_exports as web
52
+ };
53
+ //# sourceMappingURL=web.index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export { fetchArticle, fetchJson, fetchText } from \"./web.lib\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,MAAM,UAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
package/package.json ADDED
@@ -0,0 +1,67 @@
1
+ {
2
+ "name": "@synstack/web",
3
+ "type": "module",
4
+ "publishConfig": {
5
+ "access": "public"
6
+ },
7
+ "packageManager": "yarn@4.4.0",
8
+ "version": "1.0.0",
9
+ "description": "Web scraping utilities",
10
+ "keywords": [
11
+ "web",
12
+ "typescript",
13
+ "ts",
14
+ "scraping",
15
+ "article"
16
+ ],
17
+ "author": {
18
+ "name": "pAIrprog",
19
+ "url": "https://pairprog.io"
20
+ },
21
+ "homepage": "https://github.com/pAIrprogio/synscript/tree/main/packages/web",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/pAIrprogio/syn-stack.git",
25
+ "directory": "packages/web"
26
+ },
27
+ "license": "Apache-2.0",
28
+ "scripts": {
29
+ "publish": "yarn npm publish --access public",
30
+ "prepublish": "yarn test && yarn build",
31
+ "build": "tsup",
32
+ "build:watch": "tsup --watch",
33
+ "test:types": "tsc --noEmit",
34
+ "test:unit": "node --import tsx --test src/**/*.test.ts",
35
+ "test:unit:watch": "node --import tsx --watch --test src/**/*.test.ts",
36
+ "test": "yarn test:types && yarn test:unit"
37
+ },
38
+ "exports": {
39
+ ".": {
40
+ "import": {
41
+ "types": "./dist/web.index.d.ts",
42
+ "default": "./dist/web.index.js"
43
+ },
44
+ "require": {
45
+ "types": "./dist/web.index.d.cts",
46
+ "default": "./dist/web.index.cjs"
47
+ }
48
+ }
49
+ },
50
+ "dependencies": {
51
+ "@mozilla/readability": "^0.5.0",
52
+ "linkedom": "^0.18.5",
53
+ "zod": "^3.23.8"
54
+ },
55
+ "devDependencies": {
56
+ "@types/node": "^22.7.0",
57
+ "tsup": "^8.3.0",
58
+ "tsx": "^4.19.1",
59
+ "typescript": "^5.6.2"
60
+ },
61
+ "files": [
62
+ "src/**/*.ts",
63
+ "!src/**/*.test.ts",
64
+ "dist/**/*"
65
+ ],
66
+ "gitHead": "c668ecfd7fe387e978a68324e760d1ed13182d11"
67
+ }
@@ -0,0 +1 @@
1
+ export { fetchArticle, fetchJson, fetchText } from "./web.lib";
@@ -0,0 +1,3 @@
1
+ export * from "./web.bundle";
2
+ export * as web from "./web.bundle";
3
+ export { ArticleNotFoundException } from "./web.lib";
package/src/web.lib.ts ADDED
@@ -0,0 +1,60 @@
1
+ import { Readability } from "@mozilla/readability";
2
+ import { parseHTML } from "linkedom";
3
+ import { type ZodSchema } from "zod";
4
+
5
+ /**
6
+ * Retrieves an URL as JSON
7
+ * @param url
8
+ * @param options.schema an optional Zod schema to validate the data against
9
+ * @returns the JSON as a JS object
10
+ */
11
+ export const fetchJson = <T>(
12
+ url: string,
13
+ options: { schema?: ZodSchema<T> } = {},
14
+ ): Promise<T> =>
15
+ fetch(url)
16
+ .then((response) => response.json())
17
+ .then((data) => (options.schema ? options.schema.parse(data) : data));
18
+
19
+ /**
20
+ * Retrieves an URL as a string
21
+ * @param url
22
+ * @returns the plain text content of the URL
23
+ */
24
+ export const fetchText = (url: string): Promise<string> =>
25
+ fetch(url).then((response) => response.text());
26
+
27
+ /**
28
+ * Extract an article from a URL
29
+ * @param url
30
+ * @returns the article content as a JS object
31
+ */
32
+ export const fetchArticle = async (url: string) => {
33
+ const content = await fetchText(url);
34
+ const doc = parseHTML(content, { url });
35
+ const reader = new Readability(doc.window.document);
36
+ const article = reader.parse();
37
+
38
+ if (!article?.content) throw new ArticleNotFoundException(url);
39
+
40
+ return {
41
+ url,
42
+ content: article.content,
43
+ title: article.title,
44
+ byline: article.byline,
45
+ siteName: article.siteName,
46
+ lang: article.lang,
47
+ publishedTime: article.publishedTime,
48
+ };
49
+ };
50
+
51
+ export class ArticleNotFoundException extends Error {
52
+ constructor(url: string) {
53
+ super(
54
+ `
55
+ No article found at the URL
56
+ URL: ${url}
57
+ `.trim(),
58
+ );
59
+ }
60
+ }