@synstack/web 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
package/README.md ADDED
@@ -0,0 +1 @@
1
+ # @synstack/yaml
@@ -0,0 +1,78 @@
1
+ "use strict";
2
+ var __defProp = Object.defineProperty;
3
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
4
+ var __getOwnPropNames = Object.getOwnPropertyNames;
5
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
6
+ var __export = (target, all) => {
7
+ for (var name in all)
8
+ __defProp(target, name, { get: all[name], enumerable: true });
9
+ };
10
+ var __copyProps = (to, from, except, desc) => {
11
+ if (from && typeof from === "object" || typeof from === "function") {
12
+ for (let key of __getOwnPropNames(from))
13
+ if (!__hasOwnProp.call(to, key) && key !== except)
14
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
15
+ }
16
+ return to;
17
+ };
18
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
19
+
20
+ // src/web.index.ts
21
+ var web_index_exports = {};
22
+ __export(web_index_exports, {
23
+ ArticleNotFoundException: () => ArticleNotFoundException,
24
+ fetchArticle: () => fetchArticle,
25
+ fetchJson: () => fetchJson,
26
+ fetchText: () => fetchText,
27
+ web: () => web_bundle_exports
28
+ });
29
+ module.exports = __toCommonJS(web_index_exports);
30
+
31
+ // src/web.bundle.ts
32
+ var web_bundle_exports = {};
33
+ __export(web_bundle_exports, {
34
+ fetchArticle: () => fetchArticle,
35
+ fetchJson: () => fetchJson,
36
+ fetchText: () => fetchText
37
+ });
38
+
39
+ // src/web.lib.ts
40
+ var import_readability = require("@mozilla/readability");
41
+ var import_linkedom = require("linkedom");
42
+ var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
43
+ var fetchText = (url) => fetch(url).then((response) => response.text());
44
+ var fetchArticle = async (url) => {
45
+ const content = await fetchText(url);
46
+ const doc = (0, import_linkedom.parseHTML)(content, { url });
47
+ const reader = new import_readability.Readability(doc.window.document);
48
+ const article = reader.parse();
49
+ if (!article?.content) throw new ArticleNotFoundException(url);
50
+ return {
51
+ url,
52
+ content: article.content,
53
+ title: article.title,
54
+ byline: article.byline,
55
+ siteName: article.siteName,
56
+ lang: article.lang,
57
+ publishedTime: article.publishedTime
58
+ };
59
+ };
60
+ var ArticleNotFoundException = class extends Error {
61
+ constructor(url) {
62
+ super(
63
+ `
64
+ No article found at the URL
65
+ URL: ${url}
66
+ `.trim()
67
+ );
68
+ }
69
+ };
70
+ // Annotate the CommonJS export names for ESM import in node:
71
+ 0 && (module.exports = {
72
+ ArticleNotFoundException,
73
+ fetchArticle,
74
+ fetchJson,
75
+ fetchText,
76
+ web
77
+ });
78
+ //# sourceMappingURL=web.index.cjs.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/web.index.ts","../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export * from \"./web.bundle\";\nexport * as web from \"./web.bundle\";\nexport { ArticleNotFoundException } from \"./web.lib\";\n","export { fetchArticle, fetchJson, fetchText } from \"./web.lib\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,yBAA4B;AAC5B,sBAA0B;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,UAAM,2BAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,+BAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
@@ -0,0 +1,43 @@
1
+ import { ZodSchema } from 'zod';
2
+
3
+ /**
4
+ * Retrieves an URL as JSON
5
+ * @param url
6
+ * @param options.schema an optional Zod schema to validate the data against
7
+ * @returns the JSON as a JS object
8
+ */
9
+ declare const fetchJson: <T>(url: string, options?: {
10
+ schema?: ZodSchema<T>;
11
+ }) => Promise<T>;
12
+ /**
13
+ * Retrieves an URL as a string
14
+ * @param url
15
+ * @returns the plain text content of the URL
16
+ */
17
+ declare const fetchText: (url: string) => Promise<string>;
18
+ /**
19
+ * Extract an article from a URL
20
+ * @param url
21
+ * @returns the article content as a JS object
22
+ */
23
+ declare const fetchArticle: (url: string) => Promise<{
24
+ url: string;
25
+ content: string;
26
+ title: string;
27
+ byline: string;
28
+ siteName: string;
29
+ lang: string;
30
+ publishedTime: string;
31
+ }>;
32
+ declare class ArticleNotFoundException extends Error {
33
+ constructor(url: string);
34
+ }
35
+
36
+ declare const web_bundle_fetchArticle: typeof fetchArticle;
37
+ declare const web_bundle_fetchJson: typeof fetchJson;
38
+ declare const web_bundle_fetchText: typeof fetchText;
39
+ declare namespace web_bundle {
40
+ export { web_bundle_fetchArticle as fetchArticle, web_bundle_fetchJson as fetchJson, web_bundle_fetchText as fetchText };
41
+ }
42
+
43
+ export { ArticleNotFoundException, fetchArticle, fetchJson, fetchText, web_bundle as web };
@@ -0,0 +1,43 @@
1
+ import { ZodSchema } from 'zod';
2
+
3
+ /**
4
+ * Retrieves an URL as JSON
5
+ * @param url
6
+ * @param options.schema an optional Zod schema to validate the data against
7
+ * @returns the JSON as a JS object
8
+ */
9
+ declare const fetchJson: <T>(url: string, options?: {
10
+ schema?: ZodSchema<T>;
11
+ }) => Promise<T>;
12
+ /**
13
+ * Retrieves an URL as a string
14
+ * @param url
15
+ * @returns the plain text content of the URL
16
+ */
17
+ declare const fetchText: (url: string) => Promise<string>;
18
+ /**
19
+ * Extract an article from a URL
20
+ * @param url
21
+ * @returns the article content as a JS object
22
+ */
23
+ declare const fetchArticle: (url: string) => Promise<{
24
+ url: string;
25
+ content: string;
26
+ title: string;
27
+ byline: string;
28
+ siteName: string;
29
+ lang: string;
30
+ publishedTime: string;
31
+ }>;
32
+ declare class ArticleNotFoundException extends Error {
33
+ constructor(url: string);
34
+ }
35
+
36
+ declare const web_bundle_fetchArticle: typeof fetchArticle;
37
+ declare const web_bundle_fetchJson: typeof fetchJson;
38
+ declare const web_bundle_fetchText: typeof fetchText;
39
+ declare namespace web_bundle {
40
+ export { web_bundle_fetchArticle as fetchArticle, web_bundle_fetchJson as fetchJson, web_bundle_fetchText as fetchText };
41
+ }
42
+
43
+ export { ArticleNotFoundException, fetchArticle, fetchJson, fetchText, web_bundle as web };
@@ -0,0 +1,53 @@
1
+ var __defProp = Object.defineProperty;
2
+ var __export = (target, all) => {
3
+ for (var name in all)
4
+ __defProp(target, name, { get: all[name], enumerable: true });
5
+ };
6
+
7
+ // src/web.bundle.ts
8
+ var web_bundle_exports = {};
9
+ __export(web_bundle_exports, {
10
+ fetchArticle: () => fetchArticle,
11
+ fetchJson: () => fetchJson,
12
+ fetchText: () => fetchText
13
+ });
14
+
15
+ // src/web.lib.ts
16
+ import { Readability } from "@mozilla/readability";
17
+ import { parseHTML } from "linkedom";
18
+ var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
19
+ var fetchText = (url) => fetch(url).then((response) => response.text());
20
+ var fetchArticle = async (url) => {
21
+ const content = await fetchText(url);
22
+ const doc = parseHTML(content, { url });
23
+ const reader = new Readability(doc.window.document);
24
+ const article = reader.parse();
25
+ if (!article?.content) throw new ArticleNotFoundException(url);
26
+ return {
27
+ url,
28
+ content: article.content,
29
+ title: article.title,
30
+ byline: article.byline,
31
+ siteName: article.siteName,
32
+ lang: article.lang,
33
+ publishedTime: article.publishedTime
34
+ };
35
+ };
36
+ var ArticleNotFoundException = class extends Error {
37
+ constructor(url) {
38
+ super(
39
+ `
40
+ No article found at the URL
41
+ URL: ${url}
42
+ `.trim()
43
+ );
44
+ }
45
+ };
46
+ export {
47
+ ArticleNotFoundException,
48
+ fetchArticle,
49
+ fetchJson,
50
+ fetchText,
51
+ web_bundle_exports as web
52
+ };
53
+ //# sourceMappingURL=web.index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"sources":["../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export { fetchArticle, fetchJson, fetchText } from \"./web.lib\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,MAAM,UAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}
package/package.json ADDED
@@ -0,0 +1,67 @@
1
+ {
2
+ "name": "@synstack/web",
3
+ "type": "module",
4
+ "publishConfig": {
5
+ "access": "public"
6
+ },
7
+ "packageManager": "yarn@4.4.0",
8
+ "version": "1.0.0",
9
+ "description": "Web scraping utilities",
10
+ "keywords": [
11
+ "web",
12
+ "typescript",
13
+ "ts",
14
+ "scraping",
15
+ "article"
16
+ ],
17
+ "author": {
18
+ "name": "pAIrprog",
19
+ "url": "https://pairprog.io"
20
+ },
21
+ "homepage": "https://github.com/pAIrprogio/synscript/tree/main/packages/web",
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/pAIrprogio/syn-stack.git",
25
+ "directory": "packages/web"
26
+ },
27
+ "license": "Apache-2.0",
28
+ "scripts": {
29
+ "publish": "yarn npm publish --access public",
30
+ "prepublish": "yarn test && yarn build",
31
+ "build": "tsup",
32
+ "build:watch": "tsup --watch",
33
+ "test:types": "tsc --noEmit",
34
+ "test:unit": "node --import tsx --test src/**/*.test.ts",
35
+ "test:unit:watch": "node --import tsx --watch --test src/**/*.test.ts",
36
+ "test": "yarn test:types && yarn test:unit"
37
+ },
38
+ "exports": {
39
+ ".": {
40
+ "import": {
41
+ "types": "./dist/web.index.d.ts",
42
+ "default": "./dist/web.index.js"
43
+ },
44
+ "require": {
45
+ "types": "./dist/web.index.d.cts",
46
+ "default": "./dist/web.index.cjs"
47
+ }
48
+ }
49
+ },
50
+ "dependencies": {
51
+ "@mozilla/readability": "^0.5.0",
52
+ "linkedom": "^0.18.5",
53
+ "zod": "^3.23.8"
54
+ },
55
+ "devDependencies": {
56
+ "@types/node": "^22.7.0",
57
+ "tsup": "^8.3.0",
58
+ "tsx": "^4.19.1",
59
+ "typescript": "^5.6.2"
60
+ },
61
+ "files": [
62
+ "src/**/*.ts",
63
+ "!src/**/*.test.ts",
64
+ "dist/**/*"
65
+ ],
66
+ "gitHead": "c668ecfd7fe387e978a68324e760d1ed13182d11"
67
+ }
@@ -0,0 +1 @@
1
+ export { fetchArticle, fetchJson, fetchText } from "./web.lib";
@@ -0,0 +1,3 @@
1
+ export * from "./web.bundle";
2
+ export * as web from "./web.bundle";
3
+ export { ArticleNotFoundException } from "./web.lib";
package/src/web.lib.ts ADDED
@@ -0,0 +1,60 @@
1
+ import { Readability } from "@mozilla/readability";
2
+ import { parseHTML } from "linkedom";
3
+ import { type ZodSchema } from "zod";
4
+
5
+ /**
6
+ * Retrieves an URL as JSON
7
+ * @param url
8
+ * @param options.schema an optional Zod schema to validate the data against
9
+ * @returns the JSON as a JS object
10
+ */
11
+ export const fetchJson = <T>(
12
+ url: string,
13
+ options: { schema?: ZodSchema<T> } = {},
14
+ ): Promise<T> =>
15
+ fetch(url)
16
+ .then((response) => response.json())
17
+ .then((data) => (options.schema ? options.schema.parse(data) : data));
18
+
19
+ /**
20
+ * Retrieves an URL as a string
21
+ * @param url
22
+ * @returns the plain text content of the URL
23
+ */
24
+ export const fetchText = (url: string): Promise<string> =>
25
+ fetch(url).then((response) => response.text());
26
+
27
+ /**
28
+ * Extract an article from a URL
29
+ * @param url
30
+ * @returns the article content as a JS object
31
+ */
32
+ export const fetchArticle = async (url: string) => {
33
+ const content = await fetchText(url);
34
+ const doc = parseHTML(content, { url });
35
+ const reader = new Readability(doc.window.document);
36
+ const article = reader.parse();
37
+
38
+ if (!article?.content) throw new ArticleNotFoundException(url);
39
+
40
+ return {
41
+ url,
42
+ content: article.content,
43
+ title: article.title,
44
+ byline: article.byline,
45
+ siteName: article.siteName,
46
+ lang: article.lang,
47
+ publishedTime: article.publishedTime,
48
+ };
49
+ };
50
+
51
+ export class ArticleNotFoundException extends Error {
52
+ constructor(url: string) {
53
+ super(
54
+ `
55
+ No article found at the URL
56
+ URL: ${url}
57
+ `.trim(),
58
+ );
59
+ }
60
+ }