npm - @synstack/web - Versions diffs - 1.0.0 - Mend

@synstack/web 1.0.0

Files changed (11) hide show

package/README.md ADDED Viewed

	@@ -0,0 +1 @@
1	+ # @synstack/yaml

package/dist/web.index.cjs ADDED Viewed

@@ -0,0 +1,78 @@
+"use strict";
+var __defProp = Object.defineProperty;
+var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
+var __getOwnPropNames = Object.getOwnPropertyNames;
+var __hasOwnProp = Object.prototype.hasOwnProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+var __copyProps = (to, from, except, desc) => {
+  if (from && typeof from === "object" || typeof from === "function") {
+    for (let key of __getOwnPropNames(from))
+      if (!__hasOwnProp.call(to, key) && key !== except)
+        __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
+  }
+  return to;
+};
+var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
+// src/web.index.ts
+var web_index_exports = {};
+__export(web_index_exports, {
+  ArticleNotFoundException: () => ArticleNotFoundException,
+  fetchArticle: () => fetchArticle,
+  fetchJson: () => fetchJson,
+  fetchText: () => fetchText,
+  web: () => web_bundle_exports
+});
+module.exports = __toCommonJS(web_index_exports);
+// src/web.bundle.ts
+var web_bundle_exports = {};
+__export(web_bundle_exports, {
+  fetchArticle: () => fetchArticle,
+  fetchJson: () => fetchJson,
+  fetchText: () => fetchText
+});
+// src/web.lib.ts
+var import_readability = require("@mozilla/readability");
+var import_linkedom = require("linkedom");
+var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
+var fetchText = (url) => fetch(url).then((response) => response.text());
+var fetchArticle = async (url) => {
+  const content = await fetchText(url);
+  const doc = (0, import_linkedom.parseHTML)(content, { url });
+  const reader = new import_readability.Readability(doc.window.document);
+  const article = reader.parse();
+  if (!article?.content) throw new ArticleNotFoundException(url);
+  return {
+    url,
+    content: article.content,
+    title: article.title,
+    byline: article.byline,
+    siteName: article.siteName,
+    lang: article.lang,
+    publishedTime: article.publishedTime
+  };
+};
+var ArticleNotFoundException = class extends Error {
+  constructor(url) {
+    super(
+      `
+No article found at the URL
+URL: ${url}
+`.trim()
+    );
+  }
+};
+// Annotate the CommonJS export names for ESM import in node:
+0 && (module.exports = {
+  ArticleNotFoundException,
+  fetchArticle,
+  fetchJson,
+  fetchText,
+  web
+});
+//# sourceMappingURL=web.index.cjs.map

package/dist/web.index.cjs.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/web.index.ts","../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export * from \"./web.bundle\";\nexport * as web from \"./web.bundle\";\nexport { ArticleNotFoundException } from \"./web.lib\";\n","export { fetchArticle, fetchJson, fetchText } from \"./web.lib\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,yBAA4B;AAC5B,sBAA0B;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,UAAM,2BAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,+BAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}

package/dist/web.index.d.cts ADDED Viewed

@@ -0,0 +1,43 @@
+import { ZodSchema } from 'zod';
+/**
+ * Retrieves an URL as JSON
+ * @param url
+ * @param options.schema an optional Zod schema to validate the data against
+ * @returns the JSON as a JS object
+ */
+declare const fetchJson: <T>(url: string, options?: {
+    schema?: ZodSchema<T>;
+}) => Promise<T>;
+/**
+ * Retrieves an URL as a string
+ * @param url
+ * @returns the plain text content of the URL
+ */
+declare const fetchText: (url: string) => Promise<string>;
+/**
+ * Extract an article from a URL
+ * @param url
+ * @returns the article content as a JS object
+ */
+declare const fetchArticle: (url: string) => Promise<{
+    url: string;
+    content: string;
+    title: string;
+    byline: string;
+    siteName: string;
+    lang: string;
+    publishedTime: string;
+}>;
+declare class ArticleNotFoundException extends Error {
+    constructor(url: string);
+}
+declare const web_bundle_fetchArticle: typeof fetchArticle;
+declare const web_bundle_fetchJson: typeof fetchJson;
+declare const web_bundle_fetchText: typeof fetchText;
+declare namespace web_bundle {
+  export { web_bundle_fetchArticle as fetchArticle, web_bundle_fetchJson as fetchJson, web_bundle_fetchText as fetchText };
+}
+export { ArticleNotFoundException, fetchArticle, fetchJson, fetchText, web_bundle as web };

package/dist/web.index.d.ts ADDED Viewed

@@ -0,0 +1,43 @@
+import { ZodSchema } from 'zod';
+/**
+ * Retrieves an URL as JSON
+ * @param url
+ * @param options.schema an optional Zod schema to validate the data against
+ * @returns the JSON as a JS object
+ */
+declare const fetchJson: <T>(url: string, options?: {
+    schema?: ZodSchema<T>;
+}) => Promise<T>;
+/**
+ * Retrieves an URL as a string
+ * @param url
+ * @returns the plain text content of the URL
+ */
+declare const fetchText: (url: string) => Promise<string>;
+/**
+ * Extract an article from a URL
+ * @param url
+ * @returns the article content as a JS object
+ */
+declare const fetchArticle: (url: string) => Promise<{
+    url: string;
+    content: string;
+    title: string;
+    byline: string;
+    siteName: string;
+    lang: string;
+    publishedTime: string;
+}>;
+declare class ArticleNotFoundException extends Error {
+    constructor(url: string);
+}
+declare const web_bundle_fetchArticle: typeof fetchArticle;
+declare const web_bundle_fetchJson: typeof fetchJson;
+declare const web_bundle_fetchText: typeof fetchText;
+declare namespace web_bundle {
+  export { web_bundle_fetchArticle as fetchArticle, web_bundle_fetchJson as fetchJson, web_bundle_fetchText as fetchText };
+}
+export { ArticleNotFoundException, fetchArticle, fetchJson, fetchText, web_bundle as web };

package/dist/web.index.js ADDED Viewed

@@ -0,0 +1,53 @@
+var __defProp = Object.defineProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+// src/web.bundle.ts
+var web_bundle_exports = {};
+__export(web_bundle_exports, {
+  fetchArticle: () => fetchArticle,
+  fetchJson: () => fetchJson,
+  fetchText: () => fetchText
+});
+// src/web.lib.ts
+import { Readability } from "@mozilla/readability";
+import { parseHTML } from "linkedom";
+var fetchJson = (url, options = {}) => fetch(url).then((response) => response.json()).then((data) => options.schema ? options.schema.parse(data) : data);
+var fetchText = (url) => fetch(url).then((response) => response.text());
+var fetchArticle = async (url) => {
+  const content = await fetchText(url);
+  const doc = parseHTML(content, { url });
+  const reader = new Readability(doc.window.document);
+  const article = reader.parse();
+  if (!article?.content) throw new ArticleNotFoundException(url);
+  return {
+    url,
+    content: article.content,
+    title: article.title,
+    byline: article.byline,
+    siteName: article.siteName,
+    lang: article.lang,
+    publishedTime: article.publishedTime
+  };
+};
+var ArticleNotFoundException = class extends Error {
+  constructor(url) {
+    super(
+      `
+No article found at the URL
+URL: ${url}
+`.trim()
+    );
+  }
+};
+export {
+  ArticleNotFoundException,
+  fetchArticle,
+  fetchJson,
+  fetchText,
+  web_bundle_exports as web
+};
+//# sourceMappingURL=web.index.js.map

package/dist/web.index.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"sources":["../src/web.bundle.ts","../src/web.lib.ts"],"sourcesContent":["export { fetchArticle, fetchJson, fetchText } from \"./web.lib\";\n","import { Readability } from \"@mozilla/readability\";\nimport { parseHTML } from \"linkedom\";\nimport { type ZodSchema } from \"zod\";\n\n/**\n * Retrieves an URL as JSON\n * @param url\n * @param options.schema an optional Zod schema to validate the data against\n * @returns the JSON as a JS object\n */\nexport const fetchJson = <T>(\n url: string,\n options: { schema?: ZodSchema<T> } = {},\n): Promise<T> =>\n fetch(url)\n .then((response) => response.json())\n .then((data) => (options.schema ? options.schema.parse(data) : data));\n\n/**\n * Retrieves an URL as a string\n * @param url\n * @returns the plain text content of the URL\n */\nexport const fetchText = (url: string): Promise<string> =>\n fetch(url).then((response) => response.text());\n\n/**\n * Extract an article from a URL\n * @param url\n * @returns the article content as a JS object\n */\nexport const fetchArticle = async (url: string) => {\n const content = await fetchText(url);\n const doc = parseHTML(content, { url });\n const reader = new Readability(doc.window.document);\n const article = reader.parse();\n\n if (!article?.content) throw new ArticleNotFoundException(url);\n\n return {\n url,\n content: article.content,\n title: article.title,\n byline: article.byline,\n siteName: article.siteName,\n lang: article.lang,\n publishedTime: article.publishedTime,\n };\n};\n\nexport class ArticleNotFoundException extends Error {\n constructor(url: string) {\n super(\n `\nNo article found at the URL\nURL: ${url}\n`.trim(),\n );\n }\n}\n"],"mappings":";;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACAA,SAAS,mBAAmB;AAC5B,SAAS,iBAAiB;AASnB,IAAM,YAAY,CACvB,KACA,UAAqC,CAAC,MAEtC,MAAM,GAAG,EACN,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC,EAClC,KAAK,CAAC,SAAU,QAAQ,SAAS,QAAQ,OAAO,MAAM,IAAI,IAAI,IAAK;AAOjE,IAAM,YAAY,CAAC,QACxB,MAAM,GAAG,EAAE,KAAK,CAAC,aAAa,SAAS,KAAK,CAAC;AAOxC,IAAM,eAAe,OAAO,QAAgB;AACjD,QAAM,UAAU,MAAM,UAAU,GAAG;AACnC,QAAM,MAAM,UAAU,SAAS,EAAE,IAAI,CAAC;AACtC,QAAM,SAAS,IAAI,YAAY,IAAI,OAAO,QAAQ;AAClD,QAAM,UAAU,OAAO,MAAM;AAE7B,MAAI,CAAC,SAAS,QAAS,OAAM,IAAI,yBAAyB,GAAG;AAE7D,SAAO;AAAA,IACL;AAAA,IACA,SAAS,QAAQ;AAAA,IACjB,OAAO,QAAQ;AAAA,IACf,QAAQ,QAAQ;AAAA,IAChB,UAAU,QAAQ;AAAA,IAClB,MAAM,QAAQ;AAAA,IACd,eAAe,QAAQ;AAAA,EACzB;AACF;AAEO,IAAM,2BAAN,cAAuC,MAAM;AAAA,EAClD,YAAY,KAAa;AACvB;AAAA,MACE;AAAA;AAAA,OAEC,GAAG;AAAA,EACR,KAAK;AAAA,IACH;AAAA,EACF;AACF;","names":[]}

package/package.json ADDED Viewed

@@ -0,0 +1,67 @@
+{
+  "name": "@synstack/web",
+  "type": "module",
+  "publishConfig": {
+    "access": "public"
+  },
+  "packageManager": "yarn@4.4.0",
+  "version": "1.0.0",
+  "description": "Web scraping utilities",
+  "keywords": [
+    "web",
+    "typescript",
+    "ts",
+    "scraping",
+    "article"
+  ],
+  "author": {
+    "name": "pAIrprog",
+    "url": "https://pairprog.io"
+  },
+  "homepage": "https://github.com/pAIrprogio/synscript/tree/main/packages/web",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/pAIrprogio/syn-stack.git",
+    "directory": "packages/web"
+  },
+  "license": "Apache-2.0",
+  "scripts": {
+    "publish": "yarn npm publish --access public",
+    "prepublish": "yarn test && yarn build",
+    "build": "tsup",
+    "build:watch": "tsup --watch",
+    "test:types": "tsc --noEmit",
+    "test:unit": "node --import tsx --test src/**/*.test.ts",
+    "test:unit:watch": "node --import tsx --watch --test src/**/*.test.ts",
+    "test": "yarn test:types && yarn test:unit"
+  },
+  "exports": {
+    ".": {
+      "import": {
+        "types": "./dist/web.index.d.ts",
+        "default": "./dist/web.index.js"
+      },
+      "require": {
+        "types": "./dist/web.index.d.cts",
+        "default": "./dist/web.index.cjs"
+      }
+    }
+  },
+  "dependencies": {
+    "@mozilla/readability": "^0.5.0",
+    "linkedom": "^0.18.5",
+    "zod": "^3.23.8"
+  },
+  "devDependencies": {
+    "@types/node": "^22.7.0",
+    "tsup": "^8.3.0",
+    "tsx": "^4.19.1",
+    "typescript": "^5.6.2"
+  },
+  "files": [
+    "src/**/*.ts",
+    "!src/**/*.test.ts",
+    "dist/**/*"
+  ],
+  "gitHead": "c668ecfd7fe387e978a68324e760d1ed13182d11"
+}

package/src/web.bundle.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export { fetchArticle, fetchJson, fetchText } from "./web.lib";

package/src/web.index.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export * from "./web.bundle";
+export * as web from "./web.bundle";
+export { ArticleNotFoundException } from "./web.lib";

package/src/web.lib.ts ADDED Viewed

@@ -0,0 +1,60 @@
+import { Readability } from "@mozilla/readability";
+import { parseHTML } from "linkedom";
+import { type ZodSchema } from "zod";
+/**
+ * Retrieves an URL as JSON
+ * @param url
+ * @param options.schema an optional Zod schema to validate the data against
+ * @returns the JSON as a JS object
+ */
+export const fetchJson = <T>(
+  url: string,
+  options: { schema?: ZodSchema<T> } = {},
+): Promise<T> =>
+  fetch(url)
+    .then((response) => response.json())
+    .then((data) => (options.schema ? options.schema.parse(data) : data));
+/**
+ * Retrieves an URL as a string
+ * @param url
+ * @returns the plain text content of the URL
+ */
+export const fetchText = (url: string): Promise<string> =>
+  fetch(url).then((response) => response.text());
+/**
+ * Extract an article from a URL
+ * @param url
+ * @returns the article content as a JS object
+ */
+export const fetchArticle = async (url: string) => {
+  const content = await fetchText(url);
+  const doc = parseHTML(content, { url });
+  const reader = new Readability(doc.window.document);
+  const article = reader.parse();
+  if (!article?.content) throw new ArticleNotFoundException(url);
+  return {
+    url,
+    content: article.content,
+    title: article.title,
+    byline: article.byline,
+    siteName: article.siteName,
+    lang: article.lang,
+    publishedTime: article.publishedTime,
+  };
+};
+export class ArticleNotFoundException extends Error {
+  constructor(url: string) {
+    super(
+      `
+No article found at the URL
+URL: ${url}
+`.trim(),
+    );
+  }
+}