mdream 0.6.1 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import { BLOCKQUOTE_SPACING, DEFAULT_BLOCK_SPACING, ELEMENT_NODE, HTML_ENTITIES, LIST_ITEM_SPACING, MARKDOWN_CODE_BLOCK, MARKDOWN_EMPHASIS, MARKDOWN_HORIZONTAL_RULE, MARKDOWN_INLINE_CODE, MARKDOWN_STRIKETHROUGH, MARKDOWN_STRONG, MAX_TAG_ID, NO_SPACING, NodeEventEnter, NodeEventExit, TABLE_ROW_SPACING, TAG_A, TAG_ABBR, TAG_ADDRESS, TAG_AREA, TAG_ASIDE, TAG_AUDIO, TAG_B, TAG_BASE, TAG_BDO, TAG_BLOCKQUOTE, TAG_BODY, TAG_BR, TAG_BUTTON, TAG_CANVAS, TAG_CENTER, TAG_CITE, TAG_CODE, TAG_COL, TAG_DD, TAG_DEL, TAG_DETAILS, TAG_DFN, TAG_DIALOG, TAG_DIV, TAG_DL, TAG_DT, TAG_EM, TAG_EMBED, TAG_FIELDSET, TAG_FOOTER, TAG_FORM, TAG_H1, TAG_H2, TAG_H3, TAG_H4, TAG_H5, TAG_H6, TAG_HEAD, TAG_HR, TAG_I, TAG_IFRAME, TAG_IMG, TAG_INPUT, TAG_INS, TAG_KBD, TAG_KEYGEN, TAG_LABEL, TAG_LEGEND, TAG_LI, TAG_LINK, TAG_MAP, TAG_MARK, TAG_META, TAG_METER, TAG_NAV, TAG_NOFRAMES, TAG_NOSCRIPT, TAG_OL, TAG_OPTION, TAG_P, TAG_PARAM, TAG_PLAINTEXT, TAG_PRE, TAG_PROGRESS, TAG_Q, TAG_RP, TAG_RT, TAG_RUBY, TAG_SAMP, TAG_SCRIPT, TAG_SELECT, TAG_SMALL, TAG_SOURCE, TAG_SPAN, TAG_STRONG, TAG_STYLE, TAG_SUB, TAG_SUMMARY, TAG_SUP, TAG_SVG, TAG_TABLE, TAG_TBODY, TAG_TD, TAG_TEMPLATE, TAG_TEXTAREA, TAG_TFOOT, TAG_TH, TAG_THEAD, TAG_TIME, TAG_TITLE, TAG_TR, TAG_TRACK, TAG_U, TAG_UL, TAG_VAR, TAG_VIDEO, TAG_WBR, TAG_XMP, TEXT_NODE, TagIdMap, assembleBufferedContent, collectNodeContent, extractionPlugin } from "./extraction-D28Kr1J3.mjs";
2
2
  import { readFile } from "node:fs/promises";
3
- import { basename, dirname, relative } from "node:path";
3
+ import { basename, dirname, relative, sep } from "pathe";
4
4
  import { glob } from "tinyglobby";
5
5
 
6
6
  //#region src/tags.ts
@@ -1431,6 +1431,7 @@ function extractMetadata(html, url) {
1431
1431
  */
1432
1432
  function pathToUrl(filePath, baseDir) {
1433
1433
  let url = relative(baseDir, filePath);
1434
+ url = url.split(sep).join("/");
1434
1435
  if (url.endsWith(".html")) url = url.slice(0, -5);
1435
1436
  if (url.endsWith("/index")) url = url.slice(0, -6);
1436
1437
  if (url === "index") return "/";
@@ -1514,7 +1515,8 @@ function generateLlmsFullTxtContent(files, options) {
1514
1515
  function generateMarkdownFilesContent(files) {
1515
1516
  const markdownFiles = [];
1516
1517
  for (const file of files) {
1517
- const mdPath = file.url === "/" ? "md/index.md" : `md${file.url}.md`;
1518
+ const urlPath = file.url === "/" ? "index" : file.url.replace(/^\//, "").replace(/\/$/, "");
1519
+ const mdPath = `md/${urlPath}.md`;
1518
1520
  markdownFiles.push({
1519
1521
  path: mdPath,
1520
1522
  content: file.content
package/dist/cli.mjs CHANGED
@@ -1,9 +1,9 @@
1
1
  import "./_chunks/extraction-D28Kr1J3.mjs";
2
- import { generateLlmsTxtArtifacts, streamHtmlToMarkdown } from "./_chunks/src-Eo8j0-9L.mjs";
2
+ import { generateLlmsTxtArtifacts, streamHtmlToMarkdown } from "./_chunks/src-Dbe3WLUq.mjs";
3
3
  import "./_chunks/plugins-DXY-fo9h.mjs";
4
4
  import { withMinimalPreset } from "./_chunks/minimal-CCnrG7a1.mjs";
5
5
  import { mkdir, writeFile } from "node:fs/promises";
6
- import { dirname, join } from "node:path";
6
+ import { dirname, join, resolve } from "pathe";
7
7
  import { readFileSync } from "node:fs";
8
8
  import { Readable } from "node:stream";
9
9
  import { fileURLToPath } from "node:url";
@@ -24,6 +24,7 @@ async function generateLlms(patterns, options) {
24
24
  "llms-full.txt",
25
25
  "markdown"
26
26
  ];
27
+ const outputDir = resolve(options.output);
27
28
  const result = await generateLlmsTxtArtifacts({
28
29
  patterns,
29
30
  siteName: options.siteName,
@@ -32,18 +33,19 @@ async function generateLlms(patterns, options) {
32
33
  generateFull: artifacts.includes("llms-full.txt"),
33
34
  generateMarkdown: artifacts.includes("markdown")
34
35
  });
35
- await mkdir(options.output, { recursive: true });
36
- const llmsPath = join(options.output, "llms.txt");
36
+ await mkdir(outputDir, { recursive: true });
37
+ const llmsPath = join(outputDir, "llms.txt");
37
38
  await writeFile(llmsPath, result.llmsTxt, "utf-8");
38
39
  if (artifacts.includes("llms-full.txt") && result.llmsFullTxt) {
39
- const fullPath = join(options.output, "llms-full.txt");
40
+ const fullPath = join(outputDir, "llms-full.txt");
40
41
  await writeFile(fullPath, result.llmsFullTxt, "utf-8");
41
42
  }
42
43
  if (artifacts.includes("markdown") && result.markdownFiles) for (const mdFile of result.markdownFiles) {
43
- const fullPath = join(options.output, mdFile.path);
44
+ const fullPath = join(outputDir, mdFile.path);
44
45
  await mkdir(dirname(fullPath), { recursive: true });
45
46
  await writeFile(fullPath, mdFile.content, "utf-8");
46
47
  }
48
+ console.log(`✅ Generated llms.txt artifacts in: ${outputDir}`);
47
49
  } catch (error) {
48
50
  console.error("❌ Error generating llms.txt:", error);
49
51
  process.exit(1);
@@ -57,7 +59,7 @@ const cli = cac();
57
59
  cli.command("[options]", "Convert HTML from stdin to Markdown on stdout").option("--origin <url>", "Origin URL for resolving relative image paths").option("--preset <preset>", "Conversion presets: minimal").action(async (_, opts) => {
58
60
  await streamingConvert(opts);
59
61
  });
60
- cli.command("llms <patterns...>", "Generate llms.txt artifacts from HTML files").option("--site-name <name>", "Name of the site for llms.txt header").option("--description <desc>", "Description of the site for llms.txt").option("--origin <url>", "Origin URL for resolving relative paths and generating absolute URLs").option("-o, --output <dir>", "Output directory for generated files", { default: "." }).option("--artifacts <list>", "Comma-separated list of artifacts to generate: llms.txt,llms-full.txt,markdown", { default: "llms.txt,llms-full.txt,markdown" }).action(async (patterns, opts) => {
62
+ cli.command("llms <patterns...>", "Generate llms.txt artifacts from HTML files").option("--site-name <name>", "Name of the site for llms.txt header").option("--description <desc>", "Description of the site for llms.txt").option("--origin <url>", "Origin URL for resolving relative paths and generating absolute URLs").option("-o, --output <dir>", "Output directory for generated files", { default: process.cwd() }).option("--artifacts <list>", "Comma-separated list of artifacts to generate: llms.txt,llms-full.txt,markdown", { default: "llms.txt,llms-full.txt,markdown" }).action(async (patterns, opts) => {
61
63
  await generateLlms(patterns, {
62
64
  patterns,
63
65
  ...opts
package/dist/index.mjs CHANGED
@@ -1,4 +1,4 @@
1
1
  import { TagIdMap, createPlugin } from "./_chunks/extraction-D28Kr1J3.mjs";
2
- import { MarkdownProcessor, generateLlmsTxtArtifacts, htmlToMarkdown, parseHtml, streamHtmlToMarkdown } from "./_chunks/src-Eo8j0-9L.mjs";
2
+ import { MarkdownProcessor, generateLlmsTxtArtifacts, htmlToMarkdown, parseHtml, streamHtmlToMarkdown } from "./_chunks/src-Dbe3WLUq.mjs";
3
3
 
4
4
  export { MarkdownProcessor, TagIdMap, createPlugin, generateLlmsTxtArtifacts, htmlToMarkdown, parseHtml, streamHtmlToMarkdown };
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "mdream",
3
3
  "type": "module",
4
- "version": "0.6.1",
4
+ "version": "0.7.1",
5
5
  "description": "Ultra-performant HTML to Markdown Convertor Optimized for LLMs and llm.txt artifacts.",
6
6
  "author": {
7
7
  "name": "Harlan Wilton",
@@ -54,7 +54,8 @@
54
54
  ],
55
55
  "dependencies": {
56
56
  "cac": "^6.7.14",
57
- "tinyglobby": "^0.2.10"
57
+ "pathe": "^2.0.3",
58
+ "tinyglobby": "^0.2.14"
58
59
  },
59
60
  "scripts": {
60
61
  "flame": "pnpm build && unbuild bench/bundle && clinic flame -- node bench/bundle/dist/string.mjs 10",