markit-ai 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,4 +2,5 @@ import type { OutputOptions } from "../utils/output.js";
2
2
  export declare function convert(source: string, options: OutputOptions & {
3
3
  output?: string;
4
4
  prompt?: string;
5
+ imageDir?: string;
5
6
  }): Promise<void>;
@@ -1,4 +1,6 @@
1
- import { writeFileSync } from "node:fs";
1
+ import { mkdtempSync, writeFileSync } from "node:fs";
2
+ import { tmpdir } from "node:os";
3
+ import { join } from "node:path";
2
4
  import { loadConfig } from "../config.js";
3
5
  import { Markit } from "../markit.js";
4
6
  import { loadAllPlugins } from "../plugins/loader.js";
@@ -23,6 +25,8 @@ export async function convert(source, options) {
23
25
  }
24
26
  const llmFunctions = createLlmFunctions(config, options.prompt);
25
27
  const markit = new Markit(llmFunctions, plugins);
28
+ // Auto-create a temp dir for images if not explicitly provided
29
+ const imageDir = options.imageDir || mkdtempSync(join(tmpdir(), "markit-images-"));
26
30
  try {
27
31
  let result;
28
32
  const isStdin = source === "-";
@@ -36,7 +40,7 @@ export async function convert(source, options) {
36
40
  process.exit(EXIT_ERROR);
37
41
  }
38
42
  const buffer = await readStdin();
39
- result = await markit.convert(buffer, {});
43
+ result = await markit.convert(buffer, { imageDir });
40
44
  }
41
45
  else if (isUrl) {
42
46
  // Progress hint for URL fetches (stderr so it doesn't pollute piped output)
@@ -46,7 +50,7 @@ export async function convert(source, options) {
46
50
  result = await markit.convertUrl(source);
47
51
  }
48
52
  else {
49
- result = await markit.convertFile(source);
53
+ result = await markit.convertFile(source, { imageDir });
50
54
  }
51
55
  const label = isStdin ? "stdin" : source;
52
56
  // Write to file or stdout
@@ -23,6 +23,11 @@ const BUILTIN_FORMATS = [
23
23
  extensions: [".mp3", ".wav", ".m4a", ".flac"],
24
24
  builtin: true,
25
25
  },
26
+ {
27
+ name: "GitHub",
28
+ extensions: ["github.com/*", "gist.github.com/*"],
29
+ builtin: true,
30
+ },
26
31
  { name: "ZIP", extensions: [".zip"], builtin: true },
27
32
  {
28
33
  name: "Plain text",
@@ -2,5 +2,5 @@ import type { ConversionResult, Converter, StreamInfo } from "../types.js";
2
2
  export declare class DocxConverter implements Converter {
3
3
  name: string;
4
4
  accepts(streamInfo: StreamInfo): boolean;
5
- convert(input: Buffer, _streamInfo: StreamInfo): Promise<ConversionResult>;
5
+ convert(input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult>;
6
6
  }
@@ -1,3 +1,5 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import { join } from "node:path";
1
3
  import mammoth from "mammoth";
2
4
  import { createTurndown, normalizeTablesHtml } from "../utils/turndown.js";
3
5
  const EXTENSIONS = [".docx"];
@@ -16,10 +18,40 @@ export class DocxConverter {
16
18
  }
17
19
  return false;
18
20
  }
19
- async convert(input, _streamInfo) {
20
- const { value: html } = await mammoth.convertToHtml({ buffer: input });
21
+ async convert(input, streamInfo) {
22
+ const imageDir = streamInfo.imageDir;
23
+ if (imageDir) {
24
+ mkdirSync(imageDir, { recursive: true });
25
+ }
26
+ let imageCount = 0;
27
+ const convertImage = imageDir
28
+ ? mammoth.images.imgElement((image) => {
29
+ imageCount++;
30
+ const ext = (image.contentType?.split("/")[1] || "png").replace("jpeg", "jpg");
31
+ const filename = `image_${imageCount}.${ext}`;
32
+ const filepath = join(imageDir, filename);
33
+ return image.read("base64").then((base64) => {
34
+ writeFileSync(filepath, Buffer.from(base64, "base64"));
35
+ return { src: filepath, alt: `image_${imageCount}` };
36
+ });
37
+ })
38
+ : mammoth.images.imgElement((image) => {
39
+ imageCount++;
40
+ const contentType = image.contentType || "image/png";
41
+ return image.read("base64").then((base64) => {
42
+ return {
43
+ src: `data:${contentType};base64,${base64.slice(0, 0)}`,
44
+ alt: `image_${imageCount}`,
45
+ };
46
+ });
47
+ });
48
+ const { value: html } = await mammoth.convertToHtml({ buffer: input }, { convertImage });
21
49
  const turndown = createTurndown();
22
- const markdown = turndown.turndown(normalizeTablesHtml(html));
50
+ let markdown = turndown.turndown(normalizeTablesHtml(html));
51
+ // Replace data URI images with comment placeholders when no imageDir
52
+ if (!imageDir) {
53
+ markdown = markdown.replace(/!\[([^\]]*)\]\(data:[^)]*\)/g, "<!-- image: $1 -->");
54
+ }
23
55
  return { markdown: markdown.trim() };
24
56
  }
25
57
  }
@@ -23,6 +23,7 @@ export class EpubConverter {
23
23
  ignoreAttributes: false,
24
24
  attributeNamePrefix: "@_",
25
25
  textNodeName: "#text",
26
+ processEntities: { maxTotalExpansions: 1_000_000 },
26
27
  });
27
28
  // Find content.opf path from container.xml
28
29
  const containerXml = await zip
@@ -0,0 +1,18 @@
1
+ import type { ConversionResult, Converter, MarkitOptions, StreamInfo } from "../types.js";
2
+ /**
3
+ * Matches GitHub URLs and fetches clean markdown content directly
4
+ * from raw endpoints or the GitHub API — no HTML scraping needed.
5
+ *
6
+ * Supported patterns:
7
+ * - Repos: github.com/owner/repo → raw README.md
8
+ * - Files: github.com/owner/repo/blob/… → raw file content
9
+ * - Gists: gist.github.com/owner/id → raw gist content
10
+ * - Issues: github.com/owner/repo/issues/N → API (title + body)
11
+ * - PRs: github.com/owner/repo/pull/N → API (title + body)
12
+ */
13
+ export declare class GitHubConverter implements Converter {
14
+ name: string;
15
+ accepts(streamInfo: StreamInfo): boolean;
16
+ convertUrl(url: string, _options?: MarkitOptions): Promise<ConversionResult>;
17
+ convert(_input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult>;
18
+ }
@@ -0,0 +1,148 @@
1
+ const GITHUB_HOSTS = new Set([
2
+ "github.com",
3
+ "www.github.com",
4
+ "gist.github.com",
5
+ ]);
6
+ /**
7
+ * Matches GitHub URLs and fetches clean markdown content directly
8
+ * from raw endpoints or the GitHub API — no HTML scraping needed.
9
+ *
10
+ * Supported patterns:
11
+ * - Repos: github.com/owner/repo → raw README.md
12
+ * - Files: github.com/owner/repo/blob/… → raw file content
13
+ * - Gists: gist.github.com/owner/id → raw gist content
14
+ * - Issues: github.com/owner/repo/issues/N → API (title + body)
15
+ * - PRs: github.com/owner/repo/pull/N → API (title + body)
16
+ */
17
+ export class GitHubConverter {
18
+ name = "github";
19
+ accepts(streamInfo) {
20
+ if (!streamInfo.url)
21
+ return false;
22
+ try {
23
+ const { hostname } = new URL(streamInfo.url);
24
+ return GITHUB_HOSTS.has(hostname);
25
+ }
26
+ catch {
27
+ return false;
28
+ }
29
+ }
30
+ async convertUrl(url, _options) {
31
+ const parsed = new URL(url);
32
+ if (parsed.hostname === "gist.github.com") {
33
+ return fetchGist(parsed);
34
+ }
35
+ const segments = parsed.pathname.split("/").filter(Boolean);
36
+ // Need at least owner/repo
37
+ if (segments.length < 2) {
38
+ throw new Error(`Unsupported GitHub URL: ${url}`);
39
+ }
40
+ const [owner, repo, type, ...rest] = segments;
41
+ // github.com/owner/repo/blob/ref/path → raw file
42
+ if (type === "blob" && rest.length >= 2) {
43
+ const ref = rest[0];
44
+ const filePath = rest.slice(1).join("/");
45
+ return fetchRawFile(owner, repo, ref, filePath);
46
+ }
47
+ // github.com/owner/repo/issues/N or /pull/N
48
+ if ((type === "issues" || type === "pull") && rest[0]) {
49
+ const number = Number.parseInt(rest[0], 10);
50
+ if (!Number.isNaN(number)) {
51
+ return fetchIssueOrPr(owner, repo, number);
52
+ }
53
+ }
54
+ // github.com/owner/repo (no subpath or tree/wiki/etc) → README
55
+ if (!type) {
56
+ return fetchReadme(owner, repo);
57
+ }
58
+ throw new Error(`Unsupported GitHub URL pattern: ${url}`);
59
+ }
60
+ async convert(_input, streamInfo) {
61
+ // GitHub URLs are handled entirely via convertUrl.
62
+ // If we end up here, the URL was already fetched by the default path —
63
+ // just delegate to convertUrl.
64
+ if (streamInfo.url) {
65
+ return this.convertUrl(streamInfo.url);
66
+ }
67
+ throw new Error("GitHub converter requires a URL");
68
+ }
69
+ }
70
+ // ---------------------------------------------------------------------------
71
+ // Fetchers
72
+ // ---------------------------------------------------------------------------
73
+ async function fetchReadme(owner, repo) {
74
+ const url = `https://raw.githubusercontent.com/${owner}/${repo}/HEAD/README.md`;
75
+ const res = await fetch(url);
76
+ if (!res.ok) {
77
+ throw new Error(`Failed to fetch README: ${res.status} ${res.statusText}`);
78
+ }
79
+ const markdown = (await res.text()).trim();
80
+ const title = extractFirstHeading(markdown) ?? `${owner}/${repo}`;
81
+ return { markdown, title };
82
+ }
83
+ async function fetchRawFile(owner, repo, ref, filePath) {
84
+ const url = `https://raw.githubusercontent.com/${owner}/${repo}/${ref}/${filePath}`;
85
+ const res = await fetch(url);
86
+ if (!res.ok) {
87
+ throw new Error(`Failed to fetch file: ${res.status} ${res.statusText}`);
88
+ }
89
+ const content = (await res.text()).trim();
90
+ const filename = filePath.split("/").pop() ?? filePath;
91
+ // If it's markdown, return as-is. Otherwise wrap in a code block.
92
+ if (filePath.endsWith(".md") || filePath.endsWith(".mdx")) {
93
+ const title = extractFirstHeading(content) ?? filename;
94
+ return { markdown: content, title };
95
+ }
96
+ const ext = filename.includes(".") ? filename.split(".").pop() : "";
97
+ const markdown = `# ${filename}\n\n\`\`\`${ext}\n${content}\n\`\`\``;
98
+ return { markdown, title: filename };
99
+ }
100
+ async function fetchGist(parsed) {
101
+ const segments = parsed.pathname.split("/").filter(Boolean);
102
+ // gist.github.com/owner/id
103
+ const [owner, id] = segments;
104
+ if (!owner || !id) {
105
+ throw new Error(`Unsupported gist URL: ${parsed.href}`);
106
+ }
107
+ const url = `https://gist.githubusercontent.com/${owner}/${id}/raw`;
108
+ const res = await fetch(url);
109
+ if (!res.ok) {
110
+ throw new Error(`Failed to fetch gist: ${res.status} ${res.statusText}`);
111
+ }
112
+ const content = (await res.text()).trim();
113
+ const title = `gist:${id}`;
114
+ return { markdown: content, title };
115
+ }
116
+ async function fetchIssueOrPr(owner, repo, number) {
117
+ const url = `https://api.github.com/repos/${owner}/${repo}/issues/${number}`;
118
+ const res = await fetch(url, {
119
+ headers: { Accept: "application/vnd.github.v3+json" },
120
+ });
121
+ if (!res.ok) {
122
+ throw new Error(`Failed to fetch issue/PR: ${res.status} ${res.statusText}`);
123
+ }
124
+ const data = (await res.json());
125
+ const title = data.title ?? `#${number}`;
126
+ const parts = [`# ${title}`];
127
+ // Metadata line
128
+ const meta = [];
129
+ if (data.user?.login)
130
+ meta.push(`@${data.user.login}`);
131
+ if (data.state)
132
+ meta.push(data.state);
133
+ if (data.labels?.length) {
134
+ meta.push(data.labels.map((l) => l.name).join(", "));
135
+ }
136
+ if (meta.length > 0)
137
+ parts.push(meta.join(" · "));
138
+ if (data.body?.trim())
139
+ parts.push(data.body.trim());
140
+ return { markdown: parts.join("\n\n"), title };
141
+ }
142
+ // ---------------------------------------------------------------------------
143
+ // Helpers
144
+ // ---------------------------------------------------------------------------
145
+ function extractFirstHeading(markdown) {
146
+ const match = markdown.match(/^#\s+(.+)$/m);
147
+ return match?.[1]?.trim();
148
+ }
@@ -2,7 +2,7 @@ import type { ConversionResult, Converter, StreamInfo } from "../types.js";
2
2
  export declare class PptxConverter implements Converter {
3
3
  name: string;
4
4
  accepts(streamInfo: StreamInfo): boolean;
5
- convert(input: Buffer, _streamInfo: StreamInfo): Promise<ConversionResult>;
5
+ convert(input: Buffer, streamInfo: StreamInfo): Promise<ConversionResult>;
6
6
  private extractText;
7
7
  private extractTable;
8
8
  }
@@ -1,3 +1,5 @@
1
+ import { mkdirSync, writeFileSync } from "node:fs";
2
+ import { join } from "node:path";
1
3
  import { XMLParser } from "fast-xml-parser";
2
4
  import JSZip from "jszip";
3
5
  const EXTENSIONS = [".pptx"];
@@ -14,12 +16,13 @@ export class PptxConverter {
14
16
  return true;
15
17
  return false;
16
18
  }
17
- async convert(input, _streamInfo) {
19
+ async convert(input, streamInfo) {
18
20
  const zip = await JSZip.loadAsync(input);
19
21
  const parser = new XMLParser({
20
22
  ignoreAttributes: false,
21
23
  attributeNamePrefix: "@_",
22
24
  textNodeName: "#text",
25
+ processEntities: { maxTotalExpansions: 1_000_000 },
23
26
  });
24
27
  // Get slide order from presentation.xml
25
28
  const presXml = await zip.file("ppt/presentation.xml")?.async("string");
@@ -66,7 +69,12 @@ export class PptxConverter {
66
69
  });
67
70
  slidePaths.push(...slideFiles);
68
71
  }
72
+ const imageDir = streamInfo.imageDir;
73
+ if (imageDir) {
74
+ mkdirSync(imageDir, { recursive: true });
75
+ }
69
76
  const sections = [];
77
+ let imageCount = 0;
70
78
  for (let i = 0; i < slidePaths.length; i++) {
71
79
  const slideXml = await zip.file(slidePaths[i])?.async("string");
72
80
  if (!slideXml)
@@ -75,6 +83,17 @@ export class PptxConverter {
75
83
  const spTree = slide["p:sld"]?.["p:cSld"]?.["p:spTree"];
76
84
  if (!spTree)
77
85
  continue;
86
+ // Parse slide-level rels for image references
87
+ const slideRelsPath = `${slidePaths[i].replace("slides/slide", "slides/_rels/slide")}.rels`;
88
+ const slideRelsXml = await zip.file(slideRelsPath)?.async("string");
89
+ const slideRelMap = new Map();
90
+ if (slideRelsXml) {
91
+ const slideRels = parser.parse(slideRelsXml);
92
+ const relItems = toList(slideRels?.Relationships?.Relationship);
93
+ for (const r of relItems) {
94
+ slideRelMap.set(r["@_Id"], r["@_Target"]);
95
+ }
96
+ }
78
97
  const slideLines = [`<!-- Slide ${i + 1} -->`];
79
98
  const shapes = spTree["p:sp"];
80
99
  const shapeList = Array.isArray(shapes) ? shapes : shapes ? [shapes] : [];
@@ -91,6 +110,55 @@ export class PptxConverter {
91
110
  slideLines.push(text);
92
111
  }
93
112
  }
113
+ // Extract embedded images
114
+ const pics = toList(spTree["p:pic"]);
115
+ for (const pic of pics) {
116
+ const blipFill = pic["p:blipFill"];
117
+ const rEmbed = blipFill?.["a:blip"]?.["@_r:embed"];
118
+ if (!rEmbed)
119
+ continue;
120
+ const target = slideRelMap.get(rEmbed);
121
+ if (!target)
122
+ continue;
123
+ // Resolve relative target against slide directory
124
+ const imagePath = target.startsWith("/")
125
+ ? target.slice(1)
126
+ : `ppt/slides/${target}`;
127
+ // Normalize path (e.g. ppt/slides/../media/image1.png → ppt/media/image1.png)
128
+ const normalizedPath = imagePath
129
+ .split("/")
130
+ .reduce((parts, seg) => {
131
+ if (seg === "..")
132
+ parts.pop();
133
+ else
134
+ parts.push(seg);
135
+ return parts;
136
+ }, [])
137
+ .join("/");
138
+ const imageFile = zip.file(normalizedPath);
139
+ if (!imageFile)
140
+ continue;
141
+ imageCount++;
142
+ const name = pic["p:nvSpPr"]?.["p:cNvPr"]?.["@_name"] ||
143
+ pic["p:nvPicPr"]?.["p:cNvPr"]?.["@_name"] ||
144
+ `image_${imageCount}`;
145
+ if (imageDir) {
146
+ try {
147
+ const ext = normalizedPath.split(".").pop() || "png";
148
+ const filename = `slide${i + 1}_${imageCount}.${ext}`;
149
+ const filepath = join(imageDir, filename);
150
+ const buf = await imageFile.async("nodebuffer");
151
+ writeFileSync(filepath, buf);
152
+ slideLines.push(`![${name}](${filepath})`);
153
+ }
154
+ catch {
155
+ slideLines.push(`<!-- image: ${name} (slide ${i + 1}) -->`);
156
+ }
157
+ }
158
+ else {
159
+ slideLines.push(`<!-- image: ${name} (slide ${i + 1}) -->`);
160
+ }
161
+ }
94
162
  // Tables
95
163
  const graphicFrames = spTree["p:graphicFrame"];
96
164
  const gfList = Array.isArray(graphicFrames)
@@ -214,3 +282,8 @@ export class PptxConverter {
214
282
  return lines.join("\n");
215
283
  }
216
284
  }
285
+ function toList(val) {
286
+ if (!val)
287
+ return [];
288
+ return Array.isArray(val) ? val : [val];
289
+ }
@@ -20,6 +20,7 @@ export class XlsxConverter {
20
20
  ignoreAttributes: false,
21
21
  attributeNamePrefix: "@_",
22
22
  textNodeName: "#text",
23
+ processEntities: { maxTotalExpansions: 1_000_000 },
23
24
  });
24
25
  // Parse shared strings
25
26
  const ssXml = await zip.file("xl/sharedStrings.xml")?.async("string");
package/dist/index.d.ts CHANGED
@@ -3,6 +3,7 @@ export { AudioConverter } from "./converters/audio.js";
3
3
  export { CsvConverter } from "./converters/csv.js";
4
4
  export { DocxConverter } from "./converters/docx.js";
5
5
  export { EpubConverter } from "./converters/epub.js";
6
+ export { GitHubConverter } from "./converters/github.js";
6
7
  export { HtmlConverter } from "./converters/html.js";
7
8
  export { ImageConverter } from "./converters/image.js";
8
9
  export { IpynbConverter } from "./converters/ipynb.js";
package/dist/index.js CHANGED
@@ -2,6 +2,7 @@ export { AudioConverter } from "./converters/audio.js";
2
2
  export { CsvConverter } from "./converters/csv.js";
3
3
  export { DocxConverter } from "./converters/docx.js";
4
4
  export { EpubConverter } from "./converters/epub.js";
5
+ export { GitHubConverter } from "./converters/github.js";
5
6
  export { HtmlConverter } from "./converters/html.js";
6
7
  export { ImageConverter } from "./converters/image.js";
7
8
  export { IpynbConverter } from "./converters/ipynb.js";
package/dist/main.js CHANGED
@@ -18,6 +18,7 @@ program
18
18
  .option("-q, --quiet", "Raw markdown only, no decoration")
19
19
  .option("-p, --prompt <text>", "Extra instructions for image description")
20
20
  .option("-o, --output <file>", "Write to file instead of stdout")
21
+ .option("-i, --image-dir <dir>", "Extract images to this directory")
21
22
  .addHelpText("after", `
22
23
  Examples:
23
24
  $ markit report.pdf Convert a PDF to markdown
@@ -42,7 +43,8 @@ program
42
43
  json: globals.json,
43
44
  quiet: globals.quiet,
44
45
  output: opts.output,
45
- prompt: opts.prompt,
46
+ prompt: globals.prompt,
47
+ imageDir: globals.imageDir,
46
48
  });
47
49
  });
48
50
  program
@@ -144,6 +146,7 @@ program.on("command:*", async (args) => {
144
146
  quiet: globals.quiet,
145
147
  output: globals.output,
146
148
  prompt: globals.prompt,
149
+ imageDir: globals.imageDir,
147
150
  });
148
151
  });
149
152
  // No args → show concise help
package/dist/markit.d.ts CHANGED
@@ -7,7 +7,7 @@ export declare class Markit {
7
7
  /**
8
8
  * Convert a local file to markdown.
9
9
  */
10
- convertFile(path: string): Promise<ConversionResult>;
10
+ convertFile(path: string, extra?: Partial<StreamInfo>): Promise<ConversionResult>;
11
11
  /**
12
12
  * Convert a URL to markdown.
13
13
  */
package/dist/markit.js CHANGED
@@ -4,6 +4,7 @@ import { AudioConverter } from "./converters/audio.js";
4
4
  import { CsvConverter } from "./converters/csv.js";
5
5
  import { DocxConverter } from "./converters/docx.js";
6
6
  import { EpubConverter } from "./converters/epub.js";
7
+ import { GitHubConverter } from "./converters/github.js";
7
8
  import { HtmlConverter } from "./converters/html.js";
8
9
  import { ImageConverter } from "./converters/image.js";
9
10
  import { IpynbConverter } from "./converters/ipynb.js";
@@ -32,6 +33,7 @@ export class Markit {
32
33
  new XlsxConverter(),
33
34
  new EpubConverter(),
34
35
  new IpynbConverter(),
36
+ new GitHubConverter(),
35
37
  new WikipediaConverter(),
36
38
  new RssConverter(),
37
39
  new CsvConverter(),
@@ -56,12 +58,13 @@ export class Markit {
56
58
  /**
57
59
  * Convert a local file to markdown.
58
60
  */
59
- async convertFile(path) {
61
+ async convertFile(path, extra) {
60
62
  const buffer = readFileSync(path);
61
63
  const streamInfo = {
62
64
  localPath: path,
63
65
  extension: extname(path).toLowerCase(),
64
66
  filename: basename(path),
67
+ ...extra,
65
68
  };
66
69
  return this.convert(buffer, streamInfo);
67
70
  }
@@ -69,6 +72,18 @@ export class Markit {
69
72
  * Convert a URL to markdown.
70
73
  */
71
74
  async convertUrl(url) {
75
+ // Let converters with a URL-specific hook handle it first
76
+ const streamInfo = { url };
77
+ for (const converter of this.converters) {
78
+ if (!converter.convertUrl || !converter.accepts(streamInfo))
79
+ continue;
80
+ try {
81
+ return await converter.convertUrl(url, this.options);
82
+ }
83
+ catch {
84
+ // Fall through to default fetch path
85
+ }
86
+ }
72
87
  const response = await fetch(url, {
73
88
  headers: {
74
89
  Accept: "text/markdown, text/html;q=0.9, text/plain;q=0.8, */*;q=0.1",
@@ -84,13 +99,13 @@ export class Markit {
84
99
  const urlPath = new URL(url).pathname;
85
100
  const ext = extname(urlPath).toLowerCase();
86
101
  const buffer = Buffer.from(await response.arrayBuffer());
87
- const streamInfo = {
102
+ const fetchedInfo = {
88
103
  url,
89
104
  mimetype: mimetype.trim(),
90
105
  extension: ext || undefined,
91
106
  filename: basename(urlPath) || undefined,
92
107
  };
93
- return this.convert(buffer, streamInfo);
108
+ return this.convert(buffer, fetchedInfo);
94
109
  }
95
110
  /**
96
111
  * Convert a buffer with stream info to markdown.
package/dist/types.d.ts CHANGED
@@ -25,6 +25,12 @@ export interface Converter {
25
25
  name: string;
26
26
  /** Quick check: can this converter handle the given stream? */
27
27
  accepts(streamInfo: StreamInfo): boolean;
28
+ /**
29
+ * Optional URL-first hook. When present, called before the default fetch
30
+ * so the converter can handle URL fetching itself (e.g. rewrite to a raw
31
+ * content URL or call an API).
32
+ */
33
+ convertUrl?(url: string, options?: MarkitOptions): Promise<ConversionResult>;
28
34
  /** Convert the source to markdown */
29
35
  convert(input: Buffer, streamInfo: StreamInfo, options?: MarkitOptions): Promise<ConversionResult>;
30
36
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "markit-ai",
3
- "version": "0.3.0",
3
+ "version": "0.4.0",
4
4
  "description": "Convert anything to markdown. PDF, DOCX, PPTX, XLSX, HTML, EPUB, Jupyter, RSS, images, audio, URLs, and more. Pluggable converters, built-in LLM providers for image description and audio transcription. Works as a CLI and as a library.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",