wikimem 0.3.0 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +138 -29
- package/README.md +171 -309
- package/dist/cli/commands/ask.d.ts +3 -0
- package/dist/cli/commands/ask.d.ts.map +1 -0
- package/dist/cli/commands/ask.js +63 -0
- package/dist/cli/commands/ask.js.map +1 -0
- package/dist/cli/commands/export.d.ts +3 -0
- package/dist/cli/commands/export.d.ts.map +1 -0
- package/dist/cli/commands/export.js +108 -0
- package/dist/cli/commands/export.js.map +1 -0
- package/dist/cli/commands/history.d.ts +3 -0
- package/dist/cli/commands/history.d.ts.map +1 -0
- package/dist/cli/commands/history.js +61 -0
- package/dist/cli/commands/history.js.map +1 -0
- package/dist/cli/commands/improve.d.ts.map +1 -1
- package/dist/cli/commands/improve.js +4 -3
- package/dist/cli/commands/improve.js.map +1 -1
- package/dist/cli/commands/ingest.d.ts.map +1 -1
- package/dist/cli/commands/ingest.js +5 -4
- package/dist/cli/commands/ingest.js.map +1 -1
- package/dist/cli/commands/init.d.ts.map +1 -1
- package/dist/cli/commands/init.js +337 -81
- package/dist/cli/commands/init.js.map +1 -1
- package/dist/cli/commands/lint.d.ts.map +1 -1
- package/dist/cli/commands/lint.js +4 -3
- package/dist/cli/commands/lint.js.map +1 -1
- package/dist/cli/commands/mcp.d.ts +3 -0
- package/dist/cli/commands/mcp.d.ts.map +1 -0
- package/dist/cli/commands/mcp.js +11 -0
- package/dist/cli/commands/mcp.js.map +1 -0
- package/dist/cli/commands/open.d.ts +3 -0
- package/dist/cli/commands/open.d.ts.map +1 -0
- package/dist/cli/commands/open.js +36 -0
- package/dist/cli/commands/open.js.map +1 -0
- package/dist/cli/commands/query.d.ts.map +1 -1
- package/dist/cli/commands/query.js +5 -4
- package/dist/cli/commands/query.js.map +1 -1
- package/dist/cli/commands/search.d.ts +3 -0
- package/dist/cli/commands/search.d.ts.map +1 -0
- package/dist/cli/commands/search.js +61 -0
- package/dist/cli/commands/search.js.map +1 -0
- package/dist/cli/commands/serve.d.ts.map +1 -1
- package/dist/cli/commands/serve.js +41 -2
- package/dist/cli/commands/serve.js.map +1 -1
- package/dist/cli/commands/watch.d.ts.map +1 -1
- package/dist/cli/commands/watch.js +4 -3
- package/dist/cli/commands/watch.js.map +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +27 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/core/audit-trail.d.ts +15 -0
- package/dist/core/audit-trail.d.ts.map +1 -0
- package/dist/core/audit-trail.js +43 -0
- package/dist/core/audit-trail.js.map +1 -0
- package/dist/core/claude-code.d.ts +10 -0
- package/dist/core/claude-code.d.ts.map +1 -0
- package/dist/core/claude-code.js +81 -0
- package/dist/core/claude-code.js.map +1 -0
- package/dist/core/config.d.ts +23 -0
- package/dist/core/config.d.ts.map +1 -1
- package/dist/core/config.js.map +1 -1
- package/dist/core/connectors.d.ts +58 -0
- package/dist/core/connectors.d.ts.map +1 -0
- package/dist/core/connectors.js +189 -0
- package/dist/core/connectors.js.map +1 -0
- package/dist/core/folder-scanner.d.ts +10 -0
- package/dist/core/folder-scanner.d.ts.map +1 -0
- package/dist/core/folder-scanner.js +84 -0
- package/dist/core/folder-scanner.js.map +1 -0
- package/dist/core/git.d.ts +137 -0
- package/dist/core/git.d.ts.map +1 -0
- package/dist/core/git.js +520 -0
- package/dist/core/git.js.map +1 -0
- package/dist/core/history.d.ts +21 -0
- package/dist/core/history.d.ts.map +1 -0
- package/dist/core/history.js +107 -0
- package/dist/core/history.js.map +1 -0
- package/dist/core/improve.d.ts.map +1 -1
- package/dist/core/improve.js +9 -0
- package/dist/core/improve.js.map +1 -1
- package/dist/core/ingest.d.ts +1 -0
- package/dist/core/ingest.d.ts.map +1 -1
- package/dist/core/ingest.js +151 -7
- package/dist/core/ingest.js.map +1 -1
- package/dist/core/lint.d.ts.map +1 -1
- package/dist/core/lint.js +23 -4
- package/dist/core/lint.js.map +1 -1
- package/dist/core/oauth-defaults.d.ts +31 -0
- package/dist/core/oauth-defaults.d.ts.map +1 -0
- package/dist/core/oauth-defaults.js +77 -0
- package/dist/core/oauth-defaults.js.map +1 -0
- package/dist/core/observer.d.ts +94 -0
- package/dist/core/observer.d.ts.map +1 -0
- package/dist/core/observer.js +492 -0
- package/dist/core/observer.js.map +1 -0
- package/dist/core/pipeline-events.d.ts +63 -0
- package/dist/core/pipeline-events.d.ts.map +1 -0
- package/dist/core/pipeline-events.js +109 -0
- package/dist/core/pipeline-events.js.map +1 -0
- package/dist/core/query.d.ts.map +1 -1
- package/dist/core/query.js +16 -8
- package/dist/core/query.js.map +1 -1
- package/dist/core/scraper.d.ts +41 -0
- package/dist/core/scraper.d.ts.map +1 -0
- package/dist/core/scraper.js +277 -0
- package/dist/core/scraper.js.map +1 -0
- package/dist/core/sync/gdrive.d.ts +14 -0
- package/dist/core/sync/gdrive.d.ts.map +1 -0
- package/dist/core/sync/gdrive.js +205 -0
- package/dist/core/sync/gdrive.js.map +1 -0
- package/dist/core/sync/github.d.ts +20 -0
- package/dist/core/sync/github.d.ts.map +1 -0
- package/dist/core/sync/github.js +206 -0
- package/dist/core/sync/github.js.map +1 -0
- package/dist/core/sync/gmail.d.ts +15 -0
- package/dist/core/sync/gmail.d.ts.map +1 -0
- package/dist/core/sync/gmail.js +159 -0
- package/dist/core/sync/gmail.js.map +1 -0
- package/dist/core/sync/index.d.ts +47 -0
- package/dist/core/sync/index.d.ts.map +1 -0
- package/dist/core/sync/index.js +100 -0
- package/dist/core/sync/index.js.map +1 -0
- package/dist/core/sync/jira.d.ts +15 -0
- package/dist/core/sync/jira.d.ts.map +1 -0
- package/dist/core/sync/jira.js +176 -0
- package/dist/core/sync/jira.js.map +1 -0
- package/dist/core/sync/linear.d.ts +15 -0
- package/dist/core/sync/linear.d.ts.map +1 -0
- package/dist/core/sync/linear.js +111 -0
- package/dist/core/sync/linear.js.map +1 -0
- package/dist/core/sync/notion.d.ts +14 -0
- package/dist/core/sync/notion.d.ts.map +1 -0
- package/dist/core/sync/notion.js +168 -0
- package/dist/core/sync/notion.js.map +1 -0
- package/dist/core/sync/rss.d.ts +20 -0
- package/dist/core/sync/rss.d.ts.map +1 -0
- package/dist/core/sync/rss.js +165 -0
- package/dist/core/sync/rss.js.map +1 -0
- package/dist/core/sync/scheduler.d.ts +31 -0
- package/dist/core/sync/scheduler.d.ts.map +1 -0
- package/dist/core/sync/scheduler.js +129 -0
- package/dist/core/sync/scheduler.js.map +1 -0
- package/dist/core/sync/slack.d.ts +16 -0
- package/dist/core/sync/slack.d.ts.map +1 -0
- package/dist/core/sync/slack.js +173 -0
- package/dist/core/sync/slack.js.map +1 -0
- package/dist/core/vault.d.ts +22 -0
- package/dist/core/vault.d.ts.map +1 -1
- package/dist/core/vault.js +65 -0
- package/dist/core/vault.js.map +1 -1
- package/dist/core/webhooks.d.ts +13 -0
- package/dist/core/webhooks.d.ts.map +1 -0
- package/dist/core/webhooks.js +206 -0
- package/dist/core/webhooks.js.map +1 -0
- package/dist/index.js +3 -2
- package/dist/index.js.map +1 -1
- package/dist/mcp-entry.d.ts +10 -0
- package/dist/mcp-entry.d.ts.map +1 -0
- package/dist/mcp-entry.js +21 -0
- package/dist/mcp-entry.js.map +1 -0
- package/dist/mcp-server.d.ts +20 -0
- package/dist/mcp-server.d.ts.map +1 -0
- package/dist/mcp-server.js +483 -0
- package/dist/mcp-server.js.map +1 -0
- package/dist/mcp-tools-extended.d.ts +15 -0
- package/dist/mcp-tools-extended.d.ts.map +1 -0
- package/dist/mcp-tools-extended.js +277 -0
- package/dist/mcp-tools-extended.js.map +1 -0
- package/dist/processors/audio.d.ts.map +1 -1
- package/dist/processors/audio.js +42 -4
- package/dist/processors/audio.js.map +1 -1
- package/dist/processors/csv.d.ts +18 -0
- package/dist/processors/csv.d.ts.map +1 -0
- package/dist/processors/csv.js +230 -0
- package/dist/processors/csv.js.map +1 -0
- package/dist/processors/image.d.ts.map +1 -1
- package/dist/processors/image.js +55 -27
- package/dist/processors/image.js.map +1 -1
- package/dist/processors/pdf.d.ts.map +1 -1
- package/dist/processors/pdf.js +6 -3
- package/dist/processors/pdf.js.map +1 -1
- package/dist/processors/pptx.d.ts +3 -1
- package/dist/processors/pptx.d.ts.map +1 -1
- package/dist/processors/pptx.js +236 -95
- package/dist/processors/pptx.js.map +1 -1
- package/dist/processors/url.js +4 -1
- package/dist/processors/url.js.map +1 -1
- package/dist/processors/xlsx.d.ts +2 -0
- package/dist/processors/xlsx.d.ts.map +1 -1
- package/dist/processors/xlsx.js +182 -46
- package/dist/processors/xlsx.js.map +1 -1
- package/dist/providers/claude.d.ts +1 -0
- package/dist/providers/claude.d.ts.map +1 -1
- package/dist/providers/claude.js +5 -3
- package/dist/providers/claude.js.map +1 -1
- package/dist/providers/index.d.ts +17 -1
- package/dist/providers/index.d.ts.map +1 -1
- package/dist/providers/index.js +144 -0
- package/dist/providers/index.js.map +1 -1
- package/dist/providers/openai.d.ts +1 -0
- package/dist/providers/openai.d.ts.map +1 -1
- package/dist/providers/openai.js +5 -3
- package/dist/providers/openai.js.map +1 -1
- package/dist/providers/types.d.ts +18 -0
- package/dist/providers/types.d.ts.map +1 -1
- package/dist/templates/config-yaml.d.ts.map +1 -1
- package/dist/templates/config-yaml.js +12 -1
- package/dist/templates/config-yaml.js.map +1 -1
- package/dist/templates/source-types.d.ts +33 -0
- package/dist/templates/source-types.d.ts.map +1 -0
- package/dist/templates/source-types.js +178 -0
- package/dist/templates/source-types.js.map +1 -0
- package/dist/web/public/index.html +9836 -742
- package/dist/web/server.d.ts.map +1 -1
- package/dist/web/server.js +2823 -43
- package/dist/web/server.js.map +1 -1
- package/package.json +10 -4
- package/scripts/install.sh +54 -0
- package/src/web/public/index.html +9836 -742
- package/templates/mcp-config.json +9 -0
- package/templates/source-types/article.md +21 -0
- package/templates/source-types/book.md +21 -0
- package/templates/source-types/paper.md +23 -0
- package/templates/source-types/podcast.md +21 -0
- package/templates/source-types/raw-notes.md +17 -0
- package/templates/source-types/tweet-thread.md +19 -0
- package/templates/source-types/video.md +21 -0
- package/dist/web/public/public/index.html +0 -946
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"csv.js","sourceRoot":"","sources":["../../src/processors/csv.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAW9C,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,MAAM,gBAAgB,GAAG,EAAE,CAAC;AAE5B,oDAAoD;AACpD,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,OAAO,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,CAAC;AAC1C,CAAC;AAED,4EAA4E;AAC5E,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAC5C,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC;IAE9B,MAAM,SAAS,GAAG,GAAG,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;IACnE,MAAM,IAAI,GAAG,SAAS,CAAC,OAAO,EAAE,SAAS,CAAC,CAAC;IAE3C,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtB,OAAO;YACL,KAAK;YACL,OAAO,EAAE,iCAAiC,QAAQ,CAAC,QAAQ,CAAC,GAAG;YAC/D,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;YACtD,QAAQ,EAAE,CAAC;YACX,WAAW,EAAE,CAAC;YACd,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC;IAC3D,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,qBAAqB;IACvD,MAAM,WAAW,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;IAC5C,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;IAEnD,OAAO;QACL,KAAK;QACL,OAAO,EAAE,YAAY;QACrB,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,YAAY,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW,CAAC;QAC1F,QAAQ;QACR,WAAW;QACX,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,8EAA8E;AAC9E,0EAA0E;AAC1E,8EAA8E;AAE9E,SAAS,QAAQ,CAAC,IAAY;IAC5B,OAAO,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,KAAK,MAAM,CAAC,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;AAC9D,CAAC;AAED,gEAAgE;AAChE,SAAS,eAAe,CAAC,IAAY;IACnC,MAAM,MAAM,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;IACvC,MAAM,MAAM,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACjD,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAChD,MAAM,UAAU,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IACrD,MAAM,KAAK,GAAG,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAEjD,MAAM,MAAM,GAA4B;QACtC,CAAC,GAAG,EAAE,MAAM,CAAC;QACb,CAAC,IAAI,EAAE,IAAI,CAAC;QACZ,CAAC,GAAG,EAAE,UAAU,CAAC;QACjB,CAAC,GAAG,EAAE,KAAK,CAAC;KACb,CAAC;IACF,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;IACnC,MAAM,IAAI,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;IACvB,OAAO,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;AAC7C,CAAC;AAED,4EAA4E;AAC5E,SAAS,SAAS,CAAC,IAAY,EAAE,SAAiB;IAChD,MAAM,IAAI,GAAe,EAAE,CAAC;IAC5B,IAAI,UAAU,GAAa,EAAE,CAAC;IAC9B,IAAI,KAAK,GAAG,EAAE,CAAC;IACf,IAAI,QAAQ,GAAG,KAAK,CAAC;IACrB,IAAI,CAAC,GAAG,CAAC,CAAC;IAEV,OAAO,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACvB,MAAM,EAAE,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QAEnB,IAAI,QAAQ,EAAE,CAAC;YACb,IAAI,EAAE,KAAK,GAAG,EAAE,CAAC;gBACf,qBAAqB;gBACrB,IAAI,CAAC,GAAG,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,GAAG,EAAE,CAAC;oBAC/C,KAAK,IAAI,GAAG,CAAC;oBACb,CAAC,IAAI,CAAC,CAAC;oBACP,SAAS;gBACX,CAAC;gBACD,sBAAsB;gBACtB,QAAQ,GAAG,KAAK,CAAC;gBACjB,CAAC,EAAE,CAAC;gBACJ,SAAS;YACX,CAAC;YACD,KAAK,IAAI,EAAE,CAAC;YACZ,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,oBAAoB;QACpB,IAAI,EAAE,KAAK,GAAG,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrC,QAAQ,GAAG,IAAI,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,SAAS,EAAE,CAAC;YACrB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,aAAa;YACb,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,IAAI,CAAC,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,CAAC,CAAC,KAAK,IAAI;gBAAE,CAAC,EAAE,CAAC;YAC7C,SAAS;QACX,CAAC;QAED,IAAI,EAAE,KAAK,IAAI,EAAE,CAAC;YAChB,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;YAC9B,KAAK,GAAG,EAAE,CAAC;YACX,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;gBACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YACxB,CAAC;YACD,UAAU,GAAG,EAAE,CAAC;YAChB,CAAC,EAAE,CAAC;YACJ,SAAS;QACX,CAAC;QAED,KAAK,IAAI,EAAE,CAAC;QACZ,CAAC,EAAE,CAAC;IACN,CAAC;IAED,uBAAuB;IACvB,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC9C,UAAU,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,CAAC;QAC9B,IAAI,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,EAAE,CAAC;YACzC,IAAI,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAWD,SAAS,iBAAiB,CAAC,IAAgB;IACzC,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,EAAE,CAAC;IAE/B,MAAM,OAAO,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,IAAI,CAAC,OAAO;QAAE,OAAO,EAAE,CAAC;IAExB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,4BAA4B;IAEvF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,MAAM,EAAc,EAAE;QAChD,MAAM,MAAM,GAAG,QAAQ;aACpB,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;aAC/B,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QAE/B,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,eAAe;QACf,MAAM,WAAW,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;QAC9F,IAAI,WAAW,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;QACjE,CAAC;QAED,gBAAgB;QAChB,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,CAAC,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,GAAG,EAAE,GAAG,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CACnE,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC;QAClE,CAAC;QAED,aAAa;QACb,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CACpC,8BAA8B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,gCAAgC,CAAC,IAAI,CAAC,CAAC,CAAC,CACnF,CAAC,MAAM,CAAC;QACT,IAAI,SAAS,GAAG,MAAM,CAAC,MAAM,GAAG,GAAG,EAAE,CAAC;YACpC,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;QAC/D,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,MAAM,IAAI,OAAO,MAAM,GAAG,CAAC,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE,CAAC;IAC/D,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,yBAAyB;AACzB,8EAA8E;AAE9E,SAAS,UAAU,CAAC,IAAgB,EAAE,SAAiB;IACrD,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEjC,MAAM,YAAY,GAAG,SAAS,GAAG,gBAAgB,CAAC;IAClD,MAAM,WAAW,GAAG,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC;IAEhE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,MAAM,EAAE,gBAAgB,GAAG,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;QACrE,MAAM,GAAG,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;QACpB,IAAI,CAAC,GAAG;YAAE,SAAS;QAEnB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,MAAM,IAAI,GAAG,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YAC1B,sDAAsD;YACtD,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;QAC7D,CAAC;QACD,IAAI,YAAY,EAAE,CAAC;YACjB,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACpB,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,KAAK,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEvC,6BAA6B;QAC7B,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YACZ,MAAM,GAAG,GAAG,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;YACnC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACvC,CAAC;IACH,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,EAAE,CAAC;QACvC,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,GAAG,gBAAgB,GAAG,CAAC,CAAC;QACrD,KAAK,CAAC,IAAI,CAAC,eAAe,SAAS,yBAAyB,CAAC,CAAC;IAChE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED,8EAA8E;AAC9E,uBAAuB;AACvB,8EAA8E;AAE9E,SAAS,aAAa,CACpB,KAAa,EACb,QAAgB,EAChB,OAAe,EACf,QAAgB,EAChB,WAAmB,EACnB,WAAyB;IAEzB,MAAM,YAAY,GAChB,WAAW,CAAC,MAAM,GAAG,CAAC;QACpB,CAAC,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,OAAO,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;QAChE,CAAC,CAAC,KAAK,CAAC;IAEZ,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;cAElC,QAAQ;iBACL,WAAW;sBACN,YAAY;mBACf,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,
|
|
1
|
+
{"version":3,"file":"image.d.ts","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAIA,MAAM,WAAW,WAAW;IAC1B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;CACpB;AAID,wBAAgB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAErD;AAED,wBAAsB,YAAY,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,WAAW,CAAC,CAgCzE"}
|
package/dist/processors/image.js
CHANGED
|
@@ -25,42 +25,70 @@ export async function processImage(filePath) {
|
|
|
25
25
|
sourcePath: filePath,
|
|
26
26
|
};
|
|
27
27
|
}
|
|
28
|
+
const description = await describeWithVision(apiKey, base64, mediaType);
|
|
29
|
+
return {
|
|
30
|
+
title,
|
|
31
|
+
description,
|
|
32
|
+
markdown: buildMarkdown(title, filePath, description),
|
|
33
|
+
sourcePath: filePath,
|
|
34
|
+
};
|
|
35
|
+
}
|
|
36
|
+
async function describeWithVision(apiKey, base64, mediaType) {
|
|
28
37
|
const client = new Anthropic({ apiKey });
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
{
|
|
37
|
-
type: 'image',
|
|
38
|
-
source: { type: 'base64', media_type: mediaType, data: base64 },
|
|
39
|
-
},
|
|
38
|
+
// Try Claude Vision — retry once on transient failures
|
|
39
|
+
for (let attempt = 0; attempt < 2; attempt++) {
|
|
40
|
+
try {
|
|
41
|
+
const response = await client.messages.create({
|
|
42
|
+
model: 'claude-sonnet-4-20250514',
|
|
43
|
+
max_tokens: 2048,
|
|
44
|
+
messages: [
|
|
40
45
|
{
|
|
41
|
-
|
|
42
|
-
|
|
46
|
+
role: 'user',
|
|
47
|
+
content: [
|
|
48
|
+
{
|
|
49
|
+
type: 'image',
|
|
50
|
+
source: {
|
|
51
|
+
type: 'base64',
|
|
52
|
+
media_type: mediaType,
|
|
53
|
+
data: base64,
|
|
54
|
+
},
|
|
55
|
+
},
|
|
56
|
+
{
|
|
57
|
+
type: 'text',
|
|
58
|
+
text: `Describe this image in detail for a knowledge base. Include:
|
|
43
59
|
1. What the image shows (objects, people, text, diagrams, charts)
|
|
44
60
|
2. Key information or data visible
|
|
45
|
-
3. Any text content (OCR)
|
|
61
|
+
3. Any text content (OCR — extract ALL visible text verbatim)
|
|
46
62
|
4. Context and significance
|
|
47
63
|
|
|
48
64
|
Be thorough but concise. This description will represent the image in a markdown wiki where agents need to understand its content without seeing it directly.`,
|
|
65
|
+
},
|
|
66
|
+
],
|
|
49
67
|
},
|
|
50
68
|
],
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
69
|
+
});
|
|
70
|
+
return response.content
|
|
71
|
+
.filter((block) => block.type === 'text')
|
|
72
|
+
.map((block) => block.text)
|
|
73
|
+
.join('');
|
|
74
|
+
}
|
|
75
|
+
catch (err) {
|
|
76
|
+
const isRetryable = err instanceof Error &&
|
|
77
|
+
(err.message.includes('rate_limit') ||
|
|
78
|
+
err.message.includes('overloaded') ||
|
|
79
|
+
err.message.includes('529') ||
|
|
80
|
+
err.message.includes('timeout'));
|
|
81
|
+
if (isRetryable && attempt === 0) {
|
|
82
|
+
// Wait 2s and retry once
|
|
83
|
+
await new Promise((r) => setTimeout(r, 2000));
|
|
84
|
+
continue;
|
|
85
|
+
}
|
|
86
|
+
// Non-retryable or second failure — return fallback description
|
|
87
|
+
const sizeKB = Math.round(Buffer.from(base64, 'base64').length / 1024);
|
|
88
|
+
return `[Image — Claude Vision analysis failed: ${err instanceof Error ? err.message : 'unknown error'}]\n\n_File size: ${sizeKB} KB. Set ANTHROPIC_API_KEY and ensure API access to enable image description._`;
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
return '[Image — description unavailable]';
|
|
64
92
|
}
|
|
65
93
|
function buildMarkdown(title, filePath, description) {
|
|
66
94
|
return `# ${title}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"image.js","sourceRoot":"","sources":["../../src/processors/image.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,WAAW,CAAC;AAC9C,OAAO,SAAS,MAAM,mBAAmB,CAAC;AAS1C,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC;AAEzF,MAAM,UAAU,WAAW,CAAC,QAAgB;IAC1C,OAAO,oBAAoB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;AACnE,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY,CAAC,QAAgB;IACjD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAC;IAC5C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAEtC,IAAI,CAAC,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,gBAAgB,CAAC,GAAG,oBAAoB,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC1G,CAAC;IAED,MAAM,SAAS,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,SAAS,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAC5C,MAAM,SAAS,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;IAEpC,0CAA0C;IAC1C,MAAM,MAAM,GAAG,OAAO,CAAC,GAAG,CAAC,mBAAmB,CAAC,CAAC;IAChD,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,qEAAqE;QACrE,OAAO;YACL,KAAK;YACL,WAAW,EAAE,eAAe,QAAQ,CAAC,QAAQ,CAAC,EAAE;YAChD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,yDAAyD,CAAC;YACnG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,kBAAkB,CAAC,MAAM,EAAE,MAAM,EAAE,SAAS,CAAC,CAAC;IAExE,OAAO;QACL,KAAK;QACL,WAAW;QACX,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,WAAW,CAAC;QACrD,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,KAAK,UAAU,kBAAkB,CAAC,MAAc,EAAE,MAAc,EAAE,SAAiB;IACjF,MAAM,MAAM,GAAG,IAAI,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC;IAEzC,uDAAuD;IACvD,KAAK,IAAI,OAAO,GAAG,CAAC,EAAE,OAAO,GAAG,CAAC,EAAE,OAAO,EAAE,EAAE,CAAC;QAC7C,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAC5C,KAAK,EAAE,0BAA0B;gBACjC,UAAU,EAAE,IAAI;gBAChB,QAAQ,EAAE;oBACR;wBACE,IAAI,EAAE,MAAM;wBACZ,OAAO,EAAE;4BACP;gCACE,IAAI,EAAE,OAAO;gCACb,MAAM,EAAE;oCACN,IAAI,EAAE,QAAQ;oCACd,UAAU,EAAE,SAAoE;oCAChF,IAAI,EAAE,MAAM;iCACb;6BACF;4BACD;gCACE,IAAI,EAAE,MAAM;gCACZ,IAAI,EAAE;;;;;;8JAMwI;6BAC/I;yBACF;qBACF;iBACF;aACF,CAAC,CAAC;YAEH,OAAO,QAAQ,CAAC,OAAO;iBACpB,MAAM,CAAC,CAAC,KAAK,EAAgC,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,MAAM,CAAC;iBACtE,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC;iBAC1B,IAAI,CAAC,EAAE,CAAC,CAAC;QACd,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,MAAM,WAAW,GACf,GAAG,YAAY,KAAK;gBACpB,CAAC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBACjC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC;oBAClC,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;oBAC3B,GAAG,CAAC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC;YAErC,IAAI,WAAW,IAAI,OAAO,KAAK,CAAC,EAAE,CAAC;gBACjC,yBAAyB;gBACzB,MAAM,IAAI,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,UAAU,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC9C,SAAS;YACX,CAAC;YAED,gEAAgE;YAChE,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,MAAM,EAAE,QAAQ,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,CAAC;YACvE,OAAO,2CAA2C,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,oBAAoB,MAAM,gFAAgF,CAAC;QACnN,CAAC;IACH,CAAC;IAED,OAAO,mCAAmC,CAAC;AAC7C,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,WAAmB;IACzE,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;mBAE7B,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;IAErD,KAAK,KAAK,QAAQ;;;;EAIpB,WAAW;CACZ,CAAC;AACF,CAAC;AAED,SAAS,YAAY,CAAC,GAAW;IAC/B,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,MAAM,CAAC;QACZ,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,MAAM;YACT,OAAO,WAAW,CAAC;QACrB,KAAK,OAAO;YACV,OAAO,YAAY,CAAC;QACtB;YACE,OAAO,YAAY,CAAC;IACxB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,
|
|
1
|
+
{"version":3,"file":"pdf.d.ts","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAGA,MAAM,WAAW,SAAS;IACxB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAED,wBAAgB,SAAS,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAEnD;AAED,wBAAsB,UAAU,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CA6BrE"}
|
package/dist/processors/pdf.js
CHANGED
|
@@ -7,9 +7,12 @@ export async function processPdf(filePath) {
|
|
|
7
7
|
const title = basename(filePath, '.pdf');
|
|
8
8
|
const buffer = readFileSync(filePath);
|
|
9
9
|
try {
|
|
10
|
-
// pdf-parse
|
|
11
|
-
//
|
|
12
|
-
|
|
10
|
+
// Import from lib/ directly to avoid pdf-parse's index.js self-test bug
|
|
11
|
+
// (index.js tries to open ./test/data/05-versions-space.pdf on import)
|
|
12
|
+
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
|
|
13
|
+
// @ts-expect-error — pdf-parse/lib has no type declarations
|
|
14
|
+
const pdfParseModule = await import('pdf-parse/lib/pdf-parse.js');
|
|
15
|
+
const pdfParse = (pdfParseModule.default ?? pdfParseModule);
|
|
13
16
|
const data = await pdfParse(buffer);
|
|
14
17
|
const content = data.text.trim();
|
|
15
18
|
return {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,
|
|
1
|
+
{"version":3,"file":"pdf.js","sourceRoot":"","sources":["../../src/processors/pdf.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAUrC,MAAM,UAAU,SAAS,CAAC,QAAgB;IACxC,OAAO,QAAQ,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;AACjD,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,QAAgB;IAC/C,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;IACzC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEtC,IAAI,CAAC;QACH,wEAAwE;QACxE,uEAAuE;QACvE,6DAA6D;QAC7D,4DAA4D;QAC5D,MAAM,cAAc,GAAG,MAAM,MAAM,CAAC,4BAA4B,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,CAAC,cAAc,CAAC,OAAO,IAAI,cAAc,CAAgG,CAAC;QAC3J,MAAM,IAAI,GAAG,MAAM,QAAQ,CAAC,MAAM,CAAC,CAAC;QACpC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC;QAEjC,OAAO;YACL,KAAK;YACL,OAAO;YACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,IAAI,CAAC,QAAQ,CAAC;YAChE,SAAS,EAAE,IAAI,CAAC,QAAQ;YACxB,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,SAAS,KAAK,4BAA4B;YACnD,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,0DAA0D,CAAC;YACpG,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,SAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;iBAC/B,SAAS,CAAC,CAAC,CAAC,kBAAkB,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE;mBAC5C,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PowerPoint (.pptx) processor.
|
|
3
|
-
* Extracts
|
|
3
|
+
* Extracts slides via zip (adm-zip) + XML parsing.
|
|
4
|
+
* Features: slide titles, body text with bullet/numbered list structure,
|
|
5
|
+
* speaker notes, image alt-text, proper paragraph grouping.
|
|
4
6
|
*/
|
|
5
7
|
export interface PptxResult {
|
|
6
8
|
title: string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"pptx.d.ts","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAKH,MAAM,WAAW,UAAU;IACzB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,MAAM,CAAC;IAChB,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,UAAU,EAAE,MAAM,CAAC;CACpB;AAiBD,wBAAsB,WAAW,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,UAAU,CAAC,CAgCvE"}
|
package/dist/processors/pptx.js
CHANGED
|
@@ -1,18 +1,30 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* PowerPoint (.pptx) processor.
|
|
3
|
-
* Extracts
|
|
3
|
+
* Extracts slides via zip (adm-zip) + XML parsing.
|
|
4
|
+
* Features: slide titles, body text with bullet/numbered list structure,
|
|
5
|
+
* speaker notes, image alt-text, proper paragraph grouping.
|
|
4
6
|
*/
|
|
5
|
-
import { readFileSync } from 'node:fs';
|
|
6
7
|
import { basename } from 'node:path';
|
|
8
|
+
import AdmZip from 'adm-zip';
|
|
7
9
|
export async function processPptx(filePath) {
|
|
8
10
|
const title = basename(filePath, '.pptx');
|
|
9
|
-
|
|
11
|
+
let slides;
|
|
12
|
+
try {
|
|
13
|
+
slides = extractSlides(filePath);
|
|
14
|
+
}
|
|
15
|
+
catch {
|
|
16
|
+
return {
|
|
17
|
+
title,
|
|
18
|
+
content: `[PowerPoint — extraction failed for ${basename(filePath)}]`,
|
|
19
|
+
markdown: buildMarkdown(title, filePath, '[Extraction failed — file may be corrupted]', 0),
|
|
20
|
+
slideCount: 0,
|
|
21
|
+
sourcePath: filePath,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
10
24
|
const slideCount = slides.length;
|
|
11
25
|
let content;
|
|
12
26
|
if (slides.length > 0) {
|
|
13
|
-
content = slides
|
|
14
|
-
.map((slide) => formatSlide(slide))
|
|
15
|
-
.join('\n\n---\n\n');
|
|
27
|
+
content = slides.map(formatSlide).join('\n\n---\n\n');
|
|
16
28
|
}
|
|
17
29
|
else {
|
|
18
30
|
content = `[PowerPoint — no text content extracted from ${basename(filePath)}]`;
|
|
@@ -26,109 +38,203 @@ export async function processPptx(filePath) {
|
|
|
26
38
|
};
|
|
27
39
|
}
|
|
28
40
|
function extractSlides(filePath) {
|
|
29
|
-
const
|
|
30
|
-
const
|
|
31
|
-
const
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
continue;
|
|
43
|
-
const texts = extractTextElements(chunk);
|
|
44
|
-
if (texts.length > 0) {
|
|
45
|
-
slides.push({
|
|
46
|
-
slideNumber: i + 1,
|
|
47
|
-
texts,
|
|
48
|
-
notes: [], // Notes extraction below
|
|
49
|
-
});
|
|
41
|
+
const zip = new AdmZip(filePath);
|
|
42
|
+
const entries = zip.getEntries();
|
|
43
|
+
const slideEntries = new Map();
|
|
44
|
+
const noteEntries = new Map();
|
|
45
|
+
for (const entry of entries) {
|
|
46
|
+
const name = entry.entryName;
|
|
47
|
+
const slideMatch = name.match(/^ppt\/slides\/slide(\d+)\.xml$/);
|
|
48
|
+
if (slideMatch?.[1]) {
|
|
49
|
+
slideEntries.set(parseInt(slideMatch[1], 10), entry.getData().toString('utf-8'));
|
|
50
|
+
}
|
|
51
|
+
const noteMatch = name.match(/^ppt\/notesSlides\/notesSlide(\d+)\.xml$/);
|
|
52
|
+
if (noteMatch?.[1]) {
|
|
53
|
+
noteEntries.set(parseInt(noteMatch[1], 10), entry.getData().toString('utf-8'));
|
|
50
54
|
}
|
|
51
55
|
}
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
56
|
+
const slideNumbers = [...slideEntries.keys()].sort((a, b) => a - b);
|
|
57
|
+
const slides = [];
|
|
58
|
+
for (const num of slideNumbers) {
|
|
59
|
+
const slideXml = slideEntries.get(num) ?? '';
|
|
60
|
+
const noteXml = noteEntries.get(num) ?? '';
|
|
61
|
+
const title = extractSlideTitle(slideXml);
|
|
62
|
+
const paragraphs = extractBodyParagraphs(slideXml, title);
|
|
63
|
+
const notes = noteXml ? extractNotesText(noteXml) : [];
|
|
64
|
+
const imageAlts = extractImageAltTexts(slideXml);
|
|
65
|
+
slides.push({ slideNumber: num, title, paragraphs, notes, imageAlts });
|
|
66
|
+
}
|
|
67
|
+
return slides;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Extract slide title from <p:sp> with <p:ph type="title"> or type="ctrTitle".
|
|
71
|
+
*/
|
|
72
|
+
function extractSlideTitle(xml) {
|
|
73
|
+
const titleTypes = ['title', 'ctrTitle'];
|
|
74
|
+
for (const phType of titleTypes) {
|
|
75
|
+
const phPattern = new RegExp(`<p:sp>([\\s\\S]*?)<p:ph[^>]*type="${phType}"[^>]*/?>([\\s\\S]*?)</p:sp>`, 'g');
|
|
76
|
+
let match;
|
|
77
|
+
while ((match = phPattern.exec(xml)) !== null) {
|
|
78
|
+
const texts = extractRawTextFromXml(match[0]);
|
|
79
|
+
if (texts.length > 0)
|
|
80
|
+
return texts.join(' ');
|
|
66
81
|
}
|
|
67
82
|
}
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
83
|
+
return '';
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Extract body paragraphs with bullet/list detection.
|
|
87
|
+
* Parses <a:p> elements within shapes, detecting:
|
|
88
|
+
* - <a:buChar char="•"/> or similar → bullet point
|
|
89
|
+
* - <a:buAutoNum type="arabicPeriod"/> → numbered list
|
|
90
|
+
* - <a:buNone/> → no bullet (plain paragraph)
|
|
91
|
+
* - <a:pPr lvl="N"/> → indentation level
|
|
92
|
+
* - <a:rPr b="1"/> → bold text
|
|
93
|
+
*/
|
|
94
|
+
function extractBodyParagraphs(xml, titleText) {
|
|
95
|
+
const paragraphs = [];
|
|
96
|
+
// Extract all shape bodies, skipping title/subtitle placeholders
|
|
97
|
+
const shapeRegex = /<p:sp>([\s\S]*?)<\/p:sp>/g;
|
|
98
|
+
let shapeMatch;
|
|
99
|
+
while ((shapeMatch = shapeRegex.exec(xml)) !== null) {
|
|
100
|
+
const shapeXml = shapeMatch[1] ?? '';
|
|
101
|
+
// Skip title/subtitle placeholder shapes
|
|
102
|
+
if (/<p:ph[^>]*type="(title|ctrTitle|subTitle)"/.test(shapeXml))
|
|
103
|
+
continue;
|
|
104
|
+
// Skip slide number, date, footer placeholders
|
|
105
|
+
if (/<p:ph[^>]*type="(sldNum|dt|ftr)"/.test(shapeXml))
|
|
73
106
|
continue;
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
107
|
+
// Extract <p:txBody> content
|
|
108
|
+
const txBodyMatch = shapeXml.match(/<p:txBody>([\s\S]*?)<\/p:txBody>/);
|
|
109
|
+
if (!txBodyMatch?.[1])
|
|
110
|
+
continue;
|
|
111
|
+
const txBody = txBodyMatch[1];
|
|
112
|
+
const parsedParagraphs = parseParagraphs(txBody);
|
|
113
|
+
for (const p of parsedParagraphs) {
|
|
114
|
+
// Skip if text matches title or is a pure number/date
|
|
115
|
+
if (p.text === titleText)
|
|
116
|
+
continue;
|
|
117
|
+
if (/^\d+$/.test(p.text) || /^\d{1,2}\/\d{1,2}\/\d{2,4}$/.test(p.text))
|
|
118
|
+
continue;
|
|
119
|
+
paragraphs.push(p);
|
|
79
120
|
}
|
|
80
121
|
}
|
|
81
|
-
return
|
|
122
|
+
return paragraphs;
|
|
82
123
|
}
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
const
|
|
124
|
+
/**
|
|
125
|
+
* Parse <a:p> elements from a txBody, detecting bullets and levels.
|
|
126
|
+
*/
|
|
127
|
+
function parseParagraphs(txBodyXml) {
|
|
128
|
+
const result = [];
|
|
129
|
+
const paraRegex = /<a:p>([\s\S]*?)<\/a:p>/g;
|
|
88
130
|
let match;
|
|
89
|
-
while ((match =
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
const
|
|
94
|
-
|
|
95
|
-
|
|
131
|
+
while ((match = paraRegex.exec(txBodyXml)) !== null) {
|
|
132
|
+
const paraXml = match[1] ?? '';
|
|
133
|
+
// Extract indentation level from <a:pPr lvl="N">
|
|
134
|
+
const levelMatch = paraXml.match(/<a:pPr[^>]*\blvl="(\d+)"/);
|
|
135
|
+
const level = levelMatch?.[1] ? parseInt(levelMatch[1], 10) : 0;
|
|
136
|
+
// Detect bullet type
|
|
137
|
+
let bullet = 'none';
|
|
138
|
+
if (/<a:buChar\b/.test(paraXml)) {
|
|
139
|
+
bullet = 'bullet';
|
|
140
|
+
}
|
|
141
|
+
else if (/<a:buAutoNum\b/.test(paraXml)) {
|
|
142
|
+
bullet = 'numbered';
|
|
143
|
+
}
|
|
144
|
+
else if (/<a:buNone\s*\/>/.test(paraXml)) {
|
|
145
|
+
bullet = 'none';
|
|
146
|
+
}
|
|
147
|
+
else if (/<a:pPr\b/.test(paraXml) && !/<a:buNone/.test(paraXml) && level > 0) {
|
|
148
|
+
// If there's a pPr with a level but no explicit buNone, it's likely a bullet
|
|
149
|
+
bullet = 'bullet';
|
|
150
|
+
}
|
|
151
|
+
// Check for bold
|
|
152
|
+
const isBold = /<a:rPr[^>]*\bb="1"/.test(paraXml);
|
|
153
|
+
// Extract text runs
|
|
154
|
+
const texts = [];
|
|
155
|
+
const runRegex = /<a:r>([\s\S]*?)<\/a:r>/g;
|
|
156
|
+
let runMatch;
|
|
157
|
+
while ((runMatch = runRegex.exec(paraXml)) !== null) {
|
|
158
|
+
const tMatch = (runMatch[1] ?? '').match(/<a:t>([\s\S]*?)<\/a:t>/);
|
|
159
|
+
if (tMatch?.[1]?.trim()) {
|
|
160
|
+
texts.push(decodeXmlEntities(tMatch[1].trim()));
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
// Also check for <a:fld> (field) text runs (e.g., slide numbers in body)
|
|
164
|
+
const fldRegex = /<a:fld[^>]*>([\s\S]*?)<\/a:fld>/g;
|
|
165
|
+
let fldMatch;
|
|
166
|
+
while ((fldMatch = fldRegex.exec(paraXml)) !== null) {
|
|
167
|
+
const tMatch = (fldMatch[1] ?? '').match(/<a:t>([\s\S]*?)<\/a:t>/);
|
|
168
|
+
if (tMatch?.[1]?.trim()) {
|
|
169
|
+
texts.push(decodeXmlEntities(tMatch[1].trim()));
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
const text = texts.join(' ');
|
|
173
|
+
if (text.length > 0) {
|
|
174
|
+
result.push({ text, level, bullet, isBold });
|
|
175
|
+
}
|
|
96
176
|
}
|
|
97
|
-
return
|
|
177
|
+
return result;
|
|
98
178
|
}
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
179
|
+
/**
|
|
180
|
+
* Extract image alt-text from <p:pic> elements.
|
|
181
|
+
* Alt text is stored in: <p:cNvPr id="..." name="..." descr="Alt text here"/>
|
|
182
|
+
*/
|
|
183
|
+
function extractImageAltTexts(xml) {
|
|
184
|
+
const alts = [];
|
|
185
|
+
// Look for p:pic shapes with descr attribute
|
|
186
|
+
const picRegex = /<p:pic>([\s\S]*?)<\/p:pic>/g;
|
|
103
187
|
let match;
|
|
104
|
-
while ((match =
|
|
105
|
-
|
|
188
|
+
while ((match = picRegex.exec(xml)) !== null) {
|
|
189
|
+
const picXml = match[1] ?? '';
|
|
190
|
+
// descr attribute on cNvPr holds alt text
|
|
191
|
+
const descrMatch = picXml.match(/descr="([^"]+)"/);
|
|
192
|
+
if (descrMatch?.[1]) {
|
|
193
|
+
alts.push(decodeXmlEntities(descrMatch[1]));
|
|
194
|
+
}
|
|
195
|
+
// Also check for <a:hlinkClick> tooltip as fallback
|
|
196
|
+
const tooltipMatch = picXml.match(/tooltip="([^"]+)"/);
|
|
197
|
+
if (tooltipMatch?.[1] && !descrMatch) {
|
|
198
|
+
alts.push(decodeXmlEntities(tooltipMatch[1]));
|
|
199
|
+
}
|
|
106
200
|
}
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
201
|
+
// Also check <wsp> (WordArt/shapes) and <p:sp> with images
|
|
202
|
+
const spRegex = /<p:sp>([\s\S]*?)<\/p:sp>/g;
|
|
203
|
+
while ((match = spRegex.exec(xml)) !== null) {
|
|
204
|
+
const spXml = match[1] ?? '';
|
|
205
|
+
if (/<a:blipFill>/.test(spXml)) {
|
|
206
|
+
const descrMatch = spXml.match(/<p:cNvPr[^>]*descr="([^"]+)"/);
|
|
207
|
+
if (descrMatch?.[1]) {
|
|
208
|
+
alts.push(decodeXmlEntities(descrMatch[1]));
|
|
209
|
+
}
|
|
210
|
+
}
|
|
111
211
|
}
|
|
112
|
-
return
|
|
212
|
+
return alts;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Extract speaker notes text from notesSlide XML.
|
|
216
|
+
* Parses at paragraph level for better structure.
|
|
217
|
+
*/
|
|
218
|
+
function extractNotesText(noteXml) {
|
|
219
|
+
const paragraphs = parseParagraphs(noteXml);
|
|
220
|
+
return paragraphs
|
|
221
|
+
.map((p) => p.text)
|
|
222
|
+
.filter((t) => {
|
|
223
|
+
if (/^\d+$/.test(t))
|
|
224
|
+
return false;
|
|
225
|
+
if (t.length < 2)
|
|
226
|
+
return false;
|
|
227
|
+
return true;
|
|
228
|
+
});
|
|
113
229
|
}
|
|
114
|
-
function
|
|
230
|
+
function extractRawTextFromXml(xml) {
|
|
115
231
|
const texts = [];
|
|
116
|
-
// <a:t> elements contain text in Office OpenXML
|
|
117
232
|
const textRegex = /<a:t>([\s\S]*?)<\/a:t>/g;
|
|
118
233
|
let match;
|
|
119
234
|
while ((match = textRegex.exec(xml)) !== null) {
|
|
120
235
|
const text = match[1]?.trim();
|
|
121
|
-
if (text && text.length > 0)
|
|
122
|
-
texts.push(decodeXmlEntities(text));
|
|
123
|
-
}
|
|
124
|
-
}
|
|
125
|
-
// Also check for <a:fld> (field codes that may contain text)
|
|
126
|
-
const fldRegex = /<a:fld[^>]*>[\s\S]*?<a:t>([\s\S]*?)<\/a:t>[\s\S]*?<\/a:fld>/g;
|
|
127
|
-
while ((match = fldRegex.exec(xml)) !== null) {
|
|
128
|
-
const text = match[1]?.trim();
|
|
129
|
-
if (text && text.length > 0 && !texts.includes(text)) {
|
|
236
|
+
if (text && text.length > 0)
|
|
130
237
|
texts.push(decodeXmlEntities(text));
|
|
131
|
-
}
|
|
132
238
|
}
|
|
133
239
|
return texts;
|
|
134
240
|
}
|
|
@@ -139,15 +245,52 @@ function decodeXmlEntities(text) {
|
|
|
139
245
|
.replace(/>/g, '>')
|
|
140
246
|
.replace(/"/g, '"')
|
|
141
247
|
.replace(/'/g, "'")
|
|
142
|
-
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
|
|
248
|
+
.replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)))
|
|
249
|
+
.replace(/&#x([0-9A-Fa-f]+);/g, (_, code) => String.fromCharCode(parseInt(code, 16)));
|
|
143
250
|
}
|
|
144
251
|
function formatSlide(slide) {
|
|
145
|
-
|
|
146
|
-
|
|
252
|
+
const parts = [];
|
|
253
|
+
// Heading
|
|
254
|
+
if (slide.title) {
|
|
255
|
+
parts.push(`## Slide ${slide.slideNumber}: ${slide.title}`);
|
|
256
|
+
}
|
|
257
|
+
else {
|
|
258
|
+
parts.push(`## Slide ${slide.slideNumber}`);
|
|
259
|
+
}
|
|
260
|
+
// Body paragraphs with proper bullet/list formatting
|
|
261
|
+
if (slide.paragraphs.length > 0) {
|
|
262
|
+
let numberedCounter = 0;
|
|
263
|
+
const bodyLines = [];
|
|
264
|
+
for (const p of slide.paragraphs) {
|
|
265
|
+
const indent = ' '.repeat(p.level);
|
|
266
|
+
const text = p.isBold ? `**${p.text}**` : p.text;
|
|
267
|
+
if (p.bullet === 'numbered') {
|
|
268
|
+
numberedCounter++;
|
|
269
|
+
bodyLines.push(`${indent}${numberedCounter}. ${text}`);
|
|
270
|
+
}
|
|
271
|
+
else if (p.bullet === 'bullet') {
|
|
272
|
+
numberedCounter = 0;
|
|
273
|
+
bodyLines.push(`${indent}- ${text}`);
|
|
274
|
+
}
|
|
275
|
+
else {
|
|
276
|
+
numberedCounter = 0;
|
|
277
|
+
bodyLines.push(`${indent}${text}`);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
parts.push(bodyLines.join('\n'));
|
|
281
|
+
}
|
|
282
|
+
else {
|
|
283
|
+
parts.push('_[No body text]_');
|
|
284
|
+
}
|
|
285
|
+
// Image alt-texts
|
|
286
|
+
if (slide.imageAlts.length > 0) {
|
|
287
|
+
parts.push(`**Images:** ${slide.imageAlts.map((a) => `_${a}_`).join(', ')}`);
|
|
288
|
+
}
|
|
289
|
+
// Speaker notes
|
|
147
290
|
if (slide.notes.length > 0) {
|
|
148
|
-
|
|
291
|
+
parts.push(`**Speaker Notes:**\n\n> ${slide.notes.join('\n> ')}`);
|
|
149
292
|
}
|
|
150
|
-
return
|
|
293
|
+
return parts.join('\n\n');
|
|
151
294
|
}
|
|
152
295
|
function buildMarkdown(title, filePath, content, slideCount) {
|
|
153
296
|
return `# ${title}
|
|
@@ -157,8 +300,6 @@ function buildMarkdown(title, filePath, content, slideCount) {
|
|
|
157
300
|
> **Slides:** ${slideCount}
|
|
158
301
|
> **Processed:** ${new Date().toISOString().split('T')[0]}
|
|
159
302
|
|
|
160
|
-
## Slides
|
|
161
|
-
|
|
162
303
|
${content}
|
|
163
304
|
`;
|
|
164
305
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pptx.js","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AACvC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAgBrC,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IAEjC,IAAI,OAAe,CAAC;IACpB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,GAAG,MAAM;aACb,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC;aAClC,IAAI,CAAC,aAAa,CAAC,CAAC;IACzB,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,gDAAgD,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAClF,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,MAAM,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAE1C,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,uCAAuC;IACvC,iDAAiD;IACjD,0DAA0D;IAC1D,oEAAoE;IAEpE,yEAAyE;IACzE,gEAAgE;IAChE,MAAM,WAAW,GAAG,aAAa,CAAC,OAAO,CAAC,CAAC;IAE3C,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC7B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACrB,MAAM,CAAC,IAAI,CAAC;gBACV,WAAW,EAAE,CAAC,GAAG,CAAC;gBAClB,KAAK;gBACL,KAAK,EAAE,EAAE,EAAE,yBAAyB;aACrC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,+DAA+D;IAC/D,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxB,MAAM,QAAQ,GAAG,mBAAmB,CAAC,OAAO,CAAC,CAAC;QAC9C,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACxB,kEAAkE;YAClE,MAAM,SAAS,GAAG,CAAC,CAAC;YACpB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;gBACpD,MAAM,UAAU,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;gBACpD,MAAM,CAAC,IAAI,CAAC;oBACV,WAAW,EAAE,MAAM,CAAC,MAAM,GAAG,CAAC;oBAC9B,KAAK,EAAE,UAAU;oBACjB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,uDAAuD;IACvD,MAAM,UAAU,GAAG,YAAY,CAAC,OAAO,CAAC,CAAC;IACzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC3C,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;QAC5B,IAAI,CAAC,KAAK;YAAE,SAAS;QAErB,MAAM,KAAK,GAAG,mBAAmB,CAAC,KAAK,CAAC,CAAC;QACzC,iCAAiC;QACjC,MAAM,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QACxB,IAAI,KAAK,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,KAAK,CAAC,KAAK,GAAG,KAAK,CAAC;QACtB,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,aAAa,CAAC,OAAe;IACpC,8CAA8C;IAC9C,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,WAAW,GAAG,gBAAgB,CAAC;IACrC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,YAAY,CAAC,OAAe;IACnC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,MAAM,UAAU,GAAG,qBAAqB,CAAC;IACzC,MAAM,SAAS,GAAa,EAAE,CAAC;IAE/B,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACnD,SAAS,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;IAC9B,CAAC;IAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAC1C,MAAM,KAAK,GAAG,SAAS,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,GAAG,CAAC,CAAC,IAAI,OAAO,CAAC,MAAM,CAAC;QAC/C,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,CAAC,KAAK,EAAE,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;IACtE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAS,mBAAmB,CAAC,GAAW;IACtC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,gDAAgD;IAChD,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,6DAA6D;IAC7D,MAAM,QAAQ,GAAG,8DAA8D,CAAC;IAChF,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,IAAI,CAAC,EAAE,CAAC;YACrD,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,IAAI;SACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AAC1F,CAAC;AAED,SAAS,WAAW,CAAC,KAAmB;IACtC,IAAI,EAAE,GAAG,aAAa,KAAK,CAAC,WAAW,MAAM,CAAC;IAC9C,EAAE,IAAI,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAE/B,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,EAAE,IAAI,+BAA+B,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;IAC/D,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;gBAEhC,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;;;EAIvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
|
1
|
+
{"version":3,"file":"pptx.js","sourceRoot":"","sources":["../../src/processors/pptx.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AACrC,OAAO,MAAM,MAAM,SAAS,CAAC;AAyB7B,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,QAAgB;IAChD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;IAE1C,IAAI,MAAsB,CAAC;IAC3B,IAAI,CAAC;QACH,MAAM,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACnC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO;YACL,KAAK;YACL,OAAO,EAAE,uCAAuC,QAAQ,CAAC,QAAQ,CAAC,GAAG;YACrE,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,6CAA6C,EAAE,CAAC,CAAC;YAC1F,UAAU,EAAE,CAAC;YACb,UAAU,EAAE,QAAQ;SACrB,CAAC;IACJ,CAAC;IAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC;IACjC,IAAI,OAAe,CAAC;IAEpB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,OAAO,GAAG,MAAM,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACxD,CAAC;SAAM,CAAC;QACN,OAAO,GAAG,gDAAgD,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;IAClF,CAAC;IAED,OAAO;QACL,KAAK;QACL,OAAO;QACP,QAAQ,EAAE,aAAa,CAAC,KAAK,EAAE,QAAQ,EAAE,OAAO,EAAE,UAAU,CAAC;QAC7D,UAAU;QACV,UAAU,EAAE,QAAQ;KACrB,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAgB;IACrC,MAAM,GAAG,GAAG,IAAI,MAAM,CAAC,QAAQ,CAAC,CAAC;IACjC,MAAM,OAAO,GAAG,GAAG,CAAC,UAAU,EAAE,CAAC;IAEjC,MAAM,YAAY,GAAwB,IAAI,GAAG,EAAE,CAAC;IACpD,MAAM,WAAW,GAAwB,IAAI,GAAG,EAAE,CAAC;IAEnD,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;QAC5B,MAAM,IAAI,GAAG,KAAK,CAAC,SAAS,CAAC;QAE7B,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QAChE,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,YAAY,CAAC,GAAG,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACnF,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,0CAA0C,CAAC,CAAC;QACzE,IAAI,SAAS,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACnB,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,KAAK,CAAC,OAAO,EAAE,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IAED,MAAM,YAAY,GAAG,CAAC,GAAG,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;IACpE,MAAM,MAAM,GAAmB,EAAE,CAAC;IAElC,KAAK,MAAM,GAAG,IAAI,YAAY,EAAE,CAAC;QAC/B,MAAM,QAAQ,GAAG,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAC7C,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,IAAI,EAAE,CAAC;QAE3C,MAAM,KAAK,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC1C,MAAM,UAAU,GAAG,qBAAqB,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,OAAO,CAAC,CAAC,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QACvD,MAAM,SAAS,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QAEjD,MAAM,CAAC,IAAI,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,KAAK,EAAE,UAAU,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,GAAW;IACpC,MAAM,UAAU,GAAG,CAAC,OAAO,EAAE,UAAU,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,UAAU,EAAE,CAAC;QAChC,MAAM,SAAS,GAAG,IAAI,MAAM,CAC1B,qCAAqC,MAAM,8BAA8B,EACzE,GAAG,CACJ,CAAC;QACF,IAAI,KAA6B,CAAC;QAClC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YAC9C,MAAM,KAAK,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,EAAE,CAAC;AACZ,CAAC;AAED;;;;;;;;GAQG;AACH,SAAS,qBAAqB,CAAC,GAAW,EAAE,SAAiB;IAC3D,MAAM,UAAU,GAAgB,EAAE,CAAC;IAEnC,iEAAiE;IACjE,MAAM,UAAU,GAAG,2BAA2B,CAAC;IAC/C,IAAI,UAAkC,CAAC;IAEvC,OAAO,CAAC,UAAU,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAErC,yCAAyC;QACzC,IAAI,4CAA4C,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,SAAS;QAC1E,+CAA+C;QAC/C,IAAI,kCAAkC,CAAC,IAAI,CAAC,QAAQ,CAAC;YAAE,SAAS;QAEhE,6BAA6B;QAC7B,MAAM,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,kCAAkC,CAAC,CAAC;QACvE,IAAI,CAAC,WAAW,EAAE,CAAC,CAAC,CAAC;YAAE,SAAS;QAEhC,MAAM,MAAM,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC;QAC9B,MAAM,gBAAgB,GAAG,eAAe,CAAC,MAAM,CAAC,CAAC;QAEjD,KAAK,MAAM,CAAC,IAAI,gBAAgB,EAAE,CAAC;YACjC,sDAAsD;YACtD,IAAI,CAAC,CAAC,IAAI,KAAK,SAAS;gBAAE,SAAS;YACnC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,6BAA6B,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;gBAAE,SAAS;YACjF,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;GAEG;AACH,SAAS,eAAe,CAAC,SAAiB;IACxC,MAAM,MAAM,GAAgB,EAAE,CAAC;IAC/B,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACpD,MAAM,OAAO,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAE/B,iDAAiD;QACjD,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,0BAA0B,CAAC,CAAC;QAC7D,MAAM,KAAK,GAAG,UAAU,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAEhE,qBAAqB;QACrB,IAAI,MAAM,GAAwB,MAAM,CAAC;QACzC,IAAI,aAAa,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAChC,MAAM,GAAG,QAAQ,CAAC;QACpB,CAAC;aAAM,IAAI,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC1C,MAAM,GAAG,UAAU,CAAC;QACtB,CAAC;aAAM,IAAI,iBAAiB,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC;YAC3C,MAAM,GAAG,MAAM,CAAC;QAClB,CAAC;aAAM,IAAI,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,KAAK,GAAG,CAAC,EAAE,CAAC;YAC/E,6EAA6E;YAC7E,MAAM,GAAG,QAAQ,CAAC;QACpB,CAAC;QAED,iBAAiB;QACjB,MAAM,MAAM,GAAG,oBAAoB,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAElD,oBAAoB;QACpB,MAAM,KAAK,GAAa,EAAE,CAAC;QAC3B,MAAM,QAAQ,GAAG,yBAAyB,CAAC;QAC3C,IAAI,QAAgC,CAAC;QACrC,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACpD,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;YACnE,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAED,yEAAyE;QACzE,MAAM,QAAQ,GAAG,kCAAkC,CAAC;QACpD,IAAI,QAAgC,CAAC;QACrC,OAAO,CAAC,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACpD,MAAM,MAAM,GAAG,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;YACnE,IAAI,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC;gBACxB,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;YAClD,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAC7B,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACpB,MAAM,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED;;;GAGG;AACH,SAAS,oBAAoB,CAAC,GAAW;IACvC,MAAM,IAAI,GAAa,EAAE,CAAC;IAE1B,6CAA6C;IAC7C,MAAM,QAAQ,GAAG,6BAA6B,CAAC;IAC/C,IAAI,KAA6B,CAAC;IAElC,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC7C,MAAM,MAAM,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC9B,0CAA0C;QAC1C,MAAM,UAAU,GAAG,MAAM,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;QACnD,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YACpB,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAC9C,CAAC;QACD,oDAAoD;QACpD,MAAM,YAAY,GAAG,MAAM,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;QACvD,IAAI,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACrC,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,2DAA2D;IAC3D,MAAM,OAAO,GAAG,2BAA2B,CAAC;IAC5C,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC7B,IAAI,cAAc,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC;YAC/B,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;YAC/D,IAAI,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;gBACpB,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YAC9C,CAAC;QACH,CAAC;IACH,CAAC;IAED,OAAO,IAAI,CAAC;AACd,CAAC;AAED;;;GAGG;AACH,SAAS,gBAAgB,CAAC,OAAe;IACvC,MAAM,UAAU,GAAG,eAAe,CAAC,OAAO,CAAC,CAAC;IAC5C,OAAO,UAAU;SACd,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;SAClB,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;QACZ,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC;YAAE,OAAO,KAAK,CAAC;QAClC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAC;QAC/B,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACP,CAAC;AAED,SAAS,qBAAqB,CAAC,GAAW;IACxC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,MAAM,SAAS,GAAG,yBAAyB,CAAC;IAC5C,IAAI,KAA6B,CAAC;IAClC,OAAO,CAAC,KAAK,GAAG,SAAS,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;QAC9B,IAAI,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC;YAAE,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;IACnE,CAAC;IACD,OAAO,KAAK,CAAC;AACf,CAAC;AAED,SAAS,iBAAiB,CAAC,IAAY;IACrC,OAAO,IAAI;SACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;SACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;SACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;SACvB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC;SACpF,OAAO,CAAC,qBAAqB,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAc,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;AACpG,CAAC;AAED,SAAS,WAAW,CAAC,KAAmB;IACtC,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,UAAU;IACV,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QAChB,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,WAAW,KAAK,KAAK,CAAC,KAAK,EAAE,CAAC,CAAC;IAC9D,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,YAAY,KAAK,CAAC,WAAW,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,qDAAqD;IACrD,IAAI,KAAK,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAChC,IAAI,eAAe,GAAG,CAAC,CAAC;QACxB,MAAM,SAAS,GAAa,EAAE,CAAC;QAE/B,KAAK,MAAM,CAAC,IAAI,KAAK,CAAC,UAAU,EAAE,CAAC;YACjC,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACpC,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC;YAEjD,IAAI,CAAC,CAAC,MAAM,KAAK,UAAU,EAAE,CAAC;gBAC5B,eAAe,EAAE,CAAC;gBAClB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,eAAe,KAAK,IAAI,EAAE,CAAC,CAAC;YACzD,CAAC;iBAAM,IAAI,CAAC,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;gBACjC,eAAe,GAAG,CAAC,CAAC;gBACpB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,KAAK,IAAI,EAAE,CAAC,CAAC;YACvC,CAAC;iBAAM,CAAC;gBACN,eAAe,GAAG,CAAC,CAAC;gBACpB,SAAS,CAAC,IAAI,CAAC,GAAG,MAAM,GAAG,IAAI,EAAE,CAAC,CAAC;YACrC,CAAC;QACH,CAAC;QAED,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC;IACnC,CAAC;SAAM,CAAC;QACN,KAAK,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;IACjC,CAAC;IAED,kBAAkB;IAClB,IAAI,KAAK,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,KAAK,CAAC,IAAI,CAAC,eAAe,KAAK,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/E,CAAC;IAED,gBAAgB;IAChB,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC3B,KAAK,CAAC,IAAI,CAAC,2BAA2B,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACpE,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAED,SAAS,aAAa,CAAC,KAAa,EAAE,QAAgB,EAAE,OAAe,EAAE,UAAkB;IACzF,OAAO,KAAK,KAAK;;iBAEF,QAAQ,CAAC,QAAQ,CAAC,KAAK,QAAQ;;gBAEhC,UAAU;mBACP,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;;EAEvD,OAAO;CACR,CAAC;AACF,CAAC"}
|
package/dist/processors/url.js
CHANGED
|
@@ -23,8 +23,11 @@ async function processWithFirecrawl(url, apiKey) {
|
|
|
23
23
|
throw new Error(`Firecrawl API error: ${response.status}`);
|
|
24
24
|
}
|
|
25
25
|
const data = (await response.json());
|
|
26
|
+
if (!data.data?.markdown) {
|
|
27
|
+
throw new Error(`Firecrawl returned no content for ${url}`);
|
|
28
|
+
}
|
|
26
29
|
return {
|
|
27
|
-
title: data.data.metadata
|
|
30
|
+
title: data.data.metadata?.title ?? new URL(url).hostname,
|
|
28
31
|
content: data.data.markdown,
|
|
29
32
|
url,
|
|
30
33
|
};
|