@staticn0va/wigolo 0.6.3 → 0.6.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +30 -8
- package/dist/cli/index.d.ts +1 -1
- package/dist/cli/index.d.ts.map +1 -1
- package/dist/cli/index.js +12 -1
- package/dist/cli/index.js.map +1 -1
- package/dist/cli/init.d.ts +2 -0
- package/dist/cli/init.d.ts.map +1 -0
- package/dist/cli/init.js +162 -0
- package/dist/cli/init.js.map +1 -0
- package/dist/cli/setup-mcp.d.ts +2 -0
- package/dist/cli/setup-mcp.d.ts.map +1 -0
- package/dist/cli/setup-mcp.js +116 -0
- package/dist/cli/setup-mcp.js.map +1 -0
- package/dist/cli/status.d.ts +2 -0
- package/dist/cli/status.d.ts.map +1 -0
- package/dist/cli/status.js +32 -0
- package/dist/cli/status.js.map +1 -0
- package/dist/cli/tui/agents-types.d.ts +28 -0
- package/dist/cli/tui/agents-types.d.ts.map +1 -0
- package/dist/cli/tui/agents-types.js +2 -0
- package/dist/cli/tui/agents-types.js.map +1 -0
- package/dist/cli/tui/agents.d.ts +11 -0
- package/dist/cli/tui/agents.d.ts.map +1 -0
- package/dist/cli/tui/agents.js +101 -0
- package/dist/cli/tui/agents.js.map +1 -0
- package/dist/cli/tui/banner.d.ts +3 -0
- package/dist/cli/tui/banner.d.ts.map +1 -0
- package/dist/cli/tui/banner.js +25 -0
- package/dist/cli/tui/banner.js.map +1 -0
- package/dist/cli/tui/config-writer-cli.d.ts +12 -0
- package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-cli.js +33 -0
- package/dist/cli/tui/config-writer-cli.js.map +1 -0
- package/dist/cli/tui/config-writer-json.d.ts +16 -0
- package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-json.js +89 -0
- package/dist/cli/tui/config-writer-json.js.map +1 -0
- package/dist/cli/tui/config-writer-toml.d.ts +16 -0
- package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
- package/dist/cli/tui/config-writer-toml.js +88 -0
- package/dist/cli/tui/config-writer-toml.js.map +1 -0
- package/dist/cli/tui/config-writer.d.ts +25 -0
- package/dist/cli/tui/config-writer.d.ts.map +1 -0
- package/dist/cli/tui/config-writer.js +98 -0
- package/dist/cli/tui/config-writer.js.map +1 -0
- package/dist/cli/tui/detect-helpers.d.ts +6 -0
- package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
- package/dist/cli/tui/detect-helpers.js +44 -0
- package/dist/cli/tui/detect-helpers.js.map +1 -0
- package/dist/cli/tui/flags-types.d.ts +19 -0
- package/dist/cli/tui/flags-types.d.ts.map +1 -0
- package/dist/cli/tui/flags-types.js +19 -0
- package/dist/cli/tui/flags-types.js.map +1 -0
- package/dist/cli/tui/flags.d.ts +5 -0
- package/dist/cli/tui/flags.d.ts.map +1 -0
- package/dist/cli/tui/flags.js +124 -0
- package/dist/cli/tui/flags.js.map +1 -0
- package/dist/cli/tui/format.d.ts +14 -0
- package/dist/cli/tui/format.d.ts.map +1 -0
- package/dist/cli/tui/format.js +28 -0
- package/dist/cli/tui/format.js.map +1 -0
- package/dist/cli/tui/reporter-auto.d.ts +7 -0
- package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
- package/dist/cli/tui/reporter-auto.js +15 -0
- package/dist/cli/tui/reporter-auto.js.map +1 -0
- package/dist/cli/tui/reporter.d.ts +26 -0
- package/dist/cli/tui/reporter.d.ts.map +1 -0
- package/dist/cli/tui/reporter.js +31 -0
- package/dist/cli/tui/reporter.js.map +1 -0
- package/dist/cli/tui/run-command.d.ts +14 -0
- package/dist/cli/tui/run-command.d.ts.map +1 -0
- package/dist/cli/tui/run-command.js +73 -0
- package/dist/cli/tui/run-command.js.map +1 -0
- package/dist/cli/tui/select-agents.d.ts +6 -0
- package/dist/cli/tui/select-agents.d.ts.map +1 -0
- package/dist/cli/tui/select-agents.js +28 -0
- package/dist/cli/tui/select-agents.js.map +1 -0
- package/dist/cli/tui/status-agents.d.ts +11 -0
- package/dist/cli/tui/status-agents.d.ts.map +1 -0
- package/dist/cli/tui/status-agents.js +53 -0
- package/dist/cli/tui/status-agents.js.map +1 -0
- package/dist/cli/tui/status-cache.d.ts +6 -0
- package/dist/cli/tui/status-cache.d.ts.map +1 -0
- package/dist/cli/tui/status-cache.js +39 -0
- package/dist/cli/tui/status-cache.js.map +1 -0
- package/dist/cli/tui/status-format.d.ts +15 -0
- package/dist/cli/tui/status-format.d.ts.map +1 -0
- package/dist/cli/tui/status-format.js +45 -0
- package/dist/cli/tui/status-format.js.map +1 -0
- package/dist/cli/tui/status-python.d.ts +7 -0
- package/dist/cli/tui/status-python.d.ts.map +1 -0
- package/dist/cli/tui/status-python.js +24 -0
- package/dist/cli/tui/status-python.js.map +1 -0
- package/dist/cli/tui/system-check.d.ts +24 -0
- package/dist/cli/tui/system-check.d.ts.map +1 -0
- package/dist/cli/tui/system-check.js +101 -0
- package/dist/cli/tui/system-check.js.map +1 -0
- package/dist/cli/tui/tui-reporter.d.ts +19 -0
- package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
- package/dist/cli/tui/tui-reporter.js +94 -0
- package/dist/cli/tui/tui-reporter.js.map +1 -0
- package/dist/cli/tui/verify-suggestions.d.ts +5 -0
- package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
- package/dist/cli/tui/verify-suggestions.js +25 -0
- package/dist/cli/tui/verify-suggestions.js.map +1 -0
- package/dist/cli/tui/verify.d.ts +19 -0
- package/dist/cli/tui/verify.d.ts.map +1 -0
- package/dist/cli/tui/verify.js +138 -0
- package/dist/cli/tui/verify.js.map +1 -0
- package/dist/cli/tui/version.d.ts +2 -0
- package/dist/cli/tui/version.d.ts.map +1 -0
- package/dist/cli/tui/version.js +12 -0
- package/dist/cli/tui/version.js.map +1 -0
- package/dist/cli/warmup.d.ts +2 -1
- package/dist/cli/warmup.d.ts.map +1 -1
- package/dist/cli/warmup.js +147 -208
- package/dist/cli/warmup.js.map +1 -1
- package/dist/daemon/http-server.js +1 -1
- package/dist/daemon/http-server.js.map +1 -1
- package/dist/embedding/embed.d.ts +0 -1
- package/dist/embedding/embed.d.ts.map +1 -1
- package/dist/embedding/embed.js +0 -3
- package/dist/embedding/embed.js.map +1 -1
- package/dist/extraction/extract.d.ts.map +1 -1
- package/dist/extraction/extract.js +6 -0
- package/dist/extraction/extract.js.map +1 -1
- package/dist/extraction/markdown.d.ts +2 -0
- package/dist/extraction/markdown.d.ts.map +1 -1
- package/dist/extraction/markdown.js +70 -0
- package/dist/extraction/markdown.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +32 -7
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/readability.d.ts +1 -1
- package/dist/extraction/readability.d.ts.map +1 -1
- package/dist/extraction/readability.js +1 -1
- package/dist/extraction/readability.js.map +1 -1
- package/dist/extraction/site-extractors/github.js +1 -1
- package/dist/extraction/site-extractors/github.js.map +1 -1
- package/dist/extraction/site-extractors/mdn.js +1 -1
- package/dist/extraction/site-extractors/mdn.js.map +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js +1 -1
- package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
- package/dist/extraction/structured.d.ts +4 -0
- package/dist/extraction/structured.d.ts.map +1 -0
- package/dist/extraction/structured.js +206 -0
- package/dist/extraction/structured.js.map +1 -0
- package/dist/fetch/lightpanda.js +1 -1
- package/dist/fetch/lightpanda.js.map +1 -1
- package/dist/index.js +18 -0
- package/dist/index.js.map +1 -1
- package/dist/instructions.d.ts +6 -6
- package/dist/instructions.d.ts.map +1 -1
- package/dist/instructions.js +54 -51
- package/dist/instructions.js.map +1 -1
- package/dist/research/brief.d.ts +3 -0
- package/dist/research/brief.d.ts.map +1 -0
- package/dist/research/brief.js +79 -0
- package/dist/research/brief.js.map +1 -0
- package/dist/research/pipeline.d.ts.map +1 -1
- package/dist/research/pipeline.js +8 -0
- package/dist/research/pipeline.js.map +1 -1
- package/dist/research/synthesize.js +1 -1
- package/dist/research/synthesize.js.map +1 -1
- package/dist/search/find-similar.d.ts.map +1 -1
- package/dist/search/find-similar.js +41 -1
- package/dist/search/find-similar.js.map +1 -1
- package/dist/search/highlights.d.ts +10 -0
- package/dist/search/highlights.d.ts.map +1 -0
- package/dist/search/highlights.js +103 -0
- package/dist/search/highlights.js.map +1 -0
- package/dist/searxng/docker.d.ts.map +1 -1
- package/dist/searxng/docker.js +6 -2
- package/dist/searxng/docker.js.map +1 -1
- package/dist/server.d.ts.map +1 -1
- package/dist/server.js +8 -4
- package/dist/server.js.map +1 -1
- package/dist/tools/agent.d.ts +2 -2
- package/dist/tools/agent.d.ts.map +1 -1
- package/dist/tools/agent.js +1 -1
- package/dist/tools/agent.js.map +1 -1
- package/dist/tools/extract.d.ts.map +1 -1
- package/dist/tools/extract.js +19 -1
- package/dist/tools/extract.js.map +1 -1
- package/dist/tools/research.d.ts +1 -1
- package/dist/tools/research.d.ts.map +1 -1
- package/dist/tools/research.js +1 -1
- package/dist/tools/research.js.map +1 -1
- package/dist/tools/search.d.ts.map +1 -1
- package/dist/tools/search.js +55 -27
- package/dist/tools/search.js.map +1 -1
- package/dist/types.d.ts +54 -4
- package/dist/types.d.ts.map +1 -1
- package/package.json +7 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAsB,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAsB;IAChC,KAAK,CAAc;IACnB,SAAS,GAAG,KAAK,CAAC;
|
|
1
|
+
{"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAsB,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAsB;IAChC,KAAK,CAAc;IACnB,SAAS,GAAG,KAAK,CAAC;IAE1B;QACE,IAAI,CAAC,UAAU,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC5C,IAAI,CAAC,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,gBAAgB,EAAE,CAAC;YAClC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,MAAM;qBACnB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACT,GAAG,EAAE,CAAC,CAAC,aAAa;oBACpB,SAAS,EAAE,CAAC,CAAC,SAAS;oBACtB,IAAI,EAAE,CAAC,CAAC,IAAI;iBACb,CAAC,CAAC,CAAC;gBACN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;gBACnD,GAAG,CAAC,IAAI,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAExB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAClE,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,YAAY,CAAC,KAAc;QACzB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW,EAAE,QAAgB;QAC/C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,GAAG,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAC/D,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAElE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,0BAA0B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,SAAS,EAAE,CAAC,cAAc,CAAC;YACvE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;YAEjE,IAAI,aAAqB,CAAC;YAC1B,IAAI,CAAC;gBACH,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,GAAG,GAAG,CAAC;YACtB,CAAC;YAED,oBAAoB,CAAC,aAAa,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACzD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEtC,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QACjE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAED,UAAU,CAAC,GAAW,EAAE,QAAgB;QACtC,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAE5B,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;YAC5C,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,WAAW,CACf,SAAiB,EACjB,IAAY,EACZ,WAAyB;QAEzB,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC/C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YAEnE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,WAAW,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACtD,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,WAAW,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QAChE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACvD,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,QAAQ;QACN,IAAI,CAAC;YACH,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACnB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,iCAAiC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;CACF;AAED,IAAI,cAAc,GAA4B,IAAI,CAAC;AAEnD,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,IAAI,gBAAgB,EAAE,CAAC;IAC1C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,qBAAqB;IACnC,IAAI,cAAc,EAAE,CAAC;QACnB,cAAc,CAAC,QAAQ,EAAE,CAAC;QAC1B,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,
|
|
1
|
+
{"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAiC1D;AAED,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,GAChB,MAAM,GAAG,MAAM,EAAE,CAUnB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CA6CvD"}
|
|
@@ -26,6 +26,12 @@ export function extractMetadata(html) {
|
|
|
26
26
|
const ogImage = getMetaContent(doc, 'og:image');
|
|
27
27
|
if (ogImage)
|
|
28
28
|
result.og_image = ogImage;
|
|
29
|
+
const ogType = getMetaContent(doc, 'og:type');
|
|
30
|
+
if (ogType)
|
|
31
|
+
result.og_type = ogType;
|
|
32
|
+
const canonical = doc.querySelector('link[rel="canonical"]')?.getAttribute('href');
|
|
33
|
+
if (canonical)
|
|
34
|
+
result.canonical_url = canonical;
|
|
29
35
|
return result;
|
|
30
36
|
}
|
|
31
37
|
export function extractSelector(html, selector, multiple) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,SAAS,cAAc,CAAC,GAAa,EAAE,cAAsB;IAC3D,MAAM,EAAE,GACN,GAAG,CAAC,aAAa,CAAC,cAAc,cAAc,IAAI,CAAC;QACnD,GAAG,CAAC,aAAa,CAAC,kBAAkB,cAAc,IAAI,CAAC,CAAC;IAC1D,OAAO,EAAE,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC9D,IAAI,KAAK;QAAE,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IAEhC,MAAM,WAAW,GACf,cAAc,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IAC9E,IAAI,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC;IAElD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;IAEnC,MAAM,IAAI,GACR,cAAc,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;IAC/E,IAAI,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC;IAEvC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,QAAgB,EAChB,QAAiB;IAEjB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,EAAE,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAEjF,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,OAAiB,CAAC;QACtB,IAAI,QAAmB,CAAC;QAExB,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5E,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxE,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtE,QAAQ,GAAG,OAAO,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACrD,MAAM,GAAG,GAA2B,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
1
|
+
{"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,SAAS,cAAc,CAAC,GAAa,EAAE,cAAsB;IAC3D,MAAM,EAAE,GACN,GAAG,CAAC,aAAa,CAAC,cAAc,cAAc,IAAI,CAAC;QACnD,GAAG,CAAC,aAAa,CAAC,kBAAkB,cAAc,IAAI,CAAC,CAAC;IAC1D,OAAO,EAAE,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC9D,IAAI,KAAK;QAAE,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IAEhC,MAAM,WAAW,GACf,cAAc,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IAC9E,IAAI,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC;IAElD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;IAEnC,MAAM,IAAI,GACR,cAAc,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;IAC/E,IAAI,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC;IAEvC,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IAC9C,IAAI,MAAM;QAAE,MAAM,CAAC,OAAO,GAAG,MAAM,CAAC;IAEpC,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,CAAC,uBAAuB,CAAC,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;IACnF,IAAI,SAAS;QAAE,MAAM,CAAC,aAAa,GAAG,SAAS,CAAC;IAEhD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,QAAgB,EAChB,QAAiB;IAEjB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,EAAE,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAEjF,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,OAAiB,CAAC;QACtB,IAAI,QAAmB,CAAC;QAExB,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5E,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxE,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtE,QAAQ,GAAG,OAAO,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACrD,MAAM,GAAG,GAA2B,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC"}
|
|
@@ -7,4 +7,6 @@ export declare function extractLinksAndImages(markdown: string): {
|
|
|
7
7
|
links: string[];
|
|
8
8
|
images: string[];
|
|
9
9
|
};
|
|
10
|
+
export declare function filterDecorativeImages(markdown: string): string;
|
|
11
|
+
export declare function resolveRelativeUrls(markdown: string, baseUrl: string): string;
|
|
10
12
|
//# sourceMappingURL=markdown.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F"}
|
|
1
|
+
{"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAkC7E"}
|
|
@@ -104,4 +104,74 @@ export function extractLinksAndImages(markdown) {
|
|
|
104
104
|
}
|
|
105
105
|
return { links: Array.from(links), images: Array.from(images) };
|
|
106
106
|
}
|
|
107
|
+
const DECORATIVE_URL_MARKERS = [
|
|
108
|
+
'avatar',
|
|
109
|
+
'icon',
|
|
110
|
+
'logo',
|
|
111
|
+
'badge',
|
|
112
|
+
'shield',
|
|
113
|
+
'tracking',
|
|
114
|
+
'pixel',
|
|
115
|
+
'sprite',
|
|
116
|
+
'emoji',
|
|
117
|
+
'favicon',
|
|
118
|
+
];
|
|
119
|
+
// Drop `` tokens that look decorative. Heuristic only -- keep
|
|
120
|
+
// images that have alt text unless the URL clearly marks them decorative.
|
|
121
|
+
// Tracking pixels (tiny data-URI gifs) and empty-alt icons are removed.
|
|
122
|
+
export function filterDecorativeImages(markdown) {
|
|
123
|
+
if (!markdown)
|
|
124
|
+
return markdown;
|
|
125
|
+
return markdown.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
|
|
126
|
+
const trimmedAlt = alt.trim();
|
|
127
|
+
const lowerSrc = src.toLowerCase();
|
|
128
|
+
// Tiny animated-GIF tracking pixel / 1x1 beacons
|
|
129
|
+
if (lowerSrc.startsWith('data:image/gif;base64,'))
|
|
130
|
+
return '';
|
|
131
|
+
// Inline SVG icon data URIs (short = tiny, likely decorative glyph)
|
|
132
|
+
if (lowerSrc.startsWith('data:image/svg+xml') && src.length < 200)
|
|
133
|
+
return '';
|
|
134
|
+
// URL marks it as decorative regardless of alt
|
|
135
|
+
for (const marker of DECORATIVE_URL_MARKERS) {
|
|
136
|
+
if (lowerSrc.includes(marker))
|
|
137
|
+
return '';
|
|
138
|
+
}
|
|
139
|
+
// No alt text + no title = decorative
|
|
140
|
+
if (!trimmedAlt)
|
|
141
|
+
return '';
|
|
142
|
+
return match;
|
|
143
|
+
});
|
|
144
|
+
}
|
|
145
|
+
// Resolve relative `[text](path)` and `` targets against baseUrl.
|
|
146
|
+
// Leaves absolute URLs, mailto:, tel:, javascript:, and #fragments untouched.
|
|
147
|
+
export function resolveRelativeUrls(markdown, baseUrl) {
|
|
148
|
+
if (!markdown || !baseUrl)
|
|
149
|
+
return markdown;
|
|
150
|
+
const rewrite = (path) => {
|
|
151
|
+
const trimmed = path.trim();
|
|
152
|
+
if (!trimmed)
|
|
153
|
+
return path;
|
|
154
|
+
if (/^(?:https?:|mailto:|tel:|javascript:|data:|#)/i.test(trimmed))
|
|
155
|
+
return path;
|
|
156
|
+
if (trimmed.startsWith('//')) {
|
|
157
|
+
try {
|
|
158
|
+
const base = new URL(baseUrl);
|
|
159
|
+
return `${base.protocol}${trimmed}`;
|
|
160
|
+
}
|
|
161
|
+
catch {
|
|
162
|
+
return path;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
try {
|
|
166
|
+
return new URL(trimmed, baseUrl).href;
|
|
167
|
+
}
|
|
168
|
+
catch {
|
|
169
|
+
return path;
|
|
170
|
+
}
|
|
171
|
+
};
|
|
172
|
+
// Image links first so the shared link regex does not rewrite them twice.
|
|
173
|
+
let result = markdown.replace(/(!\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g, (_m, open, path, close) => `${open}${rewrite(path)}${close}`);
|
|
174
|
+
result = result.replace(/(^|[^!])(\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g, (_m, pre, open, path, close) => `${pre}${open}${rewrite(path)}${close}`);
|
|
175
|
+
return result;
|
|
176
|
+
}
|
|
107
177
|
//# sourceMappingURL=markdown.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC"}
|
|
1
|
+
{"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,sBAAsB,GAAG;IAC7B,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;IACR,UAAU;IACV,OAAO;IACP,QAAQ;IACR,OAAO;IACP,SAAS;CACV,CAAC;AAEF,yEAAyE;AACzE,0EAA0E;AAC1E,wEAAwE;AACxE,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,IAAI,CAAC,QAAQ;QAAE,OAAO,QAAQ,CAAC;IAC/B,OAAO,QAAQ,CAAC,OAAO,CAAC,2BAA2B,EAAE,CAAC,KAAK,EAAE,GAAW,EAAE,GAAW,EAAE,EAAE;QACvF,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAEnC,iDAAiD;QACjD,IAAI,QAAQ,CAAC,UAAU,CAAC,wBAAwB,CAAC;YAAE,OAAO,EAAE,CAAC;QAE7D,oEAAoE;QACpE,IAAI,QAAQ,CAAC,UAAU,CAAC,oBAAoB,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,EAAE,CAAC;QAE7E,+CAA+C;QAC/C,KAAK,MAAM,MAAM,IAAI,sBAAsB,EAAE,CAAC;YAC5C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO,EAAE,CAAC;QAC3C,CAAC;QAED,sCAAsC;QACtC,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,8EAA8E;AAC9E,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,OAAe;IACnE,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;QAAE,OAAO,QAAQ,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,IAAY,EAAU,EAAE;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,IAAI,gDAAgD,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,IAAI,CAAC;QAChF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC9B,OAAO,GAAG,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC;IAEF,0EAA0E;IAC1E,IAAI,MAAM,GAAG,QAAQ,CAAC,OAAO,CAC3B,8CAA8C,EAC9C,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CAC7D,CAAC;IAEF,MAAM,GAAG,MAAM,CAAC,OAAO,CACrB,qDAAqD,EACrD,CAAC,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CACxE,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAU/D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAE5D;AAED,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAmE3B"}
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
import { defuddleExtract } from './defuddle.js';
|
|
2
2
|
import { readabilityExtract } from './readability.js';
|
|
3
3
|
import { trafilaturaExtract, isTrafilaturaAvailable } from './trafilatura.js';
|
|
4
|
-
import { htmlToMarkdown, extractSection, extractLinksAndImages } from './markdown.js';
|
|
4
|
+
import { htmlToMarkdown, extractSection, extractLinksAndImages, filterDecorativeImages, resolveRelativeUrls, } from './markdown.js';
|
|
5
|
+
import { extractMetadata } from './extract.js';
|
|
5
6
|
import { githubExtractor } from './site-extractors/github.js';
|
|
6
7
|
import { stackoverflowExtractor } from './site-extractors/stackoverflow.js';
|
|
7
8
|
import { mdnExtractor } from './site-extractors/mdn.js';
|
|
@@ -40,14 +41,14 @@ export async function extractContent(html, url, options = {}) {
|
|
|
40
41
|
images: [],
|
|
41
42
|
extractor: 'turndown',
|
|
42
43
|
};
|
|
43
|
-
return applyPostProcessing(result, options);
|
|
44
|
+
return applyPostProcessing(result, url, html, options);
|
|
44
45
|
}
|
|
45
46
|
const siteExtractor = siteExtractors.find((e) => e.canHandle(url, html));
|
|
46
47
|
if (siteExtractor) {
|
|
47
48
|
const extracted = siteExtractor.extract(html, url);
|
|
48
49
|
if (extracted) {
|
|
49
50
|
result = extracted;
|
|
50
|
-
return applyPostProcessing(result, options);
|
|
51
|
+
return applyPostProcessing(result, url, html, options);
|
|
51
52
|
}
|
|
52
53
|
}
|
|
53
54
|
result = await defuddleExtract(html, url);
|
|
@@ -59,7 +60,7 @@ export async function extractContent(html, url, options = {}) {
|
|
|
59
60
|
result = await trafilaturaExtract(html, url);
|
|
60
61
|
if (result) {
|
|
61
62
|
log.info('Trafilatura extraction succeeded', { url, chars: result.markdown.length });
|
|
62
|
-
return applyPostProcessing(result, options);
|
|
63
|
+
return applyPostProcessing(result, url, html, options);
|
|
63
64
|
}
|
|
64
65
|
}
|
|
65
66
|
}
|
|
@@ -78,18 +79,42 @@ export async function extractContent(html, url, options = {}) {
|
|
|
78
79
|
extractor: 'turndown',
|
|
79
80
|
};
|
|
80
81
|
}
|
|
81
|
-
return applyPostProcessing(result, options);
|
|
82
|
+
return applyPostProcessing(result, url, html, options);
|
|
82
83
|
}
|
|
83
|
-
function
|
|
84
|
+
function mergeMetadata(base, html) {
|
|
85
|
+
try {
|
|
86
|
+
const meta = extractMetadata(html);
|
|
87
|
+
return {
|
|
88
|
+
...meta,
|
|
89
|
+
// Extractor-provided fields win when set (they already inspected the article body).
|
|
90
|
+
description: base.description || meta.description,
|
|
91
|
+
author: base.author || meta.author,
|
|
92
|
+
date: base.date || meta.date,
|
|
93
|
+
language: base.language,
|
|
94
|
+
og_image: base.og_image ?? meta.og_image,
|
|
95
|
+
og_type: base.og_type ?? meta.og_type,
|
|
96
|
+
canonical_url: base.canonical_url ?? meta.canonical_url,
|
|
97
|
+
keywords: base.keywords ?? meta.keywords,
|
|
98
|
+
};
|
|
99
|
+
}
|
|
100
|
+
catch {
|
|
101
|
+
return base;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
function applyPostProcessing(result, url, html, options) {
|
|
84
105
|
let markdown = result.markdown;
|
|
106
|
+
// Resolve relative links/images before slicing so downstream consumers get absolute URLs.
|
|
107
|
+
markdown = resolveRelativeUrls(markdown, url);
|
|
108
|
+
markdown = filterDecorativeImages(markdown);
|
|
85
109
|
if (options.section) {
|
|
86
110
|
const { content } = extractSection(markdown, options.section, options.sectionIndex ?? 0);
|
|
87
111
|
markdown = content;
|
|
88
112
|
}
|
|
89
113
|
const { links, images } = extractLinksAndImages(markdown);
|
|
114
|
+
const metadata = mergeMetadata(result.metadata, html);
|
|
90
115
|
if (options.maxChars && markdown.length > options.maxChars) {
|
|
91
116
|
markdown = markdown.slice(0, options.maxChars);
|
|
92
117
|
}
|
|
93
|
-
return { ...result, markdown, links, images };
|
|
118
|
+
return { ...result, markdown, links, images, metadata };
|
|
94
119
|
}
|
|
95
120
|
//# sourceMappingURL=pipeline.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,
|
|
1
|
+
{"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,EACL,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACtB,mBAAmB,GACpB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAUpC,MAAM,cAAc,GAAgB;IAClC,eAAe;IACf,sBAAsB;IACtB,YAAY;IACZ,oBAAoB;CACrB,CAAC;AAEF,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,GAAW,EACX,UAA6B,EAAE;IAE/B,IAAI,MAAM,GAA4B,IAAI,CAAC;IAE3C,IAAI,OAAO,CAAC,WAAW,KAAK,iBAAiB,EAAE,CAAC;QAC9C,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC;gBACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,GAAG,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,kBAAkB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QACD,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;QACF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,aAAa,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;IACzE,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,SAAS,GAAG,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,GAAG,SAAS,CAAC;YACnB,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,MAAM,GAAG,MAAM,eAAe,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,WAAW,KAAK,OAAO,EAAE,CAAC;YACnC,MAAM,aAAa,GAAG,MAAM,sBAAsB,EAAE,CAAC;YACrD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBAC7C,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,IAAI,CAAC,kCAAkC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBACrF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;gBACzD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,GAAG,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;IACJ,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,aAAa,CACpB,IAAkC,EAClC,IAAY;IAEZ,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,OAAO;YACL,GAAG,IAAI;YACP,oFAAoF;YACpF,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;YACjD,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;YAClC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI;YAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;YACxC,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;YACrC,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa;YACvD,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;SACzC,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAC1B,MAAwB,EACxB,GAAW,EACX,IAAY,EACZ,OAA0B;IAE1B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE/B,0FAA0F;IAC1F,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC9C,QAAQ,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,EAAE,OAAO,EAAE,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACzF,QAAQ,GAAG,OAAO,CAAC;IACrB,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAEtD,IAAI,OAAO,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAC3D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AAC1D,CAAC"}
|
|
@@ -1,3 +1,3 @@
|
|
|
1
1
|
import type { ExtractionResult } from '../types.js';
|
|
2
|
-
export declare function readabilityExtract(html: string,
|
|
2
|
+
export declare function readabilityExtract(html: string, _url: string): ExtractionResult | null;
|
|
3
3
|
//# sourceMappingURL=readability.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,
|
|
1
|
+
{"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CA0BtF"}
|
|
@@ -2,7 +2,7 @@ import { Readability } from '@mozilla/readability';
|
|
|
2
2
|
import { parseHTML } from 'linkedom';
|
|
3
3
|
import TurndownService from 'turndown';
|
|
4
4
|
const MIN_CONTENT_THRESHOLD = 100;
|
|
5
|
-
export function readabilityExtract(html,
|
|
5
|
+
export function readabilityExtract(html, _url) {
|
|
6
6
|
try {
|
|
7
7
|
const { document } = parseHTML(html);
|
|
8
8
|
const reader = new Readability(document);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,IAAY;IAC3D,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAe,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE9C,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;QACxF,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEpD,IAAI,QAAQ,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAEzD,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,QAAQ;YACR,QAAQ,EAAE;gBACR,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,QAAQ,EAAE,OAAO,CAAC,IAAI,IAAI,SAAS;aACpC;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,aAAa;SACzB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
|
|
@@ -7,7 +7,7 @@ function isIssueOrPR(url) {
|
|
|
7
7
|
function isBlob(url) {
|
|
8
8
|
return /\/blob\//.test(url);
|
|
9
9
|
}
|
|
10
|
-
function extractIssue(document,
|
|
10
|
+
function extractIssue(document, _url) {
|
|
11
11
|
const titleEl = document.querySelector('.js-issue-title') ?? document.querySelector('.gh-header-title');
|
|
12
12
|
if (!titleEl)
|
|
13
13
|
return null;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,
|
|
1
|
+
{"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,IAAY;IACpD,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;IACxG,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEhD,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC;SAChC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACzC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;IACzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,QAAQ,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QAC5C,MAAM,IAAI,GAAI,IAAgB,CAAC,SAAS,CAAC;QACzC,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,EAAE,EAAE,CAAC;YACP,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAkB;IACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,QAAQ,CAAC;IAEzD,MAAM,UAAU,GACd,QAAQ,CAAC,aAAa,CAAC,wBAAwB,CAAC;QAChD,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,UAAsB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,QAAkB;IACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEjD,MAAM,SAAS,GACb,QAAQ,CAAC,aAAa,CAAC,oBAAoB,CAAC;QAC5C,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC;QACpC,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,SAAqB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAc;IACxC,IAAI,EAAE,QAAQ;IAEd,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACvE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAChB,OAAO,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;CACF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GACX,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC;YACnD,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC;YAC1C,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAEpC,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAChE,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GACX,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC;YAC3B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;YAChC,CAAC,CAAC,QAAQ,CAAC;QAEb,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,OAAmB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1E,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,
|
|
1
|
+
{"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC,mEAAmE,CAAC,CAAC;QACnH,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAEjC,MAAM,YAAY,GAAI,cAA0B,CAAC,SAAS,CAAC;QAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE1F,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,UAAU,CAAC,UAA4B,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAE1E,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"structured.d.ts","sourceRoot":"","sources":["../../src/extraction/structured.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAA2C,MAAM,aAAa,CAAC;AAU3F,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,cAAc,CAgB9D;AAuLD,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC"}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
import { parseHTML } from 'linkedom';
|
|
2
|
+
import { extractTables } from './extract.js';
|
|
3
|
+
import { extractJsonLd } from './jsonld.js';
|
|
4
|
+
const MAX_VALUE_LEN = 400;
|
|
5
|
+
const MAX_ITEMS_PER_TYPE = 200;
|
|
6
|
+
// Entry point. Collects every structured-data pattern we can surface from
|
|
7
|
+
// raw HTML so host LLMs receive a rich, schema-free brief without needing
|
|
8
|
+
// to re-parse the page.
|
|
9
|
+
export function extractStructured(html) {
|
|
10
|
+
const { document: doc } = parseHTML(html);
|
|
11
|
+
const tables = extractTables(html);
|
|
12
|
+
const jsonld = extractJsonLd(html);
|
|
13
|
+
const definitions = extractDefinitions(doc);
|
|
14
|
+
const chart_hints = extractChartHints(doc);
|
|
15
|
+
const key_value_pairs = extractKeyValuePairs(doc);
|
|
16
|
+
return {
|
|
17
|
+
tables,
|
|
18
|
+
definitions,
|
|
19
|
+
jsonld,
|
|
20
|
+
chart_hints,
|
|
21
|
+
key_value_pairs,
|
|
22
|
+
};
|
|
23
|
+
}
|
|
24
|
+
// <dl><dt>Term</dt><dd>Description</dd></dl> is the canonical key-value
|
|
25
|
+
// structure on the web; we also handle multiple <dd> per <dt> by joining.
|
|
26
|
+
function extractDefinitions(doc) {
|
|
27
|
+
const out = [];
|
|
28
|
+
const dlists = doc.querySelectorAll('dl');
|
|
29
|
+
for (const dl of dlists) {
|
|
30
|
+
const children = Array.from(dl.children);
|
|
31
|
+
let pending = null;
|
|
32
|
+
const buffer = [];
|
|
33
|
+
const flush = () => {
|
|
34
|
+
if (pending !== null && buffer.length > 0) {
|
|
35
|
+
out.push({
|
|
36
|
+
term: pending,
|
|
37
|
+
description: truncate(buffer.join(' ').trim()),
|
|
38
|
+
});
|
|
39
|
+
}
|
|
40
|
+
pending = null;
|
|
41
|
+
buffer.length = 0;
|
|
42
|
+
};
|
|
43
|
+
for (const c of children) {
|
|
44
|
+
const tag = c.tagName.toLowerCase();
|
|
45
|
+
if (tag === 'dt') {
|
|
46
|
+
flush();
|
|
47
|
+
pending = truncate((c.textContent ?? '').trim());
|
|
48
|
+
}
|
|
49
|
+
else if (tag === 'dd' && pending !== null) {
|
|
50
|
+
const text = (c.textContent ?? '').trim();
|
|
51
|
+
if (text)
|
|
52
|
+
buffer.push(text);
|
|
53
|
+
}
|
|
54
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
55
|
+
break;
|
|
56
|
+
}
|
|
57
|
+
flush();
|
|
58
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
59
|
+
break;
|
|
60
|
+
}
|
|
61
|
+
return out;
|
|
62
|
+
}
|
|
63
|
+
// SVG / figure accessibility hints are the cheapest way to surface
|
|
64
|
+
// chart structure when the chart itself is rendered by JS. Host LLMs
|
|
65
|
+
// use these to describe a data viz without needing the underlying data.
|
|
66
|
+
function extractChartHints(doc) {
|
|
67
|
+
const out = [];
|
|
68
|
+
const seen = new Set();
|
|
69
|
+
for (const svg of doc.querySelectorAll('svg')) {
|
|
70
|
+
const title = (svg.querySelector('title')?.textContent ?? '').trim();
|
|
71
|
+
const aria_label = (svg.getAttribute('aria-label') ?? '').trim();
|
|
72
|
+
const role = (svg.getAttribute('role') ?? '').trim();
|
|
73
|
+
let figcaption;
|
|
74
|
+
const figParent = closestFigure(svg);
|
|
75
|
+
if (figParent) {
|
|
76
|
+
const cap = figParent.querySelector('figcaption');
|
|
77
|
+
figcaption = cap?.textContent?.trim() || undefined;
|
|
78
|
+
}
|
|
79
|
+
if (!title && !aria_label && !figcaption)
|
|
80
|
+
continue;
|
|
81
|
+
const hint = {
|
|
82
|
+
...(title ? { title: truncate(title) } : {}),
|
|
83
|
+
...(aria_label ? { aria_label: truncate(aria_label) } : {}),
|
|
84
|
+
...(figcaption ? { figcaption: truncate(figcaption) } : {}),
|
|
85
|
+
type_hint: inferChartType(title, aria_label, role, figcaption),
|
|
86
|
+
};
|
|
87
|
+
const key = `${hint.title ?? ''}|${hint.aria_label ?? ''}|${hint.figcaption ?? ''}`;
|
|
88
|
+
if (seen.has(key))
|
|
89
|
+
continue;
|
|
90
|
+
seen.add(key);
|
|
91
|
+
out.push(hint);
|
|
92
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
93
|
+
break;
|
|
94
|
+
}
|
|
95
|
+
// <figure><figcaption> without SVG still surfaces dataviz context for
|
|
96
|
+
// pages that render charts as images or canvas.
|
|
97
|
+
for (const fig of doc.querySelectorAll('figure')) {
|
|
98
|
+
if (fig.querySelector('svg'))
|
|
99
|
+
continue; // already handled above
|
|
100
|
+
const cap = fig.querySelector('figcaption')?.textContent?.trim();
|
|
101
|
+
if (!cap)
|
|
102
|
+
continue;
|
|
103
|
+
const key = `||${cap}`;
|
|
104
|
+
if (seen.has(key))
|
|
105
|
+
continue;
|
|
106
|
+
seen.add(key);
|
|
107
|
+
out.push({
|
|
108
|
+
figcaption: truncate(cap),
|
|
109
|
+
type_hint: inferChartType('', '', '', cap),
|
|
110
|
+
});
|
|
111
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
112
|
+
break;
|
|
113
|
+
}
|
|
114
|
+
return out;
|
|
115
|
+
}
|
|
116
|
+
function closestFigure(el) {
|
|
117
|
+
let cur = el.parentElement;
|
|
118
|
+
while (cur) {
|
|
119
|
+
if (cur.tagName.toLowerCase() === 'figure')
|
|
120
|
+
return cur;
|
|
121
|
+
cur = cur.parentElement;
|
|
122
|
+
}
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
function inferChartType(title, ariaLabel, role, figcaption) {
|
|
126
|
+
const all = [title, ariaLabel, role, figcaption ?? ''].join(' ').toLowerCase();
|
|
127
|
+
if (/\b(chart|bar|line|pie|donut|scatter|area|histogram)\b/.test(all))
|
|
128
|
+
return 'chart';
|
|
129
|
+
if (/\b(diagram|flow|architecture|topology)\b/.test(all))
|
|
130
|
+
return 'diagram';
|
|
131
|
+
if (/\b(graph|network|tree)\b/.test(all))
|
|
132
|
+
return 'graph';
|
|
133
|
+
if (role === 'img')
|
|
134
|
+
return 'chart';
|
|
135
|
+
return undefined;
|
|
136
|
+
}
|
|
137
|
+
// Comparison grids, spec sheets, and product info boxes often use
|
|
138
|
+
// explicit "Label: Value" or [data-label] patterns. We harvest those
|
|
139
|
+
// plus <meta name=...> pairs not already covered by extractMetadata.
|
|
140
|
+
function extractKeyValuePairs(doc) {
|
|
141
|
+
const out = [];
|
|
142
|
+
const seen = new Set();
|
|
143
|
+
// Microdata itemprop pairs
|
|
144
|
+
for (const el of doc.querySelectorAll('[itemprop]')) {
|
|
145
|
+
const key = el.getAttribute('itemprop') ?? '';
|
|
146
|
+
const value = (el.getAttribute('content') ?? el.textContent ?? '').trim();
|
|
147
|
+
if (!key || !value)
|
|
148
|
+
continue;
|
|
149
|
+
pushUnique(out, seen, { key, value: truncate(value), source: 'microdata' });
|
|
150
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
151
|
+
return out;
|
|
152
|
+
}
|
|
153
|
+
// data-* attributes where the name looks meaningful (>= 3 chars after data-)
|
|
154
|
+
for (const el of doc.querySelectorAll('[data-label][data-value], [data-key][data-value]')) {
|
|
155
|
+
const key = (el.getAttribute('data-label') ?? el.getAttribute('data-key') ?? '').trim();
|
|
156
|
+
const value = (el.getAttribute('data-value') ?? '').trim();
|
|
157
|
+
if (!key || !value)
|
|
158
|
+
continue;
|
|
159
|
+
pushUnique(out, seen, { key, value: truncate(value), source: 'data-attr' });
|
|
160
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
161
|
+
return out;
|
|
162
|
+
}
|
|
163
|
+
// Comparison grid rows: <div class="row"><div class="label">X</div><div class="value">Y</div></div>
|
|
164
|
+
for (const row of doc.querySelectorAll('[class*="row"], [class*="spec"], [class*="field"]')) {
|
|
165
|
+
const label = row.querySelector('[class*="label"], [class*="name"], [class*="key"], dt, th');
|
|
166
|
+
const value = row.querySelector('[class*="value"], [class*="data"], dd, td');
|
|
167
|
+
if (!label || !value)
|
|
168
|
+
continue;
|
|
169
|
+
const k = (label.textContent ?? '').trim();
|
|
170
|
+
const v = (value.textContent ?? '').trim();
|
|
171
|
+
if (!k || !v || k === v)
|
|
172
|
+
continue;
|
|
173
|
+
if (k.length > 100 || v.length === 0)
|
|
174
|
+
continue;
|
|
175
|
+
pushUnique(out, seen, { key: k, value: truncate(v), source: 'comparison-grid' });
|
|
176
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
177
|
+
return out;
|
|
178
|
+
}
|
|
179
|
+
// "Key: Value" text patterns within <li>/<p> — cheap heuristic for spec sheets
|
|
180
|
+
for (const el of doc.querySelectorAll('li, p')) {
|
|
181
|
+
const text = (el.textContent ?? '').trim();
|
|
182
|
+
if (!text || text.length > 300)
|
|
183
|
+
continue;
|
|
184
|
+
const m = text.match(/^([A-Z][A-Za-z0-9 _-]{1,40}):\s+(.+)$/);
|
|
185
|
+
if (!m)
|
|
186
|
+
continue;
|
|
187
|
+
pushUnique(out, seen, { key: m[1].trim(), value: truncate(m[2].trim()), source: 'text-pattern' });
|
|
188
|
+
if (out.length >= MAX_ITEMS_PER_TYPE)
|
|
189
|
+
return out;
|
|
190
|
+
}
|
|
191
|
+
return out;
|
|
192
|
+
}
|
|
193
|
+
function pushUnique(list, seen, pair) {
|
|
194
|
+
const key = `${pair.key.toLowerCase()}|${pair.value.toLowerCase()}`;
|
|
195
|
+
if (seen.has(key))
|
|
196
|
+
return;
|
|
197
|
+
seen.add(key);
|
|
198
|
+
list.push(pair);
|
|
199
|
+
}
|
|
200
|
+
function truncate(text) {
|
|
201
|
+
const collapsed = text.replace(/\s+/g, ' ').trim();
|
|
202
|
+
if (collapsed.length <= MAX_VALUE_LEN)
|
|
203
|
+
return collapsed;
|
|
204
|
+
return collapsed.slice(0, MAX_VALUE_LEN - 1) + '…';
|
|
205
|
+
}
|
|
206
|
+
//# sourceMappingURL=structured.js.map
|