@staticn0va/wigolo 0.6.3 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/SKILL.md +30 -8
  2. package/dist/cli/index.d.ts +1 -1
  3. package/dist/cli/index.d.ts.map +1 -1
  4. package/dist/cli/index.js +12 -1
  5. package/dist/cli/index.js.map +1 -1
  6. package/dist/cli/init.d.ts +2 -0
  7. package/dist/cli/init.d.ts.map +1 -0
  8. package/dist/cli/init.js +162 -0
  9. package/dist/cli/init.js.map +1 -0
  10. package/dist/cli/setup-mcp.d.ts +2 -0
  11. package/dist/cli/setup-mcp.d.ts.map +1 -0
  12. package/dist/cli/setup-mcp.js +116 -0
  13. package/dist/cli/setup-mcp.js.map +1 -0
  14. package/dist/cli/status.d.ts +2 -0
  15. package/dist/cli/status.d.ts.map +1 -0
  16. package/dist/cli/status.js +32 -0
  17. package/dist/cli/status.js.map +1 -0
  18. package/dist/cli/tui/agents-types.d.ts +28 -0
  19. package/dist/cli/tui/agents-types.d.ts.map +1 -0
  20. package/dist/cli/tui/agents-types.js +2 -0
  21. package/dist/cli/tui/agents-types.js.map +1 -0
  22. package/dist/cli/tui/agents.d.ts +11 -0
  23. package/dist/cli/tui/agents.d.ts.map +1 -0
  24. package/dist/cli/tui/agents.js +101 -0
  25. package/dist/cli/tui/agents.js.map +1 -0
  26. package/dist/cli/tui/banner.d.ts +3 -0
  27. package/dist/cli/tui/banner.d.ts.map +1 -0
  28. package/dist/cli/tui/banner.js +25 -0
  29. package/dist/cli/tui/banner.js.map +1 -0
  30. package/dist/cli/tui/config-writer-cli.d.ts +12 -0
  31. package/dist/cli/tui/config-writer-cli.d.ts.map +1 -0
  32. package/dist/cli/tui/config-writer-cli.js +33 -0
  33. package/dist/cli/tui/config-writer-cli.js.map +1 -0
  34. package/dist/cli/tui/config-writer-json.d.ts +16 -0
  35. package/dist/cli/tui/config-writer-json.d.ts.map +1 -0
  36. package/dist/cli/tui/config-writer-json.js +89 -0
  37. package/dist/cli/tui/config-writer-json.js.map +1 -0
  38. package/dist/cli/tui/config-writer-toml.d.ts +16 -0
  39. package/dist/cli/tui/config-writer-toml.d.ts.map +1 -0
  40. package/dist/cli/tui/config-writer-toml.js +88 -0
  41. package/dist/cli/tui/config-writer-toml.js.map +1 -0
  42. package/dist/cli/tui/config-writer.d.ts +25 -0
  43. package/dist/cli/tui/config-writer.d.ts.map +1 -0
  44. package/dist/cli/tui/config-writer.js +98 -0
  45. package/dist/cli/tui/config-writer.js.map +1 -0
  46. package/dist/cli/tui/detect-helpers.d.ts +6 -0
  47. package/dist/cli/tui/detect-helpers.d.ts.map +1 -0
  48. package/dist/cli/tui/detect-helpers.js +44 -0
  49. package/dist/cli/tui/detect-helpers.js.map +1 -0
  50. package/dist/cli/tui/flags-types.d.ts +19 -0
  51. package/dist/cli/tui/flags-types.d.ts.map +1 -0
  52. package/dist/cli/tui/flags-types.js +19 -0
  53. package/dist/cli/tui/flags-types.js.map +1 -0
  54. package/dist/cli/tui/flags.d.ts +5 -0
  55. package/dist/cli/tui/flags.d.ts.map +1 -0
  56. package/dist/cli/tui/flags.js +124 -0
  57. package/dist/cli/tui/flags.js.map +1 -0
  58. package/dist/cli/tui/format.d.ts +14 -0
  59. package/dist/cli/tui/format.d.ts.map +1 -0
  60. package/dist/cli/tui/format.js +28 -0
  61. package/dist/cli/tui/format.js.map +1 -0
  62. package/dist/cli/tui/reporter-auto.d.ts +7 -0
  63. package/dist/cli/tui/reporter-auto.d.ts.map +1 -0
  64. package/dist/cli/tui/reporter-auto.js +15 -0
  65. package/dist/cli/tui/reporter-auto.js.map +1 -0
  66. package/dist/cli/tui/reporter.d.ts +26 -0
  67. package/dist/cli/tui/reporter.d.ts.map +1 -0
  68. package/dist/cli/tui/reporter.js +31 -0
  69. package/dist/cli/tui/reporter.js.map +1 -0
  70. package/dist/cli/tui/run-command.d.ts +14 -0
  71. package/dist/cli/tui/run-command.d.ts.map +1 -0
  72. package/dist/cli/tui/run-command.js +73 -0
  73. package/dist/cli/tui/run-command.js.map +1 -0
  74. package/dist/cli/tui/select-agents.d.ts +6 -0
  75. package/dist/cli/tui/select-agents.d.ts.map +1 -0
  76. package/dist/cli/tui/select-agents.js +28 -0
  77. package/dist/cli/tui/select-agents.js.map +1 -0
  78. package/dist/cli/tui/status-agents.d.ts +11 -0
  79. package/dist/cli/tui/status-agents.d.ts.map +1 -0
  80. package/dist/cli/tui/status-agents.js +53 -0
  81. package/dist/cli/tui/status-agents.js.map +1 -0
  82. package/dist/cli/tui/status-cache.d.ts +6 -0
  83. package/dist/cli/tui/status-cache.d.ts.map +1 -0
  84. package/dist/cli/tui/status-cache.js +39 -0
  85. package/dist/cli/tui/status-cache.js.map +1 -0
  86. package/dist/cli/tui/status-format.d.ts +15 -0
  87. package/dist/cli/tui/status-format.d.ts.map +1 -0
  88. package/dist/cli/tui/status-format.js +45 -0
  89. package/dist/cli/tui/status-format.js.map +1 -0
  90. package/dist/cli/tui/status-python.d.ts +7 -0
  91. package/dist/cli/tui/status-python.d.ts.map +1 -0
  92. package/dist/cli/tui/status-python.js +24 -0
  93. package/dist/cli/tui/status-python.js.map +1 -0
  94. package/dist/cli/tui/system-check.d.ts +24 -0
  95. package/dist/cli/tui/system-check.d.ts.map +1 -0
  96. package/dist/cli/tui/system-check.js +101 -0
  97. package/dist/cli/tui/system-check.js.map +1 -0
  98. package/dist/cli/tui/tui-reporter.d.ts +19 -0
  99. package/dist/cli/tui/tui-reporter.d.ts.map +1 -0
  100. package/dist/cli/tui/tui-reporter.js +94 -0
  101. package/dist/cli/tui/tui-reporter.js.map +1 -0
  102. package/dist/cli/tui/verify-suggestions.d.ts +5 -0
  103. package/dist/cli/tui/verify-suggestions.d.ts.map +1 -0
  104. package/dist/cli/tui/verify-suggestions.js +25 -0
  105. package/dist/cli/tui/verify-suggestions.js.map +1 -0
  106. package/dist/cli/tui/verify.d.ts +19 -0
  107. package/dist/cli/tui/verify.d.ts.map +1 -0
  108. package/dist/cli/tui/verify.js +138 -0
  109. package/dist/cli/tui/verify.js.map +1 -0
  110. package/dist/cli/tui/version.d.ts +2 -0
  111. package/dist/cli/tui/version.d.ts.map +1 -0
  112. package/dist/cli/tui/version.js +12 -0
  113. package/dist/cli/tui/version.js.map +1 -0
  114. package/dist/cli/warmup.d.ts +2 -1
  115. package/dist/cli/warmup.d.ts.map +1 -1
  116. package/dist/cli/warmup.js +147 -208
  117. package/dist/cli/warmup.js.map +1 -1
  118. package/dist/daemon/http-server.js +1 -1
  119. package/dist/daemon/http-server.js.map +1 -1
  120. package/dist/embedding/embed.d.ts +0 -1
  121. package/dist/embedding/embed.d.ts.map +1 -1
  122. package/dist/embedding/embed.js +0 -3
  123. package/dist/embedding/embed.js.map +1 -1
  124. package/dist/extraction/extract.d.ts.map +1 -1
  125. package/dist/extraction/extract.js +6 -0
  126. package/dist/extraction/extract.js.map +1 -1
  127. package/dist/extraction/markdown.d.ts +2 -0
  128. package/dist/extraction/markdown.d.ts.map +1 -1
  129. package/dist/extraction/markdown.js +70 -0
  130. package/dist/extraction/markdown.js.map +1 -1
  131. package/dist/extraction/pipeline.d.ts.map +1 -1
  132. package/dist/extraction/pipeline.js +32 -7
  133. package/dist/extraction/pipeline.js.map +1 -1
  134. package/dist/extraction/readability.d.ts +1 -1
  135. package/dist/extraction/readability.d.ts.map +1 -1
  136. package/dist/extraction/readability.js +1 -1
  137. package/dist/extraction/readability.js.map +1 -1
  138. package/dist/extraction/site-extractors/github.js +1 -1
  139. package/dist/extraction/site-extractors/github.js.map +1 -1
  140. package/dist/extraction/site-extractors/mdn.js +1 -1
  141. package/dist/extraction/site-extractors/mdn.js.map +1 -1
  142. package/dist/extraction/site-extractors/stackoverflow.js +1 -1
  143. package/dist/extraction/site-extractors/stackoverflow.js.map +1 -1
  144. package/dist/extraction/structured.d.ts +4 -0
  145. package/dist/extraction/structured.d.ts.map +1 -0
  146. package/dist/extraction/structured.js +206 -0
  147. package/dist/extraction/structured.js.map +1 -0
  148. package/dist/fetch/lightpanda.js +1 -1
  149. package/dist/fetch/lightpanda.js.map +1 -1
  150. package/dist/index.js +18 -0
  151. package/dist/index.js.map +1 -1
  152. package/dist/instructions.d.ts +6 -6
  153. package/dist/instructions.d.ts.map +1 -1
  154. package/dist/instructions.js +54 -51
  155. package/dist/instructions.js.map +1 -1
  156. package/dist/research/brief.d.ts +3 -0
  157. package/dist/research/brief.d.ts.map +1 -0
  158. package/dist/research/brief.js +79 -0
  159. package/dist/research/brief.js.map +1 -0
  160. package/dist/research/pipeline.d.ts.map +1 -1
  161. package/dist/research/pipeline.js +8 -0
  162. package/dist/research/pipeline.js.map +1 -1
  163. package/dist/research/synthesize.js +1 -1
  164. package/dist/research/synthesize.js.map +1 -1
  165. package/dist/search/find-similar.d.ts.map +1 -1
  166. package/dist/search/find-similar.js +41 -1
  167. package/dist/search/find-similar.js.map +1 -1
  168. package/dist/search/highlights.d.ts +10 -0
  169. package/dist/search/highlights.d.ts.map +1 -0
  170. package/dist/search/highlights.js +103 -0
  171. package/dist/search/highlights.js.map +1 -0
  172. package/dist/searxng/docker.d.ts.map +1 -1
  173. package/dist/searxng/docker.js +6 -2
  174. package/dist/searxng/docker.js.map +1 -1
  175. package/dist/server.d.ts.map +1 -1
  176. package/dist/server.js +8 -4
  177. package/dist/server.js.map +1 -1
  178. package/dist/tools/agent.d.ts +2 -2
  179. package/dist/tools/agent.d.ts.map +1 -1
  180. package/dist/tools/agent.js +1 -1
  181. package/dist/tools/agent.js.map +1 -1
  182. package/dist/tools/extract.d.ts.map +1 -1
  183. package/dist/tools/extract.js +19 -1
  184. package/dist/tools/extract.js.map +1 -1
  185. package/dist/tools/research.d.ts +1 -1
  186. package/dist/tools/research.d.ts.map +1 -1
  187. package/dist/tools/research.js +1 -1
  188. package/dist/tools/research.js.map +1 -1
  189. package/dist/tools/search.d.ts.map +1 -1
  190. package/dist/tools/search.js +55 -27
  191. package/dist/tools/search.js.map +1 -1
  192. package/dist/types.d.ts +54 -4
  193. package/dist/types.d.ts.map +1 -1
  194. package/package.json +7 -1
@@ -1 +1 @@
1
- {"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAsB,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAsB;IAChC,KAAK,CAAc;IACnB,SAAS,GAAG,KAAK,CAAC;IAClB,WAAW,GAAG,KAAK,CAAC;IAE5B;QACE,IAAI,CAAC,UAAU,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC5C,IAAI,CAAC,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,gBAAgB,EAAE,CAAC;YAClC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,MAAM;qBACnB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACT,GAAG,EAAE,CAAC,CAAC,aAAa;oBACpB,SAAS,EAAE,CAAC,CAAC,SAAS;oBACtB,IAAI,EAAE,CAAC,CAAC,IAAI;iBACb,CAAC,CAAC,CAAC;gBACN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;gBACnD,GAAG,CAAC,IAAI,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;YACtB,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAC1B,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAClE,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,YAAY,CAAC,KAAc;QACzB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW,EAAE,QAAgB;QAC/C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,GAAG,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAC/D,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAElE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,0BAA0B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,SAAS,EAAE,CAAC,cAAc,CAAC;YACvE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;YAEjE,IAAI,aAAqB,CAAC;YAC1B,IAAI,CAAC;gBACH,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,GAAG,GAAG,CAAC;YACtB,CAAC;YAED,oBAAoB,CAAC,aAAa,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACzD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEtC,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QACjE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAED,UAAU,CAAC,GAAW,EAAE,QAAgB;QACtC,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAE5B,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;YAC5C,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,WAAW,CACf,SAAiB,EACjB,IAAY,EACZ,WAAyB;QAEzB,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC/C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YAEnE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,WAAW,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACtD,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,WAAW,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QAChE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACvD,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,QAAQ;QACN,IAAI,CAAC;YACH,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACnB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;YACzB,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,iCAAiC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;CACF;AAED,IAAI,cAAc,GAA4B,IAAI,CAAC;AAEnD,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,IAAI,gBAAgB,EAAE,CAAC;IAC1C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,qBAAqB;IACnC,IAAI,cAAc,EAAE,CAAC;QACnB,cAAc,CAAC,QAAQ,EAAE,CAAC;QAC1B,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"embed.js","sourceRoot":"","sources":["../../src/embedding/embed.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AACzC,OAAO,EAAE,mBAAmB,EAAE,MAAM,iBAAiB,CAAC;AACtD,OAAO,EAAE,WAAW,EAAsB,MAAM,mBAAmB,CAAC;AACpE,OAAO,EAAE,oBAAoB,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC;AACzF,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,GAAG,GAAG,YAAY,CAAC,WAAW,CAAC,CAAC;AAEtC,MAAM,OAAO,gBAAgB;IACnB,UAAU,CAAsB;IAChC,KAAK,CAAc;IACnB,SAAS,GAAG,KAAK,CAAC;IAE1B;QACE,IAAI,CAAC,UAAU,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAC5C,IAAI,CAAC,KAAK,GAAG,IAAI,WAAW,EAAE,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,IAAI;QACR,IAAI,CAAC;YACH,MAAM,MAAM,GAAG,gBAAgB,EAAE,CAAC;YAClC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,MAAM;qBACnB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,SAAS,IAAI,CAAC,CAAC,IAAI,GAAG,CAAC,CAAC;qBACtC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;oBACT,GAAG,EAAE,CAAC,CAAC,aAAa;oBACpB,SAAS,EAAE,CAAC,CAAC,SAAS;oBACtB,IAAI,EAAE,CAAC,CAAC,IAAI;iBACb,CAAC,CAAC,CAAC;gBACN,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;gBACnD,GAAG,CAAC,IAAI,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC,CAAC;YAC9D,CAAC;YAED,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAExB,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,8BAA8B,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAClE,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;QACzB,CAAC;IACH,CAAC;IAED,WAAW;QACT,OAAO,IAAI,CAAC,SAAS,CAAC;IACxB,CAAC;IAED,YAAY,CAAC,KAAc;QACzB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;IACzB,CAAC;IAED,QAAQ;QACN,OAAO,IAAI,CAAC,KAAK,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,aAAa,CAAC,GAAW,EAAE,QAAgB;QAC/C,IAAI,CAAC,IAAI,CAAC,SAAS,EAAE,CAAC;YACpB,GAAG,CAAC,KAAK,CAAC,0CAA0C,EAAE,EAAE,GAAG,EAAE,CAAC,CAAC;YAC/D,OAAO;QACT,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,QAAQ,CAAC,CAAC;YAElE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,0BAA0B,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBACrE,OAAO;YACT,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACjD,MAAM,MAAM,GAAG,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;YAC1C,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,IAAI,SAAS,EAAE,CAAC,cAAc,CAAC;YACvE,MAAM,IAAI,GAAG,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC;YAEjE,IAAI,aAAqB,CAAC;YAC1B,IAAI,CAAC;gBACH,aAAa,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YACpC,CAAC;YAAC,MAAM,CAAC;gBACP,aAAa,GAAG,GAAG,CAAC;YACtB,CAAC;YAED,oBAAoB,CAAC,aAAa,EAAE,MAAM,EAAE,KAAK,EAAE,IAAI,CAAC,CAAC;YACzD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,EAAE,MAAM,CAAC,CAAC;YAEtC,GAAG,CAAC,KAAK,CAAC,qBAAqB,EAAE,EAAE,GAAG,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC,CAAC;QACjE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAChE,CAAC;IACH,CAAC;IAED,UAAU,CAAC,GAAW,EAAE,QAAgB;QACtC,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO;QAE5B,IAAI,CAAC,aAAa,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE;YAC5C,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QAClE,CAAC,CAAC,CAAC;IACL,CAAC;IAED,KAAK,CAAC,WAAW,CACf,SAAiB,EACjB,IAAY,EACZ,WAAyB;QAEzB,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,EAAE,CAAC;YAC/C,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,CAAC;YACH,MAAM,SAAS,GAAG,UAAU,EAAE,CAAC;YAC/B,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,SAAS,EAAE,SAAS,CAAC,CAAC;YAEnE,IAAI,CAAC,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,KAAK,EAAE,CAAC;gBACvC,GAAG,CAAC,IAAI,CAAC,wBAAwB,EAAE,EAAE,KAAK,EAAE,QAAQ,CAAC,KAAK,EAAE,CAAC,CAAC;gBAC9D,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,MAAM,WAAW,GAAG,IAAI,YAAY,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YACtD,OAAO,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,WAAW,EAAE,IAAI,EAAE,WAAW,CAAC,CAAC;QAChE,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,IAAI,CAAC,oBAAoB,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACvD,OAAO,EAAE,CAAC;QACZ,CAAC;IACH,CAAC;IAED,QAAQ;QACN,IAAI,CAAC;YACH,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,CAAC;YACnB,IAAI,CAAC,SAAS,GAAG,KAAK,CAAC;YACvB,GAAG,CAAC,IAAI,CAAC,4BAA4B,CAAC,CAAC;QACzC,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,GAAG,CAAC,KAAK,CAAC,iCAAiC,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;QACvE,CAAC;IACH,CAAC;CACF;AAED,IAAI,cAAc,GAA4B,IAAI,CAAC;AAEnD,MAAM,UAAU,mBAAmB;IACjC,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,IAAI,gBAAgB,EAAE,CAAC;IAC1C,CAAC;IACD,OAAO,cAAc,CAAC;AACxB,CAAC;AAED,MAAM,UAAU,qBAAqB;IACnC,IAAI,cAAc,EAAE,CAAC;QACnB,cAAc,CAAC,QAAQ,EAAE,CAAC;QAC1B,cAAc,GAAG,IAAI,CAAC;IACxB,CAAC;AACH,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CA2B1D;AAED,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,GAChB,MAAM,GAAG,MAAM,EAAE,CAUnB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CA6CvD"}
1
+ {"version":3,"file":"extract.d.ts","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAS3D,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,YAAY,CAiC1D;AAED,wBAAgB,eAAe,CAC7B,IAAI,EAAE,MAAM,EACZ,QAAQ,EAAE,MAAM,EAChB,QAAQ,EAAE,OAAO,GAChB,MAAM,GAAG,MAAM,EAAE,CAUnB;AAED,wBAAgB,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,SAAS,EAAE,CA6CvD"}
@@ -26,6 +26,12 @@ export function extractMetadata(html) {
26
26
  const ogImage = getMetaContent(doc, 'og:image');
27
27
  if (ogImage)
28
28
  result.og_image = ogImage;
29
+ const ogType = getMetaContent(doc, 'og:type');
30
+ if (ogType)
31
+ result.og_type = ogType;
32
+ const canonical = doc.querySelector('link[rel="canonical"]')?.getAttribute('href');
33
+ if (canonical)
34
+ result.canonical_url = canonical;
29
35
  return result;
30
36
  }
31
37
  export function extractSelector(html, selector, multiple) {
@@ -1 +1 @@
1
- {"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,SAAS,cAAc,CAAC,GAAa,EAAE,cAAsB;IAC3D,MAAM,EAAE,GACN,GAAG,CAAC,aAAa,CAAC,cAAc,cAAc,IAAI,CAAC;QACnD,GAAG,CAAC,aAAa,CAAC,kBAAkB,cAAc,IAAI,CAAC,CAAC;IAC1D,OAAO,EAAE,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC9D,IAAI,KAAK;QAAE,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IAEhC,MAAM,WAAW,GACf,cAAc,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IAC9E,IAAI,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC;IAElD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;IAEnC,MAAM,IAAI,GACR,cAAc,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;IAC/E,IAAI,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC;IAEvC,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,QAAgB,EAChB,QAAiB;IAEjB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,EAAE,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAEjF,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,OAAiB,CAAC;QACtB,IAAI,QAAmB,CAAC;QAExB,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5E,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxE,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtE,QAAQ,GAAG,OAAO,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACrD,MAAM,GAAG,GAA2B,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC"}
1
+ {"version":3,"file":"extract.js","sourceRoot":"","sources":["../../src/extraction/extract.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AAGrC,SAAS,cAAc,CAAC,GAAa,EAAE,cAAsB;IAC3D,MAAM,EAAE,GACN,GAAG,CAAC,aAAa,CAAC,cAAc,cAAc,IAAI,CAAC;QACnD,GAAG,CAAC,aAAa,CAAC,kBAAkB,cAAc,IAAI,CAAC,CAAC;IAC1D,OAAO,EAAE,EAAE,YAAY,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;AAClD,CAAC;AAED,MAAM,UAAU,eAAe,CAAC,IAAY;IAC1C,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAiB,EAAE,CAAC;IAEhC,MAAM,KAAK,GAAG,GAAG,CAAC,aAAa,CAAC,OAAO,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,CAAC;IAC9D,IAAI,KAAK;QAAE,MAAM,CAAC,KAAK,GAAG,KAAK,CAAC;IAEhC,MAAM,WAAW,GACf,cAAc,CAAC,GAAG,EAAE,aAAa,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,gBAAgB,CAAC,CAAC;IAC9E,IAAI,WAAW;QAAE,MAAM,CAAC,WAAW,GAAG,WAAW,CAAC;IAElD,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,QAAQ,CAAC,CAAC;IAC7C,IAAI,MAAM;QAAE,MAAM,CAAC,MAAM,GAAG,MAAM,CAAC;IAEnC,MAAM,IAAI,GACR,cAAc,CAAC,GAAG,EAAE,MAAM,CAAC,IAAI,cAAc,CAAC,GAAG,EAAE,wBAAwB,CAAC,CAAC;IAC/E,IAAI,IAAI;QAAE,MAAM,CAAC,IAAI,GAAG,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IACjD,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;IAC7E,CAAC;IAED,MAAM,OAAO,GAAG,cAAc,CAAC,GAAG,EAAE,UAAU,CAAC,CAAC;IAChD,IAAI,OAAO;QAAE,MAAM,CAAC,QAAQ,GAAG,OAAO,CAAC;IAEvC,MAAM,MAAM,GAAG,cAAc,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;IAC9C,IAAI,MAAM;QAAE,MAAM,CAAC,OAAO,GAAG,MAAM,CAAC;IAEpC,MAAM,SAAS,GAAG,GAAG,CAAC,aAAa,CAAC,uBAAuB,CAAC,EAAE,YAAY,CAAC,MAAM,CAAC,CAAC;IACnF,IAAI,SAAS;QAAE,MAAM,CAAC,aAAa,GAAG,SAAS,CAAC;IAEhD,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,MAAM,UAAU,eAAe,CAC7B,IAAY,EACZ,QAAgB,EAChB,QAAiB;IAEjB,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAE1C,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,QAAQ,GAAG,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC;QAChD,OAAO,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;IACzE,CAAC;IAED,MAAM,EAAE,GAAG,GAAG,CAAC,aAAa,CAAC,QAAQ,CAAC,CAAC;IACvC,OAAO,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;AACjD,CAAC;AAED,MAAM,UAAU,aAAa,CAAC,IAAY;IACxC,MAAM,EAAE,QAAQ,EAAE,GAAG,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;IAC1C,MAAM,MAAM,GAAG,GAAG,CAAC,gBAAgB,CAAC,OAAO,CAAC,CAAC;IAC7C,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAEnC,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE;QACtC,MAAM,OAAO,GAAG,KAAK,CAAC,aAAa,CAAC,SAAS,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,SAAS,CAAC;QAEjF,MAAM,UAAU,GAAG,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC;QACtD,IAAI,OAAiB,CAAC;QACtB,IAAI,QAAmB,CAAC;QAExB,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC1B,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;YAC5E,QAAQ,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,UAAU,CAAC,CAAC,CAAC;YAC1D,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC1B,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;gBACzD,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;QACH,CAAC;aAAM,CAAC;YACN,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACzD,MAAM,QAAQ,GAAG,OAAO,CAAC,CAAC,CAAC,CAAC;YAC5B,MAAM,WAAW,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;YAEhF,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjE,QAAQ,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,MAAM,SAAS,GAAG,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxE,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,SAAS,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;gBACtE,QAAQ,GAAG,OAAO,CAAC;YACrB,CAAC;QACH,CAAC;QAED,MAAM,IAAI,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE;YAChC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YACrD,MAAM,GAAG,GAA2B,EAAE,CAAC;YACvC,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE;gBAC5B,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,WAAW,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACrD,CAAC,CAAC,CAAC;YACH,OAAO,GAAG,CAAC;QACb,CAAC,CAAC,CAAC;QAEH,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IACpC,CAAC,CAAC,CAAC;AACL,CAAC"}
@@ -7,4 +7,6 @@ export declare function extractLinksAndImages(markdown: string): {
7
7
  links: string[];
8
8
  images: string[];
9
9
  };
10
+ export declare function filterDecorativeImages(markdown: string): string;
11
+ export declare function resolveRelativeUrls(markdown: string, baseUrl: string): string;
10
12
  //# sourceMappingURL=markdown.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F"}
1
+ {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAkDA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAGnD;AAmCD,wBAAgB,cAAc,CAC5B,QAAQ,EAAE,MAAM,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,SAAI,GACf;IAAE,OAAO,EAAE,MAAM,CAAC;IAAC,OAAO,EAAE,OAAO,CAAA;CAAE,CA2BvC;AAED,wBAAgB,qBAAqB,CAAC,QAAQ,EAAE,MAAM,GAAG;IAAE,KAAK,EAAE,MAAM,EAAE,CAAC;IAAC,MAAM,EAAE,MAAM,EAAE,CAAA;CAAE,CAoB7F;AAkBD,wBAAgB,sBAAsB,CAAC,QAAQ,EAAE,MAAM,GAAG,MAAM,CAsB/D;AAID,wBAAgB,mBAAmB,CAAC,QAAQ,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,MAAM,CAkC7E"}
@@ -104,4 +104,74 @@ export function extractLinksAndImages(markdown) {
104
104
  }
105
105
  return { links: Array.from(links), images: Array.from(images) };
106
106
  }
107
+ const DECORATIVE_URL_MARKERS = [
108
+ 'avatar',
109
+ 'icon',
110
+ 'logo',
111
+ 'badge',
112
+ 'shield',
113
+ 'tracking',
114
+ 'pixel',
115
+ 'sprite',
116
+ 'emoji',
117
+ 'favicon',
118
+ ];
119
+ // Drop `![alt](src)` tokens that look decorative. Heuristic only -- keep
120
+ // images that have alt text unless the URL clearly marks them decorative.
121
+ // Tracking pixels (tiny data-URI gifs) and empty-alt icons are removed.
122
+ export function filterDecorativeImages(markdown) {
123
+ if (!markdown)
124
+ return markdown;
125
+ return markdown.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, (match, alt, src) => {
126
+ const trimmedAlt = alt.trim();
127
+ const lowerSrc = src.toLowerCase();
128
+ // Tiny animated-GIF tracking pixel / 1x1 beacons
129
+ if (lowerSrc.startsWith('data:image/gif;base64,'))
130
+ return '';
131
+ // Inline SVG icon data URIs (short = tiny, likely decorative glyph)
132
+ if (lowerSrc.startsWith('data:image/svg+xml') && src.length < 200)
133
+ return '';
134
+ // URL marks it as decorative regardless of alt
135
+ for (const marker of DECORATIVE_URL_MARKERS) {
136
+ if (lowerSrc.includes(marker))
137
+ return '';
138
+ }
139
+ // No alt text + no title = decorative
140
+ if (!trimmedAlt)
141
+ return '';
142
+ return match;
143
+ });
144
+ }
145
+ // Resolve relative `[text](path)` and `![alt](path)` targets against baseUrl.
146
+ // Leaves absolute URLs, mailto:, tel:, javascript:, and #fragments untouched.
147
+ export function resolveRelativeUrls(markdown, baseUrl) {
148
+ if (!markdown || !baseUrl)
149
+ return markdown;
150
+ const rewrite = (path) => {
151
+ const trimmed = path.trim();
152
+ if (!trimmed)
153
+ return path;
154
+ if (/^(?:https?:|mailto:|tel:|javascript:|data:|#)/i.test(trimmed))
155
+ return path;
156
+ if (trimmed.startsWith('//')) {
157
+ try {
158
+ const base = new URL(baseUrl);
159
+ return `${base.protocol}${trimmed}`;
160
+ }
161
+ catch {
162
+ return path;
163
+ }
164
+ }
165
+ try {
166
+ return new URL(trimmed, baseUrl).href;
167
+ }
168
+ catch {
169
+ return path;
170
+ }
171
+ };
172
+ // Image links first so the shared link regex does not rewrite them twice.
173
+ let result = markdown.replace(/(!\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g, (_m, open, path, close) => `${open}${rewrite(path)}${close}`);
174
+ result = result.replace(/(^|[^!])(\[[^\]]*\]\()([^)\s]+)(\s*(?:"[^"]*")?\))/g, (_m, pre, open, path, close) => `${pre}${open}${rewrite(path)}${close}`);
175
+ return result;
176
+ }
107
177
  //# sourceMappingURL=markdown.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC"}
1
+ {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/extraction/markdown.ts"],"names":[],"mappings":"AAAA,OAAO,eAAe,MAAM,UAAU,CAAC;AAEvC,SAAS,aAAa;IACpB,MAAM,EAAE,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;IAElF,wCAAwC;IACxC,EAAE,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC,CAAC;IAE/B,iDAAiD;IACjD,EAAE,CAAC,OAAO,CAAC,OAAO,EAAE;QAClB,MAAM,EAAE,OAAO;QACf,WAAW,CAAC,QAAQ,EAAE,IAAI;YACxB,MAAM,EAAE,GAAG,IAAe,CAAC;YAC3B,MAAM,IAAI,GAAc,KAAK,CAAC,IAAI,CAAC,EAAE,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC;YAC9D,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC;gBAAE,OAAO,EAAE,CAAC;YAEjC,MAAM,SAAS,GAAG,CAAC,GAAY,EAAU,EAAE;gBACzC,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;gBACzD,OAAO,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,OAAO,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YACnG,CAAC,CAAC;YAEF,MAAM,SAAS,GAAG,IAAI,CAAC,CAAC,CAAC,CAAC;YAC1B,MAAM,WAAW,GAAG,SAAS,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,IAAI,GAAG,WAAW,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC;YAEzE,IAAI,WAAW,EAAE,CAAC;gBAChB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;gBAC/B,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,QAAQ,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;gBAC5E,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;YAC5C,CAAC;YAED,MAAM,KAAK,GAAG,CAAC,SAAS,CAAC,SAAS,CAAC,EAAE,SAAS,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC;YACjF,OAAO,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC;QAC5C,CAAC;KACF,CAAC,CAAC;IAEH,qFAAqF;IACrF,EAAE,CAAC,OAAO,CAAC,WAAW,EAAE;QACtB,MAAM,EAAE,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,CAAC;QACrD,WAAW,CAAC,OAAO;YACjB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IAEH,OAAO,EAAE,CAAC;AACZ,CAAC;AAED,MAAM,QAAQ,GAAG,aAAa,EAAE,CAAC;AAEjC,MAAM,UAAU,cAAc,CAAC,IAAY;IACzC,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAQD,SAAS,aAAa,CAAC,KAAe;IACpC,MAAM,QAAQ,GAAc,EAAE,CAAC;IAC/B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtC,MAAM,KAAK,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;QACjD,IAAI,KAAK,EAAE,CAAC;YACV,QAAQ,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,CAAC,CAAC;QACjF,CAAC;IACH,CAAC;IACD,OAAO,QAAQ,CAAC;AAClB,CAAC;AAED,SAAS,kBAAkB,CAAC,KAAe,EAAE,QAAmB,EAAE,UAAkB;IAClF,MAAM,OAAO,GAAG,QAAQ,CAAC,UAAU,CAAC,CAAC;IACrC,MAAM,KAAK,GAAG,OAAO,CAAC,SAAS,CAAC;IAEhC,0EAA0E;IAC1E,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IACvB,KAAK,IAAI,CAAC,GAAG,UAAU,GAAG,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACtD,IAAI,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,IAAI,OAAO,CAAC,KAAK,EAAE,CAAC;YACvC,GAAG,GAAG,QAAQ,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;YAC5B,MAAM;QACR,CAAC;IACH,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC5C,CAAC;AAED,MAAM,UAAU,cAAc,CAC5B,QAAgB,EAChB,OAAe,EACf,YAAY,GAAG,CAAC;IAEhB,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAExE,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IACpC,MAAM,OAAO,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC;IAEnD,8BAA8B;IAC9B,MAAM,YAAY,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,KAAK,KAAK,CAAC,CAAC;IAE/E,yDAAyD;IACzD,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC;QAClE,MAAM,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,YAAY,CAAC,CAAC;QACzC,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;IAC5E,CAAC;IAED,4EAA4E;IAC5E,MAAM,gBAAgB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC;IAEzF,IAAI,gBAAgB,CAAC,MAAM,KAAK,CAAC,IAAI,YAAY,IAAI,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC7E,OAAO,EAAE,OAAO,EAAE,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC/C,CAAC;IAED,MAAM,EAAE,CAAC,EAAE,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC;IAC7C,OAAO,EAAE,OAAO,EAAE,kBAAkB,CAAC,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC;AAC5E,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,QAAgB;IACpD,MAAM,YAAY,GAAG,yBAAyB,CAAC;IAC/C,MAAM,WAAW,GAAG,8BAA8B,CAAC;IAEnD,MAAM,MAAM,GAAG,IAAI,GAAG,EAAU,CAAC;IACjC,MAAM,KAAK,GAAG,IAAI,GAAG,EAAU,CAAC;IAEhC,IAAI,KAA6B,CAAC;IAElC,uBAAuB;IACvB,OAAO,CAAC,KAAK,GAAG,YAAY,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACtD,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;IAED,4BAA4B;IAC5B,OAAO,CAAC,KAAK,GAAG,WAAW,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;QACrD,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;IACtB,CAAC;IAED,OAAO,EAAE,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,MAAM,EAAE,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,EAAE,CAAC;AAClE,CAAC;AAED,MAAM,sBAAsB,GAAG;IAC7B,QAAQ;IACR,MAAM;IACN,MAAM;IACN,OAAO;IACP,QAAQ;IACR,UAAU;IACV,OAAO;IACP,QAAQ;IACR,OAAO;IACP,SAAS;CACV,CAAC;AAEF,yEAAyE;AACzE,0EAA0E;AAC1E,wEAAwE;AACxE,MAAM,UAAU,sBAAsB,CAAC,QAAgB;IACrD,IAAI,CAAC,QAAQ;QAAE,OAAO,QAAQ,CAAC;IAC/B,OAAO,QAAQ,CAAC,OAAO,CAAC,2BAA2B,EAAE,CAAC,KAAK,EAAE,GAAW,EAAE,GAAW,EAAE,EAAE;QACvF,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC;QAEnC,iDAAiD;QACjD,IAAI,QAAQ,CAAC,UAAU,CAAC,wBAAwB,CAAC;YAAE,OAAO,EAAE,CAAC;QAE7D,oEAAoE;QACpE,IAAI,QAAQ,CAAC,UAAU,CAAC,oBAAoB,CAAC,IAAI,GAAG,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,EAAE,CAAC;QAE7E,+CAA+C;QAC/C,KAAK,MAAM,MAAM,IAAI,sBAAsB,EAAE,CAAC;YAC5C,IAAI,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC;gBAAE,OAAO,EAAE,CAAC;QAC3C,CAAC;QAED,sCAAsC;QACtC,IAAI,CAAC,UAAU;YAAE,OAAO,EAAE,CAAC;QAE3B,OAAO,KAAK,CAAC;IACf,CAAC,CAAC,CAAC;AACL,CAAC;AAED,8EAA8E;AAC9E,8EAA8E;AAC9E,MAAM,UAAU,mBAAmB,CAAC,QAAgB,EAAE,OAAe;IACnE,IAAI,CAAC,QAAQ,IAAI,CAAC,OAAO;QAAE,OAAO,QAAQ,CAAC;IAE3C,MAAM,OAAO,GAAG,CAAC,IAAY,EAAU,EAAE;QACvC,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;QAC5B,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAC1B,IAAI,gDAAgD,CAAC,IAAI,CAAC,OAAO,CAAC;YAAE,OAAO,IAAI,CAAC;QAChF,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC;gBAC9B,OAAO,GAAG,IAAI,CAAC,QAAQ,GAAG,OAAO,EAAE,CAAC;YACtC,CAAC;YAAC,MAAM,CAAC;gBACP,OAAO,IAAI,CAAC;YACd,CAAC;QACH,CAAC;QACD,IAAI,CAAC;YACH,OAAO,IAAI,GAAG,CAAC,OAAO,EAAE,OAAO,CAAC,CAAC,IAAI,CAAC;QACxC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;IACH,CAAC,CAAC;IAEF,0EAA0E;IAC1E,IAAI,MAAM,GAAG,QAAQ,CAAC,OAAO,CAC3B,8CAA8C,EAC9C,CAAC,EAAE,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CAC7D,CAAC;IAEF,MAAM,GAAG,MAAM,CAAC,OAAO,CACrB,qDAAqD,EACrD,CAAC,EAAE,EAAE,GAAG,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,GAAG,IAAI,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,KAAK,EAAE,CACxE,CAAC;IAEF,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAU/D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAE5D;AAED,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAmE3B"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAWA,OAAO,KAAK,EAAE,gBAAgB,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAU/D,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AASD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,SAAS,GAAG,IAAI,CAE5D;AAED,wBAAsB,cAAc,CAClC,IAAI,EAAE,MAAM,EACZ,GAAG,EAAE,MAAM,EACX,OAAO,GAAE,iBAAsB,GAC9B,OAAO,CAAC,gBAAgB,CAAC,CAmE3B"}
@@ -1,7 +1,8 @@
1
1
  import { defuddleExtract } from './defuddle.js';
2
2
  import { readabilityExtract } from './readability.js';
3
3
  import { trafilaturaExtract, isTrafilaturaAvailable } from './trafilatura.js';
4
- import { htmlToMarkdown, extractSection, extractLinksAndImages } from './markdown.js';
4
+ import { htmlToMarkdown, extractSection, extractLinksAndImages, filterDecorativeImages, resolveRelativeUrls, } from './markdown.js';
5
+ import { extractMetadata } from './extract.js';
5
6
  import { githubExtractor } from './site-extractors/github.js';
6
7
  import { stackoverflowExtractor } from './site-extractors/stackoverflow.js';
7
8
  import { mdnExtractor } from './site-extractors/mdn.js';
@@ -40,14 +41,14 @@ export async function extractContent(html, url, options = {}) {
40
41
  images: [],
41
42
  extractor: 'turndown',
42
43
  };
43
- return applyPostProcessing(result, options);
44
+ return applyPostProcessing(result, url, html, options);
44
45
  }
45
46
  const siteExtractor = siteExtractors.find((e) => e.canHandle(url, html));
46
47
  if (siteExtractor) {
47
48
  const extracted = siteExtractor.extract(html, url);
48
49
  if (extracted) {
49
50
  result = extracted;
50
- return applyPostProcessing(result, options);
51
+ return applyPostProcessing(result, url, html, options);
51
52
  }
52
53
  }
53
54
  result = await defuddleExtract(html, url);
@@ -59,7 +60,7 @@ export async function extractContent(html, url, options = {}) {
59
60
  result = await trafilaturaExtract(html, url);
60
61
  if (result) {
61
62
  log.info('Trafilatura extraction succeeded', { url, chars: result.markdown.length });
62
- return applyPostProcessing(result, options);
63
+ return applyPostProcessing(result, url, html, options);
63
64
  }
64
65
  }
65
66
  }
@@ -78,18 +79,42 @@ export async function extractContent(html, url, options = {}) {
78
79
  extractor: 'turndown',
79
80
  };
80
81
  }
81
- return applyPostProcessing(result, options);
82
+ return applyPostProcessing(result, url, html, options);
82
83
  }
83
- function applyPostProcessing(result, options) {
84
+ function mergeMetadata(base, html) {
85
+ try {
86
+ const meta = extractMetadata(html);
87
+ return {
88
+ ...meta,
89
+ // Extractor-provided fields win when set (they already inspected the article body).
90
+ description: base.description || meta.description,
91
+ author: base.author || meta.author,
92
+ date: base.date || meta.date,
93
+ language: base.language,
94
+ og_image: base.og_image ?? meta.og_image,
95
+ og_type: base.og_type ?? meta.og_type,
96
+ canonical_url: base.canonical_url ?? meta.canonical_url,
97
+ keywords: base.keywords ?? meta.keywords,
98
+ };
99
+ }
100
+ catch {
101
+ return base;
102
+ }
103
+ }
104
+ function applyPostProcessing(result, url, html, options) {
84
105
  let markdown = result.markdown;
106
+ // Resolve relative links/images before slicing so downstream consumers get absolute URLs.
107
+ markdown = resolveRelativeUrls(markdown, url);
108
+ markdown = filterDecorativeImages(markdown);
85
109
  if (options.section) {
86
110
  const { content } = extractSection(markdown, options.section, options.sectionIndex ?? 0);
87
111
  markdown = content;
88
112
  }
89
113
  const { links, images } = extractLinksAndImages(markdown);
114
+ const metadata = mergeMetadata(result.metadata, html);
90
115
  if (options.maxChars && markdown.length > options.maxChars) {
91
116
  markdown = markdown.slice(0, options.maxChars);
92
117
  }
93
- return { ...result, markdown, links, images };
118
+ return { ...result, markdown, links, images, metadata };
94
119
  }
95
120
  //# sourceMappingURL=pipeline.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,EAAE,cAAc,EAAE,cAAc,EAAE,qBAAqB,EAAE,MAAM,eAAe,CAAC;AAEtF,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAUpC,MAAM,cAAc,GAAgB;IAClC,eAAe;IACf,sBAAsB;IACtB,YAAY;IACZ,oBAAoB;CACrB,CAAC;AAEF,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,GAAW,EACX,UAA6B,EAAE;IAE/B,IAAI,MAAM,GAA4B,IAAI,CAAC;IAE3C,IAAI,OAAO,CAAC,WAAW,KAAK,iBAAiB,EAAE,CAAC;QAC9C,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC;gBACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,GAAG,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,kBAAkB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QACD,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;QACF,OAAO,mBAAmB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAC9C,CAAC;IAED,MAAM,aAAa,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;IACzE,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,SAAS,GAAG,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,GAAG,SAAS,CAAC;YACnB,OAAO,mBAAmB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;QAC9C,CAAC;IACH,CAAC;IAED,MAAM,GAAG,MAAM,eAAe,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,WAAW,KAAK,OAAO,EAAE,CAAC;YACnC,MAAM,aAAa,GAAG,MAAM,sBAAsB,EAAE,CAAC;YACrD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBAC7C,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,IAAI,CAAC,kCAAkC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBACrF,OAAO,mBAAmB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;gBAC9C,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,GAAG,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;IACJ,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;AAC9C,CAAC;AAED,SAAS,mBAAmB,CAC1B,MAAwB,EACxB,OAA0B;IAE1B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE/B,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,EAAE,OAAO,EAAE,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACzF,QAAQ,GAAG,OAAO,CAAC;IACrB,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAE1D,IAAI,OAAO,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAC3D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,CAAC;AAChD,CAAC"}
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/extraction/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AAChD,OAAO,EAAE,kBAAkB,EAAE,MAAM,kBAAkB,CAAC;AACtD,OAAO,EAAE,kBAAkB,EAAE,sBAAsB,EAAE,MAAM,kBAAkB,CAAC;AAC9E,OAAO,EACL,cAAc,EACd,cAAc,EACd,qBAAqB,EACrB,sBAAsB,EACtB,mBAAmB,GACpB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,eAAe,EAAE,MAAM,cAAc,CAAC;AAE/C,OAAO,EAAE,eAAe,EAAE,MAAM,6BAA6B,CAAC;AAC9D,OAAO,EAAE,sBAAsB,EAAE,MAAM,oCAAoC,CAAC;AAC5E,OAAO,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AACxD,OAAO,EAAE,oBAAoB,EAAE,MAAM,mCAAmC,CAAC;AACzE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,SAAS,EAAE,MAAM,cAAc,CAAC;AAEzC,MAAM,GAAG,GAAG,YAAY,CAAC,SAAS,CAAC,CAAC;AAUpC,MAAM,cAAc,GAAgB;IAClC,eAAe;IACf,sBAAsB;IACtB,YAAY;IACZ,oBAAoB;CACrB,CAAC;AAEF,MAAM,UAAU,iBAAiB,CAAC,SAAoB;IACpD,cAAc,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;AACjC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAClC,IAAY,EACZ,GAAW,EACX,UAA6B,EAAE;IAE/B,IAAI,MAAM,GAA4B,IAAI,CAAC;IAE3C,IAAI,OAAO,CAAC,WAAW,KAAK,iBAAiB,EAAE,CAAC;QAC9C,IAAI,OAAO,GAAG,EAAE,CAAC;QACjB,IAAI,OAAO,CAAC,SAAS,EAAE,CAAC;YACtB,IAAI,CAAC;gBACH,MAAM,QAAQ,GAAG,CAAC,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC,CAAC,OAAO,CAAC;gBACrD,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC;gBACjD,OAAO,GAAG,MAAM,CAAC,IAAI,IAAI,EAAE,CAAC;YAC9B,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,GAAG,CAAC,IAAI,CAAC,kBAAkB,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YAC5D,CAAC;QACH,CAAC;QACD,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ,EAAE,OAAO;YACjB,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;QACF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;IACzD,CAAC;IAED,MAAM,aAAa,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,SAAS,CAAC,GAAG,EAAE,IAAI,CAAC,CAAC,CAAC;IACzE,IAAI,aAAa,EAAE,CAAC;QAClB,MAAM,SAAS,GAAG,aAAa,CAAC,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;QACnD,IAAI,SAAS,EAAE,CAAC;YACd,MAAM,GAAG,SAAS,CAAC;YACnB,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;QACzD,CAAC;IACH,CAAC;IAED,MAAM,GAAG,MAAM,eAAe,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IAE1C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,MAAM,GAAG,SAAS,EAAE,CAAC;QAC3B,IAAI,MAAM,CAAC,WAAW,KAAK,OAAO,EAAE,CAAC;YACnC,MAAM,aAAa,GAAG,MAAM,sBAAsB,EAAE,CAAC;YACrD,IAAI,aAAa,EAAE,CAAC;gBAClB,MAAM,GAAG,MAAM,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;gBAC7C,IAAI,MAAM,EAAE,CAAC;oBACX,GAAG,CAAC,IAAI,CAAC,kCAAkC,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC,CAAC;oBACrF,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;gBACzD,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,GAAG,kBAAkB,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC;IACzC,CAAC;IAED,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,MAAM,QAAQ,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;QACtC,MAAM,GAAG;YACP,KAAK,EAAE,EAAE;YACT,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,UAAU;SACtB,CAAC;IACJ,CAAC;IAED,OAAO,mBAAmB,CAAC,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,OAAO,CAAC,CAAC;AACzD,CAAC;AAED,SAAS,aAAa,CACpB,IAAkC,EAClC,IAAY;IAEZ,IAAI,CAAC;QACH,MAAM,IAAI,GAAG,eAAe,CAAC,IAAI,CAAC,CAAC;QACnC,OAAO;YACL,GAAG,IAAI;YACP,oFAAoF;YACpF,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,WAAW;YACjD,MAAM,EAAE,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,MAAM;YAClC,IAAI,EAAE,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,IAAI;YAC5B,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;YACxC,OAAO,EAAE,IAAI,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;YACrC,aAAa,EAAE,IAAI,CAAC,aAAa,IAAI,IAAI,CAAC,aAAa;YACvD,QAAQ,EAAE,IAAI,CAAC,QAAQ,IAAI,IAAI,CAAC,QAAQ;SACzC,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC;AAED,SAAS,mBAAmB,CAC1B,MAAwB,EACxB,GAAW,EACX,IAAY,EACZ,OAA0B;IAE1B,IAAI,QAAQ,GAAG,MAAM,CAAC,QAAQ,CAAC;IAE/B,0FAA0F;IAC1F,QAAQ,GAAG,mBAAmB,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;IAC9C,QAAQ,GAAG,sBAAsB,CAAC,QAAQ,CAAC,CAAC;IAE5C,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;QACpB,MAAM,EAAE,OAAO,EAAE,GAAG,cAAc,CAAC,QAAQ,EAAE,OAAO,CAAC,OAAO,EAAE,OAAO,CAAC,YAAY,IAAI,CAAC,CAAC,CAAC;QACzF,QAAQ,GAAG,OAAO,CAAC;IACrB,CAAC;IAED,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,GAAG,qBAAqB,CAAC,QAAQ,CAAC,CAAC;IAC1D,MAAM,QAAQ,GAAG,aAAa,CAAC,MAAM,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;IAEtD,IAAI,OAAO,CAAC,QAAQ,IAAI,QAAQ,CAAC,MAAM,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;QAC3D,QAAQ,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,EAAE,OAAO,CAAC,QAAQ,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,EAAE,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAC;AAC1D,CAAC"}
@@ -1,3 +1,3 @@
1
1
  import type { ExtractionResult } from '../types.js';
2
- export declare function readabilityExtract(html: string, url: string): ExtractionResult | null;
2
+ export declare function readabilityExtract(html: string, _url: string): ExtractionResult | null;
3
3
  //# sourceMappingURL=readability.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,GAAG,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CA0BrF"}
1
+ {"version":3,"file":"readability.d.ts","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAGA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AAIpD,wBAAgB,kBAAkB,CAAC,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,MAAM,GAAG,gBAAgB,GAAG,IAAI,CA0BtF"}
@@ -2,7 +2,7 @@ import { Readability } from '@mozilla/readability';
2
2
  import { parseHTML } from 'linkedom';
3
3
  import TurndownService from 'turndown';
4
4
  const MIN_CONTENT_THRESHOLD = 100;
5
- export function readabilityExtract(html, url) {
5
+ export function readabilityExtract(html, _url) {
6
6
  try {
7
7
  const { document } = parseHTML(html);
8
8
  const reader = new Readability(document);
@@ -1 +1 @@
1
- {"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,GAAW;IAC1D,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAe,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE9C,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;QACxF,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEpD,IAAI,QAAQ,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAEzD,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,QAAQ;YACR,QAAQ,EAAE;gBACR,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,QAAQ,EAAE,OAAO,CAAC,IAAI,IAAI,SAAS;aACpC;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,aAAa;SACzB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
1
+ {"version":3,"file":"readability.js","sourceRoot":"","sources":["../../src/extraction/readability.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,WAAW,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,qBAAqB,GAAG,GAAG,CAAC;AAElC,MAAM,UAAU,kBAAkB,CAAC,IAAY,EAAE,IAAY;IAC3D,IAAI,CAAC;QACH,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QACrC,MAAM,MAAM,GAAG,IAAI,WAAW,CAAC,QAAe,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,MAAM,CAAC,KAAK,EAAE,CAAC;QAC/B,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE9C,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;QACxF,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;QAEpD,IAAI,QAAQ,CAAC,MAAM,GAAG,qBAAqB;YAAE,OAAO,IAAI,CAAC;QAEzD,OAAO;YACL,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,EAAE;YAC1B,QAAQ;YACR,QAAQ,EAAE;gBACR,MAAM,EAAE,OAAO,CAAC,MAAM,IAAI,SAAS;gBACnC,QAAQ,EAAE,OAAO,CAAC,IAAI,IAAI,SAAS;aACpC;YACD,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,aAAa;SACzB,CAAC;IACJ,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;AACH,CAAC"}
@@ -7,7 +7,7 @@ function isIssueOrPR(url) {
7
7
  function isBlob(url) {
8
8
  return /\/blob\//.test(url);
9
9
  }
10
- function extractIssue(document, url) {
10
+ function extractIssue(document, _url) {
11
11
  const titleEl = document.querySelector('.js-issue-title') ?? document.querySelector('.gh-header-title');
12
12
  if (!titleEl)
13
13
  return null;
@@ -1 +1 @@
1
- {"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,GAAW;IACnD,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;IACxG,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEhD,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC;SAChC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACzC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;IACzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,QAAQ,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QAC5C,MAAM,IAAI,GAAI,IAAgB,CAAC,SAAS,CAAC;QACzC,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,EAAE,EAAE,CAAC;YACP,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAkB;IACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,QAAQ,CAAC;IAEzD,MAAM,UAAU,GACd,QAAQ,CAAC,aAAa,CAAC,wBAAwB,CAAC;QAChD,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,UAAsB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,QAAkB;IACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEjD,MAAM,SAAS,GACb,QAAQ,CAAC,aAAa,CAAC,oBAAoB,CAAC;QAC5C,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC;QACpC,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,SAAqB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAc;IACxC,IAAI,EAAE,QAAQ;IAEd,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACvE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAChB,OAAO,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"github.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/github.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,SAAS,WAAW,CAAC,GAAW;IAC9B,OAAO,2BAA2B,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC/C,CAAC;AAED,SAAS,MAAM,CAAC,GAAW;IACzB,OAAO,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAC9B,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB,EAAE,IAAY;IACpD,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,iBAAiB,CAAC,IAAI,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC,CAAC;IACxG,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEhD,MAAM,QAAQ,GAAG,QAAQ,CAAC,gBAAgB,CAAC,aAAa,CAAC,CAAC;IAC1D,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC;SAChC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;SACzC,MAAM,CAAC,OAAO,CAAC,CAAC;IAEnB,MAAM,aAAa,GAAG,QAAQ,CAAC,gBAAgB,CAAC,uBAAuB,CAAC,CAAC;IACzE,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAE5C,MAAM,QAAQ,GAAa,EAAE,CAAC;IAE9B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACtB,QAAQ,CAAC,IAAI,CAAC,eAAe,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACtD,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE;QAC5C,MAAM,IAAI,GAAI,IAAgB,CAAC,SAAS,CAAC;QACzC,MAAM,EAAE,GAAG,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1C,IAAI,EAAE,EAAE,CAAC;YACP,QAAQ,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;QAC/C,CAAC;IACH,CAAC,CAAC,CAAC;IAEH,MAAM,QAAQ,GAAG,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEvC,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,aAAa,CAAC,QAAkB;IACvC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,QAAQ,CAAC;IAEzD,MAAM,UAAU,GACd,QAAQ,CAAC,aAAa,CAAC,wBAAwB,CAAC;QAChD,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,UAAU;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,UAAsB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC7E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,SAAS,WAAW,CAAC,QAAkB;IACrC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;IAChD,MAAM,KAAK,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;IAEjD,MAAM,SAAS,GACb,QAAQ,CAAC,aAAa,CAAC,oBAAoB,CAAC;QAC5C,QAAQ,CAAC,aAAa,CAAC,YAAY,CAAC;QACpC,QAAQ,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAE3C,IAAI,CAAC,SAAS;QAAE,OAAO,IAAI,CAAC;IAE5B,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,SAAqB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IAC5E,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO;QACL,KAAK;QACL,QAAQ;QACR,QAAQ,EAAE,EAAE;QACZ,KAAK,EAAE,EAAE;QACT,MAAM,EAAE,EAAE;QACV,SAAS,EAAE,eAAe;KAC3B,CAAC;AACJ,CAAC;AAED,MAAM,CAAC,MAAM,eAAe,GAAc;IACxC,IAAI,EAAE,QAAQ;IAEd,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,YAAY,IAAI,QAAQ,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAC;QACvE,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,IAAI,WAAW,CAAC,GAAG,CAAC,EAAE,CAAC;YACrB,OAAO,YAAY,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAC;QACrC,CAAC;QAED,IAAI,MAAM,CAAC,GAAG,CAAC,EAAE,CAAC;YAChB,OAAO,WAAW,CAAC,QAAQ,CAAC,CAAC;QAC/B,CAAC;QAED,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;IACjC,CAAC;CACF,CAAC"}
@@ -20,7 +20,7 @@ export const mdnExtractor = {
20
20
  return false;
21
21
  }
22
22
  },
23
- extract(html, url) {
23
+ extract(html, _url) {
24
24
  if (!html)
25
25
  return null;
26
26
  const { document } = parseHTML(html);
@@ -1 +1 @@
1
- {"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GACX,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC;YACnD,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC;YAC1C,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAEpC,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAChE,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GACX,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC;YAC3B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;YAChC,CAAC,CAAC,QAAQ,CAAC;QAEb,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,OAAmB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1E,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"mdn.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/mdn.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAExF,MAAM,eAAe,GAAG;IACtB,KAAK;IACL,UAAU;IACV,QAAQ;IACR,QAAQ;IACR,UAAU;IACV,WAAW;CACZ,CAAC;AAEF,MAAM,CAAC,MAAM,YAAY,GAAc;IACrC,IAAI,EAAE,KAAK;IAEX,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,uBAAuB,CAAC;QAC9C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GACX,QAAQ,CAAC,aAAa,CAAC,2BAA2B,CAAC;YACnD,QAAQ,CAAC,aAAa,CAAC,kBAAkB,CAAC;YAC1C,QAAQ,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAEpC,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,KAAK,MAAM,QAAQ,IAAI,eAAe,EAAE,CAAC;YACvC,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,QAAQ,CAAC,CAAC,EAAE,CAAC;gBAChE,EAAE,CAAC,UAAU,EAAE,WAAW,CAAC,EAAE,CAAC,CAAC;YACjC,CAAC;QACH,CAAC;QAED,MAAM,OAAO,GACX,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC;YAC3B,QAAQ,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC;QAElC,MAAM,QAAQ,GAAG,OAAO,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QACpD,MAAM,KAAK,GAAG,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC;YAClC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAE,CAAC,IAAI,EAAE;YAChC,CAAC,CAAC,QAAQ,CAAC;QAEb,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,QAAQ,GAAG,QAAQ,CAAC,QAAQ,CAAE,OAAmB,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;QAC1E,IAAI,CAAC,QAAQ;YAAE,OAAO,IAAI,CAAC;QAE3B,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -55,7 +55,7 @@ export const stackoverflowExtractor = {
55
55
  return false;
56
56
  }
57
57
  },
58
- extract(html, url) {
58
+ extract(html, _url) {
59
59
  if (!html)
60
60
  return null;
61
61
  const { document } = parseHTML(html);
@@ -1 +1 @@
1
- {"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,GAAW;QAC/B,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC,mEAAmE,CAAC,CAAC;QACnH,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAEjC,MAAM,YAAY,GAAI,cAA0B,CAAC,SAAS,CAAC;QAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE1F,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,UAAU,CAAC,UAA4B,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAE1E,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
1
+ {"version":3,"file":"stackoverflow.js","sourceRoot":"","sources":["../../../src/extraction/site-extractors/stackoverflow.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,SAAS,EAAE,MAAM,UAAU,CAAC;AACrC,OAAO,eAAe,MAAM,UAAU,CAAC;AAGvC,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,cAAc,EAAE,QAAQ,EAAE,CAAC,CAAC;AAQxF,SAAS,UAAU,CAAC,EAAkB;IACpC,IAAI,CAAC,EAAE;QAAE,OAAO,CAAC,CAAC;IAClB,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,gBAAgB,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,EAAE,YAAY,CAAC,YAAY,CAAC,IAAI,MAAM,EAAE,WAAW,EAAE,IAAI,EAAE,IAAI,GAAG,CAAC;IACrF,OAAO,QAAQ,CAAC,GAAG,EAAE,EAAE,CAAC,IAAI,CAAC,CAAC;AAChC,CAAC;AAED,SAAS,YAAY,CAAC,QAAkB;IACtC,MAAM,SAAS,GAAG,QAAQ,CAAC,gBAAgB,CAAC,kBAAkB,CAAC,CAAC;IAChE,MAAM,OAAO,GAAa,EAAE,CAAC;IAE7B,KAAK,MAAM,EAAE,IAAI,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC;QACvC,MAAM,QAAQ,GAAG,EAAE,CAAC,SAAS,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAC;QAC1D,MAAM,KAAK,GAAG,UAAU,CAAC,EAAa,CAAC,CAAC;QACxC,MAAM,MAAM,GAAG,EAAE,CAAC,aAAa,CAAC,qCAAqC,CAAC,CAAC;QACvE,MAAM,QAAQ,GAAG,MAAM,CAAC,CAAC,CAAE,MAAkB,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,OAAO,CAAC,IAAI,CAAC,EAAE,QAAQ,EAAE,KAAK,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC9C,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CACpB,KAAa,EACb,IAAc,EACd,KAAa,EACb,YAAoB,EACpB,OAAiB;IAEjB,MAAM,OAAO,GAAG,SAAS,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,KAAK,EAAE,CAAC;IAC7D,MAAM,UAAU,GAAG,QAAQ,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,EAAE,CAAC;IAE1D,MAAM,QAAQ,GAAa;QACzB,KAAK,KAAK,EAAE;QACZ,OAAO;QACP,EAAE;QACF,UAAU;KACX,CAAC;IAEF,MAAM,QAAQ,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC;IACnD,MAAM,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;IACpF,MAAM,OAAO,GAAG,CAAC,GAAG,QAAQ,EAAE,GAAG,MAAM,CAAC,CAAC;IAEzC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;QAC7B,MAAM,OAAO,GAAG,MAAM,CAAC,QAAQ;YAC7B,CAAC,CAAC,8BAA8B,MAAM,CAAC,KAAK,GAAG;YAC/C,CAAC,CAAC,qBAAqB,MAAM,CAAC,KAAK,GAAG,CAAC;QACzC,MAAM,MAAM,GAAG,QAAQ,CAAC,QAAQ,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC;QACzD,QAAQ,CAAC,IAAI,CAAC,KAAK,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE,MAAM,CAAC,CAAC;IAChD,CAAC;IAED,OAAO,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC/B,CAAC;AAED,MAAM,CAAC,MAAM,sBAAsB,GAAc;IAC/C,IAAI,EAAE,eAAe;IAErB,SAAS,CAAC,GAAW;QACnB,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;YACvC,OAAO,QAAQ,KAAK,mBAAmB;gBACrC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC;gBACvC,QAAQ,KAAK,mBAAmB;gBAChC,QAAQ,CAAC,QAAQ,CAAC,oBAAoB,CAAC,CAAC;QAC5C,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,KAAK,CAAC;QACf,CAAC;IACH,CAAC;IAED,OAAO,CAAC,IAAY,EAAE,IAAY;QAChC,IAAI,CAAC,IAAI;YAAE,OAAO,IAAI,CAAC;QAEvB,MAAM,EAAE,QAAQ,EAAE,GAAG,SAAS,CAAC,IAAI,CAAC,CAAC;QAErC,MAAM,OAAO,GAAG,QAAQ,CAAC,aAAa,CAAC,qBAAqB,CAAC,CAAC;QAC9D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC;QAE1B,MAAM,KAAK,GAAG,OAAO,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;QAChD,IAAI,CAAC,KAAK;YAAE,OAAO,IAAI,CAAC;QAExB,MAAM,cAAc,GAAG,QAAQ,CAAC,aAAa,CAAC,mEAAmE,CAAC,CAAC;QACnH,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAEjC,MAAM,YAAY,GAAI,cAA0B,CAAC,SAAS,CAAC;QAE3D,MAAM,MAAM,GAAG,QAAQ,CAAC,gBAAgB,CAAC,8DAA8D,CAAC,CAAC;QACzG,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,EAAE,CAAC,WAAW,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAE1F,MAAM,UAAU,GAAG,QAAQ,CAAC,aAAa,CAAC,WAAW,CAAC,CAAC;QACvD,MAAM,KAAK,GAAG,UAAU,CAAC,UAA4B,CAAC,CAAC;QAEvD,MAAM,OAAO,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;QAEvC,MAAM,QAAQ,GAAG,aAAa,CAAC,KAAK,EAAE,IAAI,EAAE,KAAK,EAAE,YAAY,EAAE,OAAO,CAAC,CAAC;QAE1E,OAAO;YACL,KAAK;YACL,QAAQ;YACR,QAAQ,EAAE,EAAE;YACZ,KAAK,EAAE,EAAE;YACT,MAAM,EAAE,EAAE;YACV,SAAS,EAAE,eAAe;SAC3B,CAAC;IACJ,CAAC;CACF,CAAC"}
@@ -0,0 +1,4 @@
1
+ import type { StructuredData } from '../types.js';
2
+ export declare function extractStructured(html: string): StructuredData;
3
+ export type { TableData } from '../types.js';
4
+ //# sourceMappingURL=structured.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structured.d.ts","sourceRoot":"","sources":["../../src/extraction/structured.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAA2C,MAAM,aAAa,CAAC;AAU3F,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,GAAG,cAAc,CAgB9D;AAuLD,YAAY,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC"}
@@ -0,0 +1,206 @@
1
+ import { parseHTML } from 'linkedom';
2
+ import { extractTables } from './extract.js';
3
+ import { extractJsonLd } from './jsonld.js';
4
+ const MAX_VALUE_LEN = 400;
5
+ const MAX_ITEMS_PER_TYPE = 200;
6
+ // Entry point. Collects every structured-data pattern we can surface from
7
+ // raw HTML so host LLMs receive a rich, schema-free brief without needing
8
+ // to re-parse the page.
9
+ export function extractStructured(html) {
10
+ const { document: doc } = parseHTML(html);
11
+ const tables = extractTables(html);
12
+ const jsonld = extractJsonLd(html);
13
+ const definitions = extractDefinitions(doc);
14
+ const chart_hints = extractChartHints(doc);
15
+ const key_value_pairs = extractKeyValuePairs(doc);
16
+ return {
17
+ tables,
18
+ definitions,
19
+ jsonld,
20
+ chart_hints,
21
+ key_value_pairs,
22
+ };
23
+ }
24
+ // <dl><dt>Term</dt><dd>Description</dd></dl> is the canonical key-value
25
+ // structure on the web; we also handle multiple <dd> per <dt> by joining.
26
+ function extractDefinitions(doc) {
27
+ const out = [];
28
+ const dlists = doc.querySelectorAll('dl');
29
+ for (const dl of dlists) {
30
+ const children = Array.from(dl.children);
31
+ let pending = null;
32
+ const buffer = [];
33
+ const flush = () => {
34
+ if (pending !== null && buffer.length > 0) {
35
+ out.push({
36
+ term: pending,
37
+ description: truncate(buffer.join(' ').trim()),
38
+ });
39
+ }
40
+ pending = null;
41
+ buffer.length = 0;
42
+ };
43
+ for (const c of children) {
44
+ const tag = c.tagName.toLowerCase();
45
+ if (tag === 'dt') {
46
+ flush();
47
+ pending = truncate((c.textContent ?? '').trim());
48
+ }
49
+ else if (tag === 'dd' && pending !== null) {
50
+ const text = (c.textContent ?? '').trim();
51
+ if (text)
52
+ buffer.push(text);
53
+ }
54
+ if (out.length >= MAX_ITEMS_PER_TYPE)
55
+ break;
56
+ }
57
+ flush();
58
+ if (out.length >= MAX_ITEMS_PER_TYPE)
59
+ break;
60
+ }
61
+ return out;
62
+ }
63
+ // SVG / figure accessibility hints are the cheapest way to surface
64
+ // chart structure when the chart itself is rendered by JS. Host LLMs
65
+ // use these to describe a data viz without needing the underlying data.
66
+ function extractChartHints(doc) {
67
+ const out = [];
68
+ const seen = new Set();
69
+ for (const svg of doc.querySelectorAll('svg')) {
70
+ const title = (svg.querySelector('title')?.textContent ?? '').trim();
71
+ const aria_label = (svg.getAttribute('aria-label') ?? '').trim();
72
+ const role = (svg.getAttribute('role') ?? '').trim();
73
+ let figcaption;
74
+ const figParent = closestFigure(svg);
75
+ if (figParent) {
76
+ const cap = figParent.querySelector('figcaption');
77
+ figcaption = cap?.textContent?.trim() || undefined;
78
+ }
79
+ if (!title && !aria_label && !figcaption)
80
+ continue;
81
+ const hint = {
82
+ ...(title ? { title: truncate(title) } : {}),
83
+ ...(aria_label ? { aria_label: truncate(aria_label) } : {}),
84
+ ...(figcaption ? { figcaption: truncate(figcaption) } : {}),
85
+ type_hint: inferChartType(title, aria_label, role, figcaption),
86
+ };
87
+ const key = `${hint.title ?? ''}|${hint.aria_label ?? ''}|${hint.figcaption ?? ''}`;
88
+ if (seen.has(key))
89
+ continue;
90
+ seen.add(key);
91
+ out.push(hint);
92
+ if (out.length >= MAX_ITEMS_PER_TYPE)
93
+ break;
94
+ }
95
+ // <figure><figcaption> without SVG still surfaces dataviz context for
96
+ // pages that render charts as images or canvas.
97
+ for (const fig of doc.querySelectorAll('figure')) {
98
+ if (fig.querySelector('svg'))
99
+ continue; // already handled above
100
+ const cap = fig.querySelector('figcaption')?.textContent?.trim();
101
+ if (!cap)
102
+ continue;
103
+ const key = `||${cap}`;
104
+ if (seen.has(key))
105
+ continue;
106
+ seen.add(key);
107
+ out.push({
108
+ figcaption: truncate(cap),
109
+ type_hint: inferChartType('', '', '', cap),
110
+ });
111
+ if (out.length >= MAX_ITEMS_PER_TYPE)
112
+ break;
113
+ }
114
+ return out;
115
+ }
116
+ function closestFigure(el) {
117
+ let cur = el.parentElement;
118
+ while (cur) {
119
+ if (cur.tagName.toLowerCase() === 'figure')
120
+ return cur;
121
+ cur = cur.parentElement;
122
+ }
123
+ return null;
124
+ }
125
+ function inferChartType(title, ariaLabel, role, figcaption) {
126
+ const all = [title, ariaLabel, role, figcaption ?? ''].join(' ').toLowerCase();
127
+ if (/\b(chart|bar|line|pie|donut|scatter|area|histogram)\b/.test(all))
128
+ return 'chart';
129
+ if (/\b(diagram|flow|architecture|topology)\b/.test(all))
130
+ return 'diagram';
131
+ if (/\b(graph|network|tree)\b/.test(all))
132
+ return 'graph';
133
+ if (role === 'img')
134
+ return 'chart';
135
+ return undefined;
136
+ }
137
+ // Comparison grids, spec sheets, and product info boxes often use
138
+ // explicit "Label: Value" or [data-label] patterns. We harvest those
139
+ // plus <meta name=...> pairs not already covered by extractMetadata.
140
+ function extractKeyValuePairs(doc) {
141
+ const out = [];
142
+ const seen = new Set();
143
+ // Microdata itemprop pairs
144
+ for (const el of doc.querySelectorAll('[itemprop]')) {
145
+ const key = el.getAttribute('itemprop') ?? '';
146
+ const value = (el.getAttribute('content') ?? el.textContent ?? '').trim();
147
+ if (!key || !value)
148
+ continue;
149
+ pushUnique(out, seen, { key, value: truncate(value), source: 'microdata' });
150
+ if (out.length >= MAX_ITEMS_PER_TYPE)
151
+ return out;
152
+ }
153
+ // data-* attributes where the name looks meaningful (>= 3 chars after data-)
154
+ for (const el of doc.querySelectorAll('[data-label][data-value], [data-key][data-value]')) {
155
+ const key = (el.getAttribute('data-label') ?? el.getAttribute('data-key') ?? '').trim();
156
+ const value = (el.getAttribute('data-value') ?? '').trim();
157
+ if (!key || !value)
158
+ continue;
159
+ pushUnique(out, seen, { key, value: truncate(value), source: 'data-attr' });
160
+ if (out.length >= MAX_ITEMS_PER_TYPE)
161
+ return out;
162
+ }
163
+ // Comparison grid rows: <div class="row"><div class="label">X</div><div class="value">Y</div></div>
164
+ for (const row of doc.querySelectorAll('[class*="row"], [class*="spec"], [class*="field"]')) {
165
+ const label = row.querySelector('[class*="label"], [class*="name"], [class*="key"], dt, th');
166
+ const value = row.querySelector('[class*="value"], [class*="data"], dd, td');
167
+ if (!label || !value)
168
+ continue;
169
+ const k = (label.textContent ?? '').trim();
170
+ const v = (value.textContent ?? '').trim();
171
+ if (!k || !v || k === v)
172
+ continue;
173
+ if (k.length > 100 || v.length === 0)
174
+ continue;
175
+ pushUnique(out, seen, { key: k, value: truncate(v), source: 'comparison-grid' });
176
+ if (out.length >= MAX_ITEMS_PER_TYPE)
177
+ return out;
178
+ }
179
+ // "Key: Value" text patterns within <li>/<p> — cheap heuristic for spec sheets
180
+ for (const el of doc.querySelectorAll('li, p')) {
181
+ const text = (el.textContent ?? '').trim();
182
+ if (!text || text.length > 300)
183
+ continue;
184
+ const m = text.match(/^([A-Z][A-Za-z0-9 _-]{1,40}):\s+(.+)$/);
185
+ if (!m)
186
+ continue;
187
+ pushUnique(out, seen, { key: m[1].trim(), value: truncate(m[2].trim()), source: 'text-pattern' });
188
+ if (out.length >= MAX_ITEMS_PER_TYPE)
189
+ return out;
190
+ }
191
+ return out;
192
+ }
193
+ function pushUnique(list, seen, pair) {
194
+ const key = `${pair.key.toLowerCase()}|${pair.value.toLowerCase()}`;
195
+ if (seen.has(key))
196
+ return;
197
+ seen.add(key);
198
+ list.push(pair);
199
+ }
200
+ function truncate(text) {
201
+ const collapsed = text.replace(/\s+/g, ' ').trim();
202
+ if (collapsed.length <= MAX_VALUE_LEN)
203
+ return collapsed;
204
+ return collapsed.slice(0, MAX_VALUE_LEN - 1) + '…';
205
+ }
206
+ //# sourceMappingURL=structured.js.map