@debriefer/sources 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/README.md +59 -0
  2. package/dist/__tests__/archives/chronicling-america.test.d.ts +8 -0
  3. package/dist/__tests__/archives/chronicling-america.test.d.ts.map +1 -0
  4. package/dist/__tests__/archives/chronicling-america.test.js +151 -0
  5. package/dist/__tests__/archives/chronicling-america.test.js.map +1 -0
  6. package/dist/__tests__/archives/europeana.test.d.ts +8 -0
  7. package/dist/__tests__/archives/europeana.test.d.ts.map +1 -0
  8. package/dist/__tests__/archives/europeana.test.js +200 -0
  9. package/dist/__tests__/archives/europeana.test.js.map +1 -0
  10. package/dist/__tests__/archives/internet-archive.test.d.ts +8 -0
  11. package/dist/__tests__/archives/internet-archive.test.d.ts.map +1 -0
  12. package/dist/__tests__/archives/internet-archive.test.js +189 -0
  13. package/dist/__tests__/archives/internet-archive.test.js.map +1 -0
  14. package/dist/__tests__/archives/trove.test.d.ts +8 -0
  15. package/dist/__tests__/archives/trove.test.d.ts.map +1 -0
  16. package/dist/__tests__/archives/trove.test.js +202 -0
  17. package/dist/__tests__/archives/trove.test.js.map +1 -0
  18. package/dist/__tests__/books/google-books.test.d.ts +8 -0
  19. package/dist/__tests__/books/google-books.test.d.ts.map +1 -0
  20. package/dist/__tests__/books/google-books.test.js +221 -0
  21. package/dist/__tests__/books/google-books.test.js.map +1 -0
  22. package/dist/__tests__/books/open-library.test.d.ts +8 -0
  23. package/dist/__tests__/books/open-library.test.d.ts.map +1 -0
  24. package/dist/__tests__/books/open-library.test.js +159 -0
  25. package/dist/__tests__/books/open-library.test.js.map +1 -0
  26. package/dist/__tests__/news/guardian.test.d.ts +9 -0
  27. package/dist/__tests__/news/guardian.test.d.ts.map +1 -0
  28. package/dist/__tests__/news/guardian.test.js +224 -0
  29. package/dist/__tests__/news/guardian.test.js.map +1 -0
  30. package/dist/__tests__/news/nytimes.test.d.ts +9 -0
  31. package/dist/__tests__/news/nytimes.test.d.ts.map +1 -0
  32. package/dist/__tests__/news/nytimes.test.js +271 -0
  33. package/dist/__tests__/news/nytimes.test.js.map +1 -0
  34. package/dist/__tests__/news/site-search-source.test.d.ts +9 -0
  35. package/dist/__tests__/news/site-search-source.test.d.ts.map +1 -0
  36. package/dist/__tests__/news/site-search-source.test.js +342 -0
  37. package/dist/__tests__/news/site-search-source.test.js.map +1 -0
  38. package/dist/__tests__/obituary/find-a-grave.test.d.ts +8 -0
  39. package/dist/__tests__/obituary/find-a-grave.test.d.ts.map +1 -0
  40. package/dist/__tests__/obituary/find-a-grave.test.js +238 -0
  41. package/dist/__tests__/obituary/find-a-grave.test.js.map +1 -0
  42. package/dist/__tests__/shared/duckduckgo-search.test.d.ts +9 -0
  43. package/dist/__tests__/shared/duckduckgo-search.test.d.ts.map +1 -0
  44. package/dist/__tests__/shared/duckduckgo-search.test.js +218 -0
  45. package/dist/__tests__/shared/duckduckgo-search.test.js.map +1 -0
  46. package/dist/__tests__/shared/fetch-page.test.d.ts +9 -0
  47. package/dist/__tests__/shared/fetch-page.test.d.ts.map +1 -0
  48. package/dist/__tests__/shared/fetch-page.test.js +281 -0
  49. package/dist/__tests__/shared/fetch-page.test.js.map +1 -0
  50. package/dist/__tests__/shared/html-utils.test.d.ts +2 -0
  51. package/dist/__tests__/shared/html-utils.test.d.ts.map +1 -0
  52. package/dist/__tests__/shared/html-utils.test.js +169 -0
  53. package/dist/__tests__/shared/html-utils.test.js.map +1 -0
  54. package/dist/__tests__/shared/readability-extract.test.d.ts +2 -0
  55. package/dist/__tests__/shared/readability-extract.test.d.ts.map +1 -0
  56. package/dist/__tests__/shared/readability-extract.test.js +107 -0
  57. package/dist/__tests__/shared/readability-extract.test.js.map +1 -0
  58. package/dist/__tests__/shared/sanitize-text.test.d.ts +2 -0
  59. package/dist/__tests__/shared/sanitize-text.test.d.ts.map +1 -0
  60. package/dist/__tests__/shared/sanitize-text.test.js +77 -0
  61. package/dist/__tests__/shared/sanitize-text.test.js.map +1 -0
  62. package/dist/__tests__/shared/search-utils.test.d.ts +2 -0
  63. package/dist/__tests__/shared/search-utils.test.d.ts.map +1 -0
  64. package/dist/__tests__/shared/search-utils.test.js +26 -0
  65. package/dist/__tests__/shared/search-utils.test.js.map +1 -0
  66. package/dist/__tests__/structured/wikidata.test.d.ts +9 -0
  67. package/dist/__tests__/structured/wikidata.test.d.ts.map +1 -0
  68. package/dist/__tests__/structured/wikidata.test.js +509 -0
  69. package/dist/__tests__/structured/wikidata.test.js.map +1 -0
  70. package/dist/__tests__/structured/wikipedia.test.d.ts +9 -0
  71. package/dist/__tests__/structured/wikipedia.test.d.ts.map +1 -0
  72. package/dist/__tests__/structured/wikipedia.test.js +643 -0
  73. package/dist/__tests__/structured/wikipedia.test.js.map +1 -0
  74. package/dist/__tests__/web-search/base.test.d.ts +9 -0
  75. package/dist/__tests__/web-search/base.test.d.ts.map +1 -0
  76. package/dist/__tests__/web-search/base.test.js +622 -0
  77. package/dist/__tests__/web-search/base.test.js.map +1 -0
  78. package/dist/__tests__/web-search/bing.test.d.ts +10 -0
  79. package/dist/__tests__/web-search/bing.test.d.ts.map +1 -0
  80. package/dist/__tests__/web-search/bing.test.js +277 -0
  81. package/dist/__tests__/web-search/bing.test.js.map +1 -0
  82. package/dist/__tests__/web-search/brave.test.d.ts +10 -0
  83. package/dist/__tests__/web-search/brave.test.d.ts.map +1 -0
  84. package/dist/__tests__/web-search/brave.test.js +264 -0
  85. package/dist/__tests__/web-search/brave.test.js.map +1 -0
  86. package/dist/__tests__/web-search/duckduckgo.test.d.ts +10 -0
  87. package/dist/__tests__/web-search/duckduckgo.test.d.ts.map +1 -0
  88. package/dist/__tests__/web-search/duckduckgo.test.js +107 -0
  89. package/dist/__tests__/web-search/duckduckgo.test.js.map +1 -0
  90. package/dist/__tests__/web-search/google.test.d.ts +9 -0
  91. package/dist/__tests__/web-search/google.test.d.ts.map +1 -0
  92. package/dist/__tests__/web-search/google.test.js +189 -0
  93. package/dist/__tests__/web-search/google.test.js.map +1 -0
  94. package/dist/archives/chronicling-america.d.ts +33 -0
  95. package/dist/archives/chronicling-america.d.ts.map +1 -0
  96. package/dist/archives/chronicling-america.js +85 -0
  97. package/dist/archives/chronicling-america.js.map +1 -0
  98. package/dist/archives/europeana.d.ts +37 -0
  99. package/dist/archives/europeana.d.ts.map +1 -0
  100. package/dist/archives/europeana.js +92 -0
  101. package/dist/archives/europeana.js.map +1 -0
  102. package/dist/archives/internet-archive.d.ts +32 -0
  103. package/dist/archives/internet-archive.d.ts.map +1 -0
  104. package/dist/archives/internet-archive.js +90 -0
  105. package/dist/archives/internet-archive.js.map +1 -0
  106. package/dist/archives/trove.d.ts +37 -0
  107. package/dist/archives/trove.d.ts.map +1 -0
  108. package/dist/archives/trove.js +97 -0
  109. package/dist/archives/trove.js.map +1 -0
  110. package/dist/books/google-books.d.ts +48 -0
  111. package/dist/books/google-books.d.ts.map +1 -0
  112. package/dist/books/google-books.js +111 -0
  113. package/dist/books/google-books.js.map +1 -0
  114. package/dist/books/open-library.d.ts +44 -0
  115. package/dist/books/open-library.d.ts.map +1 -0
  116. package/dist/books/open-library.js +103 -0
  117. package/dist/books/open-library.js.map +1 -0
  118. package/dist/index.d.ts +45 -0
  119. package/dist/index.d.ts.map +1 -0
  120. package/dist/index.js +35 -0
  121. package/dist/index.js.map +1 -0
  122. package/dist/news/guardian.d.ts +51 -0
  123. package/dist/news/guardian.d.ts.map +1 -0
  124. package/dist/news/guardian.js +131 -0
  125. package/dist/news/guardian.js.map +1 -0
  126. package/dist/news/nytimes.d.ts +27 -0
  127. package/dist/news/nytimes.d.ts.map +1 -0
  128. package/dist/news/nytimes.js +104 -0
  129. package/dist/news/nytimes.js.map +1 -0
  130. package/dist/news/site-search-source.d.ts +89 -0
  131. package/dist/news/site-search-source.d.ts.map +1 -0
  132. package/dist/news/site-search-source.js +182 -0
  133. package/dist/news/site-search-source.js.map +1 -0
  134. package/dist/news/sources.d.ts +52 -0
  135. package/dist/news/sources.d.ts.map +1 -0
  136. package/dist/news/sources.js +276 -0
  137. package/dist/news/sources.js.map +1 -0
  138. package/dist/obituary/find-a-grave.d.ts +43 -0
  139. package/dist/obituary/find-a-grave.d.ts.map +1 -0
  140. package/dist/obituary/find-a-grave.js +173 -0
  141. package/dist/obituary/find-a-grave.js.map +1 -0
  142. package/dist/shared/duckduckgo-search.d.ts +86 -0
  143. package/dist/shared/duckduckgo-search.d.ts.map +1 -0
  144. package/dist/shared/duckduckgo-search.js +218 -0
  145. package/dist/shared/duckduckgo-search.js.map +1 -0
  146. package/dist/shared/fetch-page.d.ts +50 -0
  147. package/dist/shared/fetch-page.d.ts.map +1 -0
  148. package/dist/shared/fetch-page.js +212 -0
  149. package/dist/shared/fetch-page.js.map +1 -0
  150. package/dist/shared/html-utils.d.ts +99 -0
  151. package/dist/shared/html-utils.d.ts.map +1 -0
  152. package/dist/shared/html-utils.js +246 -0
  153. package/dist/shared/html-utils.js.map +1 -0
  154. package/dist/shared/readability-extract.d.ts +33 -0
  155. package/dist/shared/readability-extract.d.ts.map +1 -0
  156. package/dist/shared/readability-extract.js +45 -0
  157. package/dist/shared/readability-extract.js.map +1 -0
  158. package/dist/shared/sanitize-text.d.ts +24 -0
  159. package/dist/shared/sanitize-text.d.ts.map +1 -0
  160. package/dist/shared/sanitize-text.js +49 -0
  161. package/dist/shared/sanitize-text.js.map +1 -0
  162. package/dist/shared/search-utils.d.ts +18 -0
  163. package/dist/shared/search-utils.d.ts.map +1 -0
  164. package/dist/shared/search-utils.js +20 -0
  165. package/dist/shared/search-utils.js.map +1 -0
  166. package/dist/structured/wikidata.d.ts +128 -0
  167. package/dist/structured/wikidata.d.ts.map +1 -0
  168. package/dist/structured/wikidata.js +361 -0
  169. package/dist/structured/wikidata.js.map +1 -0
  170. package/dist/structured/wikipedia.d.ts +184 -0
  171. package/dist/structured/wikipedia.d.ts.map +1 -0
  172. package/dist/structured/wikipedia.js +275 -0
  173. package/dist/structured/wikipedia.js.map +1 -0
  174. package/dist/web-search/base.d.ts +128 -0
  175. package/dist/web-search/base.d.ts.map +1 -0
  176. package/dist/web-search/base.js +251 -0
  177. package/dist/web-search/base.js.map +1 -0
  178. package/dist/web-search/bing.d.ts +21 -0
  179. package/dist/web-search/bing.d.ts.map +1 -0
  180. package/dist/web-search/bing.js +53 -0
  181. package/dist/web-search/bing.js.map +1 -0
  182. package/dist/web-search/brave.d.ts +21 -0
  183. package/dist/web-search/brave.d.ts.map +1 -0
  184. package/dist/web-search/brave.js +56 -0
  185. package/dist/web-search/brave.js.map +1 -0
  186. package/dist/web-search/duckduckgo.d.ts +15 -0
  187. package/dist/web-search/duckduckgo.d.ts.map +1 -0
  188. package/dist/web-search/duckduckgo.js +21 -0
  189. package/dist/web-search/duckduckgo.js.map +1 -0
  190. package/dist/web-search/google.d.ts +24 -0
  191. package/dist/web-search/google.d.ts.map +1 -0
  192. package/dist/web-search/google.js +48 -0
  193. package/dist/web-search/google.js.map +1 -0
  194. package/package.json +58 -0
@@ -0,0 +1,45 @@
1
+ export { htmlToText, htmlToTextClean, decodeHtmlEntities, looksLikeCode, stripCodeFromText, } from "./shared/html-utils.js";
2
+ export { extractArticleContent } from "./shared/readability-extract.js";
3
+ export type { ArticleExtractionResult } from "./shared/readability-extract.js";
4
+ export { sanitizeSourceText } from "./shared/sanitize-text.js";
5
+ export { splitSearchWords } from "./shared/search-utils.js";
6
+ export { WikidataSource, wikidata, escapeSparql, isValidLabel, getValidLabel, filterValidLabels, } from "./structured/wikidata.js";
7
+ export type { WikidataOptions, SparqlQueryBuilder, SparqlResultParser, SparqlBinding, SparqlResponse, } from "./structured/wikidata.js";
8
+ export { WikipediaSource, wikipedia } from "./structured/wikipedia.js";
9
+ export type { WikipediaOptions, WikipediaSection, SectionFilter, AsyncSectionFilter, } from "./structured/wikipedia.js";
10
+ export { fetchPage } from "./shared/fetch-page.js";
11
+ export type { FetchPageOptions, FetchPageResult } from "./shared/fetch-page.js";
12
+ export { searchDuckDuckGo, isDuckDuckGoCaptcha, extractUrlsFromDuckDuckGoHtml, cleanDuckDuckGoUrl, } from "./shared/duckduckgo-search.js";
13
+ export type { DuckDuckGoSearchOptions, SearchResult } from "./shared/duckduckgo-search.js";
14
+ export { WebSearchBase } from "./web-search/base.js";
15
+ export type { WebSearchOptions, LinkSelectionOptions, WebSearchResult } from "./web-search/base.js";
16
+ export { GoogleSearchSource, googleSearch } from "./web-search/google.js";
17
+ export type { GoogleSearchOptions } from "./web-search/google.js";
18
+ export { BingSearchSource, bingSearch } from "./web-search/bing.js";
19
+ export type { BingSearchOptions } from "./web-search/bing.js";
20
+ export { BraveSearchSource, braveSearch } from "./web-search/brave.js";
21
+ export type { BraveSearchOptions } from "./web-search/brave.js";
22
+ export { DuckDuckGoSearchSource, duckduckgoSearch } from "./web-search/duckduckgo.js";
23
+ export type { DuckDuckGoSourceOptions } from "./web-search/duckduckgo.js";
24
+ export { SiteSearchSource, pickBestUrl } from "./news/site-search-source.js";
25
+ export type { SiteSearchConfig } from "./news/site-search-source.js";
26
+ export { apNews, bbcNews, reuters, npr, independent, telegraph, washingtonPost, laTimes, time, newYorker, pbs, britannica, rollingStone, smithsonian, nationalGeographic, historyCom, tcm, allMusic, people, biographyCom, legacy, } from "./news/sources.js";
27
+ export { GuardianSource, guardian } from "./news/guardian.js";
28
+ export type { GuardianOptions } from "./news/guardian.js";
29
+ export { NYTimesSource, nytimes } from "./news/nytimes.js";
30
+ export type { NYTimesOptions } from "./news/nytimes.js";
31
+ export { GoogleBooksSource, googleBooks } from "./books/google-books.js";
32
+ export type { GoogleBooksOptions } from "./books/google-books.js";
33
+ export { OpenLibrarySource, openLibrary } from "./books/open-library.js";
34
+ export type { OpenLibraryOptions } from "./books/open-library.js";
35
+ export { ChroniclingAmericaSource, chroniclingAmerica } from "./archives/chronicling-america.js";
36
+ export type { ChroniclingAmericaOptions } from "./archives/chronicling-america.js";
37
+ export { TroveSource, trove } from "./archives/trove.js";
38
+ export type { TroveOptions } from "./archives/trove.js";
39
+ export { EuropeanaSource, europeana } from "./archives/europeana.js";
40
+ export type { EuropeanaOptions } from "./archives/europeana.js";
41
+ export { InternetArchiveSource, internetArchive } from "./archives/internet-archive.js";
42
+ export type { InternetArchiveOptions } from "./archives/internet-archive.js";
43
+ export { FindAGraveSource, findAGrave } from "./obituary/find-a-grave.js";
44
+ export type { FindAGraveOptions } from "./obituary/find-a-grave.js";
45
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AACA,OAAO,EACL,UAAU,EACV,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,iBAAiB,GAClB,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EAAE,qBAAqB,EAAE,MAAM,iCAAiC,CAAA;AACvE,YAAY,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAA;AAC9E,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAA;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AAG3D,OAAO,EACL,cAAc,EACd,QAAQ,EACR,YAAY,EACZ,YAAY,EACZ,aAAa,EACb,iBAAiB,GAClB,MAAM,0BAA0B,CAAA;AACjC,YAAY,EACV,eAAe,EACf,kBAAkB,EAClB,kBAAkB,EAClB,aAAa,EACb,cAAc,GACf,MAAM,0BAA0B,CAAA;AAEjC,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,2BAA2B,CAAA;AACtE,YAAY,EACV,gBAAgB,EAChB,gBAAgB,EAChB,aAAa,EACb,kBAAkB,GACnB,MAAM,2BAA2B,CAAA;AAGlC,OAAO,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAClD,YAAY,EAAE,gBAAgB,EAAE,eAAe,EAAE,MAAM,wBAAwB,CAAA;AAC/E,OAAO,EACL,gBAAgB,EAChB,mBAAmB,EACnB,6BAA6B,EAC7B,kBAAkB,GACnB,MAAM,+BAA+B,CAAA;AACtC,YAAY,EAAE,uBAAuB,EAAE,YAAY,EAAE,MAAM,+BAA+B,CAAA;AAG1F,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AACpD,YAAY,EAAE,gBAAgB,EAAE,oBAAoB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAA;AAGnG,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAA;AACzE,YAAY,EAAE,mBAAmB,EAAE,MAAM,wBAAwB,CAAA;AACjE,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAA;AACnE,YAAY,EAAE,iBAAiB,EAAE,MAAM,sBAAsB,CAAA;AAC7D,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAA;AACtE,YAAY,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAC/D,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAA;AACrF,YAAY,EAAE,uBAAuB,EAAE,MAAM,4BAA4B,CAAA;AAGzE,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,MAAM,8BAA8B,CAAA;AAC5E,YAAY,EAAE,gBAAgB,EAAE,MAAM,8BAA8B,CAAA;AACpE,OAAO,EACL,MAAM,EACN,OAAO,EACP,OAAO,EACP,GAAG,EACH,WAAW,EACX,SAAS,EACT,cAAc,EACd,OAAO,EACP,IAAI,EACJ,SAAS,EACT,GAAG,EACH,UAAU,EACV,YAAY,EACZ,WAAW,EACX,kBAAkB,EAClB,UAAU,EACV,GAAG,EACH,QAAQ,EACR,MAAM,EACN,YAAY,EACZ,MAAM,GACP,MAAM,mBAAmB,CAAA;AAG1B,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAA;AAC7D,YAAY,EAAE,eAAe,EAAE,MAAM,oBAAoB,CAAA;AACzD,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAA;AAC1D,YAAY,EAAE,cAAc,EAAE,MAAM,mBAAmB,CAAA;AAGvD,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAA;AACxE,YAAY,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAA;AACjE,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAA;AACxE,YAAY,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAA;AAGjE,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAA;AAChG,YAAY,EAAE,yBAAyB,EAAE,MAAM,mCAAmC,CAAA;AAClF,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAA;AACxD,YAAY,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAA;AACvD,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAA;AACpE,YAAY,EAAE,gBAAgB,EAAE,MAAM,yBAAyB,CAAA;AAC/D,OAAO,EAAE,qBAAqB,EAAE,eAAe,EAAE,MAAM,gCAAgC,CAAA;AACvF,YAAY,EAAE,sBAAsB,EAAE,MAAM,gCAAgC,CAAA;AAG5E,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAA;AACzE,YAAY,EAAE,iBAAiB,EAAE,MAAM,4BAA4B,CAAA"}
package/dist/index.js ADDED
@@ -0,0 +1,35 @@
1
+ // Shared utilities — consumers may need these for custom sources
2
+ export { htmlToText, htmlToTextClean, decodeHtmlEntities, looksLikeCode, stripCodeFromText, } from "./shared/html-utils.js";
3
+ export { extractArticleContent } from "./shared/readability-extract.js";
4
+ export { sanitizeSourceText } from "./shared/sanitize-text.js";
5
+ export { splitSearchWords } from "./shared/search-utils.js";
6
+ // Structured data sources
7
+ export { WikidataSource, wikidata, escapeSparql, isValidLabel, getValidLabel, filterValidLabels, } from "./structured/wikidata.js";
8
+ export { WikipediaSource, wikipedia } from "./structured/wikipedia.js";
9
+ // Shared utilities — fetch and search
10
+ export { fetchPage } from "./shared/fetch-page.js";
11
+ export { searchDuckDuckGo, isDuckDuckGoCaptcha, extractUrlsFromDuckDuckGoHtml, cleanDuckDuckGoUrl, } from "./shared/duckduckgo-search.js";
12
+ // Web search base (for building custom search sources)
13
+ export { WebSearchBase } from "./web-search/base.js";
14
+ // Web search sources
15
+ export { GoogleSearchSource, googleSearch } from "./web-search/google.js";
16
+ export { BingSearchSource, bingSearch } from "./web-search/bing.js";
17
+ export { BraveSearchSource, braveSearch } from "./web-search/brave.js";
18
+ export { DuckDuckGoSearchSource, duckduckgoSearch } from "./web-search/duckduckgo.js";
19
+ // News sources — site-search based
20
+ export { SiteSearchSource, pickBestUrl } from "./news/site-search-source.js";
21
+ export { apNews, bbcNews, reuters, npr, independent, telegraph, washingtonPost, laTimes, time, newYorker, pbs, britannica, rollingStone, smithsonian, nationalGeographic, historyCom, tcm, allMusic, people, biographyCom, legacy, } from "./news/sources.js";
22
+ // News sources — API based
23
+ export { GuardianSource, guardian } from "./news/guardian.js";
24
+ export { NYTimesSource, nytimes } from "./news/nytimes.js";
25
+ // Book sources
26
+ export { GoogleBooksSource, googleBooks } from "./books/google-books.js";
27
+ export { OpenLibrarySource, openLibrary } from "./books/open-library.js";
28
+ // Archive sources
29
+ export { ChroniclingAmericaSource, chroniclingAmerica } from "./archives/chronicling-america.js";
30
+ export { TroveSource, trove } from "./archives/trove.js";
31
+ export { EuropeanaSource, europeana } from "./archives/europeana.js";
32
+ export { InternetArchiveSource, internetArchive } from "./archives/internet-archive.js";
33
+ // Obituary sources
34
+ export { FindAGraveSource, findAGrave } from "./obituary/find-a-grave.js";
35
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,iEAAiE;AACjE,OAAO,EACL,UAAU,EACV,eAAe,EACf,kBAAkB,EAClB,aAAa,EACb,iBAAiB,GAClB,MAAM,wBAAwB,CAAA;AAC/B,OAAO,EAAE,qBAAqB,EAAE,MAAM,iCAAiC,CAAA;AAEvE,OAAO,EAAE,kBAAkB,EAAE,MAAM,2BAA2B,CAAA;AAC9D,OAAO,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAA;AAE3D,0BAA0B;AAC1B,OAAO,EACL,cAAc,EACd,QAAQ,EACR,YAAY,EACZ,YAAY,EACZ,aAAa,EACb,iBAAiB,GAClB,MAAM,0BAA0B,CAAA;AASjC,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,2BAA2B,CAAA;AAQtE,sCAAsC;AACtC,OAAO,EAAE,SAAS,EAAE,MAAM,wBAAwB,CAAA;AAElD,OAAO,EACL,gBAAgB,EAChB,mBAAmB,EACnB,6BAA6B,EAC7B,kBAAkB,GACnB,MAAM,+BAA+B,CAAA;AAGtC,uDAAuD;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,sBAAsB,CAAA;AAGpD,qBAAqB;AACrB,OAAO,EAAE,kBAAkB,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAA;AAEzE,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAA;AAEnE,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,uBAAuB,CAAA;AAEtE,OAAO,EAAE,sBAAsB,EAAE,gBAAgB,EAAE,MAAM,4BAA4B,CAAA;AAGrF,mCAAmC;AACnC,OAAO,EAAE,gBAAgB,EAAE,WAAW,EAAE,MAAM,8BAA8B,CAAA;AAE5E,OAAO,EACL,MAAM,EACN,OAAO,EACP,OAAO,EACP,GAAG,EACH,WAAW,EACX,SAAS,EACT,cAAc,EACd,OAAO,EACP,IAAI,EACJ,SAAS,EACT,GAAG,EACH,UAAU,EACV,YAAY,EACZ,WAAW,EACX,kBAAkB,EAClB,UAAU,EACV,GAAG,EACH,QAAQ,EACR,MAAM,EACN,YAAY,EACZ,MAAM,GACP,MAAM,mBAAmB,CAAA;AAE1B,2BAA2B;AAC3B,OAAO,EAAE,cAAc,EAAE,QAAQ,EAAE,MAAM,oBAAoB,CAAA;AAE7D,OAAO,EAAE,aAAa,EAAE,OAAO,EAAE,MAAM,mBAAmB,CAAA;AAG1D,eAAe;AACf,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAA;AAExE,OAAO,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,yBAAyB,CAAA;AAGxE,kBAAkB;AAClB,OAAO,EAAE,wBAAwB,EAAE,kBAAkB,EAAE,MAAM,mCAAmC,CAAA;AAEhG,OAAO,EAAE,WAAW,EAAE,KAAK,EAAE,MAAM,qBAAqB,CAAA;AAExD,OAAO,EAAE,eAAe,EAAE,SAAS,EAAE,MAAM,yBAAyB,CAAA;AAEpE,OAAO,EAAE,qBAAqB,EAAE,eAAe,EAAE,MAAM,gCAAgC,CAAA;AAGvF,mBAAmB;AACnB,OAAO,EAAE,gBAAgB,EAAE,UAAU,EAAE,MAAM,4BAA4B,CAAA"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * The Guardian API source.
3
+ *
4
+ * Queries the Guardian Content API for biographical/profile articles
5
+ * about a research subject. Picks the best article by matching
6
+ * biographical keywords in title, then body text, falling back to
7
+ * the first result.
8
+ *
9
+ * Requires a Guardian API key (https://open-platform.theguardian.com/).
10
+ */
11
+ import { BaseResearchSource, ReliabilityTier, type BaseSourceOptions, type ResearchSubject, type RawFinding } from "@debriefer/core";
12
+ export interface GuardianOptions extends BaseSourceOptions {
13
+ /** Guardian API key. Defaults to process.env.GUARDIAN_API_KEY. */
14
+ apiKey?: string;
15
+ }
16
+ /**
17
+ * Research source backed by the Guardian Content API.
18
+ *
19
+ * Searches for biographical/profile articles about a subject,
20
+ * picks the best match by keyword scoring, and returns the
21
+ * sanitized body text.
22
+ */
23
+ export declare class GuardianSource extends BaseResearchSource<ResearchSubject> {
24
+ readonly name = "The Guardian";
25
+ readonly type = "guardian";
26
+ readonly reliabilityTier = ReliabilityTier.TIER_1_NEWS;
27
+ readonly domain = "content.guardianapis.com";
28
+ readonly isFree = true;
29
+ readonly estimatedCostPerQuery = 0;
30
+ private apiKey;
31
+ constructor(options?: GuardianOptions);
32
+ isAvailable(): boolean;
33
+ protected fetchResult(subject: ResearchSubject, signal: AbortSignal): Promise<RawFinding | null>;
34
+ /**
35
+ * Find the best article from Guardian search results.
36
+ *
37
+ * Priority:
38
+ * 1. Article with a bio keyword in the title
39
+ * 2. Article with a bio keyword in the body/standfirst
40
+ * 3. First result as fallback
41
+ */
42
+ private findBestArticle;
43
+ }
44
+ /**
45
+ * Create a Guardian API source instance.
46
+ *
47
+ * @param options - Guardian-specific options (apiKey, rateLimitMs, etc.)
48
+ * @returns A configured GuardianSource
49
+ */
50
+ export declare function guardian(options?: GuardianOptions): GuardianSource;
51
+ //# sourceMappingURL=guardian.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"guardian.d.ts","sourceRoot":"","sources":["../../src/news/guardian.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EACL,kBAAkB,EAClB,eAAe,EACf,KAAK,iBAAiB,EACtB,KAAK,eAAe,EACpB,KAAK,UAAU,EAChB,MAAM,iBAAiB,CAAA;AA0BxB,MAAM,WAAW,eAAgB,SAAQ,iBAAiB;IACxD,kEAAkE;IAClE,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAyBD;;;;;;GAMG;AACH,qBAAa,cAAe,SAAQ,kBAAkB,CAAC,eAAe,CAAC;IACrE,QAAQ,CAAC,IAAI,kBAAiB;IAC9B,QAAQ,CAAC,IAAI,cAAa;IAC1B,QAAQ,CAAC,eAAe,+BAA8B;IACtD,QAAQ,CAAC,MAAM,8BAA6B;IAC5C,QAAQ,CAAC,MAAM,QAAO;IACtB,QAAQ,CAAC,qBAAqB,KAAI;IAElC,OAAO,CAAC,MAAM,CAAoB;gBAEtB,OAAO,GAAE,eAAoB;IAKhC,WAAW,IAAI,OAAO;cAIf,WAAW,CACzB,OAAO,EAAE,eAAe,EACxB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IA4C7B;;;;;;;OAOG;IACH,OAAO,CAAC,eAAe;CAoBxB;AAMD;;;;;GAKG;AACH,wBAAgB,QAAQ,CAAC,OAAO,CAAC,EAAE,eAAe,GAAG,cAAc,CAElE"}
@@ -0,0 +1,131 @@
1
+ /**
2
+ * The Guardian API source.
3
+ *
4
+ * Queries the Guardian Content API for biographical/profile articles
5
+ * about a research subject. Picks the best article by matching
6
+ * biographical keywords in title, then body text, falling back to
7
+ * the first result.
8
+ *
9
+ * Requires a Guardian API key (https://open-platform.theguardian.com/).
10
+ */
11
+ import { BaseResearchSource, ReliabilityTier, } from "@debriefer/core";
12
+ import { sanitizeSourceText } from "../shared/sanitize-text.js";
13
+ // ============================================================================
14
+ // Constants
15
+ // ============================================================================
16
+ const GUARDIAN_API_URL = "https://content.guardianapis.com/search";
17
+ const BIO_KEYWORDS = [
18
+ "profile",
19
+ "interview",
20
+ "early life",
21
+ "childhood",
22
+ "biography",
23
+ "life story",
24
+ "portrait",
25
+ "who is",
26
+ "growing up",
27
+ "memoir",
28
+ ];
29
+ // ============================================================================
30
+ // GuardianSource
31
+ // ============================================================================
32
+ /**
33
+ * Research source backed by the Guardian Content API.
34
+ *
35
+ * Searches for biographical/profile articles about a subject,
36
+ * picks the best match by keyword scoring, and returns the
37
+ * sanitized body text.
38
+ */
39
+ export class GuardianSource extends BaseResearchSource {
40
+ name = "The Guardian";
41
+ type = "guardian";
42
+ reliabilityTier = ReliabilityTier.TIER_1_NEWS;
43
+ domain = "content.guardianapis.com";
44
+ isFree = true;
45
+ estimatedCostPerQuery = 0;
46
+ apiKey;
47
+ constructor(options = {}) {
48
+ super({ rateLimitMs: 200, ...options });
49
+ this.apiKey = options.apiKey ?? process.env.GUARDIAN_API_KEY;
50
+ }
51
+ isAvailable() {
52
+ return Boolean(this.apiKey);
53
+ }
54
+ async fetchResult(subject, signal) {
55
+ if (!this.apiKey)
56
+ return null;
57
+ const query = `"${subject.name}" AND (profile OR interview OR "early life" OR childhood OR biography)`;
58
+ const url = new URL(GUARDIAN_API_URL);
59
+ url.searchParams.set("api-key", this.apiKey);
60
+ url.searchParams.set("q", query);
61
+ url.searchParams.set("show-fields", "bodyText,standfirst,trailText");
62
+ url.searchParams.set("page-size", "10");
63
+ url.searchParams.set("order-by", "relevance");
64
+ const response = await fetch(url.toString(), { signal });
65
+ if (!response.ok) {
66
+ throw new Error(`Guardian API error: HTTP ${response.status} ${response.statusText}`);
67
+ }
68
+ const data = (await response.json());
69
+ if (!data.response?.results?.length)
70
+ return null;
71
+ // Pick best article by biographical keyword matching
72
+ const article = this.findBestArticle(data.response.results);
73
+ if (!article)
74
+ return null;
75
+ const text = article.fields?.bodyText || article.fields?.standfirst || article.fields?.trailText;
76
+ if (!text || text.length < 200)
77
+ return null;
78
+ const sanitized = sanitizeSourceText(text);
79
+ if (sanitized.length < 200)
80
+ return null;
81
+ return {
82
+ text: sanitized,
83
+ confidence: -1,
84
+ costUsd: 0,
85
+ url: article.webUrl,
86
+ publication: "The Guardian",
87
+ metadata: {
88
+ title: article.webTitle,
89
+ },
90
+ };
91
+ }
92
+ /**
93
+ * Find the best article from Guardian search results.
94
+ *
95
+ * Priority:
96
+ * 1. Article with a bio keyword in the title
97
+ * 2. Article with a bio keyword in the body/standfirst
98
+ * 3. First result as fallback
99
+ */
100
+ findBestArticle(results) {
101
+ // First: match by title
102
+ for (const result of results) {
103
+ const title = result.webTitle.toLowerCase();
104
+ if (BIO_KEYWORDS.some((kw) => title.includes(kw))) {
105
+ return result;
106
+ }
107
+ }
108
+ // Second: match by body text
109
+ for (const result of results) {
110
+ const body = (result.fields?.bodyText || result.fields?.standfirst || "").toLowerCase();
111
+ if (BIO_KEYWORDS.some((kw) => body.includes(kw))) {
112
+ return result;
113
+ }
114
+ }
115
+ // Fallback: first result
116
+ return results[0];
117
+ }
118
+ }
119
+ // ============================================================================
120
+ // Factory
121
+ // ============================================================================
122
+ /**
123
+ * Create a Guardian API source instance.
124
+ *
125
+ * @param options - Guardian-specific options (apiKey, rateLimitMs, etc.)
126
+ * @returns A configured GuardianSource
127
+ */
128
+ export function guardian(options) {
129
+ return new GuardianSource(options);
130
+ }
131
+ //# sourceMappingURL=guardian.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"guardian.js","sourceRoot":"","sources":["../../src/news/guardian.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EACL,kBAAkB,EAClB,eAAe,GAIhB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAA;AAE/D,+EAA+E;AAC/E,YAAY;AACZ,+EAA+E;AAE/E,MAAM,gBAAgB,GAAG,yCAAyC,CAAA;AAElE,MAAM,YAAY,GAAG;IACnB,SAAS;IACT,WAAW;IACX,YAAY;IACZ,WAAW;IACX,WAAW;IACX,YAAY;IACZ,UAAU;IACV,QAAQ;IACR,YAAY;IACZ,QAAQ;CACT,CAAA;AA8BD,+EAA+E;AAC/E,iBAAiB;AACjB,+EAA+E;AAE/E;;;;;;GAMG;AACH,MAAM,OAAO,cAAe,SAAQ,kBAAmC;IAC5D,IAAI,GAAG,cAAc,CAAA;IACrB,IAAI,GAAG,UAAU,CAAA;IACjB,eAAe,GAAG,eAAe,CAAC,WAAW,CAAA;IAC7C,MAAM,GAAG,0BAA0B,CAAA;IACnC,MAAM,GAAG,IAAI,CAAA;IACb,qBAAqB,GAAG,CAAC,CAAA;IAE1B,MAAM,CAAoB;IAElC,YAAY,UAA2B,EAAE;QACvC,KAAK,CAAC,EAAE,WAAW,EAAE,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC,CAAA;QACvC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAA;IAC9D,CAAC;IAEQ,WAAW;QAClB,OAAO,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAC7B,CAAC;IAES,KAAK,CAAC,WAAW,CACzB,OAAwB,EACxB,MAAmB;QAEnB,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAA;QAE7B,MAAM,KAAK,GAAG,IAAI,OAAO,CAAC,IAAI,wEAAwE,CAAA;QAEtG,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,gBAAgB,CAAC,CAAA;QACrC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAA;QAC5C,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAChC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,aAAa,EAAE,+BAA+B,CAAC,CAAA;QACpE,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,WAAW,EAAE,IAAI,CAAC,CAAA;QACvC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,UAAU,EAAE,WAAW,CAAC,CAAA;QAE7C,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAA;QAExD,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAA;QACvF,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAA;QAE3D,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,EAAE,MAAM;YAAE,OAAO,IAAI,CAAA;QAEhD,qDAAqD;QACrD,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAA;QAC3D,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAA;QAEzB,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,EAAE,QAAQ,IAAI,OAAO,CAAC,MAAM,EAAE,UAAU,IAAI,OAAO,CAAC,MAAM,EAAE,SAAS,CAAA;QAChG,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAA;QAE3C,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAC1C,IAAI,SAAS,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAA;QAEvC,OAAO;YACL,IAAI,EAAE,SAAS;YACf,UAAU,EAAE,CAAC,CAAC;YACd,OAAO,EAAE,CAAC;YACV,GAAG,EAAE,OAAO,CAAC,MAAM;YACnB,WAAW,EAAE,cAAc;YAC3B,QAAQ,EAAE;gBACR,KAAK,EAAE,OAAO,CAAC,QAAQ;aACxB;SACF,CAAA;IACH,CAAC;IAED;;;;;;;OAOG;IACK,eAAe,CAAC,OAA0B;QAChD,wBAAwB;QACxB,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,KAAK,GAAG,MAAM,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAA;YAC3C,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAClD,OAAO,MAAM,CAAA;YACf,CAAC;QACH,CAAC;QAED,6BAA6B;QAC7B,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,QAAQ,IAAI,MAAM,CAAC,MAAM,EAAE,UAAU,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAA;YACvF,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACjD,OAAO,MAAM,CAAA;YACf,CAAC;QACH,CAAC;QAED,yBAAyB;QACzB,OAAO,OAAO,CAAC,CAAC,CAAC,CAAA;IACnB,CAAC;CACF;AAED,+EAA+E;AAC/E,UAAU;AACV,+EAA+E;AAE/E;;;;;GAKG;AACH,MAAM,UAAU,QAAQ,CAAC,OAAyB;IAChD,OAAO,IAAI,cAAc,CAAC,OAAO,CAAC,CAAA;AACpC,CAAC"}
@@ -0,0 +1,27 @@
1
+ /**
2
+ * New York Times Article Search API source.
3
+ *
4
+ * Uses the NYT Article Search API v2 to find biographical/profile articles.
5
+ * Unlike site-search sources that scrape full article text, the NYT API only
6
+ * returns partial content (lead_paragraph, abstract, snippet), so confidence
7
+ * is capped at 0.7 rather than delegating to base class keyword scoring.
8
+ */
9
+ import { BaseResearchSource, ReliabilityTier, type BaseSourceOptions, type ResearchSubject, type RawFinding } from "@debriefer/core";
10
+ export interface NYTimesOptions extends BaseSourceOptions {
11
+ apiKey?: string;
12
+ }
13
+ export declare class NYTimesSource extends BaseResearchSource<ResearchSubject> {
14
+ readonly name = "The New York Times";
15
+ readonly type = "nytimes";
16
+ readonly reliabilityTier = ReliabilityTier.TIER_1_NEWS;
17
+ readonly domain = "api.nytimes.com";
18
+ readonly isFree = true;
19
+ readonly estimatedCostPerQuery = 0;
20
+ private apiKey;
21
+ constructor(options?: NYTimesOptions);
22
+ isAvailable(): boolean;
23
+ protected fetchResult(subject: ResearchSubject, signal: AbortSignal): Promise<RawFinding | null>;
24
+ private findBestArticle;
25
+ }
26
+ export declare function nytimes(options?: NYTimesOptions): NYTimesSource;
27
+ //# sourceMappingURL=nytimes.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"nytimes.d.ts","sourceRoot":"","sources":["../../src/news/nytimes.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,kBAAkB,EAClB,eAAe,EACf,KAAK,iBAAiB,EACtB,KAAK,eAAe,EACpB,KAAK,UAAU,EAChB,MAAM,iBAAiB,CAAA;AAkBxB,MAAM,WAAW,cAAe,SAAQ,iBAAiB;IACvD,MAAM,CAAC,EAAE,MAAM,CAAA;CAChB;AAED,qBAAa,aAAc,SAAQ,kBAAkB,CAAC,eAAe,CAAC;IACpE,QAAQ,CAAC,IAAI,wBAAuB;IACpC,QAAQ,CAAC,IAAI,aAAY;IACzB,QAAQ,CAAC,eAAe,+BAA8B;IACtD,QAAQ,CAAC,MAAM,qBAAoB;IACnC,QAAQ,CAAC,MAAM,QAAO;IACtB,QAAQ,CAAC,qBAAqB,KAAI;IAElC,OAAO,CAAC,MAAM,CAAoB;gBAEtB,OAAO,GAAE,cAAmB;IAK/B,WAAW,IAAI,OAAO;cAIf,WAAW,CACzB,OAAO,EAAE,eAAe,EACxB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;IA4D7B,OAAO,CAAC,eAAe;CA4BxB;AAED,wBAAgB,OAAO,CAAC,OAAO,CAAC,EAAE,cAAc,GAAG,aAAa,CAE/D"}
@@ -0,0 +1,104 @@
1
+ /**
2
+ * New York Times Article Search API source.
3
+ *
4
+ * Uses the NYT Article Search API v2 to find biographical/profile articles.
5
+ * Unlike site-search sources that scrape full article text, the NYT API only
6
+ * returns partial content (lead_paragraph, abstract, snippet), so confidence
7
+ * is capped at 0.7 rather than delegating to base class keyword scoring.
8
+ */
9
+ import { BaseResearchSource, ReliabilityTier, } from "@debriefer/core";
10
+ import { sanitizeSourceText } from "../shared/sanitize-text.js";
11
+ const NYT_API_URL = "https://api.nytimes.com/svc/search/v2/articlesearch.json";
12
+ const BIO_KEYWORDS = [
13
+ "profile",
14
+ "interview",
15
+ "early life",
16
+ "childhood",
17
+ "biography",
18
+ "life of",
19
+ "portrait",
20
+ "who is",
21
+ "growing up",
22
+ "personal",
23
+ ];
24
+ export class NYTimesSource extends BaseResearchSource {
25
+ name = "The New York Times";
26
+ type = "nytimes";
27
+ reliabilityTier = ReliabilityTier.TIER_1_NEWS;
28
+ domain = "api.nytimes.com";
29
+ isFree = true;
30
+ estimatedCostPerQuery = 0;
31
+ apiKey;
32
+ constructor(options = {}) {
33
+ super({ rateLimitMs: 6000, ...options });
34
+ this.apiKey = options.apiKey ?? process.env.NYTIMES_API_KEY;
35
+ }
36
+ isAvailable() {
37
+ return Boolean(this.apiKey);
38
+ }
39
+ async fetchResult(subject, signal) {
40
+ if (!this.apiKey)
41
+ return null;
42
+ const query = `"${subject.name}" (biography OR profile OR interview)`;
43
+ const url = new URL(NYT_API_URL);
44
+ url.searchParams.set("api-key", this.apiKey);
45
+ url.searchParams.set("q", query);
46
+ url.searchParams.set("sort", "relevance");
47
+ url.searchParams.set("fq", 'document_type:("article")');
48
+ const response = await fetch(url.toString(), { signal });
49
+ if (!response.ok) {
50
+ throw new Error(`NYT API error: HTTP ${response.status} ${response.statusText}`);
51
+ }
52
+ const data = (await response.json());
53
+ if (!data.response?.docs?.length)
54
+ return null;
55
+ // Filter out docs without URLs, then pick best article
56
+ const docsWithUrls = data.response.docs.filter((d) => d.web_url);
57
+ if (docsWithUrls.length === 0)
58
+ return null;
59
+ const doc = this.findBestArticle(docsWithUrls);
60
+ if (!doc)
61
+ return null;
62
+ // Combine available text fields (NYT API doesn't return full body)
63
+ const parts = [doc.lead_paragraph, doc.abstract, doc.snippet].filter(Boolean);
64
+ const text = parts.join("\n\n");
65
+ if (text.length < 100)
66
+ return null;
67
+ const sanitized = sanitizeSourceText(text);
68
+ if (sanitized.length < 100)
69
+ return null;
70
+ // Cap confidence at 0.7 — NYT API returns partial content only
71
+ return {
72
+ text: sanitized,
73
+ confidence: 0.7,
74
+ costUsd: 0,
75
+ url: doc.web_url,
76
+ publication: "The New York Times",
77
+ metadata: {
78
+ title: doc.headline?.main ?? "",
79
+ },
80
+ };
81
+ }
82
+ findBestArticle(docs) {
83
+ // First: match by headline
84
+ for (const doc of docs) {
85
+ const title = (doc.headline?.main ?? "").toLowerCase();
86
+ if (BIO_KEYWORDS.some((kw) => title.includes(kw))) {
87
+ return doc;
88
+ }
89
+ }
90
+ // Second: match by abstract/snippet
91
+ for (const doc of docs) {
92
+ const body = `${doc.abstract ?? ""} ${doc.snippet ?? ""}`.toLowerCase();
93
+ if (BIO_KEYWORDS.some((kw) => body.includes(kw))) {
94
+ return doc;
95
+ }
96
+ }
97
+ // Fallback: first result
98
+ return docs[0];
99
+ }
100
+ }
101
+ export function nytimes(options) {
102
+ return new NYTimesSource(options);
103
+ }
104
+ //# sourceMappingURL=nytimes.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"nytimes.js","sourceRoot":"","sources":["../../src/news/nytimes.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,EACL,kBAAkB,EAClB,eAAe,GAIhB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAE,kBAAkB,EAAE,MAAM,4BAA4B,CAAA;AAE/D,MAAM,WAAW,GAAG,0DAA0D,CAAA;AAE9E,MAAM,YAAY,GAAG;IACnB,SAAS;IACT,WAAW;IACX,YAAY;IACZ,WAAW;IACX,WAAW;IACX,SAAS;IACT,UAAU;IACV,QAAQ;IACR,YAAY;IACZ,UAAU;CACX,CAAA;AAMD,MAAM,OAAO,aAAc,SAAQ,kBAAmC;IAC3D,IAAI,GAAG,oBAAoB,CAAA;IAC3B,IAAI,GAAG,SAAS,CAAA;IAChB,eAAe,GAAG,eAAe,CAAC,WAAW,CAAA;IAC7C,MAAM,GAAG,iBAAiB,CAAA;IAC1B,MAAM,GAAG,IAAI,CAAA;IACb,qBAAqB,GAAG,CAAC,CAAA;IAE1B,MAAM,CAAoB;IAElC,YAAY,UAA0B,EAAE;QACtC,KAAK,CAAC,EAAE,WAAW,EAAE,IAAI,EAAE,GAAG,OAAO,EAAE,CAAC,CAAA;QACxC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,eAAe,CAAA;IAC7D,CAAC;IAEQ,WAAW;QAClB,OAAO,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;IAC7B,CAAC;IAES,KAAK,CAAC,WAAW,CACzB,OAAwB,EACxB,MAAmB;QAEnB,IAAI,CAAC,IAAI,CAAC,MAAM;YAAE,OAAO,IAAI,CAAA;QAE7B,MAAM,KAAK,GAAG,IAAI,OAAO,CAAC,IAAI,uCAAuC,CAAA;QAErE,MAAM,GAAG,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,CAAA;QAChC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAA;QAC5C,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,KAAK,CAAC,CAAA;QAChC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,MAAM,EAAE,WAAW,CAAC,CAAA;QACzC,GAAG,CAAC,YAAY,CAAC,GAAG,CAAC,IAAI,EAAE,2BAA2B,CAAC,CAAA;QAEvD,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,CAAC,CAAA;QAExD,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,uBAAuB,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,EAAE,CAAC,CAAA;QAClF,CAAC;QAED,MAAM,IAAI,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAWlC,CAAA;QAED,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,IAAI,EAAE,MAAM;YAAE,OAAO,IAAI,CAAA;QAE7C,uDAAuD;QACvD,MAAM,YAAY,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,CAAA;QAChE,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC;YAAE,OAAO,IAAI,CAAA;QAE1C,MAAM,GAAG,GAAG,IAAI,CAAC,eAAe,CAAC,YAAY,CAAC,CAAA;QAC9C,IAAI,CAAC,GAAG;YAAE,OAAO,IAAI,CAAA;QAErB,mEAAmE;QACnE,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,cAAc,EAAE,GAAG,CAAC,QAAQ,EAAE,GAAG,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAA;QAC7E,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAA;QAC/B,IAAI,IAAI,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAA;QAElC,MAAM,SAAS,GAAG,kBAAkB,CAAC,IAAI,CAAC,CAAA;QAC1C,IAAI,SAAS,CAAC,MAAM,GAAG,GAAG;YAAE,OAAO,IAAI,CAAA;QAEvC,+DAA+D;QAC/D,OAAO;YACL,IAAI,EAAE,SAAS;YACf,UAAU,EAAE,GAAG;YACf,OAAO,EAAE,CAAC;YACV,GAAG,EAAE,GAAG,CAAC,OAAO;YAChB,WAAW,EAAE,oBAAoB;YACjC,QAAQ,EAAE;gBACR,KAAK,EAAE,GAAG,CAAC,QAAQ,EAAE,IAAI,IAAI,EAAE;aAChC;SACF,CAAA;IACH,CAAC;IAEO,eAAe,CACrB,IAME;QAEF,2BAA2B;QAC3B,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,KAAK,GAAG,CAAC,GAAG,CAAC,QAAQ,EAAE,IAAI,IAAI,EAAE,CAAC,CAAC,WAAW,EAAE,CAAA;YACtD,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBAClD,OAAO,GAAG,CAAA;YACZ,CAAC;QACH,CAAC;QAED,oCAAoC;QACpC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,IAAI,GAAG,GAAG,GAAG,CAAC,QAAQ,IAAI,EAAE,IAAI,GAAG,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC,WAAW,EAAE,CAAA;YACvE,IAAI,YAAY,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;gBACjD,OAAO,GAAG,CAAA;YACZ,CAAC;QACH,CAAC;QAED,yBAAyB;QACzB,OAAO,IAAI,CAAC,CAAC,CAAC,CAAA;IAChB,CAAC;CACF;AAED,MAAM,UAAU,OAAO,CAAC,OAAwB;IAC9C,OAAO,IAAI,aAAa,CAAC,OAAO,CAAC,CAAA;AACnC,CAAC"}
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Configurable site-search source for news and reference sites.
3
+ *
4
+ * Uses DuckDuckGo `site:` search to find articles on a specific domain,
5
+ * picks the best URL based on path preferences, fetches the page,
6
+ * extracts article content, and returns a sanitized finding.
7
+ *
8
+ * This is the reusable base that 19+ news/reference source factories
9
+ * instantiate with different SiteSearchConfig values (AP, BBC, Reuters, etc.).
10
+ */
11
+ import { BaseResearchSource, type BaseSourceOptions, type ResearchSubject, type RawFinding, type ReliabilityTier } from "@debriefer/core";
12
+ import { type SearchResult } from "../shared/duckduckgo-search.js";
13
+ /** Configuration for a site-search source. */
14
+ export interface SiteSearchConfig {
15
+ /** Display name (e.g., "AP News"). */
16
+ name: string;
17
+ /** Source type identifier (e.g., "ap-news"). */
18
+ type: string;
19
+ /** Primary domain to search (e.g., "apnews.com"). */
20
+ domain: string;
21
+ /** Additional domains to search (e.g., ["bbc.co.uk"]). */
22
+ additionalDomains?: string[];
23
+ /** Reliability tier from the RSP scale. */
24
+ reliabilityTier: ReliabilityTier;
25
+ /** Rate limit delay in milliseconds. Default: 1500. */
26
+ rateLimitMs?: number;
27
+ /** URL paths to boost when selecting results (e.g., ["/article/"]). */
28
+ preferredPaths?: string[];
29
+ /** URL paths to penalize when selecting results (e.g., ["/gallery/"]). */
30
+ avoidPaths?: string[];
31
+ /** Additional query terms appended to the search (e.g., "biography OR profile"). */
32
+ queryTerms?: string;
33
+ /** Minimum extracted text length in characters. Default: 200. */
34
+ minContentLength?: number;
35
+ }
36
+ /**
37
+ * Pick the best URL from search results based on path preferences.
38
+ *
39
+ * Scoring:
40
+ * - +10 for each preferredPaths match found in the URL pathname
41
+ * - -10 for each avoidPaths match found in the URL pathname
42
+ * - Returns highest-scored URL, falling back to first result
43
+ * - Returns null for empty array
44
+ *
45
+ * @param results - Array of search results from DDG
46
+ * @param options - Optional path preference configuration
47
+ * @returns The best URL string, or null if no results
48
+ */
49
+ export declare function pickBestUrl(results: SearchResult[], options?: {
50
+ preferredPaths?: string[];
51
+ avoidPaths?: string[];
52
+ }): string | null;
53
+ /**
54
+ * A configurable research source that searches a specific news/reference
55
+ * site via DuckDuckGo `site:` queries.
56
+ *
57
+ * All metadata (name, type, reliabilityTier, domain, isFree, estimatedCostPerQuery)
58
+ * is derived from the provided SiteSearchConfig.
59
+ */
60
+ export declare class SiteSearchSource extends BaseResearchSource<ResearchSubject> {
61
+ readonly name: string;
62
+ readonly type: string;
63
+ readonly reliabilityTier: ReliabilityTier;
64
+ readonly domain: string;
65
+ readonly isFree = true;
66
+ readonly estimatedCostPerQuery = 0;
67
+ private readonly config;
68
+ constructor(config: SiteSearchConfig, options?: BaseSourceOptions);
69
+ /**
70
+ * Build search query for this subject.
71
+ *
72
+ * Format: `"subject name" queryTerms` (queryTerms omitted if empty).
73
+ */
74
+ buildQuery(subject: ResearchSubject): string;
75
+ /**
76
+ * Fetch a finding from this news/reference site.
77
+ *
78
+ * Pipeline:
79
+ * 1. Search DDG with `site:domain` filter
80
+ * 2. Pick best URL based on path preferences
81
+ * 3. Fetch the page
82
+ * 4. Extract article content via Readability
83
+ * 5. Check minimum content length
84
+ * 6. Sanitize text
85
+ * 7. Return RawFinding with metadata
86
+ */
87
+ protected fetchResult(subject: ResearchSubject, signal: AbortSignal): Promise<RawFinding | null>;
88
+ }
89
+ //# sourceMappingURL=site-search-source.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"site-search-source.d.ts","sourceRoot":"","sources":["../../src/news/site-search-source.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,EACL,kBAAkB,EAClB,KAAK,iBAAiB,EACtB,KAAK,eAAe,EACpB,KAAK,UAAU,EACf,KAAK,eAAe,EACrB,MAAM,iBAAiB,CAAA;AACxB,OAAO,EAAoB,KAAK,YAAY,EAAE,MAAM,gCAAgC,CAAA;AASpF,8CAA8C;AAC9C,MAAM,WAAW,gBAAgB;IAC/B,sCAAsC;IACtC,IAAI,EAAE,MAAM,CAAA;IACZ,gDAAgD;IAChD,IAAI,EAAE,MAAM,CAAA;IACZ,qDAAqD;IACrD,MAAM,EAAE,MAAM,CAAA;IACd,0DAA0D;IAC1D,iBAAiB,CAAC,EAAE,MAAM,EAAE,CAAA;IAC5B,2CAA2C;IAC3C,eAAe,EAAE,eAAe,CAAA;IAChC,uDAAuD;IACvD,WAAW,CAAC,EAAE,MAAM,CAAA;IACpB,uEAAuE;IACvE,cAAc,CAAC,EAAE,MAAM,EAAE,CAAA;IACzB,0EAA0E;IAC1E,UAAU,CAAC,EAAE,MAAM,EAAE,CAAA;IACrB,oFAAoF;IACpF,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,iEAAiE;IACjE,gBAAgB,CAAC,EAAE,MAAM,CAAA;CAC1B;AAMD;;;;;;;;;;;;GAYG;AACH,wBAAgB,WAAW,CACzB,OAAO,EAAE,YAAY,EAAE,EACvB,OAAO,CAAC,EAAE;IAAE,cAAc,CAAC,EAAE,MAAM,EAAE,CAAC;IAAC,UAAU,CAAC,EAAE,MAAM,EAAE,CAAA;CAAE,GAC7D,MAAM,GAAG,IAAI,CA8Cf;AAMD;;;;;;GAMG;AACH,qBAAa,gBAAiB,SAAQ,kBAAkB,CAAC,eAAe,CAAC;IACvE,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAA;IACrB,QAAQ,CAAC,eAAe,EAAE,eAAe,CAAA;IACzC,QAAQ,CAAC,MAAM,EAAE,MAAM,CAAA;IACvB,QAAQ,CAAC,MAAM,QAAO;IACtB,QAAQ,CAAC,qBAAqB,KAAI;IAElC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAkB;gBAE7B,MAAM,EAAE,gBAAgB,EAAE,OAAO,CAAC,EAAE,iBAAiB;IASjE;;;;OAIG;IACH,UAAU,CAAC,OAAO,EAAE,eAAe,GAAG,MAAM;IAQ5C;;;;;;;;;;;OAWG;cACa,WAAW,CACzB,OAAO,EAAE,eAAe,EACxB,MAAM,EAAE,WAAW,GAClB,OAAO,CAAC,UAAU,GAAG,IAAI,CAAC;CAuE9B"}