@debriefer/sources 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (194) hide show
  1. package/README.md +59 -0
  2. package/dist/__tests__/archives/chronicling-america.test.d.ts +8 -0
  3. package/dist/__tests__/archives/chronicling-america.test.d.ts.map +1 -0
  4. package/dist/__tests__/archives/chronicling-america.test.js +151 -0
  5. package/dist/__tests__/archives/chronicling-america.test.js.map +1 -0
  6. package/dist/__tests__/archives/europeana.test.d.ts +8 -0
  7. package/dist/__tests__/archives/europeana.test.d.ts.map +1 -0
  8. package/dist/__tests__/archives/europeana.test.js +200 -0
  9. package/dist/__tests__/archives/europeana.test.js.map +1 -0
  10. package/dist/__tests__/archives/internet-archive.test.d.ts +8 -0
  11. package/dist/__tests__/archives/internet-archive.test.d.ts.map +1 -0
  12. package/dist/__tests__/archives/internet-archive.test.js +189 -0
  13. package/dist/__tests__/archives/internet-archive.test.js.map +1 -0
  14. package/dist/__tests__/archives/trove.test.d.ts +8 -0
  15. package/dist/__tests__/archives/trove.test.d.ts.map +1 -0
  16. package/dist/__tests__/archives/trove.test.js +202 -0
  17. package/dist/__tests__/archives/trove.test.js.map +1 -0
  18. package/dist/__tests__/books/google-books.test.d.ts +8 -0
  19. package/dist/__tests__/books/google-books.test.d.ts.map +1 -0
  20. package/dist/__tests__/books/google-books.test.js +221 -0
  21. package/dist/__tests__/books/google-books.test.js.map +1 -0
  22. package/dist/__tests__/books/open-library.test.d.ts +8 -0
  23. package/dist/__tests__/books/open-library.test.d.ts.map +1 -0
  24. package/dist/__tests__/books/open-library.test.js +159 -0
  25. package/dist/__tests__/books/open-library.test.js.map +1 -0
  26. package/dist/__tests__/news/guardian.test.d.ts +9 -0
  27. package/dist/__tests__/news/guardian.test.d.ts.map +1 -0
  28. package/dist/__tests__/news/guardian.test.js +224 -0
  29. package/dist/__tests__/news/guardian.test.js.map +1 -0
  30. package/dist/__tests__/news/nytimes.test.d.ts +9 -0
  31. package/dist/__tests__/news/nytimes.test.d.ts.map +1 -0
  32. package/dist/__tests__/news/nytimes.test.js +271 -0
  33. package/dist/__tests__/news/nytimes.test.js.map +1 -0
  34. package/dist/__tests__/news/site-search-source.test.d.ts +9 -0
  35. package/dist/__tests__/news/site-search-source.test.d.ts.map +1 -0
  36. package/dist/__tests__/news/site-search-source.test.js +342 -0
  37. package/dist/__tests__/news/site-search-source.test.js.map +1 -0
  38. package/dist/__tests__/obituary/find-a-grave.test.d.ts +8 -0
  39. package/dist/__tests__/obituary/find-a-grave.test.d.ts.map +1 -0
  40. package/dist/__tests__/obituary/find-a-grave.test.js +238 -0
  41. package/dist/__tests__/obituary/find-a-grave.test.js.map +1 -0
  42. package/dist/__tests__/shared/duckduckgo-search.test.d.ts +9 -0
  43. package/dist/__tests__/shared/duckduckgo-search.test.d.ts.map +1 -0
  44. package/dist/__tests__/shared/duckduckgo-search.test.js +218 -0
  45. package/dist/__tests__/shared/duckduckgo-search.test.js.map +1 -0
  46. package/dist/__tests__/shared/fetch-page.test.d.ts +9 -0
  47. package/dist/__tests__/shared/fetch-page.test.d.ts.map +1 -0
  48. package/dist/__tests__/shared/fetch-page.test.js +281 -0
  49. package/dist/__tests__/shared/fetch-page.test.js.map +1 -0
  50. package/dist/__tests__/shared/html-utils.test.d.ts +2 -0
  51. package/dist/__tests__/shared/html-utils.test.d.ts.map +1 -0
  52. package/dist/__tests__/shared/html-utils.test.js +169 -0
  53. package/dist/__tests__/shared/html-utils.test.js.map +1 -0
  54. package/dist/__tests__/shared/readability-extract.test.d.ts +2 -0
  55. package/dist/__tests__/shared/readability-extract.test.d.ts.map +1 -0
  56. package/dist/__tests__/shared/readability-extract.test.js +107 -0
  57. package/dist/__tests__/shared/readability-extract.test.js.map +1 -0
  58. package/dist/__tests__/shared/sanitize-text.test.d.ts +2 -0
  59. package/dist/__tests__/shared/sanitize-text.test.d.ts.map +1 -0
  60. package/dist/__tests__/shared/sanitize-text.test.js +77 -0
  61. package/dist/__tests__/shared/sanitize-text.test.js.map +1 -0
  62. package/dist/__tests__/shared/search-utils.test.d.ts +2 -0
  63. package/dist/__tests__/shared/search-utils.test.d.ts.map +1 -0
  64. package/dist/__tests__/shared/search-utils.test.js +26 -0
  65. package/dist/__tests__/shared/search-utils.test.js.map +1 -0
  66. package/dist/__tests__/structured/wikidata.test.d.ts +9 -0
  67. package/dist/__tests__/structured/wikidata.test.d.ts.map +1 -0
  68. package/dist/__tests__/structured/wikidata.test.js +509 -0
  69. package/dist/__tests__/structured/wikidata.test.js.map +1 -0
  70. package/dist/__tests__/structured/wikipedia.test.d.ts +9 -0
  71. package/dist/__tests__/structured/wikipedia.test.d.ts.map +1 -0
  72. package/dist/__tests__/structured/wikipedia.test.js +643 -0
  73. package/dist/__tests__/structured/wikipedia.test.js.map +1 -0
  74. package/dist/__tests__/web-search/base.test.d.ts +9 -0
  75. package/dist/__tests__/web-search/base.test.d.ts.map +1 -0
  76. package/dist/__tests__/web-search/base.test.js +622 -0
  77. package/dist/__tests__/web-search/base.test.js.map +1 -0
  78. package/dist/__tests__/web-search/bing.test.d.ts +10 -0
  79. package/dist/__tests__/web-search/bing.test.d.ts.map +1 -0
  80. package/dist/__tests__/web-search/bing.test.js +277 -0
  81. package/dist/__tests__/web-search/bing.test.js.map +1 -0
  82. package/dist/__tests__/web-search/brave.test.d.ts +10 -0
  83. package/dist/__tests__/web-search/brave.test.d.ts.map +1 -0
  84. package/dist/__tests__/web-search/brave.test.js +264 -0
  85. package/dist/__tests__/web-search/brave.test.js.map +1 -0
  86. package/dist/__tests__/web-search/duckduckgo.test.d.ts +10 -0
  87. package/dist/__tests__/web-search/duckduckgo.test.d.ts.map +1 -0
  88. package/dist/__tests__/web-search/duckduckgo.test.js +107 -0
  89. package/dist/__tests__/web-search/duckduckgo.test.js.map +1 -0
  90. package/dist/__tests__/web-search/google.test.d.ts +9 -0
  91. package/dist/__tests__/web-search/google.test.d.ts.map +1 -0
  92. package/dist/__tests__/web-search/google.test.js +189 -0
  93. package/dist/__tests__/web-search/google.test.js.map +1 -0
  94. package/dist/archives/chronicling-america.d.ts +33 -0
  95. package/dist/archives/chronicling-america.d.ts.map +1 -0
  96. package/dist/archives/chronicling-america.js +85 -0
  97. package/dist/archives/chronicling-america.js.map +1 -0
  98. package/dist/archives/europeana.d.ts +37 -0
  99. package/dist/archives/europeana.d.ts.map +1 -0
  100. package/dist/archives/europeana.js +92 -0
  101. package/dist/archives/europeana.js.map +1 -0
  102. package/dist/archives/internet-archive.d.ts +32 -0
  103. package/dist/archives/internet-archive.d.ts.map +1 -0
  104. package/dist/archives/internet-archive.js +90 -0
  105. package/dist/archives/internet-archive.js.map +1 -0
  106. package/dist/archives/trove.d.ts +37 -0
  107. package/dist/archives/trove.d.ts.map +1 -0
  108. package/dist/archives/trove.js +97 -0
  109. package/dist/archives/trove.js.map +1 -0
  110. package/dist/books/google-books.d.ts +48 -0
  111. package/dist/books/google-books.d.ts.map +1 -0
  112. package/dist/books/google-books.js +111 -0
  113. package/dist/books/google-books.js.map +1 -0
  114. package/dist/books/open-library.d.ts +44 -0
  115. package/dist/books/open-library.d.ts.map +1 -0
  116. package/dist/books/open-library.js +103 -0
  117. package/dist/books/open-library.js.map +1 -0
  118. package/dist/index.d.ts +45 -0
  119. package/dist/index.d.ts.map +1 -0
  120. package/dist/index.js +35 -0
  121. package/dist/index.js.map +1 -0
  122. package/dist/news/guardian.d.ts +51 -0
  123. package/dist/news/guardian.d.ts.map +1 -0
  124. package/dist/news/guardian.js +131 -0
  125. package/dist/news/guardian.js.map +1 -0
  126. package/dist/news/nytimes.d.ts +27 -0
  127. package/dist/news/nytimes.d.ts.map +1 -0
  128. package/dist/news/nytimes.js +104 -0
  129. package/dist/news/nytimes.js.map +1 -0
  130. package/dist/news/site-search-source.d.ts +89 -0
  131. package/dist/news/site-search-source.d.ts.map +1 -0
  132. package/dist/news/site-search-source.js +182 -0
  133. package/dist/news/site-search-source.js.map +1 -0
  134. package/dist/news/sources.d.ts +52 -0
  135. package/dist/news/sources.d.ts.map +1 -0
  136. package/dist/news/sources.js +276 -0
  137. package/dist/news/sources.js.map +1 -0
  138. package/dist/obituary/find-a-grave.d.ts +43 -0
  139. package/dist/obituary/find-a-grave.d.ts.map +1 -0
  140. package/dist/obituary/find-a-grave.js +173 -0
  141. package/dist/obituary/find-a-grave.js.map +1 -0
  142. package/dist/shared/duckduckgo-search.d.ts +86 -0
  143. package/dist/shared/duckduckgo-search.d.ts.map +1 -0
  144. package/dist/shared/duckduckgo-search.js +218 -0
  145. package/dist/shared/duckduckgo-search.js.map +1 -0
  146. package/dist/shared/fetch-page.d.ts +50 -0
  147. package/dist/shared/fetch-page.d.ts.map +1 -0
  148. package/dist/shared/fetch-page.js +212 -0
  149. package/dist/shared/fetch-page.js.map +1 -0
  150. package/dist/shared/html-utils.d.ts +99 -0
  151. package/dist/shared/html-utils.d.ts.map +1 -0
  152. package/dist/shared/html-utils.js +246 -0
  153. package/dist/shared/html-utils.js.map +1 -0
  154. package/dist/shared/readability-extract.d.ts +33 -0
  155. package/dist/shared/readability-extract.d.ts.map +1 -0
  156. package/dist/shared/readability-extract.js +45 -0
  157. package/dist/shared/readability-extract.js.map +1 -0
  158. package/dist/shared/sanitize-text.d.ts +24 -0
  159. package/dist/shared/sanitize-text.d.ts.map +1 -0
  160. package/dist/shared/sanitize-text.js +49 -0
  161. package/dist/shared/sanitize-text.js.map +1 -0
  162. package/dist/shared/search-utils.d.ts +18 -0
  163. package/dist/shared/search-utils.d.ts.map +1 -0
  164. package/dist/shared/search-utils.js +20 -0
  165. package/dist/shared/search-utils.js.map +1 -0
  166. package/dist/structured/wikidata.d.ts +128 -0
  167. package/dist/structured/wikidata.d.ts.map +1 -0
  168. package/dist/structured/wikidata.js +361 -0
  169. package/dist/structured/wikidata.js.map +1 -0
  170. package/dist/structured/wikipedia.d.ts +184 -0
  171. package/dist/structured/wikipedia.d.ts.map +1 -0
  172. package/dist/structured/wikipedia.js +275 -0
  173. package/dist/structured/wikipedia.js.map +1 -0
  174. package/dist/web-search/base.d.ts +128 -0
  175. package/dist/web-search/base.d.ts.map +1 -0
  176. package/dist/web-search/base.js +251 -0
  177. package/dist/web-search/base.js.map +1 -0
  178. package/dist/web-search/bing.d.ts +21 -0
  179. package/dist/web-search/bing.d.ts.map +1 -0
  180. package/dist/web-search/bing.js +53 -0
  181. package/dist/web-search/bing.js.map +1 -0
  182. package/dist/web-search/brave.d.ts +21 -0
  183. package/dist/web-search/brave.d.ts.map +1 -0
  184. package/dist/web-search/brave.js +56 -0
  185. package/dist/web-search/brave.js.map +1 -0
  186. package/dist/web-search/duckduckgo.d.ts +15 -0
  187. package/dist/web-search/duckduckgo.d.ts.map +1 -0
  188. package/dist/web-search/duckduckgo.js +21 -0
  189. package/dist/web-search/duckduckgo.js.map +1 -0
  190. package/dist/web-search/google.d.ts +24 -0
  191. package/dist/web-search/google.d.ts.map +1 -0
  192. package/dist/web-search/google.js +48 -0
  193. package/dist/web-search/google.js.map +1 -0
  194. package/package.json +58 -0
@@ -0,0 +1,643 @@
1
+ /**
2
+ * Tests for the Wikipedia source.
3
+ *
4
+ * Mocks `wtf_wikipedia` to avoid real API calls.
5
+ * Tests article fetching, section filtering, disambiguation handling,
6
+ * confidence calculation, and the factory function.
7
+ */
8
+ import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
9
+ import { ReliabilityTier } from "@debriefer/core";
10
+ import { WikipediaSource, wikipedia } from "../../structured/wikipedia.js";
11
+ // ============================================================================
12
+ // Mock wtf_wikipedia
13
+ // ============================================================================
14
+ const mockFetch = vi.fn();
15
+ vi.mock("wtf_wikipedia", () => {
16
+ return {
17
+ default: {
18
+ fetch: (...args) => mockFetch(...args),
19
+ Document: class MockDocument {
20
+ },
21
+ },
22
+ __esModule: true,
23
+ };
24
+ });
25
+ beforeEach(() => {
26
+ mockFetch.mockReset();
27
+ });
28
+ afterEach(() => {
29
+ vi.restoreAllMocks();
30
+ });
31
+ // ============================================================================
32
+ // Test Helpers
33
+ // ============================================================================
34
+ function makeSubject(overrides) {
35
+ return {
36
+ id: 1,
37
+ name: "John Wayne",
38
+ ...overrides,
39
+ };
40
+ }
41
+ function makeSection(title, text, depth = 0) {
42
+ return {
43
+ _title: title,
44
+ _text: text,
45
+ _depth: depth,
46
+ };
47
+ }
48
+ function makeSectionObj(s) {
49
+ return {
50
+ title: () => s._title,
51
+ text: (_opts) => s._text,
52
+ depth: () => s._depth,
53
+ };
54
+ }
55
+ function makeDocument(title, sections, isDisambiguation = false) {
56
+ return {
57
+ title: () => title,
58
+ sections: () => sections.map(makeSectionObj),
59
+ isDisambiguation: () => isDisambiguation,
60
+ };
61
+ }
62
+ // ============================================================================
63
+ // WikipediaSource
64
+ // ============================================================================
65
+ describe("WikipediaSource", () => {
66
+ describe("properties", () => {
67
+ it("has correct name, type, reliability, domain, and cost", () => {
68
+ const source = new WikipediaSource();
69
+ expect(source.name).toBe("Wikipedia");
70
+ expect(source.type).toBe("wikipedia");
71
+ expect(source.reliabilityTier).toBe(ReliabilityTier.SECONDARY_COMPILATION);
72
+ expect(source.domain).toBe("en.wikipedia.org");
73
+ expect(source.isFree).toBe(true);
74
+ expect(source.estimatedCostPerQuery).toBe(0);
75
+ });
76
+ it("always reports as available (no API key needed)", () => {
77
+ const source = new WikipediaSource();
78
+ expect(source.isAvailable()).toBe(true);
79
+ });
80
+ });
81
+ describe("article fetching", () => {
82
+ it("fetches article by subject name with spaces replaced by underscores", async () => {
83
+ const source = new WikipediaSource();
84
+ const subject = makeSubject();
85
+ const doc = makeDocument("John Wayne", [
86
+ makeSection("", "John Wayne was born Marion Robert Morrison. He was a famous American actor known for westerns.", 0),
87
+ makeSection("Early life", "Wayne was born in Winterset, Iowa on May 26, 1907.", 1),
88
+ ]);
89
+ mockFetch.mockResolvedValueOnce(doc);
90
+ const signal = AbortSignal.timeout(5000);
91
+ const result = await source.lookup(subject, signal);
92
+ expect(mockFetch).toHaveBeenCalledWith("John_Wayne");
93
+ expect(result).not.toBeNull();
94
+ expect(result.text).toContain("John Wayne");
95
+ expect(result.publication).toBe("Wikipedia");
96
+ });
97
+ it("returns null when article not found", async () => {
98
+ const source = new WikipediaSource();
99
+ const subject = makeSubject({ name: "Nonexistent Person" });
100
+ mockFetch.mockResolvedValueOnce(null);
101
+ const signal = AbortSignal.timeout(5000);
102
+ const result = await source.lookup(subject, signal);
103
+ expect(result).toBeNull();
104
+ });
105
+ it("returns null when wtf.fetch throws an error", async () => {
106
+ const source = new WikipediaSource();
107
+ const subject = makeSubject();
108
+ mockFetch.mockRejectedValueOnce(new Error("Network error"));
109
+ const signal = AbortSignal.timeout(5000);
110
+ const result = await source.lookup(subject, signal);
111
+ expect(result).toBeNull();
112
+ });
113
+ it("returns null when article has no sections", async () => {
114
+ const source = new WikipediaSource();
115
+ const subject = makeSubject();
116
+ const doc = makeDocument("John Wayne", []);
117
+ mockFetch.mockResolvedValueOnce(doc);
118
+ const signal = AbortSignal.timeout(5000);
119
+ const result = await source.lookup(subject, signal);
120
+ expect(result).toBeNull();
121
+ });
122
+ });
123
+ describe("section filtering", () => {
124
+ it("returns all sections by default", async () => {
125
+ const source = new WikipediaSource();
126
+ const subject = makeSubject();
127
+ const doc = makeDocument("John Wayne", [
128
+ makeSection("", "Introduction text about John Wayne that is long enough to include in the output for the test.", 0),
129
+ makeSection("Early life", "Early life section content that is long enough to include in the output for the test.", 1),
130
+ makeSection("Career", "Career section content that is long enough to include in the output for the test to verify filtering.", 1),
131
+ ]);
132
+ mockFetch.mockResolvedValueOnce(doc);
133
+ const signal = AbortSignal.timeout(5000);
134
+ const result = await source.lookup(subject, signal);
135
+ expect(result).not.toBeNull();
136
+ expect(result.text).toContain("[Introduction]");
137
+ expect(result.text).toContain("[Early life]");
138
+ expect(result.text).toContain("[Career]");
139
+ });
140
+ it("uses custom section filter when provided", async () => {
141
+ const source = new WikipediaSource({
142
+ sectionFilter: (sections) => sections.filter((s) => /death|illness/i.test(s.title)),
143
+ });
144
+ const subject = makeSubject();
145
+ const doc = makeDocument("John Wayne", [
146
+ makeSection("", "Introduction text about John Wayne that is long enough to include in the output for the test.", 0),
147
+ makeSection("Career", "Career section content that is long enough to include in the output for the test to verify filtering.", 1),
148
+ makeSection("Death", "Wayne died of stomach cancer on June 11, 1979, at UCLA Medical Center.", 1),
149
+ ]);
150
+ mockFetch.mockResolvedValueOnce(doc);
151
+ const signal = AbortSignal.timeout(5000);
152
+ const result = await source.lookup(subject, signal);
153
+ expect(result).not.toBeNull();
154
+ // Intro is included by default
155
+ expect(result.text).toContain("[Introduction]");
156
+ // Death matched the filter
157
+ expect(result.text).toContain("[Death]");
158
+ // Career did NOT match the filter
159
+ expect(result.text).not.toContain("[Career]");
160
+ });
161
+ it("skips sections shorter than minimum length", async () => {
162
+ const source = new WikipediaSource();
163
+ const subject = makeSubject();
164
+ const doc = makeDocument("John Wayne", [
165
+ makeSection("", "Introduction text about John Wayne that is long enough to include in the output for the test.", 0),
166
+ makeSection("Short", "Too short", 1),
167
+ makeSection("Long enough", "This section has enough content to be included because it exceeds the minimum character threshold.", 1),
168
+ ]);
169
+ mockFetch.mockResolvedValueOnce(doc);
170
+ const signal = AbortSignal.timeout(5000);
171
+ const result = await source.lookup(subject, signal);
172
+ expect(result).not.toBeNull();
173
+ expect(result.text).not.toContain("[Short]");
174
+ expect(result.text).toContain("[Long enough]");
175
+ });
176
+ it("returns null when all sections are below minimum length", async () => {
177
+ const source = new WikipediaSource({ includeIntro: false });
178
+ const subject = makeSubject();
179
+ const doc = makeDocument("John Wayne", [
180
+ makeSection("", "Stub.", 0),
181
+ makeSection("Section", "Short.", 1),
182
+ ]);
183
+ mockFetch.mockResolvedValueOnce(doc);
184
+ const signal = AbortSignal.timeout(5000);
185
+ const result = await source.lookup(subject, signal);
186
+ expect(result).toBeNull();
187
+ });
188
+ });
189
+ describe("intro section handling", () => {
190
+ it("includes intro by default even when filter selects other sections", async () => {
191
+ const source = new WikipediaSource({
192
+ sectionFilter: (sections) => sections.filter((s) => s.title === "Death"),
193
+ });
194
+ const subject = makeSubject();
195
+ const doc = makeDocument("John Wayne", [
196
+ makeSection("", "Introduction text about John Wayne that is long enough to include in the output for the test.", 0),
197
+ makeSection("Death", "Wayne died of stomach cancer on June 11, 1979, at UCLA Medical Center in Los Angeles.", 1),
198
+ ]);
199
+ mockFetch.mockResolvedValueOnce(doc);
200
+ const signal = AbortSignal.timeout(5000);
201
+ const result = await source.lookup(subject, signal);
202
+ expect(result.text).toContain("[Introduction]");
203
+ expect(result.text).toContain("[Death]");
204
+ });
205
+ it("excludes intro when includeIntro is false", async () => {
206
+ const source = new WikipediaSource({
207
+ includeIntro: false,
208
+ sectionFilter: (sections) => sections.filter((s) => s.title === "Death"),
209
+ });
210
+ const subject = makeSubject();
211
+ const doc = makeDocument("John Wayne", [
212
+ makeSection("", "Introduction text about John Wayne that is long enough to include in the output for the test.", 0),
213
+ makeSection("Death", "Wayne died of stomach cancer on June 11, 1979, at UCLA Medical Center in Los Angeles.", 1),
214
+ ]);
215
+ mockFetch.mockResolvedValueOnce(doc);
216
+ const signal = AbortSignal.timeout(5000);
217
+ const result = await source.lookup(subject, signal);
218
+ expect(result.text).not.toContain("[Introduction]");
219
+ expect(result.text).toContain("[Death]");
220
+ });
221
+ });
222
+ describe("disambiguation handling", () => {
223
+ it("tries alternate titles when disambiguation page is detected", async () => {
224
+ const source = new WikipediaSource({
225
+ disambiguationSuffixes: ["_(actor)", "_(actress)"],
226
+ });
227
+ const subject = makeSubject();
228
+ const disambigDoc = makeDocument("John Wayne", [], true);
229
+ const actorDoc = makeDocument("John Wayne (actor)", [
230
+ makeSection("", "John Wayne was born Marion Robert Morrison, an American actor known for westerns and war films.", 0),
231
+ ]);
232
+ mockFetch
233
+ .mockResolvedValueOnce(disambigDoc) // first attempt: disambiguation
234
+ .mockResolvedValueOnce(actorDoc); // second attempt: _(actor)
235
+ const signal = AbortSignal.timeout(5000);
236
+ const result = await source.lookup(subject, signal);
237
+ expect(mockFetch).toHaveBeenCalledTimes(2);
238
+ expect(mockFetch).toHaveBeenCalledWith("John_Wayne");
239
+ expect(mockFetch).toHaveBeenCalledWith("John_Wayne_(actor)");
240
+ expect(result).not.toBeNull();
241
+ });
242
+ it("returns null when all alternate titles are also disambiguation pages", async () => {
243
+ const source = new WikipediaSource({
244
+ disambiguationSuffixes: ["_(actor)"],
245
+ });
246
+ const subject = makeSubject();
247
+ const disambig1 = makeDocument("John Wayne", [], true);
248
+ const disambig2 = makeDocument("John Wayne (actor)", [], true);
249
+ mockFetch.mockResolvedValueOnce(disambig1).mockResolvedValueOnce(disambig2);
250
+ const signal = AbortSignal.timeout(5000);
251
+ const result = await source.lookup(subject, signal);
252
+ expect(result).toBeNull();
253
+ });
254
+ it("does not try alternates when handleDisambiguation is false", async () => {
255
+ const source = new WikipediaSource({ handleDisambiguation: false });
256
+ const subject = makeSubject();
257
+ const disambigDoc = makeDocument("John Wayne", [], true);
258
+ mockFetch.mockResolvedValueOnce(disambigDoc);
259
+ const signal = AbortSignal.timeout(5000);
260
+ const result = await source.lookup(subject, signal);
261
+ expect(mockFetch).toHaveBeenCalledTimes(1);
262
+ // With disambiguation disabled, the source should try to use the doc
263
+ // but it has no sections, so it returns null
264
+ expect(result).toBeNull();
265
+ });
266
+ it("uses custom disambiguation suffixes", async () => {
267
+ const source = new WikipediaSource({
268
+ disambiguationSuffixes: ["_(filmmaker)", "_(director)"],
269
+ });
270
+ const subject = makeSubject({ name: "James Cameron" });
271
+ const disambigDoc = makeDocument("James Cameron", [], true);
272
+ const notFoundDoc = null;
273
+ const directorDoc = makeDocument("James Cameron (director)", [
274
+ makeSection("", "James Cameron is a Canadian filmmaker known for directing Titanic and Avatar in Hollywood.", 0),
275
+ ]);
276
+ mockFetch
277
+ .mockResolvedValueOnce(disambigDoc)
278
+ .mockResolvedValueOnce(notFoundDoc) // _(filmmaker) not found
279
+ .mockResolvedValueOnce(directorDoc); // _(director) found
280
+ const signal = AbortSignal.timeout(5000);
281
+ const result = await source.lookup(subject, signal);
282
+ expect(mockFetch).toHaveBeenCalledTimes(3);
283
+ expect(mockFetch).toHaveBeenCalledWith("James_Cameron_(filmmaker)");
284
+ expect(mockFetch).toHaveBeenCalledWith("James_Cameron_(director)");
285
+ expect(result).not.toBeNull();
286
+ });
287
+ });
288
+ describe("confidence calculation", () => {
289
+ it("gives higher confidence when subject name appears in text", async () => {
290
+ const source = new WikipediaSource();
291
+ const subject = makeSubject();
292
+ const docWithName = makeDocument("John Wayne", [
293
+ makeSection("", "John Wayne was born in Winterset, Iowa. He became one of the most popular actors in American cinema history.", 0),
294
+ ]);
295
+ mockFetch.mockResolvedValueOnce(docWithName);
296
+ const signal = AbortSignal.timeout(5000);
297
+ const withName = await source.lookup(subject, signal);
298
+ expect(withName.confidence).toBeGreaterThanOrEqual(0.5);
299
+ });
300
+ it("gives higher confidence for longer content", async () => {
301
+ const source = new WikipediaSource();
302
+ const subject = makeSubject();
303
+ const longContent = "John Wayne ".repeat(100) + " was a great actor.";
304
+ const docLong = makeDocument("John Wayne", [makeSection("", longContent, 0)]);
305
+ mockFetch.mockResolvedValueOnce(docLong);
306
+ const signal = AbortSignal.timeout(5000);
307
+ const result = await source.lookup(subject, signal);
308
+ expect(result.confidence).toBeGreaterThanOrEqual(0.6);
309
+ });
310
+ });
311
+ describe("output format", () => {
312
+ it("includes URL with resolved article title", async () => {
313
+ const source = new WikipediaSource();
314
+ const subject = makeSubject();
315
+ const doc = makeDocument("John Wayne", [
316
+ makeSection("", "John Wayne was born Marion Robert Morrison, an American actor known for westerns and war films.", 0),
317
+ ]);
318
+ mockFetch.mockResolvedValueOnce(doc);
319
+ const signal = AbortSignal.timeout(5000);
320
+ const result = await source.lookup(subject, signal);
321
+ expect(result.url).toBe("https://en.wikipedia.org/wiki/John_Wayne");
322
+ });
323
+ it("includes article title in the finding", async () => {
324
+ const source = new WikipediaSource();
325
+ const subject = makeSubject();
326
+ const doc = makeDocument("John Wayne", [
327
+ makeSection("", "John Wayne was born Marion Robert Morrison, an American actor known for westerns and war films.", 0),
328
+ ]);
329
+ mockFetch.mockResolvedValueOnce(doc);
330
+ const signal = AbortSignal.timeout(5000);
331
+ const result = await source.lookup(subject, signal);
332
+ expect(result.articleTitle).toBe("John Wayne");
333
+ });
334
+ it("includes metadata with section info", async () => {
335
+ const source = new WikipediaSource();
336
+ const subject = makeSubject();
337
+ const doc = makeDocument("John Wayne", [
338
+ makeSection("", "Introduction about John Wayne, born Marion Robert Morrison, a famous American actor.", 0),
339
+ makeSection("Early life", "Wayne was born in Winterset, Iowa on May 26, 1907 to a pharmacist father.", 1),
340
+ ]);
341
+ mockFetch.mockResolvedValueOnce(doc);
342
+ const signal = AbortSignal.timeout(5000);
343
+ const result = await source.lookup(subject, signal);
344
+ expect(result.metadata).toBeDefined();
345
+ expect(result.metadata.sectionCount).toBe(2);
346
+ expect(result.metadata.sectionTitles).toContain("Introduction");
347
+ expect(result.metadata.sectionTitles).toContain("Early life");
348
+ });
349
+ it("formats sections with title headers", async () => {
350
+ const source = new WikipediaSource();
351
+ const subject = makeSubject();
352
+ const doc = makeDocument("John Wayne", [
353
+ makeSection("", "Introduction content about John Wayne born Marion Robert Morrison as an actor.", 0),
354
+ makeSection("Death", "Wayne died of stomach cancer on June 11, 1979, at UCLA Medical Center in Los Angeles.", 1),
355
+ ]);
356
+ mockFetch.mockResolvedValueOnce(doc);
357
+ const signal = AbortSignal.timeout(5000);
358
+ const result = await source.lookup(subject, signal);
359
+ expect(result.text).toMatch(/\[Introduction\].*Introduction content/s);
360
+ expect(result.text).toMatch(/\[Death\].*Wayne died/s);
361
+ });
362
+ it("cost is always zero", async () => {
363
+ const source = new WikipediaSource();
364
+ const subject = makeSubject();
365
+ const doc = makeDocument("John Wayne", [
366
+ makeSection("", "Introduction content about John Wayne born Marion Robert Morrison as an actor.", 0),
367
+ ]);
368
+ mockFetch.mockResolvedValueOnce(doc);
369
+ const signal = AbortSignal.timeout(5000);
370
+ const result = await source.lookup(subject, signal);
371
+ expect(result.costUsd).toBe(0);
372
+ });
373
+ });
374
+ describe("buildQuery (for cache key)", () => {
375
+ // Default suffixes _(actor),_(actress) are included in cache keys when
376
+ // handleDisambiguation is enabled (the default). Tests below with
377
+ // handleDisambiguation:false or empty suffixes verify they are omitted.
378
+ const defaultSuffixKey = "suffixes:_(actor),_(actress)";
379
+ it("returns the subject name with default suffix key", () => {
380
+ const source = new WikipediaSource();
381
+ const subject = makeSubject();
382
+ expect(source.buildQuery(subject)).toBe(`John Wayne|${defaultSuffixKey}`);
383
+ });
384
+ it("includes sections marker when custom sectionFilter is provided", () => {
385
+ const source = new WikipediaSource({
386
+ sectionFilter: (sections) => sections.filter((s) => s.title === "Death"),
387
+ });
388
+ const subject = makeSubject();
389
+ expect(source.buildQuery(subject)).toBe(`John Wayne|sections:custom|${defaultSuffixKey}`);
390
+ });
391
+ it("includes no-intro marker when includeIntro is false", () => {
392
+ const source = new WikipediaSource({ includeIntro: false });
393
+ const subject = makeSubject();
394
+ expect(source.buildQuery(subject)).toBe(`John Wayne|no-intro|${defaultSuffixKey}`);
395
+ });
396
+ it("includes both markers when both options are set", () => {
397
+ const source = new WikipediaSource({
398
+ sectionFilter: (sections) => sections.filter((s) => s.title === "Death"),
399
+ includeIntro: false,
400
+ });
401
+ const subject = makeSubject();
402
+ expect(source.buildQuery(subject)).toBe(`John Wayne|sections:custom|no-intro|${defaultSuffixKey}`);
403
+ });
404
+ it("includes async marker when asyncSectionFilter is provided", () => {
405
+ const source = new WikipediaSource({
406
+ asyncSectionFilter: async (sections) => sections,
407
+ });
408
+ const subject = makeSubject();
409
+ expect(source.buildQuery(subject)).toBe(`John Wayne|sections:async|${defaultSuffixKey}`);
410
+ });
411
+ it("prefers async marker over sync marker when both provided", () => {
412
+ const source = new WikipediaSource({
413
+ sectionFilter: (sections) => sections,
414
+ asyncSectionFilter: async (sections) => sections,
415
+ });
416
+ const subject = makeSubject();
417
+ expect(source.buildQuery(subject)).toBe(`John Wayne|sections:async|${defaultSuffixKey}`);
418
+ });
419
+ it("includes validate marker when validatePerson is provided", () => {
420
+ const source = new WikipediaSource({
421
+ validatePerson: () => true,
422
+ });
423
+ const subject = makeSubject();
424
+ expect(source.buildQuery(subject)).toBe(`John Wayne|validate:person|${defaultSuffixKey}`);
425
+ });
426
+ it("includes disambig:off when handleDisambiguation is false", () => {
427
+ const source = new WikipediaSource({
428
+ handleDisambiguation: false,
429
+ });
430
+ const subject = makeSubject();
431
+ // Suffixes excluded when disambiguation is off (they're never used)
432
+ expect(source.buildQuery(subject)).toBe("John Wayne|disambig:off");
433
+ });
434
+ it("omits suffixes key when disambiguationSuffixes is empty", () => {
435
+ const source = new WikipediaSource({
436
+ disambiguationSuffixes: [],
437
+ });
438
+ const subject = makeSubject();
439
+ expect(source.buildQuery(subject)).toBe("John Wayne");
440
+ });
441
+ it("includes custom suffixes in cache key", () => {
442
+ const source = new WikipediaSource({
443
+ disambiguationSuffixes: ["_(filmmaker)"],
444
+ });
445
+ const subject = makeSubject();
446
+ expect(source.buildQuery(subject)).toBe("John Wayne|suffixes:_(filmmaker)");
447
+ });
448
+ });
449
+ describe("async section filter", () => {
450
+ it("uses asyncSectionFilter when provided", async () => {
451
+ const asyncFilter = vi.fn().mockResolvedValue([{ index: 2, title: "Death", depth: 1 }]);
452
+ const source = new WikipediaSource({ asyncSectionFilter: asyncFilter });
453
+ const subject = makeSubject();
454
+ const doc = makeDocument("John Wayne", [
455
+ makeSection("", "Introduction text about John Wayne that is long enough to include in the output for the test.", 0),
456
+ makeSection("Career", "Career section content that is long enough to include in the output for the test to verify filtering.", 1),
457
+ makeSection("Death", "Wayne died of stomach cancer on June 11, 1979, at UCLA Medical Center in Los Angeles.", 1),
458
+ ]);
459
+ mockFetch.mockResolvedValueOnce(doc);
460
+ const signal = AbortSignal.timeout(5000);
461
+ const result = await source.lookup(subject, signal);
462
+ expect(asyncFilter).toHaveBeenCalledTimes(1);
463
+ // First arg: all sections
464
+ expect(asyncFilter.mock.calls[0][0]).toHaveLength(3);
465
+ // Second arg: full article text
466
+ expect(asyncFilter.mock.calls[0][1]).toContain("John Wayne");
467
+ expect(result).not.toBeNull();
468
+ // Intro included by default
469
+ expect(result.text).toContain("[Introduction]");
470
+ // Death selected by async filter
471
+ expect(result.text).toContain("[Death]");
472
+ // Career NOT selected
473
+ expect(result.text).not.toContain("[Career]");
474
+ });
475
+ it("asyncSectionFilter takes precedence over sync sectionFilter", async () => {
476
+ const syncFilter = vi.fn().mockReturnValue([]);
477
+ const asyncFilter = vi.fn().mockResolvedValue([{ index: 1, title: "Death", depth: 1 }]);
478
+ const source = new WikipediaSource({
479
+ sectionFilter: syncFilter,
480
+ asyncSectionFilter: asyncFilter,
481
+ });
482
+ const subject = makeSubject();
483
+ const doc = makeDocument("John Wayne", [
484
+ makeSection("", "Introduction text about John Wayne that is long enough to include in the output for the test.", 0),
485
+ makeSection("Death", "Wayne died of stomach cancer on June 11, 1979, at UCLA Medical Center in Los Angeles.", 1),
486
+ ]);
487
+ mockFetch.mockResolvedValueOnce(doc);
488
+ const signal = AbortSignal.timeout(5000);
489
+ const result = await source.lookup(subject, signal);
490
+ // Async filter should have been called, NOT the sync one
491
+ expect(asyncFilter).toHaveBeenCalledTimes(1);
492
+ expect(syncFilter).not.toHaveBeenCalled();
493
+ expect(result).not.toBeNull();
494
+ expect(result.text).toContain("[Death]");
495
+ });
496
+ });
497
+ describe("person validation", () => {
498
+ it("uses the document when validatePerson returns true", async () => {
499
+ const validate = vi.fn().mockReturnValue(true);
500
+ const source = new WikipediaSource({ validatePerson: validate });
501
+ const subject = makeSubject();
502
+ const doc = makeDocument("John Wayne", [
503
+ makeSection("", "John Wayne (born May 26, 1907) was born Marion Robert Morrison, an American actor.", 0),
504
+ ]);
505
+ mockFetch.mockResolvedValueOnce(doc);
506
+ const signal = AbortSignal.timeout(5000);
507
+ const result = await source.lookup(subject, signal);
508
+ expect(validate).toHaveBeenCalledTimes(1);
509
+ expect(validate.mock.calls[0][1]).toBe(subject);
510
+ expect(result).not.toBeNull();
511
+ expect(result.text).toContain("John Wayne");
512
+ });
513
+ it("tries disambiguation suffixes when validatePerson returns false", async () => {
514
+ const validate = vi
515
+ .fn()
516
+ .mockReturnValueOnce(false) // reject first doc
517
+ .mockReturnValueOnce(true); // accept alternate
518
+ const source = new WikipediaSource({
519
+ validatePerson: validate,
520
+ disambiguationSuffixes: ["_(actor)"],
521
+ });
522
+ const subject = makeSubject();
523
+ const wrongDoc = makeDocument("John Wayne", [
524
+ makeSection("", "John Wayne is a city in Indiana with a population of about 30,000 people.", 0),
525
+ ]);
526
+ const actorDoc = makeDocument("John Wayne (actor)", [
527
+ makeSection("", "John Wayne (born May 26, 1907) was born Marion Robert Morrison, an American actor.", 0),
528
+ ]);
529
+ mockFetch.mockResolvedValueOnce(wrongDoc).mockResolvedValueOnce(actorDoc);
530
+ const signal = AbortSignal.timeout(5000);
531
+ const result = await source.lookup(subject, signal);
532
+ expect(validate).toHaveBeenCalledTimes(2);
533
+ expect(result).not.toBeNull();
534
+ expect(result.text).toContain("Marion Robert Morrison");
535
+ });
536
+ it("returns null when validatePerson fails for all documents", async () => {
537
+ const validate = vi.fn().mockReturnValue(false);
538
+ const source = new WikipediaSource({
539
+ validatePerson: validate,
540
+ disambiguationSuffixes: ["_(actor)"],
541
+ });
542
+ const subject = makeSubject();
543
+ const doc1 = makeDocument("John Wayne", [
544
+ makeSection("", "John Wayne is a city in Indiana with a population of about 30,000 people.", 0),
545
+ ]);
546
+ const doc2 = makeDocument("John Wayne (actor)", [
547
+ makeSection("", "Different John Wayne actor from an indie film that is not the right person.", 0),
548
+ ]);
549
+ mockFetch.mockResolvedValueOnce(doc1).mockResolvedValueOnce(doc2);
550
+ const signal = AbortSignal.timeout(5000);
551
+ const result = await source.lookup(subject, signal);
552
+ expect(validate).toHaveBeenCalledTimes(2);
553
+ expect(result).toBeNull();
554
+ });
555
+ it("supports async validatePerson callbacks", async () => {
556
+ const validate = vi.fn().mockResolvedValue(true);
557
+ const source = new WikipediaSource({ validatePerson: validate });
558
+ const subject = makeSubject();
559
+ const doc = makeDocument("John Wayne", [
560
+ makeSection("", "John Wayne (born May 26, 1907) was born Marion Robert Morrison, an American actor.", 0),
561
+ ]);
562
+ mockFetch.mockResolvedValueOnce(doc);
563
+ const signal = AbortSignal.timeout(5000);
564
+ const result = await source.lookup(subject, signal);
565
+ expect(validate).toHaveBeenCalledTimes(1);
566
+ expect(result).not.toBeNull();
567
+ });
568
+ it("tries disambiguation suffixes when async validatePerson returns false", async () => {
569
+ const validate = vi.fn().mockResolvedValueOnce(false).mockResolvedValueOnce(true);
570
+ const source = new WikipediaSource({
571
+ validatePerson: validate,
572
+ disambiguationSuffixes: ["_(actor)"],
573
+ });
574
+ const subject = makeSubject();
575
+ const wrongDoc = makeDocument("John Wayne", [
576
+ makeSection("", "John Wayne is a city in Indiana with a population of about 30,000 people.", 0),
577
+ ]);
578
+ const actorDoc = makeDocument("John Wayne (actor)", [
579
+ makeSection("", "John Wayne (born May 26, 1907) was born Marion Robert Morrison, an American actor.", 0),
580
+ ]);
581
+ mockFetch.mockResolvedValueOnce(wrongDoc).mockResolvedValueOnce(actorDoc);
582
+ const signal = AbortSignal.timeout(5000);
583
+ const result = await source.lookup(subject, signal);
584
+ expect(validate).toHaveBeenCalledTimes(2);
585
+ expect(result).not.toBeNull();
586
+ expect(result.text).toContain("Marion Robert Morrison");
587
+ });
588
+ it("returns null immediately when validation fails and handleDisambiguation is false", async () => {
589
+ const validate = vi.fn().mockReturnValue(false);
590
+ const source = new WikipediaSource({
591
+ validatePerson: validate,
592
+ handleDisambiguation: false,
593
+ disambiguationSuffixes: ["_(actor)"],
594
+ });
595
+ const subject = makeSubject();
596
+ const doc = makeDocument("John Wayne", [
597
+ makeSection("", "John Wayne is a city in Indiana with a population of about 30,000 people.", 0),
598
+ ]);
599
+ mockFetch.mockResolvedValueOnce(doc);
600
+ const signal = AbortSignal.timeout(5000);
601
+ const result = await source.lookup(subject, signal);
602
+ // Should NOT try disambiguation suffixes
603
+ expect(mockFetch).toHaveBeenCalledTimes(1);
604
+ expect(validate).toHaveBeenCalledTimes(1);
605
+ expect(result).toBeNull();
606
+ });
607
+ });
608
+ describe("keyword-based confidence delegation", () => {
609
+ it("returns -1 confidence when requiredKeywords are configured (for base class to handle)", async () => {
610
+ const source = new WikipediaSource({
611
+ requiredKeywords: ["died", "death"],
612
+ });
613
+ const subject = makeSubject();
614
+ const doc = makeDocument("John Wayne", [
615
+ makeSection("", "John Wayne died on June 11, 1979. His death was caused by stomach cancer at UCLA.", 0),
616
+ ]);
617
+ mockFetch.mockResolvedValueOnce(doc);
618
+ const signal = AbortSignal.timeout(5000);
619
+ const result = await source.lookup(subject, signal);
620
+ // The base class's lookup() method should have overridden the -1
621
+ // with keyword-based confidence calculation
622
+ expect(result).not.toBeNull();
623
+ // The base class replaces -1 with calculated confidence
624
+ expect(result.confidence).toBeGreaterThan(0);
625
+ });
626
+ });
627
+ });
628
+ // ============================================================================
629
+ // Factory Function
630
+ // ============================================================================
631
+ describe("wikipedia factory", () => {
632
+ it("creates a WikipediaSource instance", () => {
633
+ const source = wikipedia();
634
+ expect(source).toBeInstanceOf(WikipediaSource);
635
+ expect(source.name).toBe("Wikipedia");
636
+ });
637
+ it("passes options through to the source", () => {
638
+ const filter = (sections) => sections.filter((s) => s.title === "Death");
639
+ const source = wikipedia({ sectionFilter: filter, includeIntro: false });
640
+ expect(source).toBeInstanceOf(WikipediaSource);
641
+ });
642
+ });
643
+ //# sourceMappingURL=wikipedia.test.js.map