@debriefer/sources 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +59 -0
- package/dist/__tests__/archives/chronicling-america.test.d.ts +8 -0
- package/dist/__tests__/archives/chronicling-america.test.d.ts.map +1 -0
- package/dist/__tests__/archives/chronicling-america.test.js +151 -0
- package/dist/__tests__/archives/chronicling-america.test.js.map +1 -0
- package/dist/__tests__/archives/europeana.test.d.ts +8 -0
- package/dist/__tests__/archives/europeana.test.d.ts.map +1 -0
- package/dist/__tests__/archives/europeana.test.js +200 -0
- package/dist/__tests__/archives/europeana.test.js.map +1 -0
- package/dist/__tests__/archives/internet-archive.test.d.ts +8 -0
- package/dist/__tests__/archives/internet-archive.test.d.ts.map +1 -0
- package/dist/__tests__/archives/internet-archive.test.js +189 -0
- package/dist/__tests__/archives/internet-archive.test.js.map +1 -0
- package/dist/__tests__/archives/trove.test.d.ts +8 -0
- package/dist/__tests__/archives/trove.test.d.ts.map +1 -0
- package/dist/__tests__/archives/trove.test.js +202 -0
- package/dist/__tests__/archives/trove.test.js.map +1 -0
- package/dist/__tests__/books/google-books.test.d.ts +8 -0
- package/dist/__tests__/books/google-books.test.d.ts.map +1 -0
- package/dist/__tests__/books/google-books.test.js +221 -0
- package/dist/__tests__/books/google-books.test.js.map +1 -0
- package/dist/__tests__/books/open-library.test.d.ts +8 -0
- package/dist/__tests__/books/open-library.test.d.ts.map +1 -0
- package/dist/__tests__/books/open-library.test.js +159 -0
- package/dist/__tests__/books/open-library.test.js.map +1 -0
- package/dist/__tests__/news/guardian.test.d.ts +9 -0
- package/dist/__tests__/news/guardian.test.d.ts.map +1 -0
- package/dist/__tests__/news/guardian.test.js +224 -0
- package/dist/__tests__/news/guardian.test.js.map +1 -0
- package/dist/__tests__/news/nytimes.test.d.ts +9 -0
- package/dist/__tests__/news/nytimes.test.d.ts.map +1 -0
- package/dist/__tests__/news/nytimes.test.js +271 -0
- package/dist/__tests__/news/nytimes.test.js.map +1 -0
- package/dist/__tests__/news/site-search-source.test.d.ts +9 -0
- package/dist/__tests__/news/site-search-source.test.d.ts.map +1 -0
- package/dist/__tests__/news/site-search-source.test.js +342 -0
- package/dist/__tests__/news/site-search-source.test.js.map +1 -0
- package/dist/__tests__/obituary/find-a-grave.test.d.ts +8 -0
- package/dist/__tests__/obituary/find-a-grave.test.d.ts.map +1 -0
- package/dist/__tests__/obituary/find-a-grave.test.js +238 -0
- package/dist/__tests__/obituary/find-a-grave.test.js.map +1 -0
- package/dist/__tests__/shared/duckduckgo-search.test.d.ts +9 -0
- package/dist/__tests__/shared/duckduckgo-search.test.d.ts.map +1 -0
- package/dist/__tests__/shared/duckduckgo-search.test.js +218 -0
- package/dist/__tests__/shared/duckduckgo-search.test.js.map +1 -0
- package/dist/__tests__/shared/fetch-page.test.d.ts +9 -0
- package/dist/__tests__/shared/fetch-page.test.d.ts.map +1 -0
- package/dist/__tests__/shared/fetch-page.test.js +281 -0
- package/dist/__tests__/shared/fetch-page.test.js.map +1 -0
- package/dist/__tests__/shared/html-utils.test.d.ts +2 -0
- package/dist/__tests__/shared/html-utils.test.d.ts.map +1 -0
- package/dist/__tests__/shared/html-utils.test.js +169 -0
- package/dist/__tests__/shared/html-utils.test.js.map +1 -0
- package/dist/__tests__/shared/readability-extract.test.d.ts +2 -0
- package/dist/__tests__/shared/readability-extract.test.d.ts.map +1 -0
- package/dist/__tests__/shared/readability-extract.test.js +107 -0
- package/dist/__tests__/shared/readability-extract.test.js.map +1 -0
- package/dist/__tests__/shared/sanitize-text.test.d.ts +2 -0
- package/dist/__tests__/shared/sanitize-text.test.d.ts.map +1 -0
- package/dist/__tests__/shared/sanitize-text.test.js +77 -0
- package/dist/__tests__/shared/sanitize-text.test.js.map +1 -0
- package/dist/__tests__/shared/search-utils.test.d.ts +2 -0
- package/dist/__tests__/shared/search-utils.test.d.ts.map +1 -0
- package/dist/__tests__/shared/search-utils.test.js +26 -0
- package/dist/__tests__/shared/search-utils.test.js.map +1 -0
- package/dist/__tests__/structured/wikidata.test.d.ts +9 -0
- package/dist/__tests__/structured/wikidata.test.d.ts.map +1 -0
- package/dist/__tests__/structured/wikidata.test.js +509 -0
- package/dist/__tests__/structured/wikidata.test.js.map +1 -0
- package/dist/__tests__/structured/wikipedia.test.d.ts +9 -0
- package/dist/__tests__/structured/wikipedia.test.d.ts.map +1 -0
- package/dist/__tests__/structured/wikipedia.test.js +643 -0
- package/dist/__tests__/structured/wikipedia.test.js.map +1 -0
- package/dist/__tests__/web-search/base.test.d.ts +9 -0
- package/dist/__tests__/web-search/base.test.d.ts.map +1 -0
- package/dist/__tests__/web-search/base.test.js +622 -0
- package/dist/__tests__/web-search/base.test.js.map +1 -0
- package/dist/__tests__/web-search/bing.test.d.ts +10 -0
- package/dist/__tests__/web-search/bing.test.d.ts.map +1 -0
- package/dist/__tests__/web-search/bing.test.js +277 -0
- package/dist/__tests__/web-search/bing.test.js.map +1 -0
- package/dist/__tests__/web-search/brave.test.d.ts +10 -0
- package/dist/__tests__/web-search/brave.test.d.ts.map +1 -0
- package/dist/__tests__/web-search/brave.test.js +264 -0
- package/dist/__tests__/web-search/brave.test.js.map +1 -0
- package/dist/__tests__/web-search/duckduckgo.test.d.ts +10 -0
- package/dist/__tests__/web-search/duckduckgo.test.d.ts.map +1 -0
- package/dist/__tests__/web-search/duckduckgo.test.js +107 -0
- package/dist/__tests__/web-search/duckduckgo.test.js.map +1 -0
- package/dist/__tests__/web-search/google.test.d.ts +9 -0
- package/dist/__tests__/web-search/google.test.d.ts.map +1 -0
- package/dist/__tests__/web-search/google.test.js +189 -0
- package/dist/__tests__/web-search/google.test.js.map +1 -0
- package/dist/archives/chronicling-america.d.ts +33 -0
- package/dist/archives/chronicling-america.d.ts.map +1 -0
- package/dist/archives/chronicling-america.js +85 -0
- package/dist/archives/chronicling-america.js.map +1 -0
- package/dist/archives/europeana.d.ts +37 -0
- package/dist/archives/europeana.d.ts.map +1 -0
- package/dist/archives/europeana.js +92 -0
- package/dist/archives/europeana.js.map +1 -0
- package/dist/archives/internet-archive.d.ts +32 -0
- package/dist/archives/internet-archive.d.ts.map +1 -0
- package/dist/archives/internet-archive.js +90 -0
- package/dist/archives/internet-archive.js.map +1 -0
- package/dist/archives/trove.d.ts +37 -0
- package/dist/archives/trove.d.ts.map +1 -0
- package/dist/archives/trove.js +97 -0
- package/dist/archives/trove.js.map +1 -0
- package/dist/books/google-books.d.ts +48 -0
- package/dist/books/google-books.d.ts.map +1 -0
- package/dist/books/google-books.js +111 -0
- package/dist/books/google-books.js.map +1 -0
- package/dist/books/open-library.d.ts +44 -0
- package/dist/books/open-library.d.ts.map +1 -0
- package/dist/books/open-library.js +103 -0
- package/dist/books/open-library.js.map +1 -0
- package/dist/index.d.ts +45 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +35 -0
- package/dist/index.js.map +1 -0
- package/dist/news/guardian.d.ts +51 -0
- package/dist/news/guardian.d.ts.map +1 -0
- package/dist/news/guardian.js +131 -0
- package/dist/news/guardian.js.map +1 -0
- package/dist/news/nytimes.d.ts +27 -0
- package/dist/news/nytimes.d.ts.map +1 -0
- package/dist/news/nytimes.js +104 -0
- package/dist/news/nytimes.js.map +1 -0
- package/dist/news/site-search-source.d.ts +89 -0
- package/dist/news/site-search-source.d.ts.map +1 -0
- package/dist/news/site-search-source.js +182 -0
- package/dist/news/site-search-source.js.map +1 -0
- package/dist/news/sources.d.ts +52 -0
- package/dist/news/sources.d.ts.map +1 -0
- package/dist/news/sources.js +276 -0
- package/dist/news/sources.js.map +1 -0
- package/dist/obituary/find-a-grave.d.ts +43 -0
- package/dist/obituary/find-a-grave.d.ts.map +1 -0
- package/dist/obituary/find-a-grave.js +173 -0
- package/dist/obituary/find-a-grave.js.map +1 -0
- package/dist/shared/duckduckgo-search.d.ts +86 -0
- package/dist/shared/duckduckgo-search.d.ts.map +1 -0
- package/dist/shared/duckduckgo-search.js +218 -0
- package/dist/shared/duckduckgo-search.js.map +1 -0
- package/dist/shared/fetch-page.d.ts +50 -0
- package/dist/shared/fetch-page.d.ts.map +1 -0
- package/dist/shared/fetch-page.js +212 -0
- package/dist/shared/fetch-page.js.map +1 -0
- package/dist/shared/html-utils.d.ts +99 -0
- package/dist/shared/html-utils.d.ts.map +1 -0
- package/dist/shared/html-utils.js +246 -0
- package/dist/shared/html-utils.js.map +1 -0
- package/dist/shared/readability-extract.d.ts +33 -0
- package/dist/shared/readability-extract.d.ts.map +1 -0
- package/dist/shared/readability-extract.js +45 -0
- package/dist/shared/readability-extract.js.map +1 -0
- package/dist/shared/sanitize-text.d.ts +24 -0
- package/dist/shared/sanitize-text.d.ts.map +1 -0
- package/dist/shared/sanitize-text.js +49 -0
- package/dist/shared/sanitize-text.js.map +1 -0
- package/dist/shared/search-utils.d.ts +18 -0
- package/dist/shared/search-utils.d.ts.map +1 -0
- package/dist/shared/search-utils.js +20 -0
- package/dist/shared/search-utils.js.map +1 -0
- package/dist/structured/wikidata.d.ts +128 -0
- package/dist/structured/wikidata.d.ts.map +1 -0
- package/dist/structured/wikidata.js +361 -0
- package/dist/structured/wikidata.js.map +1 -0
- package/dist/structured/wikipedia.d.ts +184 -0
- package/dist/structured/wikipedia.d.ts.map +1 -0
- package/dist/structured/wikipedia.js +275 -0
- package/dist/structured/wikipedia.js.map +1 -0
- package/dist/web-search/base.d.ts +128 -0
- package/dist/web-search/base.d.ts.map +1 -0
- package/dist/web-search/base.js +251 -0
- package/dist/web-search/base.js.map +1 -0
- package/dist/web-search/bing.d.ts +21 -0
- package/dist/web-search/bing.d.ts.map +1 -0
- package/dist/web-search/bing.js +53 -0
- package/dist/web-search/bing.js.map +1 -0
- package/dist/web-search/brave.d.ts +21 -0
- package/dist/web-search/brave.d.ts.map +1 -0
- package/dist/web-search/brave.js +56 -0
- package/dist/web-search/brave.js.map +1 -0
- package/dist/web-search/duckduckgo.d.ts +15 -0
- package/dist/web-search/duckduckgo.d.ts.map +1 -0
- package/dist/web-search/duckduckgo.js +21 -0
- package/dist/web-search/duckduckgo.js.map +1 -0
- package/dist/web-search/google.d.ts +24 -0
- package/dist/web-search/google.d.ts.map +1 -0
- package/dist/web-search/google.js +48 -0
- package/dist/web-search/google.js.map +1 -0
- package/package.json +58 -0
|
@@ -0,0 +1,622 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Tests for WebSearchBase abstract class.
|
|
3
|
+
*
|
|
4
|
+
* Uses a concrete TestSearchSource subclass with configurable search results.
|
|
5
|
+
* Mocks fetchPage and extractArticleContent to avoid real HTTP calls.
|
|
6
|
+
* Tests the full pipeline: search → score/rank → fetch → extract → combine.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect, vi, beforeEach, afterEach } from "vitest";
|
|
9
|
+
import { ReliabilityTier } from "@debriefer/core";
|
|
10
|
+
import { WebSearchBase } from "../../web-search/base.js";
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// Mocks
|
|
13
|
+
// ============================================================================
|
|
14
|
+
vi.mock("../../shared/fetch-page.js", () => ({
|
|
15
|
+
fetchPage: vi.fn(),
|
|
16
|
+
}));
|
|
17
|
+
vi.mock("../../shared/readability-extract.js", () => ({
|
|
18
|
+
extractArticleContent: vi.fn(),
|
|
19
|
+
}));
|
|
20
|
+
// Import mocked modules after vi.mock declarations
|
|
21
|
+
import { fetchPage } from "../../shared/fetch-page.js";
|
|
22
|
+
import { extractArticleContent } from "../../shared/readability-extract.js";
|
|
23
|
+
const mockFetchPage = vi.mocked(fetchPage);
|
|
24
|
+
const mockExtract = vi.mocked(extractArticleContent);
|
|
25
|
+
beforeEach(() => {
|
|
26
|
+
mockFetchPage.mockReset();
|
|
27
|
+
mockExtract.mockReset();
|
|
28
|
+
});
|
|
29
|
+
afterEach(() => {
|
|
30
|
+
vi.restoreAllMocks();
|
|
31
|
+
});
|
|
32
|
+
// ============================================================================
|
|
33
|
+
// Test Subclass
|
|
34
|
+
// ============================================================================
|
|
35
|
+
class TestSearchSource extends WebSearchBase {
|
|
36
|
+
name = "Test Search";
|
|
37
|
+
type = "test-search";
|
|
38
|
+
reliabilityTier = ReliabilityTier.SEARCH_AGGREGATOR;
|
|
39
|
+
domain = "test.example.com";
|
|
40
|
+
isFree = true;
|
|
41
|
+
estimatedCostPerQuery = 0;
|
|
42
|
+
searchResults = [];
|
|
43
|
+
async performSearch() {
|
|
44
|
+
return this.searchResults;
|
|
45
|
+
}
|
|
46
|
+
/** Expose protected isDomainBlocked for testing. */
|
|
47
|
+
testIsDomainBlocked(url) {
|
|
48
|
+
return this.isDomainBlocked(url);
|
|
49
|
+
}
|
|
50
|
+
/** Expose protected fetchResult for testing. */
|
|
51
|
+
async doFetch(subject, signal) {
|
|
52
|
+
return this.fetchResult(subject, signal);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
// ============================================================================
|
|
56
|
+
// Test Helpers
|
|
57
|
+
// ============================================================================
|
|
58
|
+
/**
|
|
59
|
+
* Pad text to exceed the default minContentLength (200 chars).
|
|
60
|
+
* Appends filler to ensure the text is long enough for pipeline filtering.
|
|
61
|
+
*/
|
|
62
|
+
function longText(prefix) {
|
|
63
|
+
const filler = " — this additional text is padding to ensure the extracted content exceeds the default minimum content length threshold of 200 characters that the WebSearchBase pipeline enforces during extraction.";
|
|
64
|
+
return prefix + filler;
|
|
65
|
+
}
|
|
66
|
+
function makeSubject(overrides) {
|
|
67
|
+
return {
|
|
68
|
+
id: 1,
|
|
69
|
+
name: "John Wayne",
|
|
70
|
+
...overrides,
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
function makeSearchResult(overrides) {
|
|
74
|
+
return {
|
|
75
|
+
url: "https://example.com/page",
|
|
76
|
+
title: "Example Page",
|
|
77
|
+
snippet: "A page about the topic",
|
|
78
|
+
...overrides,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
/** Set up fetchPage and extractArticleContent to return content for each URL. */
|
|
82
|
+
function setupPageExtraction(pages) {
|
|
83
|
+
mockFetchPage.mockImplementation(async (options) => {
|
|
84
|
+
const page = pages.find((p) => p.url === options.url);
|
|
85
|
+
if (!page || page.fetchFailed) {
|
|
86
|
+
return {
|
|
87
|
+
content: "",
|
|
88
|
+
url: options.url,
|
|
89
|
+
fetchMethod: "none",
|
|
90
|
+
error: "Fetch failed",
|
|
91
|
+
};
|
|
92
|
+
}
|
|
93
|
+
return {
|
|
94
|
+
content: page.html ?? `<html><body>${page.text ?? "content"}</body></html>`,
|
|
95
|
+
url: page.url,
|
|
96
|
+
fetchMethod: "direct",
|
|
97
|
+
};
|
|
98
|
+
});
|
|
99
|
+
mockExtract.mockImplementation((_html, url) => {
|
|
100
|
+
const page = pages.find((p) => p.url === url);
|
|
101
|
+
if (!page || page.extractFailed) {
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
text: page.text ?? longText("Default extracted content"),
|
|
106
|
+
title: page.title ?? "Page Title",
|
|
107
|
+
author: null,
|
|
108
|
+
excerpt: null,
|
|
109
|
+
siteName: null,
|
|
110
|
+
};
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
// ============================================================================
|
|
114
|
+
// Tests
|
|
115
|
+
// ============================================================================
|
|
116
|
+
describe("WebSearchBase", () => {
|
|
117
|
+
// ==========================================================================
|
|
118
|
+
// Full pipeline
|
|
119
|
+
// ==========================================================================
|
|
120
|
+
describe("full pipeline", () => {
|
|
121
|
+
it("search → fetch → extract → combine returns a RawFinding", async () => {
|
|
122
|
+
const source = new TestSearchSource();
|
|
123
|
+
source.searchResults = [
|
|
124
|
+
makeSearchResult({ url: "https://example.com/article", title: "Great Article" }),
|
|
125
|
+
];
|
|
126
|
+
setupPageExtraction([
|
|
127
|
+
{
|
|
128
|
+
url: "https://example.com/article",
|
|
129
|
+
title: "Great Article",
|
|
130
|
+
text: longText("This is the extracted article content about John Wayne"),
|
|
131
|
+
},
|
|
132
|
+
]);
|
|
133
|
+
const subject = makeSubject();
|
|
134
|
+
const signal = AbortSignal.timeout(5000);
|
|
135
|
+
const result = await source.doFetch(subject, signal);
|
|
136
|
+
expect(result).not.toBeNull();
|
|
137
|
+
expect(result.text).toContain("This is the extracted article content");
|
|
138
|
+
expect(result.url).toBe("https://example.com/article");
|
|
139
|
+
expect(result.costUsd).toBe(0);
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
// ==========================================================================
|
|
143
|
+
// Empty search results → null
|
|
144
|
+
// ==========================================================================
|
|
145
|
+
describe("empty search results", () => {
|
|
146
|
+
it("returns null when performSearch returns no results", async () => {
|
|
147
|
+
const source = new TestSearchSource();
|
|
148
|
+
source.searchResults = [];
|
|
149
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
150
|
+
expect(result).toBeNull();
|
|
151
|
+
});
|
|
152
|
+
});
|
|
153
|
+
// ==========================================================================
|
|
154
|
+
// All pages fail extraction → null
|
|
155
|
+
// ==========================================================================
|
|
156
|
+
describe("all pages fail extraction", () => {
|
|
157
|
+
it("returns null when all fetched pages fail extraction", async () => {
|
|
158
|
+
const source = new TestSearchSource();
|
|
159
|
+
source.searchResults = [
|
|
160
|
+
makeSearchResult({ url: "https://example.com/page1" }),
|
|
161
|
+
makeSearchResult({ url: "https://example.com/page2" }),
|
|
162
|
+
];
|
|
163
|
+
setupPageExtraction([
|
|
164
|
+
{ url: "https://example.com/page1", extractFailed: true },
|
|
165
|
+
{ url: "https://example.com/page2", extractFailed: true },
|
|
166
|
+
]);
|
|
167
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
168
|
+
expect(result).toBeNull();
|
|
169
|
+
});
|
|
170
|
+
});
|
|
171
|
+
// ==========================================================================
|
|
172
|
+
// All pages below minContentLength → null
|
|
173
|
+
// ==========================================================================
|
|
174
|
+
describe("all pages below minContentLength", () => {
|
|
175
|
+
it("returns null when all extracted content is too short", async () => {
|
|
176
|
+
const source = new TestSearchSource({ minContentLength: 200 });
|
|
177
|
+
source.searchResults = [makeSearchResult({ url: "https://example.com/short" })];
|
|
178
|
+
setupPageExtraction([
|
|
179
|
+
{
|
|
180
|
+
url: "https://example.com/short",
|
|
181
|
+
text: "Short text",
|
|
182
|
+
},
|
|
183
|
+
]);
|
|
184
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
185
|
+
expect(result).toBeNull();
|
|
186
|
+
});
|
|
187
|
+
});
|
|
188
|
+
// ==========================================================================
|
|
189
|
+
// maxLinksToFollow limits fetches
|
|
190
|
+
// ==========================================================================
|
|
191
|
+
describe("maxLinksToFollow", () => {
|
|
192
|
+
it("only fetches up to maxLinksToFollow pages", async () => {
|
|
193
|
+
const source = new TestSearchSource({ maxLinksToFollow: 2 });
|
|
194
|
+
source.searchResults = [
|
|
195
|
+
makeSearchResult({ url: "https://example.com/page1", title: "Page 1" }),
|
|
196
|
+
makeSearchResult({ url: "https://example.com/page2", title: "Page 2" }),
|
|
197
|
+
makeSearchResult({ url: "https://example.com/page3", title: "Page 3" }),
|
|
198
|
+
makeSearchResult({ url: "https://example.com/page4", title: "Page 4" }),
|
|
199
|
+
];
|
|
200
|
+
setupPageExtraction([
|
|
201
|
+
{ url: "https://example.com/page1", text: longText("Content from page one") },
|
|
202
|
+
{ url: "https://example.com/page2", text: longText("Content from page two") },
|
|
203
|
+
{ url: "https://example.com/page3", text: longText("Content from page three") },
|
|
204
|
+
{ url: "https://example.com/page4", text: longText("Content from page four") },
|
|
205
|
+
]);
|
|
206
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
207
|
+
expect(result).not.toBeNull();
|
|
208
|
+
// Only 2 pages should have been fetched
|
|
209
|
+
expect(mockFetchPage).toHaveBeenCalledTimes(2);
|
|
210
|
+
});
|
|
211
|
+
});
|
|
212
|
+
// ==========================================================================
|
|
213
|
+
// domainScores affects link ordering
|
|
214
|
+
// ==========================================================================
|
|
215
|
+
describe("domainScores", () => {
|
|
216
|
+
it("preferred domain is fetched first due to higher score", async () => {
|
|
217
|
+
const source = new TestSearchSource({
|
|
218
|
+
maxLinksToFollow: 1,
|
|
219
|
+
domainScores: { "preferred.com": 100 },
|
|
220
|
+
});
|
|
221
|
+
source.searchResults = [
|
|
222
|
+
makeSearchResult({ url: "https://example.com/page", title: "Normal Page" }),
|
|
223
|
+
makeSearchResult({ url: "https://preferred.com/page", title: "Preferred Page" }),
|
|
224
|
+
];
|
|
225
|
+
setupPageExtraction([
|
|
226
|
+
{ url: "https://example.com/page", text: longText("Normal content") },
|
|
227
|
+
{ url: "https://preferred.com/page", text: longText("Preferred content") },
|
|
228
|
+
]);
|
|
229
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
230
|
+
expect(result).not.toBeNull();
|
|
231
|
+
// With maxLinksToFollow=1, only the preferred page should be fetched
|
|
232
|
+
expect(mockFetchPage).toHaveBeenCalledTimes(1);
|
|
233
|
+
expect(mockFetchPage).toHaveBeenCalledWith(expect.objectContaining({ url: "https://preferred.com/page" }));
|
|
234
|
+
});
|
|
235
|
+
});
|
|
236
|
+
// ==========================================================================
|
|
237
|
+
// boostKeywords affect link ordering
|
|
238
|
+
// ==========================================================================
|
|
239
|
+
describe("boostKeywords", () => {
|
|
240
|
+
it("result with boost keyword in title is ranked higher", async () => {
|
|
241
|
+
const source = new TestSearchSource({
|
|
242
|
+
maxLinksToFollow: 1,
|
|
243
|
+
boostKeywords: [{ keyword: "obituary", boost: 100 }],
|
|
244
|
+
});
|
|
245
|
+
source.searchResults = [
|
|
246
|
+
makeSearchResult({ url: "https://example.com/bio", title: "Biography" }),
|
|
247
|
+
makeSearchResult({ url: "https://example.com/obit", title: "Obituary for John Wayne" }),
|
|
248
|
+
];
|
|
249
|
+
setupPageExtraction([
|
|
250
|
+
{ url: "https://example.com/bio", text: longText("Biography content") },
|
|
251
|
+
{ url: "https://example.com/obit", text: longText("Obituary content") },
|
|
252
|
+
]);
|
|
253
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
254
|
+
expect(result).not.toBeNull();
|
|
255
|
+
expect(mockFetchPage).toHaveBeenCalledTimes(1);
|
|
256
|
+
expect(mockFetchPage).toHaveBeenCalledWith(expect.objectContaining({ url: "https://example.com/obit" }));
|
|
257
|
+
});
|
|
258
|
+
});
|
|
259
|
+
// ==========================================================================
|
|
260
|
+
// penaltyKeywords affect link ordering
|
|
261
|
+
// ==========================================================================
|
|
262
|
+
describe("penaltyKeywords", () => {
|
|
263
|
+
it("result with penalty keyword in snippet is ranked lower", async () => {
|
|
264
|
+
const source = new TestSearchSource({
|
|
265
|
+
maxLinksToFollow: 1,
|
|
266
|
+
penaltyKeywords: [{ keyword: "login", penalty: 100 }],
|
|
267
|
+
});
|
|
268
|
+
source.searchResults = [
|
|
269
|
+
makeSearchResult({
|
|
270
|
+
url: "https://example.com/login-page",
|
|
271
|
+
title: "Page",
|
|
272
|
+
snippet: "Please login to continue",
|
|
273
|
+
}),
|
|
274
|
+
makeSearchResult({
|
|
275
|
+
url: "https://example.com/article",
|
|
276
|
+
title: "Article",
|
|
277
|
+
snippet: "Real article content",
|
|
278
|
+
}),
|
|
279
|
+
];
|
|
280
|
+
setupPageExtraction([
|
|
281
|
+
{ url: "https://example.com/login-page", text: longText("Login page content") },
|
|
282
|
+
{ url: "https://example.com/article", text: longText("Real article content") },
|
|
283
|
+
]);
|
|
284
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
285
|
+
expect(result).not.toBeNull();
|
|
286
|
+
expect(mockFetchPage).toHaveBeenCalledTimes(1);
|
|
287
|
+
expect(mockFetchPage).toHaveBeenCalledWith(expect.objectContaining({ url: "https://example.com/article" }));
|
|
288
|
+
});
|
|
289
|
+
});
|
|
290
|
+
// ==========================================================================
|
|
291
|
+
// blockedDomains filters links
|
|
292
|
+
// ==========================================================================
|
|
293
|
+
describe("blockedDomains", () => {
|
|
294
|
+
it("filters out results from blocked domains", async () => {
|
|
295
|
+
const source = new TestSearchSource({
|
|
296
|
+
blockedDomains: ["blocked.com"],
|
|
297
|
+
});
|
|
298
|
+
source.searchResults = [
|
|
299
|
+
makeSearchResult({ url: "https://blocked.com/page", title: "Blocked" }),
|
|
300
|
+
makeSearchResult({ url: "https://sub.blocked.com/page", title: "Also Blocked" }),
|
|
301
|
+
makeSearchResult({ url: "https://allowed.com/page", title: "Allowed" }),
|
|
302
|
+
];
|
|
303
|
+
setupPageExtraction([
|
|
304
|
+
{ url: "https://blocked.com/page", text: longText("Should not be fetched") },
|
|
305
|
+
{ url: "https://sub.blocked.com/page", text: longText("Should not be fetched either") },
|
|
306
|
+
{ url: "https://allowed.com/page", text: longText("Allowed content") },
|
|
307
|
+
]);
|
|
308
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
309
|
+
expect(result).not.toBeNull();
|
|
310
|
+
// Only the allowed page should have been fetched
|
|
311
|
+
expect(mockFetchPage).toHaveBeenCalledTimes(1);
|
|
312
|
+
expect(mockFetchPage).toHaveBeenCalledWith(expect.objectContaining({ url: "https://allowed.com/page" }));
|
|
313
|
+
});
|
|
314
|
+
it("returns null when all results are from blocked domains", async () => {
|
|
315
|
+
const source = new TestSearchSource({
|
|
316
|
+
blockedDomains: ["blocked.com"],
|
|
317
|
+
});
|
|
318
|
+
source.searchResults = [
|
|
319
|
+
makeSearchResult({ url: "https://blocked.com/page1" }),
|
|
320
|
+
makeSearchResult({ url: "https://www.blocked.com/page2" }),
|
|
321
|
+
];
|
|
322
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
323
|
+
expect(result).toBeNull();
|
|
324
|
+
expect(mockFetchPage).not.toHaveBeenCalled();
|
|
325
|
+
});
|
|
326
|
+
});
|
|
327
|
+
// ==========================================================================
|
|
328
|
+
// Multiple pages combine with attribution
|
|
329
|
+
// ==========================================================================
|
|
330
|
+
describe("multiple pages combine with attribution", () => {
|
|
331
|
+
it("combines text from multiple pages with title attribution and separators", async () => {
|
|
332
|
+
const source = new TestSearchSource({ maxLinksToFollow: 3 });
|
|
333
|
+
source.searchResults = [
|
|
334
|
+
makeSearchResult({ url: "https://example.com/page1", title: "First Article" }),
|
|
335
|
+
makeSearchResult({ url: "https://example.com/page2", title: "Second Article" }),
|
|
336
|
+
];
|
|
337
|
+
setupPageExtraction([
|
|
338
|
+
{
|
|
339
|
+
url: "https://example.com/page1",
|
|
340
|
+
title: "First Article",
|
|
341
|
+
text: longText("Content from the first article"),
|
|
342
|
+
},
|
|
343
|
+
{
|
|
344
|
+
url: "https://example.com/page2",
|
|
345
|
+
title: "Second Article",
|
|
346
|
+
text: longText("Content from the second article"),
|
|
347
|
+
},
|
|
348
|
+
]);
|
|
349
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
350
|
+
expect(result).not.toBeNull();
|
|
351
|
+
// Should have title attributions
|
|
352
|
+
expect(result.text).toContain("First Article");
|
|
353
|
+
expect(result.text).toContain("Second Article");
|
|
354
|
+
// Should have separator between pages
|
|
355
|
+
expect(result.text).toContain("---");
|
|
356
|
+
// Should have content from both pages
|
|
357
|
+
expect(result.text).toContain("Content from the first article");
|
|
358
|
+
expect(result.text).toContain("Content from the second article");
|
|
359
|
+
});
|
|
360
|
+
});
|
|
361
|
+
// ==========================================================================
|
|
362
|
+
// Metadata includes expected fields
|
|
363
|
+
// ==========================================================================
|
|
364
|
+
describe("metadata", () => {
|
|
365
|
+
it("includes searchEngine, linksFollowed, pagesExtracted, and urls", async () => {
|
|
366
|
+
const source = new TestSearchSource();
|
|
367
|
+
source.searchResults = [
|
|
368
|
+
makeSearchResult({ url: "https://example.com/page1" }),
|
|
369
|
+
makeSearchResult({ url: "https://example.com/page2" }),
|
|
370
|
+
];
|
|
371
|
+
setupPageExtraction([
|
|
372
|
+
{
|
|
373
|
+
url: "https://example.com/page1",
|
|
374
|
+
title: "Page 1",
|
|
375
|
+
text: longText("Content from page 1"),
|
|
376
|
+
},
|
|
377
|
+
{
|
|
378
|
+
url: "https://example.com/page2",
|
|
379
|
+
extractFailed: true,
|
|
380
|
+
},
|
|
381
|
+
]);
|
|
382
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
383
|
+
expect(result).not.toBeNull();
|
|
384
|
+
expect(result.metadata).toBeDefined();
|
|
385
|
+
expect(result.metadata.searchEngine).toBe("Test Search");
|
|
386
|
+
expect(result.metadata.linksFollowed).toBe(2);
|
|
387
|
+
expect(result.metadata.pagesExtracted).toBe(1);
|
|
388
|
+
expect(result.metadata.urls).toEqual(["https://example.com/page1"]);
|
|
389
|
+
});
|
|
390
|
+
});
|
|
391
|
+
// ==========================================================================
|
|
392
|
+
// Confidence is -1 (delegation to base class)
|
|
393
|
+
// ==========================================================================
|
|
394
|
+
describe("confidence delegation", () => {
|
|
395
|
+
it("returns confidence of -1 to delegate to base class keyword scoring", async () => {
|
|
396
|
+
const source = new TestSearchSource();
|
|
397
|
+
source.searchResults = [makeSearchResult({ url: "https://example.com/page" })];
|
|
398
|
+
setupPageExtraction([
|
|
399
|
+
{
|
|
400
|
+
url: "https://example.com/page",
|
|
401
|
+
text: longText("Extracted article content"),
|
|
402
|
+
},
|
|
403
|
+
]);
|
|
404
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
405
|
+
expect(result).not.toBeNull();
|
|
406
|
+
expect(result.confidence).toBe(-1);
|
|
407
|
+
});
|
|
408
|
+
});
|
|
409
|
+
// ==========================================================================
|
|
410
|
+
// Fetch failures for some pages still returns others
|
|
411
|
+
// ==========================================================================
|
|
412
|
+
describe("partial fetch failures", () => {
|
|
413
|
+
it("returns content from pages that succeed even when some fail", async () => {
|
|
414
|
+
const source = new TestSearchSource();
|
|
415
|
+
source.searchResults = [
|
|
416
|
+
makeSearchResult({ url: "https://example.com/fail" }),
|
|
417
|
+
makeSearchResult({ url: "https://example.com/succeed" }),
|
|
418
|
+
];
|
|
419
|
+
setupPageExtraction([
|
|
420
|
+
{ url: "https://example.com/fail", fetchFailed: true },
|
|
421
|
+
{
|
|
422
|
+
url: "https://example.com/succeed",
|
|
423
|
+
text: longText("Successful page content"),
|
|
424
|
+
},
|
|
425
|
+
]);
|
|
426
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
427
|
+
expect(result).not.toBeNull();
|
|
428
|
+
expect(result.text).toContain("Successful page content");
|
|
429
|
+
});
|
|
430
|
+
});
|
|
431
|
+
// ==========================================================================
|
|
432
|
+
// SSRF prevention
|
|
433
|
+
// ==========================================================================
|
|
434
|
+
describe("SSRF prevention (isDomainBlocked)", () => {
|
|
435
|
+
const source = new TestSearchSource();
|
|
436
|
+
it("allows normal http URLs", () => {
|
|
437
|
+
expect(source.testIsDomainBlocked("https://example.com/page")).toBe(false);
|
|
438
|
+
expect(source.testIsDomainBlocked("http://example.com/page")).toBe(false);
|
|
439
|
+
});
|
|
440
|
+
it("blocks non-http schemes", () => {
|
|
441
|
+
expect(source.testIsDomainBlocked("file:///etc/passwd")).toBe(true);
|
|
442
|
+
expect(source.testIsDomainBlocked("ftp://internal/file")).toBe(true);
|
|
443
|
+
expect(source.testIsDomainBlocked("javascript:alert(1)")).toBe(true);
|
|
444
|
+
});
|
|
445
|
+
it("blocks localhost", () => {
|
|
446
|
+
expect(source.testIsDomainBlocked("http://localhost/admin")).toBe(true);
|
|
447
|
+
expect(source.testIsDomainBlocked("http://localhost:8080/")).toBe(true);
|
|
448
|
+
});
|
|
449
|
+
it("blocks 127.0.0.0/8 loopback range", () => {
|
|
450
|
+
expect(source.testIsDomainBlocked("http://127.0.0.1/")).toBe(true);
|
|
451
|
+
expect(source.testIsDomainBlocked("http://127.1.2.3/")).toBe(true);
|
|
452
|
+
expect(source.testIsDomainBlocked("http://127.255.255.255/")).toBe(true);
|
|
453
|
+
});
|
|
454
|
+
it("blocks IPv6 loopback", () => {
|
|
455
|
+
expect(source.testIsDomainBlocked("http://[::1]/")).toBe(true);
|
|
456
|
+
});
|
|
457
|
+
it("blocks RFC 1918 private IPs", () => {
|
|
458
|
+
expect(source.testIsDomainBlocked("http://10.0.0.1/")).toBe(true);
|
|
459
|
+
expect(source.testIsDomainBlocked("http://10.255.255.255/")).toBe(true);
|
|
460
|
+
expect(source.testIsDomainBlocked("http://192.168.1.1/")).toBe(true);
|
|
461
|
+
expect(source.testIsDomainBlocked("http://172.16.0.1/")).toBe(true);
|
|
462
|
+
expect(source.testIsDomainBlocked("http://172.31.255.255/")).toBe(true);
|
|
463
|
+
});
|
|
464
|
+
it("allows non-private 172.x IPs", () => {
|
|
465
|
+
expect(source.testIsDomainBlocked("http://172.64.0.1/")).toBe(false); // Cloudflare
|
|
466
|
+
expect(source.testIsDomainBlocked("http://172.15.0.1/")).toBe(false);
|
|
467
|
+
expect(source.testIsDomainBlocked("http://172.32.0.1/")).toBe(false);
|
|
468
|
+
});
|
|
469
|
+
it("blocks cloud metadata endpoint", () => {
|
|
470
|
+
expect(source.testIsDomainBlocked("http://169.254.169.254/latest/meta-data/")).toBe(true);
|
|
471
|
+
expect(source.testIsDomainBlocked("http://169.254.0.1/")).toBe(true);
|
|
472
|
+
});
|
|
473
|
+
it("blocks 0.0.0.0", () => {
|
|
474
|
+
expect(source.testIsDomainBlocked("http://0.0.0.0/")).toBe(true);
|
|
475
|
+
});
|
|
476
|
+
it("blocks .local domains", () => {
|
|
477
|
+
expect(source.testIsDomainBlocked("http://myserver.local/")).toBe(true);
|
|
478
|
+
});
|
|
479
|
+
it("blocks unparseable URLs", () => {
|
|
480
|
+
expect(source.testIsDomainBlocked("not-a-url")).toBe(true);
|
|
481
|
+
});
|
|
482
|
+
it("does not false-positive on domains starting with fc/fd/fe80", () => {
|
|
483
|
+
expect(source.testIsDomainBlocked("https://fcnews.com/article")).toBe(false);
|
|
484
|
+
expect(source.testIsDomainBlocked("https://fdic.gov/")).toBe(false);
|
|
485
|
+
expect(source.testIsDomainBlocked("https://fe80news.com/")).toBe(false);
|
|
486
|
+
});
|
|
487
|
+
});
|
|
488
|
+
// ==========================================================================
|
|
489
|
+
// linkSelector callback
|
|
490
|
+
// ==========================================================================
|
|
491
|
+
describe("linkSelector", () => {
|
|
492
|
+
it("filters results via linkSelector before fetching", async () => {
|
|
493
|
+
const source = new TestSearchSource({
|
|
494
|
+
linkSelector: async (results) => results.filter((r) => r.url.includes("chosen")),
|
|
495
|
+
});
|
|
496
|
+
source.searchResults = [
|
|
497
|
+
makeSearchResult({ url: "https://example.com/skipped", title: "Skipped" }),
|
|
498
|
+
makeSearchResult({ url: "https://example.com/chosen", title: "Chosen" }),
|
|
499
|
+
];
|
|
500
|
+
setupPageExtraction([
|
|
501
|
+
{ url: "https://example.com/skipped", text: longText("Should not be fetched") },
|
|
502
|
+
{ url: "https://example.com/chosen", text: longText("Chosen content") },
|
|
503
|
+
]);
|
|
504
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
505
|
+
expect(result).not.toBeNull();
|
|
506
|
+
expect(mockFetchPage).toHaveBeenCalledTimes(1);
|
|
507
|
+
expect(mockFetchPage).toHaveBeenCalledWith(expect.objectContaining({ url: "https://example.com/chosen" }));
|
|
508
|
+
});
|
|
509
|
+
it("receives the subject for context-aware selection", async () => {
|
|
510
|
+
const selectorSpy = vi.fn().mockImplementation((results) => results);
|
|
511
|
+
const source = new TestSearchSource({ linkSelector: selectorSpy });
|
|
512
|
+
source.searchResults = [makeSearchResult({ url: "https://example.com/page" })];
|
|
513
|
+
setupPageExtraction([{ url: "https://example.com/page", text: longText("Content") }]);
|
|
514
|
+
const subject = makeSubject({ name: "Test Actor" });
|
|
515
|
+
await source.doFetch(subject, AbortSignal.timeout(5000));
|
|
516
|
+
expect(selectorSpy).toHaveBeenCalledWith(expect.any(Array), expect.objectContaining({ name: "Test Actor" }));
|
|
517
|
+
});
|
|
518
|
+
});
|
|
519
|
+
// ==========================================================================
|
|
520
|
+
// maxLinkCost budget
|
|
521
|
+
// ==========================================================================
|
|
522
|
+
describe("maxLinkCost", () => {
|
|
523
|
+
it("stops following links when cost budget is zero", async () => {
|
|
524
|
+
const source = new TestSearchSource({
|
|
525
|
+
maxLinkCost: 0,
|
|
526
|
+
});
|
|
527
|
+
source.searchResults = [
|
|
528
|
+
makeSearchResult({ url: "https://example.com/page1" }),
|
|
529
|
+
makeSearchResult({ url: "https://example.com/page2" }),
|
|
530
|
+
];
|
|
531
|
+
setupPageExtraction([
|
|
532
|
+
{ url: "https://example.com/page1", text: longText("Content 1") },
|
|
533
|
+
{ url: "https://example.com/page2", text: longText("Content 2") },
|
|
534
|
+
]);
|
|
535
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
536
|
+
expect(result).toBeNull();
|
|
537
|
+
expect(mockFetchPage).not.toHaveBeenCalled();
|
|
538
|
+
});
|
|
539
|
+
it("stops after budget is exhausted mid-way through links (custom fetch)", async () => {
|
|
540
|
+
const customFetch = vi.fn().mockResolvedValue(longText("Fetched content"));
|
|
541
|
+
// Budget of 0.001 with estimatedCostPerQuery=0 (TestSearchSource).
|
|
542
|
+
// customFetchPage path uses estimatedCostPerQuery for cost tracking,
|
|
543
|
+
// but TestSearchSource has cost=0, so each fetch adds $0. Use a
|
|
544
|
+
// PaidTestSearchSource instead.
|
|
545
|
+
const source = new TestSearchSource({
|
|
546
|
+
maxLinkCost: 0.001,
|
|
547
|
+
fetchPage: customFetch,
|
|
548
|
+
});
|
|
549
|
+
// Override estimatedCostPerQuery for this test
|
|
550
|
+
Object.defineProperty(source, "estimatedCostPerQuery", { value: 0.001 });
|
|
551
|
+
source.searchResults = [
|
|
552
|
+
makeSearchResult({ url: "https://example.com/page1" }),
|
|
553
|
+
makeSearchResult({ url: "https://example.com/page2" }),
|
|
554
|
+
makeSearchResult({ url: "https://example.com/page3" }),
|
|
555
|
+
];
|
|
556
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
557
|
+
expect(result).not.toBeNull();
|
|
558
|
+
// Budget allows exactly 1 fetch ($0.001), then exhausted before 2nd
|
|
559
|
+
expect(customFetch).toHaveBeenCalledTimes(1);
|
|
560
|
+
});
|
|
561
|
+
it("enforces budget in default fetch path", async () => {
|
|
562
|
+
const source = new TestSearchSource({ maxLinkCost: 0.001 });
|
|
563
|
+
Object.defineProperty(source, "estimatedCostPerQuery", { value: 0.001 });
|
|
564
|
+
source.searchResults = [
|
|
565
|
+
makeSearchResult({ url: "https://example.com/page1" }),
|
|
566
|
+
makeSearchResult({ url: "https://example.com/page2" }),
|
|
567
|
+
makeSearchResult({ url: "https://example.com/page3" }),
|
|
568
|
+
];
|
|
569
|
+
setupPageExtraction([
|
|
570
|
+
{ url: "https://example.com/page1", text: longText("Content 1") },
|
|
571
|
+
{ url: "https://example.com/page2", text: longText("Content 2") },
|
|
572
|
+
{ url: "https://example.com/page3", text: longText("Content 3") },
|
|
573
|
+
]);
|
|
574
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
575
|
+
expect(result).not.toBeNull();
|
|
576
|
+
// Budget allows 1 successful fetch, then exhausted
|
|
577
|
+
expect(mockFetchPage).toHaveBeenCalledTimes(1);
|
|
578
|
+
});
|
|
579
|
+
});
|
|
580
|
+
// ==========================================================================
|
|
581
|
+
// custom fetchPage callback
|
|
582
|
+
// ==========================================================================
|
|
583
|
+
describe("custom fetchPage", () => {
|
|
584
|
+
it("uses custom fetchPage instead of default pipeline", async () => {
|
|
585
|
+
const customFetch = vi.fn().mockResolvedValue(longText("Custom fetched content"));
|
|
586
|
+
const source = new TestSearchSource({ fetchPage: customFetch });
|
|
587
|
+
source.searchResults = [
|
|
588
|
+
makeSearchResult({ url: "https://example.com/page", title: "Custom Page" }),
|
|
589
|
+
];
|
|
590
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
591
|
+
expect(result).not.toBeNull();
|
|
592
|
+
expect(result.text).toContain("Custom fetched content");
|
|
593
|
+
// Default fetchPage should NOT have been called
|
|
594
|
+
expect(mockFetchPage).not.toHaveBeenCalled();
|
|
595
|
+
// Custom fetch should have been called with the URL
|
|
596
|
+
expect(customFetch).toHaveBeenCalledWith("https://example.com/page", expect.any(AbortSignal));
|
|
597
|
+
});
|
|
598
|
+
it("skips pages where custom fetchPage returns null", async () => {
|
|
599
|
+
const customFetch = vi
|
|
600
|
+
.fn()
|
|
601
|
+
.mockResolvedValueOnce(null) // First page fails
|
|
602
|
+
.mockResolvedValueOnce(longText("Second page content"));
|
|
603
|
+
const source = new TestSearchSource({ fetchPage: customFetch });
|
|
604
|
+
source.searchResults = [
|
|
605
|
+
makeSearchResult({ url: "https://example.com/fail", title: "Fail" }),
|
|
606
|
+
makeSearchResult({ url: "https://example.com/succeed", title: "Succeed" }),
|
|
607
|
+
];
|
|
608
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
609
|
+
expect(result).not.toBeNull();
|
|
610
|
+
expect(result.text).toContain("Second page content");
|
|
611
|
+
expect(customFetch).toHaveBeenCalledTimes(2);
|
|
612
|
+
});
|
|
613
|
+
it("respects minContentLength with custom fetchPage", async () => {
|
|
614
|
+
const customFetch = vi.fn().mockResolvedValue("Short");
|
|
615
|
+
const source = new TestSearchSource({ fetchPage: customFetch, minContentLength: 200 });
|
|
616
|
+
source.searchResults = [makeSearchResult({ url: "https://example.com/short" })];
|
|
617
|
+
const result = await source.doFetch(makeSubject(), AbortSignal.timeout(5000));
|
|
618
|
+
expect(result).toBeNull();
|
|
619
|
+
});
|
|
620
|
+
});
|
|
621
|
+
});
|
|
622
|
+
//# sourceMappingURL=base.test.js.map
|