nosible 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +438 -0
- package/dist/index.cjs +1841 -0
- package/dist/index.cjs.map +29 -0
- package/dist/index.js +1815 -0
- package/dist/index.js.map +29 -0
- package/package.json +63 -0
- package/src/api/api.test.ts +366 -0
- package/src/api/index.ts +179 -0
- package/src/api/schemas.ts +152 -0
- package/src/client.test.ts +685 -0
- package/src/client.ts +762 -0
- package/src/index.ts +4 -0
- package/src/scrape/types.ts +119 -0
- package/src/scrape/webPageData.test.ts +302 -0
- package/src/scrape/webPageData.ts +103 -0
- package/src/search/analyze.test.ts +396 -0
- package/src/search/analyze.ts +151 -0
- package/src/search/bulkSearch.ts +62 -0
- package/src/search/result.test.ts +423 -0
- package/src/search/result.ts +391 -0
- package/src/search/result.types.ts +32 -0
- package/src/search/resultFactory.ts +21 -0
- package/src/search/resultSet.io.test.ts +320 -0
- package/src/search/resultSet.test.ts +368 -0
- package/src/search/resultSet.ts +387 -0
- package/src/search/resultSet.types.ts +3 -0
- package/src/search/search.test.ts +299 -0
- package/src/search/search.ts +187 -0
- package/src/search/searchSet.io.test.ts +321 -0
- package/src/search/searchSet.ts +122 -0
- package/src/search/sqlFilter.test.ts +129 -0
- package/src/search/sqlFilter.ts +147 -0
- package/src/test-utils/mocks.ts +159 -0
- package/src/topicTrend/topicTrend.ts +53 -0
- package/src/utils/browser.test.ts +209 -0
- package/src/utils/browser.ts +21 -0
- package/src/utils/fernet.ts +47 -0
- package/src/utils/file.test.ts +81 -0
- package/src/utils/file.ts +195 -0
- package/src/utils/index.ts +7 -0
- package/src/utils/llm.test.ts +279 -0
- package/src/utils/llm.ts +244 -0
- package/src/utils/userPlan.test.ts +332 -0
- package/src/utils/userPlan.ts +211 -0
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import {describe, it, expect, beforeAll} from "bun:test";
|
|
2
|
+
import {ResultSet} from "./resultSet";
|
|
3
|
+
import {importCsv} from "../utils/file";
|
|
4
|
+
import {randomUUID} from "node:crypto";
|
|
5
|
+
|
|
6
|
+
/**
|
|
7
|
+
* Test configuration
|
|
8
|
+
*/
|
|
9
|
+
const TEST_DIR = `${process.cwd()}/tmp`; // Directory where test files will be created
|
|
10
|
+
const CLEANUP = true; // Set to false to keep test files for inspection
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Helper function to conditionally cleanup test files
|
|
14
|
+
*/
|
|
15
|
+
const cleanupFile = async (filePath: string) => {
|
|
16
|
+
if (CLEANUP) {
|
|
17
|
+
const fs = await import("node:fs/promises");
|
|
18
|
+
await fs.unlink(filePath).catch(() => {});
|
|
19
|
+
} else {
|
|
20
|
+
console.log(`Keeping test file: ${filePath}`);
|
|
21
|
+
}
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* I/O tests for ResultSet with file system operations
|
|
26
|
+
* These tests require:
|
|
27
|
+
* - NOSIBLE_API_KEY environment variable to be set
|
|
28
|
+
* - Real network access to Nosible API
|
|
29
|
+
* - File system access for reading/writing
|
|
30
|
+
*/
|
|
31
|
+
describe("ResultSet - I/O Tests", () => {
|
|
32
|
+
let client: any;
|
|
33
|
+
let searchResultSet: ResultSet;
|
|
34
|
+
|
|
35
|
+
beforeAll(async () => {
|
|
36
|
+
try {
|
|
37
|
+
// Dynamic import to break circular dependency
|
|
38
|
+
const clientModule = await import("../client");
|
|
39
|
+
const {NosibleClient} = clientModule;
|
|
40
|
+
client = new NosibleClient();
|
|
41
|
+
|
|
42
|
+
// Create test directory if it doesn't exist
|
|
43
|
+
const fs = await import("node:fs/promises");
|
|
44
|
+
await fs.mkdir(TEST_DIR, {recursive: true}).catch(() => {});
|
|
45
|
+
|
|
46
|
+
// Get a real search result set
|
|
47
|
+
searchResultSet = await client.fastSearch({
|
|
48
|
+
question: "artificial intelligence",
|
|
49
|
+
nResults: 10,
|
|
50
|
+
});
|
|
51
|
+
} catch (error) {
|
|
52
|
+
console.warn("Skipping I/O tests due to setup error");
|
|
53
|
+
throw error;
|
|
54
|
+
}
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
describe("File export methods", () => {
|
|
58
|
+
it("should export ResultSet to JSON file", async () => {
|
|
59
|
+
const uniqueId = randomUUID();
|
|
60
|
+
const fileName = `${TEST_DIR}/test-results-${uniqueId}.json`;
|
|
61
|
+
await searchResultSet.writeToJson(fileName);
|
|
62
|
+
|
|
63
|
+
console.log(
|
|
64
|
+
`Successfully exported ${searchResultSet.results.length} results to JSON`
|
|
65
|
+
);
|
|
66
|
+
|
|
67
|
+
// Verify the file was created
|
|
68
|
+
const fs = await import("node:fs/promises");
|
|
69
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
70
|
+
|
|
71
|
+
const fileExists = await fs
|
|
72
|
+
.access(fileName)
|
|
73
|
+
.then(() => true)
|
|
74
|
+
.catch(() => false);
|
|
75
|
+
expect(fileExists).toBe(true);
|
|
76
|
+
|
|
77
|
+
// Clean up
|
|
78
|
+
await cleanupFile(fileName);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it("should export ResultSet to NDJSON file", async () => {
|
|
82
|
+
const uniqueId = randomUUID();
|
|
83
|
+
const fileName = `${TEST_DIR}/test-results-${uniqueId}.ndjson`;
|
|
84
|
+
await searchResultSet.writeToNdjson(fileName);
|
|
85
|
+
|
|
86
|
+
console.log(
|
|
87
|
+
`Successfully exported ${searchResultSet.results.length} results to NDJSON`
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
// Verify the file was created
|
|
91
|
+
const fs = await import("node:fs/promises");
|
|
92
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
93
|
+
|
|
94
|
+
const fileExists = await fs
|
|
95
|
+
.access(fileName)
|
|
96
|
+
.then(() => true)
|
|
97
|
+
.catch(() => false);
|
|
98
|
+
expect(fileExists).toBe(true);
|
|
99
|
+
|
|
100
|
+
// Clean up
|
|
101
|
+
await cleanupFile(fileName);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
it("should export ResultSet to CSV file", async () => {
|
|
105
|
+
const uniqueId = randomUUID();
|
|
106
|
+
const fileName = `${TEST_DIR}/test-results-${uniqueId}.csv`;
|
|
107
|
+
await searchResultSet.writeToCsv(fileName);
|
|
108
|
+
|
|
109
|
+
console.log(
|
|
110
|
+
`Successfully exported ${searchResultSet.results.length} results to CSV`
|
|
111
|
+
);
|
|
112
|
+
|
|
113
|
+
// Verify the file was created
|
|
114
|
+
const fs = await import("node:fs/promises");
|
|
115
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
116
|
+
|
|
117
|
+
const fileExists = await fs
|
|
118
|
+
.access(fileName)
|
|
119
|
+
.then(() => true)
|
|
120
|
+
.catch(() => false);
|
|
121
|
+
expect(fileExists).toBe(true);
|
|
122
|
+
|
|
123
|
+
// Read file and verify contents
|
|
124
|
+
const json = await importCsv({filePath: fileName});
|
|
125
|
+
expect(json).toBeDefined();
|
|
126
|
+
expect(json.length).toBeGreaterThan(0);
|
|
127
|
+
expect(json[1].origin_shard).toBeDefined();
|
|
128
|
+
|
|
129
|
+
// Clean up
|
|
130
|
+
await cleanupFile(fileName);
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
it("should export ResultSet to Parquet file", async () => {
|
|
134
|
+
const uniqueId = randomUUID();
|
|
135
|
+
const fileName = `${TEST_DIR}/test-results-${uniqueId}.parquet`;
|
|
136
|
+
await searchResultSet.writeToParquet(fileName);
|
|
137
|
+
|
|
138
|
+
console.log(
|
|
139
|
+
`Successfully exported ${searchResultSet.results.length} results to Parquet`
|
|
140
|
+
);
|
|
141
|
+
|
|
142
|
+
// Verify the file was created
|
|
143
|
+
const fs = await import("node:fs/promises");
|
|
144
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
145
|
+
|
|
146
|
+
const fileExists = await fs
|
|
147
|
+
.access(fileName)
|
|
148
|
+
.then(() => true)
|
|
149
|
+
.catch(() => false);
|
|
150
|
+
expect(fileExists).toBe(true);
|
|
151
|
+
|
|
152
|
+
// Clean up
|
|
153
|
+
await cleanupFile(fileName);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
it("should export ResultSet to IPC file", async () => {
|
|
157
|
+
const uniqueId = randomUUID();
|
|
158
|
+
const fileName = `${TEST_DIR}/test-results-${uniqueId}.ipc`;
|
|
159
|
+
await searchResultSet.writeToIpc(fileName);
|
|
160
|
+
|
|
161
|
+
console.log(
|
|
162
|
+
`Successfully exported ${searchResultSet.results.length} results to IPC`
|
|
163
|
+
);
|
|
164
|
+
|
|
165
|
+
// Verify the file was created
|
|
166
|
+
const fs = await import("node:fs/promises");
|
|
167
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
168
|
+
|
|
169
|
+
const fileExists = await fs
|
|
170
|
+
.access(fileName)
|
|
171
|
+
.then(() => true)
|
|
172
|
+
.catch(() => false);
|
|
173
|
+
expect(fileExists).toBe(true);
|
|
174
|
+
|
|
175
|
+
// Clean up
|
|
176
|
+
await cleanupFile(fileName);
|
|
177
|
+
});
|
|
178
|
+
});
|
|
179
|
+
|
|
180
|
+
describe("Data conversion methods", () => {
|
|
181
|
+
it("should convert ResultSet to Polars DataFrame", async () => {
|
|
182
|
+
const df = await searchResultSet.toPolars();
|
|
183
|
+
|
|
184
|
+
expect(df).toBeDefined();
|
|
185
|
+
expect(df.height).toBeGreaterThan(0);
|
|
186
|
+
expect(df.width).toBeGreaterThan(0);
|
|
187
|
+
|
|
188
|
+
console.log(
|
|
189
|
+
`Successfully converted ${searchResultSet.results.length} results to Polars DataFrame`
|
|
190
|
+
);
|
|
191
|
+
console.log(`DataFrame shape: ${df.height} rows x ${df.width} columns`);
|
|
192
|
+
});
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
describe("Round-trip export/import - JSON", () => {
|
|
196
|
+
it("should verify exported JSON can be reimported", async () => {
|
|
197
|
+
const uniqueId = randomUUID();
|
|
198
|
+
const fileName = `${TEST_DIR}/test-roundtrip-${uniqueId}.json`;
|
|
199
|
+
await searchResultSet.writeToJson(fileName);
|
|
200
|
+
|
|
201
|
+
// Import the JSON file back
|
|
202
|
+
const reimportedResultSet = await ResultSet.fromFilePath(
|
|
203
|
+
fileName,
|
|
204
|
+
client
|
|
205
|
+
);
|
|
206
|
+
|
|
207
|
+
expect(reimportedResultSet).toBeDefined();
|
|
208
|
+
expect(reimportedResultSet.results.length).toBe(
|
|
209
|
+
searchResultSet.results.length
|
|
210
|
+
);
|
|
211
|
+
|
|
212
|
+
console.log(
|
|
213
|
+
`Successfully round-tripped ${reimportedResultSet.results.length} results through JSON`
|
|
214
|
+
);
|
|
215
|
+
|
|
216
|
+
// Clean up
|
|
217
|
+
await cleanupFile(fileName);
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it("should produce identical JSON files after save -> load -> save cycle", async () => {
|
|
221
|
+
const fs = await import("node:fs/promises");
|
|
222
|
+
const uniqueId = randomUUID();
|
|
223
|
+
const file1 = `${TEST_DIR}/test-roundtrip-1-${uniqueId}.json`;
|
|
224
|
+
const file2 = `${TEST_DIR}/test-roundtrip-2-${uniqueId}.json`;
|
|
225
|
+
|
|
226
|
+
try {
|
|
227
|
+
// Save original results
|
|
228
|
+
await searchResultSet.writeToJson(file1);
|
|
229
|
+
|
|
230
|
+
// Load and verify
|
|
231
|
+
const reloadedResultSet = await ResultSet.fromFilePath(file1, client);
|
|
232
|
+
expect(reloadedResultSet.results.length).toBe(
|
|
233
|
+
searchResultSet.results.length
|
|
234
|
+
);
|
|
235
|
+
|
|
236
|
+
// Save again
|
|
237
|
+
await reloadedResultSet.writeToJson(file2);
|
|
238
|
+
|
|
239
|
+
// Compare the two JSON files
|
|
240
|
+
const json1Content = await fs.readFile(file1, "utf-8");
|
|
241
|
+
const json2Content = await fs.readFile(file2, "utf-8");
|
|
242
|
+
|
|
243
|
+
const json1 = JSON.parse(json1Content);
|
|
244
|
+
const json2 = JSON.parse(json2Content);
|
|
245
|
+
|
|
246
|
+
// Verify they have the same structure and content
|
|
247
|
+
expect(json1.length).toBe(json2.length);
|
|
248
|
+
expect(json1).toEqual(json2);
|
|
249
|
+
|
|
250
|
+
console.log(
|
|
251
|
+
`✓ Both JSON files are identical (${json1.length} results)`
|
|
252
|
+
);
|
|
253
|
+
} finally {
|
|
254
|
+
// Clean up both files
|
|
255
|
+
await cleanupFile(file1);
|
|
256
|
+
await cleanupFile(file2);
|
|
257
|
+
}
|
|
258
|
+
});
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
describe("Round-trip export/import - CSV", () => {
|
|
262
|
+
it("should verify exported CSV can be reimported", async () => {
|
|
263
|
+
const uniqueId = randomUUID();
|
|
264
|
+
const fileName = `${TEST_DIR}/test-roundtrip-${uniqueId}.csv`;
|
|
265
|
+
await searchResultSet.writeToCsv(fileName);
|
|
266
|
+
|
|
267
|
+
// Import the CSV file back
|
|
268
|
+
const reimportedResultSet = await ResultSet.fromFilePath(
|
|
269
|
+
fileName,
|
|
270
|
+
client
|
|
271
|
+
);
|
|
272
|
+
|
|
273
|
+
expect(reimportedResultSet).toBeDefined();
|
|
274
|
+
expect(reimportedResultSet.results.length).toBe(
|
|
275
|
+
searchResultSet.results.length
|
|
276
|
+
);
|
|
277
|
+
|
|
278
|
+
console.log(
|
|
279
|
+
`Successfully round-tripped ${reimportedResultSet.results.length} results through CSV`
|
|
280
|
+
);
|
|
281
|
+
|
|
282
|
+
// Clean up
|
|
283
|
+
await cleanupFile(fileName);
|
|
284
|
+
});
|
|
285
|
+
|
|
286
|
+
it("should produce identical CSV files after save -> load -> save cycle", async () => {
|
|
287
|
+
const fs = await import("node:fs/promises");
|
|
288
|
+
const uniqueId = randomUUID();
|
|
289
|
+
const file1 = `${TEST_DIR}/test-roundtrip-1-${uniqueId}.csv`;
|
|
290
|
+
const file2 = `${TEST_DIR}/test-roundtrip-2-${uniqueId}.csv`;
|
|
291
|
+
|
|
292
|
+
try {
|
|
293
|
+
// Save original results
|
|
294
|
+
await searchResultSet.writeToCsv(file1);
|
|
295
|
+
|
|
296
|
+
// Load and verify
|
|
297
|
+
const reloadedResultSet = await ResultSet.fromFilePath(file1, client);
|
|
298
|
+
expect(reloadedResultSet.results.length).toBe(
|
|
299
|
+
searchResultSet.results.length
|
|
300
|
+
);
|
|
301
|
+
|
|
302
|
+
// Save again
|
|
303
|
+
await reloadedResultSet.writeToCsv(file2);
|
|
304
|
+
|
|
305
|
+
// Compare the two CSV files
|
|
306
|
+
const csv1Content = await fs.readFile(file1, "utf-8");
|
|
307
|
+
const csv2Content = await fs.readFile(file2, "utf-8");
|
|
308
|
+
|
|
309
|
+
// Verify they have the same content
|
|
310
|
+
expect(csv1Content).toBe(csv2Content);
|
|
311
|
+
|
|
312
|
+
console.log(`✓ Both CSV files are identical`);
|
|
313
|
+
} finally {
|
|
314
|
+
// Clean up both files
|
|
315
|
+
await cleanupFile(file1);
|
|
316
|
+
await cleanupFile(file2);
|
|
317
|
+
}
|
|
318
|
+
});
|
|
319
|
+
});
|
|
320
|
+
});
|
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
import {describe, it, expect, beforeEach} from "bun:test";
|
|
2
|
+
import {ResultSet} from "./resultSet";
|
|
3
|
+
import {Result} from "./result";
|
|
4
|
+
import {AnalyzeBy} from "./analyze";
|
|
5
|
+
import type {SearchResponse} from "../api/schemas";
|
|
6
|
+
import {
|
|
7
|
+
mockSemantics,
|
|
8
|
+
mockSearchResponse,
|
|
9
|
+
mockSearchQuery,
|
|
10
|
+
createMockClient,
|
|
11
|
+
} from "../test-utils/mocks";
|
|
12
|
+
|
|
13
|
+
// Create additional mock search responses
|
|
14
|
+
const mockSearchResponse1: SearchResponse = {
|
|
15
|
+
...mockSearchResponse,
|
|
16
|
+
url_hash: "abc123",
|
|
17
|
+
url: "https://example.com/article1",
|
|
18
|
+
title: "Test Article 1",
|
|
19
|
+
description: "This is test article 1",
|
|
20
|
+
content: "This is the full content of test article 1 about machine learning.",
|
|
21
|
+
best_chunk: "Most relevant chunk from article 1.",
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
const mockSearchResponse2: SearchResponse = {
|
|
25
|
+
...mockSearchResponse,
|
|
26
|
+
url_hash: "def456",
|
|
27
|
+
url: "https://example.com/article2",
|
|
28
|
+
published: "2023-02-20",
|
|
29
|
+
visited: "2023-02-21",
|
|
30
|
+
author: "Jane Smith",
|
|
31
|
+
title: "Test Article 2",
|
|
32
|
+
description: "This is test article 2",
|
|
33
|
+
content:
|
|
34
|
+
"This is the full content of test article 2 about artificial intelligence.",
|
|
35
|
+
best_chunk: "Most relevant chunk from article 2.",
|
|
36
|
+
semantics: {
|
|
37
|
+
...mockSemantics,
|
|
38
|
+
origin_shard: 2,
|
|
39
|
+
similarity: 0.92,
|
|
40
|
+
},
|
|
41
|
+
};
|
|
42
|
+
|
|
43
|
+
const mockClient = createMockClient() as any;
|
|
44
|
+
|
|
45
|
+
// Create mock results
|
|
46
|
+
let mockResult1: Result;
|
|
47
|
+
let mockResult2: Result;
|
|
48
|
+
|
|
49
|
+
describe("ResultSet - Logic Tests", () => {
|
|
50
|
+
beforeEach(() => {
|
|
51
|
+
// Initialize mock results
|
|
52
|
+
mockResult1 = new Result({
|
|
53
|
+
client: mockClient,
|
|
54
|
+
result: {...mockSearchQuery, ...mockSearchResponse1},
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
mockResult2 = new Result({
|
|
58
|
+
client: mockClient,
|
|
59
|
+
result: {...mockSearchQuery, ...mockSearchResponse2},
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
|
|
63
|
+
describe("constructor", () => {
|
|
64
|
+
it("should create a ResultSet instance with an array of results", () => {
|
|
65
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
66
|
+
|
|
67
|
+
expect(resultSet).toBeInstanceOf(ResultSet);
|
|
68
|
+
expect(resultSet.results).toHaveLength(2);
|
|
69
|
+
expect(resultSet.results[0]).toBe(mockResult1);
|
|
70
|
+
expect(resultSet.results[1]).toBe(mockResult2);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it("should create a ResultSet instance with an empty array", () => {
|
|
74
|
+
const resultSet = new ResultSet([]);
|
|
75
|
+
|
|
76
|
+
expect(resultSet).toBeInstanceOf(ResultSet);
|
|
77
|
+
expect(resultSet.results).toHaveLength(0);
|
|
78
|
+
});
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
describe("fromResults", () => {
|
|
82
|
+
it("should create a ResultSet from SearchResponse array", () => {
|
|
83
|
+
const resultSet = ResultSet.fromResults(
|
|
84
|
+
[mockSearchResponse1, mockSearchResponse2],
|
|
85
|
+
mockSearchQuery,
|
|
86
|
+
mockClient
|
|
87
|
+
);
|
|
88
|
+
|
|
89
|
+
expect(resultSet).toBeInstanceOf(ResultSet);
|
|
90
|
+
expect(resultSet.results).toHaveLength(2);
|
|
91
|
+
expect(resultSet.results[0]).toBeInstanceOf(Result);
|
|
92
|
+
expect(resultSet.results[0]?.url_hash).toBe("abc123");
|
|
93
|
+
expect(resultSet.results[1]?.url_hash).toBe("def456");
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
it("should create a ResultSet from Result array", () => {
|
|
97
|
+
const resultSet = ResultSet.fromResults(
|
|
98
|
+
[mockResult1, mockResult2],
|
|
99
|
+
mockSearchQuery,
|
|
100
|
+
mockClient
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
expect(resultSet).toBeInstanceOf(ResultSet);
|
|
104
|
+
expect(resultSet.results).toHaveLength(2);
|
|
105
|
+
expect(resultSet.results[0]).toBe(mockResult1);
|
|
106
|
+
expect(resultSet.results[1]).toBe(mockResult2);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it("should create a ResultSet from mixed array of SearchResponse and Result", () => {
|
|
110
|
+
const resultSet = ResultSet.fromResults(
|
|
111
|
+
[mockSearchResponse1, mockResult2],
|
|
112
|
+
mockSearchQuery,
|
|
113
|
+
mockClient
|
|
114
|
+
);
|
|
115
|
+
|
|
116
|
+
expect(resultSet).toBeInstanceOf(ResultSet);
|
|
117
|
+
expect(resultSet.results).toHaveLength(2);
|
|
118
|
+
expect(resultSet.results[0]).toBeInstanceOf(Result);
|
|
119
|
+
expect(resultSet.results[1]).toBe(mockResult2);
|
|
120
|
+
});
|
|
121
|
+
});
|
|
122
|
+
|
|
123
|
+
describe("join", () => {
|
|
124
|
+
it("should join multiple ResultSets into one", () => {
|
|
125
|
+
const resultSet1 = new ResultSet([mockResult1]);
|
|
126
|
+
const resultSet2 = new ResultSet([mockResult2]);
|
|
127
|
+
|
|
128
|
+
const joinedResultSet = ResultSet.join([resultSet1, resultSet2]);
|
|
129
|
+
|
|
130
|
+
expect(joinedResultSet).toBeInstanceOf(ResultSet);
|
|
131
|
+
expect(joinedResultSet.results).toHaveLength(2);
|
|
132
|
+
expect(joinedResultSet.results[0]).toBe(mockResult1);
|
|
133
|
+
expect(joinedResultSet.results[1]).toBe(mockResult2);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
it("should join three ResultSets", () => {
|
|
137
|
+
const mockResult3 = new Result({
|
|
138
|
+
client: mockClient,
|
|
139
|
+
result: {
|
|
140
|
+
...mockSearchQuery,
|
|
141
|
+
...mockSearchResponse1,
|
|
142
|
+
url_hash: "ghi789",
|
|
143
|
+
},
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
const resultSet1 = new ResultSet([mockResult1]);
|
|
147
|
+
const resultSet2 = new ResultSet([mockResult2]);
|
|
148
|
+
const resultSet3 = new ResultSet([mockResult3]);
|
|
149
|
+
|
|
150
|
+
const joinedResultSet = ResultSet.join([
|
|
151
|
+
resultSet1,
|
|
152
|
+
resultSet2,
|
|
153
|
+
resultSet3,
|
|
154
|
+
]);
|
|
155
|
+
|
|
156
|
+
expect(joinedResultSet.results).toHaveLength(3);
|
|
157
|
+
expect(joinedResultSet.results[0]).toBe(mockResult1);
|
|
158
|
+
expect(joinedResultSet.results[1]).toBe(mockResult2);
|
|
159
|
+
expect(joinedResultSet.results[2]).toBe(mockResult3);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it("should handle empty ResultSets", () => {
|
|
163
|
+
const resultSet1 = new ResultSet([mockResult1]);
|
|
164
|
+
const emptyResultSet = new ResultSet([]);
|
|
165
|
+
const resultSet2 = new ResultSet([mockResult2]);
|
|
166
|
+
|
|
167
|
+
const joinedResultSet = ResultSet.join([
|
|
168
|
+
resultSet1,
|
|
169
|
+
emptyResultSet,
|
|
170
|
+
resultSet2,
|
|
171
|
+
]);
|
|
172
|
+
|
|
173
|
+
expect(joinedResultSet.results).toHaveLength(2);
|
|
174
|
+
expect(joinedResultSet.results[0]).toBe(mockResult1);
|
|
175
|
+
expect(joinedResultSet.results[1]).toBe(mockResult2);
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
it("should handle all empty ResultSets", () => {
|
|
179
|
+
const emptyResultSet1 = new ResultSet([]);
|
|
180
|
+
const emptyResultSet2 = new ResultSet([]);
|
|
181
|
+
|
|
182
|
+
const joinedResultSet = ResultSet.join([
|
|
183
|
+
emptyResultSet1,
|
|
184
|
+
emptyResultSet2,
|
|
185
|
+
]);
|
|
186
|
+
|
|
187
|
+
expect(joinedResultSet.results).toHaveLength(0);
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
it("should handle single ResultSet", () => {
|
|
191
|
+
const resultSet1 = new ResultSet([mockResult1, mockResult2]);
|
|
192
|
+
|
|
193
|
+
const joinedResultSet = ResultSet.join([resultSet1]);
|
|
194
|
+
|
|
195
|
+
expect(joinedResultSet.results).toHaveLength(2);
|
|
196
|
+
expect(joinedResultSet.results[0]).toBe(mockResult1);
|
|
197
|
+
expect(joinedResultSet.results[1]).toBe(mockResult2);
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it("should handle empty array of ResultSets", () => {
|
|
201
|
+
const joinedResultSet = ResultSet.join([]);
|
|
202
|
+
|
|
203
|
+
expect(joinedResultSet).toBeInstanceOf(ResultSet);
|
|
204
|
+
expect(joinedResultSet.results).toHaveLength(0);
|
|
205
|
+
});
|
|
206
|
+
|
|
207
|
+
it("should preserve Result objects without creating copies", () => {
|
|
208
|
+
const resultSet1 = new ResultSet([mockResult1]);
|
|
209
|
+
const resultSet2 = new ResultSet([mockResult2]);
|
|
210
|
+
|
|
211
|
+
const joinedResultSet = ResultSet.join([resultSet1, resultSet2]);
|
|
212
|
+
|
|
213
|
+
// Should be the same object references, not copies
|
|
214
|
+
expect(joinedResultSet.results[0]).toBe(mockResult1);
|
|
215
|
+
expect(joinedResultSet.results[1]).toBe(mockResult2);
|
|
216
|
+
});
|
|
217
|
+
|
|
218
|
+
it("should handle ResultSets with multiple results each", () => {
|
|
219
|
+
const mockResult3 = new Result({
|
|
220
|
+
client: mockClient,
|
|
221
|
+
result: {
|
|
222
|
+
...mockSearchQuery,
|
|
223
|
+
...mockSearchResponse1,
|
|
224
|
+
url_hash: "ghi789",
|
|
225
|
+
},
|
|
226
|
+
});
|
|
227
|
+
const mockResult4 = new Result({
|
|
228
|
+
client: mockClient,
|
|
229
|
+
result: {
|
|
230
|
+
...mockSearchQuery,
|
|
231
|
+
...mockSearchResponse2,
|
|
232
|
+
url_hash: "jkl012",
|
|
233
|
+
},
|
|
234
|
+
});
|
|
235
|
+
|
|
236
|
+
const resultSet1 = new ResultSet([mockResult1, mockResult2]);
|
|
237
|
+
const resultSet2 = new ResultSet([mockResult3, mockResult4]);
|
|
238
|
+
|
|
239
|
+
const joinedResultSet = ResultSet.join([resultSet1, resultSet2]);
|
|
240
|
+
|
|
241
|
+
expect(joinedResultSet.results).toHaveLength(4);
|
|
242
|
+
expect(joinedResultSet.results[0]).toBe(mockResult1);
|
|
243
|
+
expect(joinedResultSet.results[1]).toBe(mockResult2);
|
|
244
|
+
expect(joinedResultSet.results[2]).toBe(mockResult3);
|
|
245
|
+
expect(joinedResultSet.results[3]).toBe(mockResult4);
|
|
246
|
+
});
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
describe("getResults", () => {
|
|
250
|
+
it("should return an array of result data objects", () => {
|
|
251
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
252
|
+
const results = resultSet.getResults();
|
|
253
|
+
|
|
254
|
+
expect(results).toHaveLength(2);
|
|
255
|
+
expect(results[0]).toEqual(mockResult1.data());
|
|
256
|
+
expect(results[1]).toEqual(mockResult2.data());
|
|
257
|
+
expect(results[0]?.url_hash).toBe("abc123");
|
|
258
|
+
expect(results[1]?.url_hash).toBe("def456");
|
|
259
|
+
});
|
|
260
|
+
|
|
261
|
+
it("should return an empty array for empty ResultSet", () => {
|
|
262
|
+
const resultSet = new ResultSet([]);
|
|
263
|
+
const results = resultSet.getResults();
|
|
264
|
+
|
|
265
|
+
expect(results).toHaveLength(0);
|
|
266
|
+
});
|
|
267
|
+
});
|
|
268
|
+
|
|
269
|
+
describe("getFlattenResults", () => {
|
|
270
|
+
it("should return an array of flattened result objects", () => {
|
|
271
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
272
|
+
const flattenedResults = resultSet.getFlattenResults();
|
|
273
|
+
|
|
274
|
+
expect(flattenedResults).toHaveLength(2);
|
|
275
|
+
expect(flattenedResults[0]).toEqual(mockResult1.flatten());
|
|
276
|
+
expect(flattenedResults[1]).toEqual(mockResult2.flatten());
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
it("should return an empty array for empty ResultSet", () => {
|
|
280
|
+
const resultSet = new ResultSet([]);
|
|
281
|
+
const flattenedResults = resultSet.getFlattenResults();
|
|
282
|
+
|
|
283
|
+
expect(flattenedResults).toHaveLength(0);
|
|
284
|
+
});
|
|
285
|
+
});
|
|
286
|
+
|
|
287
|
+
describe("findInResults", () => {
|
|
288
|
+
it("should find results matching the query using lunr", () => {
|
|
289
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
290
|
+
const matches = resultSet.findInResults("machine learning");
|
|
291
|
+
|
|
292
|
+
expect(matches).toBeInstanceOf(Array);
|
|
293
|
+
// Should find the first result which contains "machine learning"
|
|
294
|
+
expect(matches.length).toBeGreaterThanOrEqual(1);
|
|
295
|
+
});
|
|
296
|
+
|
|
297
|
+
it("should return an empty array when no matches are found", () => {
|
|
298
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
299
|
+
const matches = resultSet.findInResults("nonexistenttermsearchquery");
|
|
300
|
+
|
|
301
|
+
expect(matches).toBeInstanceOf(Array);
|
|
302
|
+
expect(matches).toHaveLength(0);
|
|
303
|
+
});
|
|
304
|
+
|
|
305
|
+
it("should return multiple results when query matches multiple documents", () => {
|
|
306
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
307
|
+
const matches = resultSet.findInResults("article");
|
|
308
|
+
|
|
309
|
+
expect(matches).toBeInstanceOf(Array);
|
|
310
|
+
// Both results contain "article" in their content
|
|
311
|
+
expect(matches.length).toBeGreaterThanOrEqual(2);
|
|
312
|
+
});
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
describe("analyze", () => {
|
|
316
|
+
it("should call analyzeResults with correct parameters for categorical data", () => {
|
|
317
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
318
|
+
const analyzeResult = resultSet.analyze(AnalyzeBy.netloc);
|
|
319
|
+
|
|
320
|
+
expect(analyzeResult).toBeDefined();
|
|
321
|
+
// Type assertion for categorical result
|
|
322
|
+
const categoricalResult = analyzeResult as Record<string, number>;
|
|
323
|
+
expect(categoricalResult["example.com"]).toBe(2);
|
|
324
|
+
});
|
|
325
|
+
|
|
326
|
+
it("should handle different AnalyzeBy types", () => {
|
|
327
|
+
const resultSet = new ResultSet([mockResult1, mockResult2]);
|
|
328
|
+
|
|
329
|
+
const byNetloc = resultSet.analyze(AnalyzeBy.netloc);
|
|
330
|
+
expect(byNetloc).toBeDefined();
|
|
331
|
+
const netlocResult = byNetloc as Record<string, number>;
|
|
332
|
+
expect(netlocResult["example.com"]).toBe(2);
|
|
333
|
+
|
|
334
|
+
const byAuthor = resultSet.analyze(AnalyzeBy.author);
|
|
335
|
+
expect(byAuthor).toBeDefined();
|
|
336
|
+
|
|
337
|
+
const bySimilarity = resultSet.analyze(AnalyzeBy.similarity);
|
|
338
|
+
expect(bySimilarity).toBeDefined();
|
|
339
|
+
expect("mean" in bySimilarity).toBe(true);
|
|
340
|
+
});
|
|
341
|
+
});
|
|
342
|
+
|
|
343
|
+
describe("integration scenarios", () => {
|
|
344
|
+
it("should handle a complete workflow: create, analyze, and search", () => {
|
|
345
|
+
const resultSet = ResultSet.fromResults(
|
|
346
|
+
[mockSearchResponse1, mockSearchResponse2],
|
|
347
|
+
mockSearchQuery,
|
|
348
|
+
mockClient
|
|
349
|
+
);
|
|
350
|
+
|
|
351
|
+
// Analyze
|
|
352
|
+
const analysis = resultSet.analyze(AnalyzeBy.netloc);
|
|
353
|
+
expect(analysis).toBeDefined();
|
|
354
|
+
|
|
355
|
+
// Search within results
|
|
356
|
+
const matches = resultSet.findInResults("article");
|
|
357
|
+
expect(matches.length).toBeGreaterThan(0);
|
|
358
|
+
});
|
|
359
|
+
|
|
360
|
+
it("should handle empty result sets gracefully", () => {
|
|
361
|
+
const emptyResultSet = new ResultSet([]);
|
|
362
|
+
|
|
363
|
+
expect(emptyResultSet.getResults()).toHaveLength(0);
|
|
364
|
+
expect(emptyResultSet.getFlattenResults()).toHaveLength(0);
|
|
365
|
+
expect(emptyResultSet.findInResults("query")).toHaveLength(0);
|
|
366
|
+
});
|
|
367
|
+
});
|
|
368
|
+
});
|