nosible 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/README.md +438 -0
  2. package/dist/index.cjs +1841 -0
  3. package/dist/index.cjs.map +29 -0
  4. package/dist/index.js +1815 -0
  5. package/dist/index.js.map +29 -0
  6. package/package.json +63 -0
  7. package/src/api/api.test.ts +366 -0
  8. package/src/api/index.ts +179 -0
  9. package/src/api/schemas.ts +152 -0
  10. package/src/client.test.ts +685 -0
  11. package/src/client.ts +762 -0
  12. package/src/index.ts +4 -0
  13. package/src/scrape/types.ts +119 -0
  14. package/src/scrape/webPageData.test.ts +302 -0
  15. package/src/scrape/webPageData.ts +103 -0
  16. package/src/search/analyze.test.ts +396 -0
  17. package/src/search/analyze.ts +151 -0
  18. package/src/search/bulkSearch.ts +62 -0
  19. package/src/search/result.test.ts +423 -0
  20. package/src/search/result.ts +391 -0
  21. package/src/search/result.types.ts +32 -0
  22. package/src/search/resultFactory.ts +21 -0
  23. package/src/search/resultSet.io.test.ts +320 -0
  24. package/src/search/resultSet.test.ts +368 -0
  25. package/src/search/resultSet.ts +387 -0
  26. package/src/search/resultSet.types.ts +3 -0
  27. package/src/search/search.test.ts +299 -0
  28. package/src/search/search.ts +187 -0
  29. package/src/search/searchSet.io.test.ts +321 -0
  30. package/src/search/searchSet.ts +122 -0
  31. package/src/search/sqlFilter.test.ts +129 -0
  32. package/src/search/sqlFilter.ts +147 -0
  33. package/src/test-utils/mocks.ts +159 -0
  34. package/src/topicTrend/topicTrend.ts +53 -0
  35. package/src/utils/browser.test.ts +209 -0
  36. package/src/utils/browser.ts +21 -0
  37. package/src/utils/fernet.ts +47 -0
  38. package/src/utils/file.test.ts +81 -0
  39. package/src/utils/file.ts +195 -0
  40. package/src/utils/index.ts +7 -0
  41. package/src/utils/llm.test.ts +279 -0
  42. package/src/utils/llm.ts +244 -0
  43. package/src/utils/userPlan.test.ts +332 -0
  44. package/src/utils/userPlan.ts +211 -0
@@ -0,0 +1,423 @@
1
+ import {describe, it, expect, beforeEach, beforeAll, vi} from "bun:test";
2
+ import {Result} from "./result";
3
+ import {type FlattenResult} from "./result.types";
4
+ import {NosibleClient} from "../client";
5
+ import {ResultSet} from "./resultSet";
6
+ import {WebPageData} from "../scrape/webPageData";
7
+ import type {SimilarUserSearchParamsType} from "./search";
8
+ import {
9
+ createMockScrapeResponse,
10
+ createMockSnippet,
11
+ } from "../scrape/webPageData.test";
12
+ import {
13
+ mockSemantics,
14
+ mockSearchResponse,
15
+ mockResult,
16
+ mockSearchQuery,
17
+ createMockClient,
18
+ } from "../test-utils/mocks";
19
+
20
+ // Mock scrape response for WebPageData testing
21
+ const mockScrapeResponse = createMockScrapeResponse({
22
+ snippet1: createMockSnippet("snippet1", null, null, "Sample snippet content"),
23
+ });
24
+
25
+ const mockClient = createMockClient() as unknown as NosibleClient;
26
+
27
+ // Re-export for backward compatibility with other test files
28
+ export {
29
+ mockSemantics,
30
+ mockSearchResponse,
31
+ mockResult,
32
+ mockSearchQuery,
33
+ createMockClient,
34
+ };
35
+
36
+ describe("Result", () => {
37
+ let realClient: NosibleClient;
38
+ let mockResultInstance: Result;
39
+ let realResult: Result;
40
+ let originalUrlHash: string;
41
+
42
+ beforeAll(async () => {
43
+ // Setup real client for integration tests
44
+ realClient = new NosibleClient();
45
+
46
+ // Execute a fast search to get a real result
47
+ const searchResultSet = await realClient.fastSearch({
48
+ question: "artificial intelligence machine learning",
49
+ nResults: 10,
50
+ });
51
+
52
+ // Get the first result from the search
53
+ if (searchResultSet.results.length > 0) {
54
+ const firstResult = searchResultSet.results[0];
55
+ if (!firstResult) {
56
+ throw new Error("Expected first result to be defined");
57
+ }
58
+
59
+ realResult = firstResult;
60
+ } else {
61
+ throw new Error("No search results found. Cannot proceed with test.");
62
+ }
63
+ });
64
+
65
+ beforeEach(() => {
66
+ vi.clearAllMocks();
67
+ mockResultInstance = new Result({
68
+ client: mockClient,
69
+ result: mockResult,
70
+ });
71
+ });
72
+
73
+ describe("constructor", () => {
74
+ it("should create a Result instance with all required properties", () => {
75
+ expect(mockResultInstance).toBeInstanceOf(Result);
76
+ expect(mockResultInstance.url_hash).toBe("abc123");
77
+ expect(mockResultInstance.url).toBe("https://example.com/article");
78
+ expect(mockResultInstance.netloc).toBe("example.com");
79
+ expect(mockResultInstance.published).toBe("2023-01-15");
80
+ expect(mockResultInstance.visited).toBe("2023-01-16");
81
+ expect(mockResultInstance.language).toBe("en");
82
+ expect(mockResultInstance.author).toBe("John Doe");
83
+ expect(mockResultInstance.title).toBe("Test Article Title");
84
+ expect(mockResultInstance.description).toBe(
85
+ "This is a test article description"
86
+ );
87
+ expect(mockResultInstance.content).toBe(
88
+ "This is the full content of the test article. It contains multiple sentences and provides comprehensive information about the topic being discussed."
89
+ );
90
+ expect(mockResultInstance.best_chunk).toBe(
91
+ "This is the most relevant chunk of content from the article."
92
+ );
93
+ expect(mockResultInstance.semantics).toEqual(mockSemantics);
94
+ expect(mockResultInstance.client).toBe(mockClient);
95
+ });
96
+
97
+ it("should handle null values for optional fields", () => {
98
+ const resultWithNulls = new Result({
99
+ client: mockClient,
100
+ result: {
101
+ ...mockResult,
102
+ brand_safety: null,
103
+ continent: null,
104
+ region: null,
105
+ country: null,
106
+ sector: null,
107
+ industry_group: null,
108
+ industry: null,
109
+ sub_industry: null,
110
+ iab_tier_1: null,
111
+ iab_tier_2: null,
112
+ iab_tier_3: null,
113
+ iab_tier_4: null,
114
+ },
115
+ });
116
+
117
+ expect(resultWithNulls.brand_safety).toBeNull();
118
+ expect(resultWithNulls.continent).toBeNull();
119
+ expect(resultWithNulls.region).toBeNull();
120
+ expect(resultWithNulls.country).toBeNull();
121
+ expect(resultWithNulls.sector).toBeNull();
122
+ expect(resultWithNulls.industry_group).toBeNull();
123
+ expect(resultWithNulls.industry).toBeNull();
124
+ expect(resultWithNulls.sub_industry).toBeNull();
125
+ expect(resultWithNulls.iab_tier_1).toBeNull();
126
+ expect(resultWithNulls.iab_tier_2).toBeNull();
127
+ expect(resultWithNulls.iab_tier_3).toBeNull();
128
+ expect(resultWithNulls.iab_tier_4).toBeNull();
129
+ });
130
+ });
131
+
132
+ describe("getSentiment", () => {
133
+ it("should throw error when client has no llmClient", async () => {
134
+ const clientWithoutLLM = {} as NosibleClient;
135
+ const resultWithoutLLM = new Result({
136
+ client: clientWithoutLLM,
137
+ result: mockResult,
138
+ });
139
+
140
+ await expect(resultWithoutLLM.getSentiment()).rejects.toThrow(
141
+ "A Nosible client instance must be provided with a OpenRouter API key."
142
+ );
143
+ });
144
+
145
+ it("should call getSentiment with correct parameters when llmClient exists", async () => {
146
+ expect(mockResultInstance.getSentiment).toBeDefined();
147
+ expect(typeof mockResultInstance.getSentiment).toBe("function");
148
+
149
+ // Mock the LLM response for this test
150
+ (mockClient.llmClient?.chat.completions.create as any).mockResolvedValue({
151
+ choices: [{message: {content: "0.5"}}],
152
+ });
153
+
154
+ // Verify that the method can be called without errors
155
+ await expect(
156
+ mockResultInstance.getSentiment("google/gemini-2.0-flash-001")
157
+ ).resolves.toBeDefined();
158
+ });
159
+ });
160
+
161
+ describe("getSimilar", () => {
162
+ it("should call client.fastSearch with correct parameters", async () => {
163
+ const mockResultSet = new ResultSet([]);
164
+ (mockClient.fastSearch as any).mockResolvedValue(mockResultSet);
165
+
166
+ const searchParams: SimilarUserSearchParamsType = {
167
+ nResults: 10,
168
+ minSimilarity: 0.7,
169
+ };
170
+
171
+ const similarResults = await mockResultInstance.getSimilar(searchParams);
172
+
173
+ expect(mockClient.fastSearch).toHaveBeenCalledWith({
174
+ question: mockResultInstance.title,
175
+ excludeDocs: [mockResultInstance.url_hash],
176
+ ...searchParams,
177
+ });
178
+ expect(similarResults).toBe(mockResultSet);
179
+ });
180
+
181
+ it("should not return the same document in the result set", async () => {
182
+ const similarParams = {
183
+ nResults: 10,
184
+ };
185
+
186
+ const similarResultSet = await realResult.getSimilar(similarParams);
187
+ const flattenedResults = similarResultSet.getFlattenResults();
188
+ const resultHashes = flattenedResults.map((r) => r.url_hash);
189
+
190
+ expect(resultHashes).not.toContain(realResult.url_hash);
191
+
192
+ if (resultHashes.length > 0) {
193
+ console.log(`Found ${resultHashes.length} similar results`);
194
+ }
195
+ }, 15000);
196
+
197
+ it("should use the result title as the search question", async () => {
198
+ const similarParams = {
199
+ nResults: 10,
200
+ };
201
+
202
+ const similarResultSet = await realResult.getSimilar(similarParams);
203
+
204
+ expect(similarResultSet).toBeDefined();
205
+ expect(similarResultSet.results).toBeDefined();
206
+
207
+ console.log(
208
+ `Search completed successfully using title: "${realResult.title}"`
209
+ );
210
+ }, 20000);
211
+
212
+ it("should exclude the original document when requesting more results", async () => {
213
+ const similarParams = {
214
+ nResults: 10,
215
+ min_similarity: 0.5,
216
+ };
217
+
218
+ const similarResultSet = await realResult.getSimilar(similarParams);
219
+ const flattenedResults = similarResultSet.getFlattenResults();
220
+ const resultHashes = flattenedResults.map((r) => r.url_hash);
221
+
222
+ expect(resultHashes).not.toContain(realResult.url_hash);
223
+
224
+ console.log(`Original document hash: ${realResult.url_hash}`);
225
+ console.log(`Found ${resultHashes.length} similar results`);
226
+ }, 20000);
227
+
228
+ it("should handle empty result sets gracefully", async () => {
229
+ const similarParams = {
230
+ nResults: 10,
231
+ min_similarity: 0.99,
232
+ must_include: ["extremelyrareterm12345"],
233
+ };
234
+
235
+ const similarResultSet = await realResult.getSimilar(similarParams);
236
+ const flattenedResults = similarResultSet.getFlattenResults();
237
+ const resultHashes = flattenedResults.map((r) => r.url_hash);
238
+
239
+ expect(resultHashes).not.toContain(realResult.url_hash);
240
+
241
+ console.log(
242
+ `High-threshold search returned ${resultHashes.length} results`
243
+ );
244
+ }, 20000);
245
+ });
246
+
247
+ describe("scrapeUrl", () => {
248
+ it("should call client.scrapeUrl with the result URL", async () => {
249
+ const mockWebPageData = new WebPageData(mockClient, mockScrapeResponse);
250
+ (mockClient.scrapeUrl as any).mockResolvedValue(mockWebPageData);
251
+
252
+ const scrapedData = await mockResultInstance.scrapeUrl();
253
+
254
+ expect(mockClient.scrapeUrl).toHaveBeenCalledWith(mockResultInstance.url);
255
+ expect(scrapedData).toBe(mockWebPageData);
256
+ });
257
+ });
258
+
259
+ describe("flatten", () => {
260
+ it("should return a flattened result object with all properties", () => {
261
+ const flattened: FlattenResult = mockResultInstance.flatten();
262
+
263
+ expect(flattened).toEqual({
264
+ url_hash: "abc123",
265
+ url: "https://example.com/article",
266
+ netloc: "example.com",
267
+ published: "2023-01-15",
268
+ visited: "2023-01-16",
269
+ language: "en",
270
+ author: "John Doe",
271
+ title: "Test Article Title",
272
+ description: "This is a test article description",
273
+ content:
274
+ "This is the full content of the test article. It contains multiple sentences and provides comprehensive information about the topic being discussed.",
275
+ best_chunk:
276
+ "This is the most relevant chunk of content from the article.",
277
+ origin_shard: 1,
278
+ chunks_total: 10,
279
+ chunks_matched: 5,
280
+ chunks_kept: 3,
281
+ similarity: 0.85,
282
+ brand_safety: null,
283
+ continent: null,
284
+ region: null,
285
+ country: null,
286
+ sector: null,
287
+ industry_group: null,
288
+ industry: null,
289
+ sub_industry: null,
290
+ iab_tier_1: null,
291
+ iab_tier_2: null,
292
+ iab_tier_3: null,
293
+ iab_tier_4: null,
294
+ });
295
+ });
296
+
297
+ it("should include set-level fields when they are present", () => {
298
+ // Create a result with set-level fields by directly manipulating the result instance
299
+ const resultWithSetFields = new Result({
300
+ client: mockClient,
301
+ result: mockResult,
302
+ });
303
+
304
+ // Manually set the fields to test the flatten method
305
+ resultWithSetFields.brand_safety = "safe";
306
+ resultWithSetFields.continent = "North America";
307
+ resultWithSetFields.region = "North America";
308
+ resultWithSetFields.country = "United States";
309
+ resultWithSetFields.sector = "Technology";
310
+ resultWithSetFields.industry_group = "Software";
311
+ resultWithSetFields.industry = "Enterprise Software";
312
+ resultWithSetFields.sub_industry = "Business Intelligence";
313
+ resultWithSetFields.iab_tier_1 = "Technology";
314
+ resultWithSetFields.iab_tier_2 = "Software";
315
+ resultWithSetFields.iab_tier_3 = "Business Software";
316
+ resultWithSetFields.iab_tier_4 = "Analytics";
317
+
318
+ const flattened = resultWithSetFields.flatten();
319
+
320
+ expect(flattened.brand_safety).toBe("safe");
321
+ expect(flattened.continent).toBe("North America");
322
+ expect(flattened.region).toBe("North America");
323
+ expect(flattened.country).toBe("United States");
324
+ expect(flattened.sector).toBe("Technology");
325
+ expect(flattened.industry_group).toBe("Software");
326
+ expect(flattened.industry).toBe("Enterprise Software");
327
+ expect(flattened.sub_industry).toBe("Business Intelligence");
328
+ expect(flattened.iab_tier_1).toBe("Technology");
329
+ expect(flattened.iab_tier_2).toBe("Software");
330
+ expect(flattened.iab_tier_3).toBe("Business Software");
331
+ expect(flattened.iab_tier_4).toBe("Analytics");
332
+ });
333
+ });
334
+
335
+ describe("data", () => {
336
+ it("should return a data object with all properties including semantics", () => {
337
+ const data = mockResultInstance.data();
338
+
339
+ expect(data).toEqual({
340
+ url_hash: "abc123",
341
+ url: "https://example.com/article",
342
+ netloc: "example.com",
343
+ published: "2023-01-15",
344
+ visited: "2023-01-16",
345
+ language: "en",
346
+ author: "John Doe",
347
+ title: "Test Article Title",
348
+ description: "This is a test article description",
349
+ content:
350
+ "This is the full content of the test article. It contains multiple sentences and provides comprehensive information about the topic being discussed.",
351
+ best_chunk:
352
+ "This is the most relevant chunk of content from the article.",
353
+ brand_safety: null,
354
+ continent: null,
355
+ region: null,
356
+ country: null,
357
+ sector: null,
358
+ industry_group: null,
359
+ industry: null,
360
+ sub_industry: null,
361
+ iab_tier_1: null,
362
+ iab_tier_2: null,
363
+ iab_tier_3: null,
364
+ iab_tier_4: null,
365
+ semantics: mockSemantics,
366
+ });
367
+ });
368
+
369
+ it("should include set-level fields when they are present", () => {
370
+ // Create a result with set-level fields by directly manipulating the result instance
371
+ const resultWithData = new Result({
372
+ client: mockClient,
373
+ result: mockResult,
374
+ });
375
+
376
+ // Manually set the fields to test the data method
377
+ resultWithData.brand_safety = "sensitive";
378
+ resultWithData.continent = "Europe";
379
+
380
+ const data = resultWithData.data();
381
+
382
+ expect(data.brand_safety).toBe("sensitive");
383
+ expect(data.continent).toBe("Europe");
384
+ expect(data.semantics).toEqual(mockSemantics);
385
+ });
386
+ });
387
+
388
+ describe("property access", () => {
389
+ it("should provide direct access to all search response properties", () => {
390
+ expect(mockResultInstance.url_hash).toBeDefined();
391
+ expect(mockResultInstance.url).toBeDefined();
392
+ expect(mockResultInstance.netloc).toBeDefined();
393
+ expect(mockResultInstance.published).toBeDefined();
394
+ expect(mockResultInstance.visited).toBeDefined();
395
+ expect(mockResultInstance.language).toBeDefined();
396
+ expect(mockResultInstance.author).toBeDefined();
397
+ expect(mockResultInstance.title).toBeDefined();
398
+ expect(mockResultInstance.description).toBeDefined();
399
+ expect(mockResultInstance.content).toBeDefined();
400
+ expect(mockResultInstance.best_chunk).toBeDefined();
401
+ expect(mockResultInstance.semantics).toBeDefined();
402
+ });
403
+
404
+ it("should provide access to set-level properties", () => {
405
+ // Create a result with set-level fields by directly manipulating the result instance
406
+ const resultWithSetData = new Result({
407
+ client: mockClient,
408
+ result: mockResult,
409
+ });
410
+
411
+ // Manually set the fields to test property access
412
+ resultWithSetData.brand_safety = "unsafe";
413
+ resultWithSetData.continent = "Asia";
414
+ resultWithSetData.region = "East Asia";
415
+ resultWithSetData.country = "Japan";
416
+
417
+ expect(resultWithSetData.brand_safety).toBe("unsafe");
418
+ expect(resultWithSetData.continent).toBe("Asia");
419
+ expect(resultWithSetData.region).toBe("East Asia");
420
+ expect(resultWithSetData.country).toBe("Japan");
421
+ });
422
+ });
423
+ });