illuma-agents 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -5
- package/dist/cjs/common/enum.cjs +1 -2
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/instrumentation.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/types.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs +79 -2
- package/dist/cjs/llm/anthropic/utils/message_inputs.cjs.map +1 -1
- package/dist/cjs/llm/anthropic/utils/tools.cjs.map +1 -1
- package/dist/cjs/llm/bedrock/index.cjs +99 -0
- package/dist/cjs/llm/bedrock/index.cjs.map +1 -0
- package/dist/cjs/llm/fake.cjs.map +1 -1
- package/dist/cjs/llm/providers.cjs +13 -16
- package/dist/cjs/llm/providers.cjs.map +1 -1
- package/dist/cjs/llm/text.cjs.map +1 -1
- package/dist/cjs/messages/core.cjs +14 -14
- package/dist/cjs/messages/core.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs.map +1 -1
- package/dist/cjs/messages/prune.cjs.map +1 -1
- package/dist/cjs/run.cjs +10 -1
- package/dist/cjs/run.cjs.map +1 -1
- package/dist/cjs/splitStream.cjs.map +1 -1
- package/dist/cjs/stream.cjs +4 -1
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +10 -1
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +29 -25
- package/dist/cjs/tools/handlers.cjs.map +1 -1
- package/dist/cjs/tools/search/anthropic.cjs.map +1 -1
- package/dist/cjs/tools/search/content.cjs.map +1 -1
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +25 -25
- package/dist/cjs/tools/search/schema.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +6 -1
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/serper-scraper.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +162 -35
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs.map +1 -1
- package/dist/cjs/utils/graph.cjs.map +1 -1
- package/dist/cjs/utils/llm.cjs +0 -1
- package/dist/cjs/utils/llm.cjs.map +1 -1
- package/dist/cjs/utils/misc.cjs.map +1 -1
- package/dist/cjs/utils/run.cjs.map +1 -1
- package/dist/cjs/utils/title.cjs +7 -7
- package/dist/cjs/utils/title.cjs.map +1 -1
- package/dist/esm/common/enum.mjs +1 -2
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/instrumentation.mjs.map +1 -1
- package/dist/esm/llm/anthropic/types.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs +79 -2
- package/dist/esm/llm/anthropic/utils/message_inputs.mjs.map +1 -1
- package/dist/esm/llm/anthropic/utils/tools.mjs.map +1 -1
- package/dist/esm/llm/bedrock/index.mjs +97 -0
- package/dist/esm/llm/bedrock/index.mjs.map +1 -0
- package/dist/esm/llm/fake.mjs.map +1 -1
- package/dist/esm/llm/providers.mjs +2 -5
- package/dist/esm/llm/providers.mjs.map +1 -1
- package/dist/esm/llm/text.mjs.map +1 -1
- package/dist/esm/messages/core.mjs +14 -14
- package/dist/esm/messages/core.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs.map +1 -1
- package/dist/esm/messages/prune.mjs.map +1 -1
- package/dist/esm/run.mjs +10 -1
- package/dist/esm/run.mjs.map +1 -1
- package/dist/esm/splitStream.mjs.map +1 -1
- package/dist/esm/stream.mjs +4 -1
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +10 -1
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +30 -26
- package/dist/esm/tools/handlers.mjs.map +1 -1
- package/dist/esm/tools/search/anthropic.mjs.map +1 -1
- package/dist/esm/tools/search/content.mjs.map +1 -1
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +25 -25
- package/dist/esm/tools/search/schema.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +6 -1
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/serper-scraper.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +162 -35
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs.map +1 -1
- package/dist/esm/utils/graph.mjs.map +1 -1
- package/dist/esm/utils/llm.mjs +0 -1
- package/dist/esm/utils/llm.mjs.map +1 -1
- package/dist/esm/utils/misc.mjs.map +1 -1
- package/dist/esm/utils/run.mjs.map +1 -1
- package/dist/esm/utils/title.mjs +7 -7
- package/dist/esm/utils/title.mjs.map +1 -1
- package/dist/types/common/enum.d.ts +1 -2
- package/dist/types/llm/bedrock/index.d.ts +36 -0
- package/dist/types/tools/search/types.d.ts +2 -0
- package/dist/types/types/llm.d.ts +3 -8
- package/package.json +15 -11
- package/src/common/enum.ts +1 -2
- package/src/common/index.ts +1 -1
- package/src/instrumentation.ts +22 -22
- package/src/llm/anthropic/llm.spec.ts +1442 -1442
- package/src/llm/anthropic/types.ts +140 -140
- package/src/llm/anthropic/utils/message_inputs.ts +757 -660
- package/src/llm/anthropic/utils/output_parsers.ts +133 -133
- package/src/llm/anthropic/utils/tools.ts +29 -29
- package/src/llm/bedrock/index.ts +128 -0
- package/src/llm/fake.ts +133 -133
- package/src/llm/google/utils/tools.ts +160 -160
- package/src/llm/openai/types.ts +24 -24
- package/src/llm/openai/utils/isReasoningModel.test.ts +90 -90
- package/src/llm/providers.ts +2 -7
- package/src/llm/text.ts +94 -94
- package/src/messages/core.ts +463 -463
- package/src/messages/formatAgentMessages.tools.test.ts +400 -400
- package/src/messages/formatMessage.test.ts +693 -693
- package/src/messages/ids.ts +26 -26
- package/src/messages/prune.ts +567 -567
- package/src/messages/shiftIndexTokenCountMap.test.ts +81 -81
- package/src/mockStream.ts +98 -98
- package/src/prompts/collab.ts +5 -5
- package/src/prompts/index.ts +1 -1
- package/src/prompts/taskmanager.ts +61 -61
- package/src/run.ts +13 -4
- package/src/scripts/ant_web_search_edge_case.ts +162 -0
- package/src/scripts/ant_web_search_error_edge_case.ts +148 -0
- package/src/scripts/args.ts +48 -48
- package/src/scripts/caching.ts +123 -123
- package/src/scripts/code_exec_files.ts +193 -193
- package/src/scripts/empty_input.ts +137 -137
- package/src/scripts/image.ts +178 -178
- package/src/scripts/memory.ts +97 -97
- package/src/scripts/thinking.ts +149 -149
- package/src/specs/anthropic.simple.test.ts +67 -0
- package/src/specs/spec.utils.ts +3 -3
- package/src/specs/token-distribution-edge-case.test.ts +316 -316
- package/src/specs/tool-error.test.ts +193 -193
- package/src/splitStream.test.ts +691 -691
- package/src/splitStream.ts +234 -234
- package/src/stream.test.ts +94 -94
- package/src/stream.ts +4 -1
- package/src/tools/ToolNode.ts +12 -1
- package/src/tools/handlers.ts +32 -28
- package/src/tools/search/anthropic.ts +51 -51
- package/src/tools/search/content.test.ts +173 -173
- package/src/tools/search/content.ts +147 -147
- package/src/tools/search/direct-url.test.ts +530 -0
- package/src/tools/search/firecrawl.ts +210 -210
- package/src/tools/search/format.ts +250 -250
- package/src/tools/search/highlights.ts +320 -320
- package/src/tools/search/index.ts +2 -2
- package/src/tools/search/jina-reranker.test.ts +126 -126
- package/src/tools/search/output.md +2775 -2775
- package/src/tools/search/rerankers.ts +242 -242
- package/src/tools/search/schema.ts +63 -63
- package/src/tools/search/search.ts +766 -759
- package/src/tools/search/serper-scraper.ts +155 -155
- package/src/tools/search/test.html +883 -883
- package/src/tools/search/test.md +642 -642
- package/src/tools/search/test.ts +159 -159
- package/src/tools/search/tool.ts +619 -471
- package/src/tools/search/types.ts +689 -687
- package/src/tools/search/utils.ts +79 -79
- package/src/types/index.ts +6 -6
- package/src/types/llm.ts +2 -8
- package/src/utils/graph.ts +10 -10
- package/src/utils/llm.ts +26 -27
- package/src/utils/llmConfig.ts +5 -3
- package/src/utils/logging.ts +48 -48
- package/src/utils/misc.ts +57 -57
- package/src/utils/run.ts +100 -100
- package/src/utils/title.ts +165 -165
- package/dist/cjs/llm/ollama/index.cjs +0 -70
- package/dist/cjs/llm/ollama/index.cjs.map +0 -1
- package/dist/cjs/llm/ollama/utils.cjs +0 -158
- package/dist/cjs/llm/ollama/utils.cjs.map +0 -1
- package/dist/esm/llm/ollama/index.mjs +0 -68
- package/dist/esm/llm/ollama/index.mjs.map +0 -1
- package/dist/esm/llm/ollama/utils.mjs +0 -155
- package/dist/esm/llm/ollama/utils.mjs.map +0 -1
- package/dist/types/llm/ollama/index.d.ts +0 -8
- package/dist/types/llm/ollama/utils.d.ts +0 -7
- package/src/llm/ollama/index.ts +0 -92
- package/src/llm/ollama/utils.ts +0 -193
- package/src/proto/CollabGraph.ts +0 -269
- package/src/proto/TaskManager.ts +0 -243
- package/src/proto/collab.ts +0 -200
- package/src/proto/collab_design.ts +0 -184
- package/src/proto/collab_design_v2.ts +0 -224
- package/src/proto/collab_design_v3.ts +0 -255
- package/src/proto/collab_design_v4.ts +0 -220
- package/src/proto/collab_design_v5.ts +0 -251
- package/src/proto/collab_graph.ts +0 -181
- package/src/proto/collab_original.ts +0 -123
- package/src/proto/example.ts +0 -93
- package/src/proto/example_new.ts +0 -68
- package/src/proto/example_old.ts +0 -201
- package/src/proto/example_test.ts +0 -152
- package/src/proto/example_test_anthropic.ts +0 -100
- package/src/proto/log_stream.ts +0 -202
- package/src/proto/main_collab_community_event.ts +0 -133
- package/src/proto/main_collab_design_v2.ts +0 -96
- package/src/proto/main_collab_design_v4.ts +0 -100
- package/src/proto/main_collab_design_v5.ts +0 -135
- package/src/proto/main_collab_global_analysis.ts +0 -122
- package/src/proto/main_collab_hackathon_event.ts +0 -153
- package/src/proto/main_collab_space_mission.ts +0 -153
- package/src/proto/main_philosophy.ts +0 -210
- package/src/proto/original_script.ts +0 -126
- package/src/proto/standard.ts +0 -100
- package/src/proto/stream.ts +0 -56
- package/src/proto/tasks.ts +0 -118
- package/src/proto/tools/global_analysis_tools.ts +0 -86
- package/src/proto/tools/space_mission_tools.ts +0 -60
- package/src/proto/vertexai.ts +0 -54
|
@@ -0,0 +1,530 @@
|
|
|
1
|
+
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
2
|
+
/* eslint-disable no-console */
|
|
3
|
+
/**
|
|
4
|
+
* Tests for direct URL extraction feature in the search tool
|
|
5
|
+
*/
|
|
6
|
+
import { describe, test, expect, jest, beforeEach } from '@jest/globals';
|
|
7
|
+
|
|
8
|
+
// Import the functions we need to test - we'll need to export them first
|
|
9
|
+
// For now, we'll test the logic by recreating the helper functions here
|
|
10
|
+
// In a real scenario, these would be exported from tool.ts
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* URL regex pattern to detect direct URLs in query
|
|
14
|
+
*/
|
|
15
|
+
const URL_PATTERN = /https?:\/\/[^\s<>"{}|\\^`[\]]+/gi;
|
|
16
|
+
|
|
17
|
+
/**
|
|
18
|
+
* Extracts URLs from a query string
|
|
19
|
+
*/
|
|
20
|
+
function extractUrlsFromQuery(query: string): string[] {
|
|
21
|
+
const matches = query.match(URL_PATTERN);
|
|
22
|
+
return matches ?? [];
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
/**
|
|
26
|
+
* Checks if the query is primarily a URL request
|
|
27
|
+
*/
|
|
28
|
+
function isDirectUrlRequest(query: string): boolean {
|
|
29
|
+
const urls = extractUrlsFromQuery(query);
|
|
30
|
+
if (urls.length === 0) {
|
|
31
|
+
return false;
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
let remainingText = query;
|
|
35
|
+
for (const url of urls) {
|
|
36
|
+
remainingText = remainingText.replace(url, '');
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
remainingText = remainingText.trim().toLowerCase();
|
|
40
|
+
|
|
41
|
+
if (remainingText.length < 50) {
|
|
42
|
+
return true;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return false;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
describe('Direct URL Extraction - Helper Functions', () => {
|
|
49
|
+
describe('extractUrlsFromQuery', () => {
|
|
50
|
+
test('should extract a single HTTP URL', () => {
|
|
51
|
+
const query = 'http://example.com';
|
|
52
|
+
const urls = extractUrlsFromQuery(query);
|
|
53
|
+
expect(urls).toEqual(['http://example.com']);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
test('should extract a single HTTPS URL', () => {
|
|
57
|
+
const query = 'https://example.com';
|
|
58
|
+
const urls = extractUrlsFromQuery(query);
|
|
59
|
+
expect(urls).toEqual(['https://example.com']);
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
test('should extract URL with path', () => {
|
|
63
|
+
const query = 'https://example.com/path/to/page';
|
|
64
|
+
const urls = extractUrlsFromQuery(query);
|
|
65
|
+
expect(urls).toEqual(['https://example.com/path/to/page']);
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
test('should extract URL with query parameters', () => {
|
|
69
|
+
const query = 'https://example.com/search?q=test&page=1';
|
|
70
|
+
const urls = extractUrlsFromQuery(query);
|
|
71
|
+
expect(urls).toEqual(['https://example.com/search?q=test&page=1']);
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
test('should extract URL with fragment', () => {
|
|
75
|
+
const query = 'https://example.com/page#section';
|
|
76
|
+
const urls = extractUrlsFromQuery(query);
|
|
77
|
+
expect(urls).toEqual(['https://example.com/page#section']);
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
test('should extract URL from mixed text', () => {
|
|
81
|
+
const query = 'Check out https://example.com for more info';
|
|
82
|
+
const urls = extractUrlsFromQuery(query);
|
|
83
|
+
expect(urls).toEqual(['https://example.com']);
|
|
84
|
+
});
|
|
85
|
+
|
|
86
|
+
test('should extract multiple URLs', () => {
|
|
87
|
+
const query = 'Compare https://site1.com and https://site2.com';
|
|
88
|
+
const urls = extractUrlsFromQuery(query);
|
|
89
|
+
expect(urls).toEqual(['https://site1.com', 'https://site2.com']);
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
test('should return empty array when no URLs present', () => {
|
|
93
|
+
const query = 'Just a regular search query';
|
|
94
|
+
const urls = extractUrlsFromQuery(query);
|
|
95
|
+
expect(urls).toEqual([]);
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
test('should handle URL with port number', () => {
|
|
99
|
+
const query = 'https://localhost:3000/api';
|
|
100
|
+
const urls = extractUrlsFromQuery(query);
|
|
101
|
+
expect(urls).toEqual(['https://localhost:3000/api']);
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
test('should handle URL with special characters in path', () => {
|
|
105
|
+
const query = 'https://example.com/path-with-dashes_and_underscores';
|
|
106
|
+
const urls = extractUrlsFromQuery(query);
|
|
107
|
+
expect(urls).toEqual([
|
|
108
|
+
'https://example.com/path-with-dashes_and_underscores',
|
|
109
|
+
]);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('should handle URL with encoded characters', () => {
|
|
113
|
+
const query = 'https://example.com/search?q=hello%20world';
|
|
114
|
+
const urls = extractUrlsFromQuery(query);
|
|
115
|
+
expect(urls).toEqual(['https://example.com/search?q=hello%20world']);
|
|
116
|
+
});
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
describe('isDirectUrlRequest', () => {
|
|
120
|
+
test('should return true for bare URL', () => {
|
|
121
|
+
expect(isDirectUrlRequest('https://example.com')).toBe(true);
|
|
122
|
+
});
|
|
123
|
+
|
|
124
|
+
test('should return true for URL with short question', () => {
|
|
125
|
+
expect(isDirectUrlRequest('What is https://example.com?')).toBe(true);
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
test('should return true for "summarize this" style requests', () => {
|
|
129
|
+
expect(isDirectUrlRequest('Summarize https://example.com')).toBe(true);
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
test('should return true for "read this page" style requests', () => {
|
|
133
|
+
expect(isDirectUrlRequest('Read https://example.com/article')).toBe(true);
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
test('should return true for URL with brief context', () => {
|
|
137
|
+
expect(isDirectUrlRequest("What's on https://example.com?")).toBe(true);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
test('should return false for search-like query with URL mention', () => {
|
|
141
|
+
const query =
|
|
142
|
+
'I want to find information about artificial intelligence and machine learning. Can you search for https://example.com and tell me what they say about neural networks?';
|
|
143
|
+
expect(isDirectUrlRequest(query)).toBe(false);
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
test('should return false when no URL present', () => {
|
|
147
|
+
expect(isDirectUrlRequest('Search for AI news')).toBe(false);
|
|
148
|
+
});
|
|
149
|
+
|
|
150
|
+
test('should return true for multiple URLs with short text', () => {
|
|
151
|
+
expect(
|
|
152
|
+
isDirectUrlRequest('Compare https://site1.com and https://site2.com')
|
|
153
|
+
).toBe(true);
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
test('should handle URL at end of sentence', () => {
|
|
157
|
+
expect(isDirectUrlRequest('Check this: https://example.com')).toBe(true);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test('should handle URL at beginning', () => {
|
|
161
|
+
expect(isDirectUrlRequest('https://example.com - what is this?')).toBe(
|
|
162
|
+
true
|
|
163
|
+
);
|
|
164
|
+
});
|
|
165
|
+
|
|
166
|
+
test('should return true for exactly 50 chars of non-URL text', () => {
|
|
167
|
+
// "a]" repeated to make exactly 49 chars + space = 50
|
|
168
|
+
const padding = 'a'.repeat(49);
|
|
169
|
+
expect(isDirectUrlRequest(`${padding} https://example.com`)).toBe(true);
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
test('should return false for 51+ chars of non-URL text', () => {
|
|
173
|
+
const padding = 'a'.repeat(51);
|
|
174
|
+
expect(isDirectUrlRequest(`${padding} https://example.com`)).toBe(false);
|
|
175
|
+
});
|
|
176
|
+
});
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
describe('Direct URL Extraction - Integration', () => {
|
|
180
|
+
// Mock scraper for testing
|
|
181
|
+
const createMockScraper = () => ({
|
|
182
|
+
scrapeUrl: jest.fn<(url: string) => Promise<[string, any]>>(),
|
|
183
|
+
extractContent: jest.fn<(response: any) => [string, any]>(),
|
|
184
|
+
extractMetadata: jest.fn<(response: any) => any>(),
|
|
185
|
+
});
|
|
186
|
+
|
|
187
|
+
const createMockLogger = () => ({
|
|
188
|
+
debug: jest.fn(),
|
|
189
|
+
info: jest.fn(),
|
|
190
|
+
warn: jest.fn(),
|
|
191
|
+
error: jest.fn(),
|
|
192
|
+
});
|
|
193
|
+
|
|
194
|
+
describe('extractDirectUrlContent', () => {
|
|
195
|
+
// We'll test this by creating a simple version of the function
|
|
196
|
+
async function extractDirectUrlContent({
|
|
197
|
+
urls,
|
|
198
|
+
scraper,
|
|
199
|
+
logger,
|
|
200
|
+
}: {
|
|
201
|
+
urls: string[];
|
|
202
|
+
scraper: ReturnType<typeof createMockScraper>;
|
|
203
|
+
logger: ReturnType<typeof createMockLogger>;
|
|
204
|
+
}): Promise<any> {
|
|
205
|
+
try {
|
|
206
|
+
const results: any[] = [];
|
|
207
|
+
|
|
208
|
+
for (const url of urls) {
|
|
209
|
+
try {
|
|
210
|
+
logger.debug(`Direct URL extraction: ${url}`);
|
|
211
|
+
const [, response] = await scraper.scrapeUrl(url);
|
|
212
|
+
|
|
213
|
+
if (response.success && response.data) {
|
|
214
|
+
const [content, references] = scraper.extractContent(response);
|
|
215
|
+
const metadata = scraper.extractMetadata(response);
|
|
216
|
+
|
|
217
|
+
const getString = (value: unknown): string | undefined => {
|
|
218
|
+
return typeof value === 'string' ? value : undefined;
|
|
219
|
+
};
|
|
220
|
+
|
|
221
|
+
results.push({
|
|
222
|
+
position: results.length + 1,
|
|
223
|
+
title:
|
|
224
|
+
getString(metadata.title) ??
|
|
225
|
+
getString(metadata.ogTitle) ??
|
|
226
|
+
url,
|
|
227
|
+
link: url,
|
|
228
|
+
snippet:
|
|
229
|
+
getString(metadata.description) ??
|
|
230
|
+
getString(metadata.ogDescription) ??
|
|
231
|
+
'',
|
|
232
|
+
content: content,
|
|
233
|
+
references: references,
|
|
234
|
+
processed: true,
|
|
235
|
+
});
|
|
236
|
+
} else {
|
|
237
|
+
logger.warn(
|
|
238
|
+
`Failed to extract content from ${url}: ${response.error}`
|
|
239
|
+
);
|
|
240
|
+
results.push({
|
|
241
|
+
position: results.length + 1,
|
|
242
|
+
title: url,
|
|
243
|
+
link: url,
|
|
244
|
+
snippet: response.error ?? 'Failed to extract content',
|
|
245
|
+
processed: false,
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
} catch (error) {
|
|
249
|
+
logger.error(`Error extracting URL ${url}:`, error);
|
|
250
|
+
results.push({
|
|
251
|
+
position: results.length + 1,
|
|
252
|
+
title: url,
|
|
253
|
+
link: url,
|
|
254
|
+
snippet: error instanceof Error ? error.message : String(error),
|
|
255
|
+
processed: false,
|
|
256
|
+
});
|
|
257
|
+
}
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
return {
|
|
261
|
+
success: true,
|
|
262
|
+
data: {
|
|
263
|
+
organic: results,
|
|
264
|
+
topStories: [],
|
|
265
|
+
images: [],
|
|
266
|
+
videos: [],
|
|
267
|
+
relatedSearches: [],
|
|
268
|
+
},
|
|
269
|
+
};
|
|
270
|
+
} catch (error) {
|
|
271
|
+
logger.error('Error in direct URL extraction:', error);
|
|
272
|
+
return {
|
|
273
|
+
success: false,
|
|
274
|
+
error: error instanceof Error ? error.message : String(error),
|
|
275
|
+
};
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
test('should successfully extract content from a single URL', async () => {
|
|
280
|
+
const mockScraper = createMockScraper();
|
|
281
|
+
const mockLogger = createMockLogger();
|
|
282
|
+
|
|
283
|
+
mockScraper.scrapeUrl.mockResolvedValue([
|
|
284
|
+
'https://example.com',
|
|
285
|
+
{
|
|
286
|
+
success: true,
|
|
287
|
+
data: {
|
|
288
|
+
markdown: '# Example Page\n\nThis is the content.',
|
|
289
|
+
html: '<h1>Example Page</h1><p>This is the content.</p>',
|
|
290
|
+
},
|
|
291
|
+
},
|
|
292
|
+
]);
|
|
293
|
+
|
|
294
|
+
mockScraper.extractContent.mockReturnValue([
|
|
295
|
+
'# Example Page\n\nThis is the content.',
|
|
296
|
+
{ links: [], images: [], videos: [] },
|
|
297
|
+
]);
|
|
298
|
+
|
|
299
|
+
mockScraper.extractMetadata.mockReturnValue({
|
|
300
|
+
title: 'Example Page',
|
|
301
|
+
description: 'An example website',
|
|
302
|
+
});
|
|
303
|
+
|
|
304
|
+
const result = await extractDirectUrlContent({
|
|
305
|
+
urls: ['https://example.com'],
|
|
306
|
+
scraper: mockScraper,
|
|
307
|
+
logger: mockLogger,
|
|
308
|
+
});
|
|
309
|
+
|
|
310
|
+
expect(result.success).toBe(true);
|
|
311
|
+
expect(result.data.organic).toHaveLength(1);
|
|
312
|
+
expect(result.data.organic[0]).toMatchObject({
|
|
313
|
+
position: 1,
|
|
314
|
+
title: 'Example Page',
|
|
315
|
+
link: 'https://example.com',
|
|
316
|
+
snippet: 'An example website',
|
|
317
|
+
content: '# Example Page\n\nThis is the content.',
|
|
318
|
+
processed: true,
|
|
319
|
+
});
|
|
320
|
+
});
|
|
321
|
+
|
|
322
|
+
test('should handle multiple URLs', async () => {
|
|
323
|
+
const mockScraper = createMockScraper();
|
|
324
|
+
const mockLogger = createMockLogger();
|
|
325
|
+
|
|
326
|
+
mockScraper.scrapeUrl
|
|
327
|
+
.mockResolvedValueOnce([
|
|
328
|
+
'https://site1.com',
|
|
329
|
+
{ success: true, data: { markdown: 'Content 1' } },
|
|
330
|
+
])
|
|
331
|
+
.mockResolvedValueOnce([
|
|
332
|
+
'https://site2.com',
|
|
333
|
+
{ success: true, data: { markdown: 'Content 2' } },
|
|
334
|
+
]);
|
|
335
|
+
|
|
336
|
+
mockScraper.extractContent
|
|
337
|
+
.mockReturnValueOnce(['Content 1', undefined])
|
|
338
|
+
.mockReturnValueOnce(['Content 2', undefined]);
|
|
339
|
+
|
|
340
|
+
mockScraper.extractMetadata
|
|
341
|
+
.mockReturnValueOnce({ title: 'Site 1' })
|
|
342
|
+
.mockReturnValueOnce({ title: 'Site 2' });
|
|
343
|
+
|
|
344
|
+
const result = await extractDirectUrlContent({
|
|
345
|
+
urls: ['https://site1.com', 'https://site2.com'],
|
|
346
|
+
scraper: mockScraper,
|
|
347
|
+
logger: mockLogger,
|
|
348
|
+
});
|
|
349
|
+
|
|
350
|
+
expect(result.success).toBe(true);
|
|
351
|
+
expect(result.data.organic).toHaveLength(2);
|
|
352
|
+
expect(result.data.organic[0].title).toBe('Site 1');
|
|
353
|
+
expect(result.data.organic[1].title).toBe('Site 2');
|
|
354
|
+
expect(result.data.organic[0].position).toBe(1);
|
|
355
|
+
expect(result.data.organic[1].position).toBe(2);
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
test('should handle scraper failure gracefully', async () => {
|
|
359
|
+
const mockScraper = createMockScraper();
|
|
360
|
+
const mockLogger = createMockLogger();
|
|
361
|
+
|
|
362
|
+
mockScraper.scrapeUrl.mockResolvedValue([
|
|
363
|
+
'https://example.com',
|
|
364
|
+
{
|
|
365
|
+
success: false,
|
|
366
|
+
error: 'Failed to fetch page',
|
|
367
|
+
},
|
|
368
|
+
]);
|
|
369
|
+
|
|
370
|
+
const result = await extractDirectUrlContent({
|
|
371
|
+
urls: ['https://example.com'],
|
|
372
|
+
scraper: mockScraper,
|
|
373
|
+
logger: mockLogger,
|
|
374
|
+
});
|
|
375
|
+
|
|
376
|
+
expect(result.success).toBe(true);
|
|
377
|
+
expect(result.data.organic).toHaveLength(1);
|
|
378
|
+
expect(result.data.organic[0]).toMatchObject({
|
|
379
|
+
title: 'https://example.com',
|
|
380
|
+
link: 'https://example.com',
|
|
381
|
+
snippet: 'Failed to fetch page',
|
|
382
|
+
processed: false,
|
|
383
|
+
});
|
|
384
|
+
expect(mockLogger.warn).toHaveBeenCalled();
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
test('should handle scraper exception', async () => {
|
|
388
|
+
const mockScraper = createMockScraper();
|
|
389
|
+
const mockLogger = createMockLogger();
|
|
390
|
+
|
|
391
|
+
mockScraper.scrapeUrl.mockRejectedValue(new Error('Network error'));
|
|
392
|
+
|
|
393
|
+
const result = await extractDirectUrlContent({
|
|
394
|
+
urls: ['https://example.com'],
|
|
395
|
+
scraper: mockScraper,
|
|
396
|
+
logger: mockLogger,
|
|
397
|
+
});
|
|
398
|
+
|
|
399
|
+
expect(result.success).toBe(true);
|
|
400
|
+
expect(result.data.organic).toHaveLength(1);
|
|
401
|
+
expect(result.data.organic[0]).toMatchObject({
|
|
402
|
+
title: 'https://example.com',
|
|
403
|
+
link: 'https://example.com',
|
|
404
|
+
snippet: 'Network error',
|
|
405
|
+
processed: false,
|
|
406
|
+
});
|
|
407
|
+
expect(mockLogger.error).toHaveBeenCalled();
|
|
408
|
+
});
|
|
409
|
+
|
|
410
|
+
test('should use URL as title when metadata title is missing', async () => {
|
|
411
|
+
const mockScraper = createMockScraper();
|
|
412
|
+
const mockLogger = createMockLogger();
|
|
413
|
+
|
|
414
|
+
mockScraper.scrapeUrl.mockResolvedValue([
|
|
415
|
+
'https://example.com/page',
|
|
416
|
+
{ success: true, data: { markdown: 'Content' } },
|
|
417
|
+
]);
|
|
418
|
+
|
|
419
|
+
mockScraper.extractContent.mockReturnValue(['Content', undefined]);
|
|
420
|
+
mockScraper.extractMetadata.mockReturnValue({}); // No title
|
|
421
|
+
|
|
422
|
+
const result = await extractDirectUrlContent({
|
|
423
|
+
urls: ['https://example.com/page'],
|
|
424
|
+
scraper: mockScraper,
|
|
425
|
+
logger: mockLogger,
|
|
426
|
+
});
|
|
427
|
+
|
|
428
|
+
expect(result.data.organic[0].title).toBe('https://example.com/page');
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
test('should use ogTitle as fallback when title is missing', async () => {
|
|
432
|
+
const mockScraper = createMockScraper();
|
|
433
|
+
const mockLogger = createMockLogger();
|
|
434
|
+
|
|
435
|
+
mockScraper.scrapeUrl.mockResolvedValue([
|
|
436
|
+
'https://example.com',
|
|
437
|
+
{ success: true, data: { markdown: 'Content' } },
|
|
438
|
+
]);
|
|
439
|
+
|
|
440
|
+
mockScraper.extractContent.mockReturnValue(['Content', undefined]);
|
|
441
|
+
mockScraper.extractMetadata.mockReturnValue({
|
|
442
|
+
ogTitle: 'OG Title',
|
|
443
|
+
ogDescription: 'OG Description',
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
const result = await extractDirectUrlContent({
|
|
447
|
+
urls: ['https://example.com'],
|
|
448
|
+
scraper: mockScraper,
|
|
449
|
+
logger: mockLogger,
|
|
450
|
+
});
|
|
451
|
+
|
|
452
|
+
expect(result.data.organic[0].title).toBe('OG Title');
|
|
453
|
+
expect(result.data.organic[0].snippet).toBe('OG Description');
|
|
454
|
+
});
|
|
455
|
+
|
|
456
|
+
test('should handle mixed success and failure', async () => {
|
|
457
|
+
const mockScraper = createMockScraper();
|
|
458
|
+
const mockLogger = createMockLogger();
|
|
459
|
+
|
|
460
|
+
mockScraper.scrapeUrl
|
|
461
|
+
.mockResolvedValueOnce([
|
|
462
|
+
'https://success.com',
|
|
463
|
+
{ success: true, data: { markdown: 'Content' } },
|
|
464
|
+
])
|
|
465
|
+
.mockResolvedValueOnce([
|
|
466
|
+
'https://failure.com',
|
|
467
|
+
{ success: false, error: 'Not found' },
|
|
468
|
+
]);
|
|
469
|
+
|
|
470
|
+
mockScraper.extractContent.mockReturnValue(['Content', undefined]);
|
|
471
|
+
mockScraper.extractMetadata.mockReturnValue({ title: 'Success Site' });
|
|
472
|
+
|
|
473
|
+
const result = await extractDirectUrlContent({
|
|
474
|
+
urls: ['https://success.com', 'https://failure.com'],
|
|
475
|
+
scraper: mockScraper,
|
|
476
|
+
logger: mockLogger,
|
|
477
|
+
});
|
|
478
|
+
|
|
479
|
+
expect(result.success).toBe(true);
|
|
480
|
+
expect(result.data.organic).toHaveLength(2);
|
|
481
|
+
expect(result.data.organic[0].processed).toBe(true);
|
|
482
|
+
expect(result.data.organic[1].processed).toBe(false);
|
|
483
|
+
});
|
|
484
|
+
});
|
|
485
|
+
});
|
|
486
|
+
|
|
487
|
+
describe('URL Pattern Edge Cases', () => {
|
|
488
|
+
test('should not match invalid protocols', () => {
|
|
489
|
+
expect(extractUrlsFromQuery('ftp://example.com')).toEqual([]);
|
|
490
|
+
expect(extractUrlsFromQuery('file://example.com')).toEqual([]);
|
|
491
|
+
expect(extractUrlsFromQuery('mailto:test@example.com')).toEqual([]);
|
|
492
|
+
});
|
|
493
|
+
|
|
494
|
+
test('should handle URLs with international domain names', () => {
|
|
495
|
+
const urls = extractUrlsFromQuery('https://例え.jp/page');
|
|
496
|
+
expect(urls).toEqual(['https://例え.jp/page']);
|
|
497
|
+
});
|
|
498
|
+
|
|
499
|
+
test('should handle very long URLs', () => {
|
|
500
|
+
const longPath = 'a'.repeat(500);
|
|
501
|
+
const longUrl = `https://example.com/${longPath}`;
|
|
502
|
+
const urls = extractUrlsFromQuery(longUrl);
|
|
503
|
+
expect(urls).toEqual([longUrl]);
|
|
504
|
+
});
|
|
505
|
+
|
|
506
|
+
test('should handle URL with authentication', () => {
|
|
507
|
+
const urls = extractUrlsFromQuery('https://user:pass@example.com/page');
|
|
508
|
+
expect(urls).toEqual(['https://user:pass@example.com/page']);
|
|
509
|
+
});
|
|
510
|
+
|
|
511
|
+
test('should handle URL surrounded by parentheses', () => {
|
|
512
|
+
const urls = extractUrlsFromQuery('Check this (https://example.com) out');
|
|
513
|
+
// Note: the closing paren might be included depending on regex
|
|
514
|
+
expect(urls.length).toBeGreaterThan(0);
|
|
515
|
+
expect(urls[0]).toContain('https://example.com');
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
test('should handle multiple URLs on same line', () => {
|
|
519
|
+
const query =
|
|
520
|
+
'Visit https://a.com, https://b.com, and https://c.com for info';
|
|
521
|
+
const urls = extractUrlsFromQuery(query);
|
|
522
|
+
expect(urls.length).toBe(3);
|
|
523
|
+
});
|
|
524
|
+
|
|
525
|
+
test('should handle newlines between URLs', () => {
|
|
526
|
+
const query = 'First URL:\nhttps://a.com\nSecond URL:\nhttps://b.com';
|
|
527
|
+
const urls = extractUrlsFromQuery(query);
|
|
528
|
+
expect(urls).toEqual(['https://a.com', 'https://b.com']);
|
|
529
|
+
});
|
|
530
|
+
});
|