@librechat/agents 2.4.30 → 2.4.33
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/common/enum.cjs +1 -0
- package/dist/cjs/common/enum.cjs.map +1 -1
- package/dist/cjs/events.cjs +3 -3
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +2 -1
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +7 -2
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs +23 -0
- package/dist/cjs/messages/ids.cjs.map +1 -0
- package/dist/cjs/splitStream.cjs +2 -1
- package/dist/cjs/splitStream.cjs.map +1 -1
- package/dist/cjs/stream.cjs +87 -154
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +14 -3
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +144 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -0
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +131 -0
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -0
- package/dist/cjs/tools/search/format.cjs +203 -0
- package/dist/cjs/tools/search/format.cjs.map +1 -0
- package/dist/cjs/tools/search/highlights.cjs +245 -0
- package/dist/cjs/tools/search/highlights.cjs.map +1 -0
- package/dist/cjs/tools/search/rerankers.cjs +194 -0
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -0
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +491 -0
- package/dist/cjs/tools/search/search.cjs.map +1 -0
- package/dist/cjs/tools/search/tool.cjs +292 -0
- package/dist/cjs/tools/search/tool.cjs.map +1 -0
- package/dist/cjs/tools/search/utils.cjs +66 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/common/enum.mjs +1 -0
- package/dist/esm/common/enum.mjs.map +1 -1
- package/dist/esm/events.mjs +1 -1
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +2 -1
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +4 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs +21 -0
- package/dist/esm/messages/ids.mjs.map +1 -0
- package/dist/esm/splitStream.mjs +2 -1
- package/dist/esm/splitStream.mjs.map +1 -1
- package/dist/esm/stream.mjs +87 -152
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +14 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +141 -0
- package/dist/esm/tools/handlers.mjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +128 -0
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -0
- package/dist/esm/tools/search/format.mjs +201 -0
- package/dist/esm/tools/search/format.mjs.map +1 -0
- package/dist/esm/tools/search/highlights.mjs +243 -0
- package/dist/esm/tools/search/highlights.mjs.map +1 -0
- package/dist/esm/tools/search/rerankers.mjs +188 -0
- package/dist/esm/tools/search/rerankers.mjs.map +1 -0
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +488 -0
- package/dist/esm/tools/search/search.mjs.map +1 -0
- package/dist/esm/tools/search/tool.mjs +290 -0
- package/dist/esm/tools/search/tool.mjs.map +1 -0
- package/dist/esm/tools/search/utils.mjs +61 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/common/enum.d.ts +1 -0
- package/dist/types/graphs/Graph.d.ts +1 -1
- package/dist/types/index.d.ts +2 -0
- package/dist/types/messages/ids.d.ts +3 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/scripts/search.d.ts +1 -0
- package/dist/types/stream.d.ts +0 -8
- package/dist/types/tools/ToolNode.d.ts +6 -0
- package/dist/types/tools/example.d.ts +23 -3
- package/dist/types/tools/handlers.d.ts +8 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +38 -0
- package/dist/types/tools/search/format.d.ts +5 -0
- package/dist/types/tools/search/highlights.d.ts +13 -0
- package/dist/types/tools/search/index.d.ts +2 -0
- package/dist/types/tools/search/rerankers.d.ts +36 -0
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/search.d.ts +9 -0
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +33 -0
- package/dist/types/tools/search/types.d.ts +540 -0
- package/dist/types/tools/search/utils.d.ts +10 -0
- package/package.json +10 -7
- package/src/common/enum.ts +1 -0
- package/src/events.ts +49 -15
- package/src/graphs/Graph.ts +6 -2
- package/src/index.ts +2 -0
- package/src/messages/ids.ts +26 -0
- package/src/messages/index.ts +1 -0
- package/src/scripts/search.ts +146 -0
- package/src/splitStream.test.ts +132 -71
- package/src/splitStream.ts +2 -1
- package/src/stream.ts +94 -183
- package/src/tools/ToolNode.ts +37 -14
- package/src/tools/handlers.ts +167 -0
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +158 -0
- package/src/tools/search/format.ts +252 -0
- package/src/tools/search/highlights.ts +320 -0
- package/src/tools/search/index.ts +2 -0
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/rerankers.ts +269 -0
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +680 -0
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +427 -0
- package/src/tools/search/types.ts +621 -0
- package/src/tools/search/utils.ts +79 -0
- package/src/utils/llmConfig.ts +1 -1
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var nanoid = require('nanoid');
|
|
4
|
+
var _enum = require('../common/enum.cjs');
|
|
5
|
+
require('../messages/core.cjs');
|
|
6
|
+
var ids = require('../messages/ids.cjs');
|
|
7
|
+
require('@langchain/core/messages');
|
|
8
|
+
|
|
9
|
+
/* eslint-disable no-console */
|
|
10
|
+
// src/tools/handlers.ts
|
|
11
|
+
function handleToolCallChunks({ graph, stepKey, toolCallChunks, }) {
|
|
12
|
+
let prevStepId;
|
|
13
|
+
let prevRunStep;
|
|
14
|
+
try {
|
|
15
|
+
prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);
|
|
16
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
/** Edge Case: If no previous step exists, create a new message creation step */
|
|
20
|
+
const message_id = ids.getMessageId(stepKey, graph, true) ?? '';
|
|
21
|
+
prevStepId = graph.dispatchRunStep(stepKey, {
|
|
22
|
+
type: _enum.StepTypes.MESSAGE_CREATION,
|
|
23
|
+
message_creation: {
|
|
24
|
+
message_id,
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
28
|
+
}
|
|
29
|
+
const _stepId = graph.getStepIdByKey(stepKey, prevRunStep?.index);
|
|
30
|
+
/** Edge Case: Tool Call Run Step or `tool_call_ids` never dispatched */
|
|
31
|
+
const tool_calls = prevStepId && prevRunStep && prevRunStep.type === _enum.StepTypes.MESSAGE_CREATION
|
|
32
|
+
? []
|
|
33
|
+
: undefined;
|
|
34
|
+
/** Edge Case: `id` and `name` fields cannot be empty strings */
|
|
35
|
+
for (const toolCallChunk of toolCallChunks) {
|
|
36
|
+
if (toolCallChunk.name === '') {
|
|
37
|
+
toolCallChunk.name = undefined;
|
|
38
|
+
}
|
|
39
|
+
if (toolCallChunk.id === '') {
|
|
40
|
+
toolCallChunk.id = undefined;
|
|
41
|
+
}
|
|
42
|
+
else if (tool_calls != null &&
|
|
43
|
+
toolCallChunk.id != null &&
|
|
44
|
+
toolCallChunk.name != null) {
|
|
45
|
+
tool_calls.push({
|
|
46
|
+
args: {},
|
|
47
|
+
id: toolCallChunk.id,
|
|
48
|
+
name: toolCallChunk.name,
|
|
49
|
+
type: _enum.ToolCallTypes.TOOL_CALL,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
let stepId = _stepId;
|
|
54
|
+
const alreadyDispatched = prevRunStep?.type === _enum.StepTypes.MESSAGE_CREATION &&
|
|
55
|
+
graph.messageStepHasToolCalls.has(prevStepId);
|
|
56
|
+
if (!alreadyDispatched && tool_calls?.length === toolCallChunks.length) {
|
|
57
|
+
graph.dispatchMessageDelta(prevStepId, {
|
|
58
|
+
content: [
|
|
59
|
+
{
|
|
60
|
+
type: _enum.ContentTypes.TEXT,
|
|
61
|
+
text: '',
|
|
62
|
+
tool_call_ids: tool_calls.map((tc) => tc.id ?? ''),
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
});
|
|
66
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
67
|
+
stepId = graph.dispatchRunStep(stepKey, {
|
|
68
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
69
|
+
tool_calls,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
graph.dispatchRunStepDelta(stepId, {
|
|
73
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
74
|
+
tool_calls: toolCallChunks,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
const handleToolCalls = (toolCalls, metadata, graph) => {
|
|
78
|
+
if (!graph || !metadata) {
|
|
79
|
+
console.warn(`Graph or metadata not found in ${event} event`);
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (!toolCalls) {
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
if (toolCalls.length === 0) {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const stepKey = graph.getStepKey(metadata);
|
|
89
|
+
for (const tool_call of toolCalls) {
|
|
90
|
+
const toolCallId = tool_call.id ?? `toolu_${nanoid.nanoid()}`;
|
|
91
|
+
tool_call.id = toolCallId;
|
|
92
|
+
if (!toolCallId || graph.toolCallStepIds.has(toolCallId)) {
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
let prevStepId = '';
|
|
96
|
+
let prevRunStep;
|
|
97
|
+
try {
|
|
98
|
+
prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);
|
|
99
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
// no previous step
|
|
103
|
+
}
|
|
104
|
+
const dispatchToolCallIds = (lastMessageStepId) => {
|
|
105
|
+
graph.dispatchMessageDelta(lastMessageStepId, {
|
|
106
|
+
content: [
|
|
107
|
+
{
|
|
108
|
+
type: 'text',
|
|
109
|
+
text: '',
|
|
110
|
+
tool_call_ids: [toolCallId],
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
});
|
|
114
|
+
};
|
|
115
|
+
/* If the previous step exists and is a message creation */
|
|
116
|
+
if (prevStepId &&
|
|
117
|
+
prevRunStep &&
|
|
118
|
+
prevRunStep.type === _enum.StepTypes.MESSAGE_CREATION) {
|
|
119
|
+
dispatchToolCallIds(prevStepId);
|
|
120
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
121
|
+
/* If the previous step doesn't exist or is not a message creation */
|
|
122
|
+
}
|
|
123
|
+
else if (!prevRunStep ||
|
|
124
|
+
prevRunStep.type !== _enum.StepTypes.MESSAGE_CREATION) {
|
|
125
|
+
const messageId = ids.getMessageId(stepKey, graph, true) ?? '';
|
|
126
|
+
const stepId = graph.dispatchRunStep(stepKey, {
|
|
127
|
+
type: _enum.StepTypes.MESSAGE_CREATION,
|
|
128
|
+
message_creation: {
|
|
129
|
+
message_id: messageId,
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
dispatchToolCallIds(stepId);
|
|
133
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
134
|
+
}
|
|
135
|
+
graph.dispatchRunStep(stepKey, {
|
|
136
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
137
|
+
tool_calls: [tool_call],
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
exports.handleToolCallChunks = handleToolCallChunks;
|
|
143
|
+
exports.handleToolCalls = handleToolCalls;
|
|
144
|
+
//# sourceMappingURL=handlers.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"handlers.cjs","sources":["../../../src/tools/handlers.ts"],"sourcesContent":["/* eslint-disable no-console */\n// src/tools/handlers.ts\nimport { nanoid } from 'nanoid';\nimport type { ToolCall, ToolCallChunk } from '@langchain/core/messages/tool';\nimport type { Graph } from '@/graphs';\nimport type * as t from '@/types';\nimport { StepTypes, ContentTypes, ToolCallTypes } from '@/common';\nimport { getMessageId } from '@/messages';\n\nexport function handleToolCallChunks({\n graph,\n stepKey,\n toolCallChunks,\n}: {\n graph: Graph;\n stepKey: string;\n toolCallChunks: ToolCallChunk[];\n}): void {\n let prevStepId: string;\n let prevRunStep: t.RunStep | undefined;\n try {\n prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);\n prevRunStep = graph.getRunStep(prevStepId);\n } catch {\n /** Edge Case: If no previous step exists, create a new message creation step */\n const message_id = getMessageId(stepKey, graph, true) ?? '';\n prevStepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.MESSAGE_CREATION,\n message_creation: {\n message_id,\n },\n });\n prevRunStep = graph.getRunStep(prevStepId);\n }\n\n const _stepId = graph.getStepIdByKey(stepKey, prevRunStep?.index);\n\n /** Edge Case: Tool Call Run Step or `tool_call_ids` never dispatched */\n const tool_calls: ToolCall[] | undefined =\n prevStepId && prevRunStep && prevRunStep.type === StepTypes.MESSAGE_CREATION\n ? []\n : undefined;\n\n /** Edge Case: `id` and `name` fields cannot be empty strings */\n for (const toolCallChunk of toolCallChunks) {\n if (toolCallChunk.name === '') {\n toolCallChunk.name = undefined;\n }\n if (toolCallChunk.id === '') {\n toolCallChunk.id = undefined;\n } else if (\n tool_calls != null &&\n toolCallChunk.id != null &&\n toolCallChunk.name != null\n ) {\n tool_calls.push({\n args: {},\n id: toolCallChunk.id,\n name: toolCallChunk.name,\n type: ToolCallTypes.TOOL_CALL,\n });\n }\n }\n\n let stepId: string = _stepId;\n const alreadyDispatched =\n prevRunStep?.type === StepTypes.MESSAGE_CREATION &&\n graph.messageStepHasToolCalls.has(prevStepId);\n if (!alreadyDispatched && tool_calls?.length === toolCallChunks.length) {\n graph.dispatchMessageDelta(prevStepId, {\n content: [\n {\n type: ContentTypes.TEXT,\n text: '',\n tool_call_ids: tool_calls.map((tc) => tc.id ?? ''),\n },\n ],\n });\n graph.messageStepHasToolCalls.set(prevStepId, true);\n stepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.TOOL_CALLS,\n tool_calls,\n });\n }\n graph.dispatchRunStepDelta(stepId, {\n type: StepTypes.TOOL_CALLS,\n tool_calls: toolCallChunks,\n });\n}\n\nexport const handleToolCalls = (\n toolCalls?: ToolCall[],\n metadata?: Record<string, unknown>,\n graph?: Graph\n): void => {\n if (!graph || !metadata) {\n console.warn(`Graph or metadata not found in ${event} event`);\n return;\n }\n\n if (!toolCalls) {\n return;\n }\n\n if (toolCalls.length === 0) {\n return;\n }\n\n const stepKey = graph.getStepKey(metadata);\n\n for (const tool_call of toolCalls) {\n const toolCallId = tool_call.id ?? `toolu_${nanoid()}`;\n tool_call.id = toolCallId;\n if (!toolCallId || graph.toolCallStepIds.has(toolCallId)) {\n continue;\n }\n\n let prevStepId = '';\n let prevRunStep: t.RunStep | undefined;\n try {\n prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);\n prevRunStep = graph.getRunStep(prevStepId);\n } catch {\n // no previous step\n }\n\n const dispatchToolCallIds = (lastMessageStepId: string): void => {\n graph.dispatchMessageDelta(lastMessageStepId, {\n content: [\n {\n type: 'text',\n text: '',\n tool_call_ids: [toolCallId],\n },\n ],\n });\n };\n /* If the previous step exists and is a message creation */\n if (\n prevStepId &&\n prevRunStep &&\n prevRunStep.type === StepTypes.MESSAGE_CREATION\n ) {\n dispatchToolCallIds(prevStepId);\n graph.messageStepHasToolCalls.set(prevStepId, true);\n /* If the previous step doesn't exist or is not a message creation */\n } else if (\n !prevRunStep ||\n prevRunStep.type !== StepTypes.MESSAGE_CREATION\n ) {\n const messageId = getMessageId(stepKey, graph, true) ?? '';\n const stepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.MESSAGE_CREATION,\n message_creation: {\n message_id: messageId,\n },\n });\n dispatchToolCallIds(stepId);\n graph.messageStepHasToolCalls.set(prevStepId, true);\n }\n\n graph.dispatchRunStep(stepKey, {\n type: StepTypes.TOOL_CALLS,\n tool_calls: [tool_call],\n });\n }\n};\n"],"names":["getMessageId","StepTypes","ToolCallTypes","ContentTypes","nanoid"],"mappings":";;;;;;;;AAAA;AACA;AAQM,SAAU,oBAAoB,CAAC,EACnC,KAAK,EACL,OAAO,EACP,cAAc,GAKf,EAAA;AACC,IAAA,IAAI,UAAkB;AACtB,IAAA,IAAI,WAAkC;AACtC,IAAA,IAAI;AACF,QAAA,UAAU,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;AACxE,QAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAC1C,IAAA,MAAM;;AAEN,QAAA,MAAM,UAAU,GAAGA,gBAAY,CAAC,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;AAC3D,QAAA,UAAU,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YAC1C,IAAI,EAAEC,eAAS,CAAC,gBAAgB;AAChC,YAAA,gBAAgB,EAAE;gBAChB,UAAU;AACX,aAAA;AACF,SAAA,CAAC;AACF,QAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAG5C,IAAA,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC;;AAGjE,IAAA,MAAM,UAAU,GACd,UAAU,IAAI,WAAW,IAAI,WAAW,CAAC,IAAI,KAAKA,eAAS,CAAC;AAC1D,UAAE;UACA,SAAS;;AAGf,IAAA,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE;AAC1C,QAAA,IAAI,aAAa,CAAC,IAAI,KAAK,EAAE,EAAE;AAC7B,YAAA,aAAa,CAAC,IAAI,GAAG,SAAS;;AAEhC,QAAA,IAAI,aAAa,CAAC,EAAE,KAAK,EAAE,EAAE;AAC3B,YAAA,aAAa,CAAC,EAAE,GAAG,SAAS;;aACvB,IACL,UAAU,IAAI,IAAI;YAClB,aAAa,CAAC,EAAE,IAAI,IAAI;AACxB,YAAA,aAAa,CAAC,IAAI,IAAI,IAAI,EAC1B;YACA,UAAU,CAAC,IAAI,CAAC;AACd,gBAAA,IAAI,EAAE,EAAE;gBACR,EAAE,EAAE,aAAa,CAAC,EAAE;gBACpB,IAAI,EAAE,aAAa,CAAC,IAAI;gBACxB,IAAI,EAAEC,mBAAa,CAAC,SAAS;AAC9B,aAAA,CAAC;;;IAIN,IAAI,MAAM,GAAW,OAAO;IAC5B,MAAM,iBAAiB,GACrB,WAAW,EAAE,IAAI,KAAKD,eAAS,CAAC,gBAAgB;AAChD,QAAA,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,CAAC;IAC/C,IAAI,CAAC,iBAAiB,IAAI,UAAU,EAAE,MAAM,KAAK,cAAc,CAAC,MAAM,EAAE;AACtE,QAAA,KAAK,CAAC,oBAAoB,CAAC,UAAU,EAAE;AACrC,YAAA,OAAO,EAAE;AACP,gBAAA;oBACE,IAAI,EAAEE,kBAAY,CAAC,IAAI;AACvB,oBAAA,IAAI,EAAE,EAAE;AACR,oBAAA,aAAa,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC;AACnD,iBAAA;AACF,aAAA;AACF,SAAA,CAAC;QACF,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;AACnD,QAAA,MAAM,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YACtC,IAAI,EAAEF,eAAS,CAAC,UAAU;YAC1B,UAAU;AACX,SAAA,CAAC;;AAEJ,IAAA,KAAK,CAAC,oBAAoB,CAAC,MAAM,EAAE;QACjC,IAAI,EAAEA,eAAS,CAAC,UAAU;AAC1B,QAAA,UAAU,EAAE,cAAc;AAC3B,KAAA,CAAC;AACJ;AAEa,MAAA,eAAe,GAAG,CAC7B,SAAsB,EACtB,QAAkC,EAClC,KAAa,KACL;AACR,IAAA,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;AACvB,QAAA,OAAO,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;QAC7D;;IAGF,IAAI,CAAC,SAAS,EAAE;QACd;;AAGF,IAAA,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE;QAC1B;;IAGF,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC;AAE1C,IAAA,KAAK,MAAM,SAAS,IAAI,SAAS,EAAE;QACjC,MAAM,UAAU,GAAG,SAAS,CAAC,EAAE,IAAI,CAAS,MAAA,EAAAG,aAAM,EAAE,CAAA,CAAE;AACtD,QAAA,SAAS,CAAC,EAAE,GAAG,UAAU;AACzB,QAAA,IAAI,CAAC,UAAU,IAAI,KAAK,CAAC,eAAe,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE;YACxD;;QAGF,IAAI,UAAU,GAAG,EAAE;AACnB,QAAA,IAAI,WAAkC;AACtC,QAAA,IAAI;AACF,YAAA,UAAU,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;AACxE,YAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAC1C,QAAA,MAAM;;;AAIR,QAAA,MAAM,mBAAmB,GAAG,CAAC,iBAAyB,KAAU;AAC9D,YAAA,KAAK,CAAC,oBAAoB,CAAC,iBAAiB,EAAE;AAC5C,gBAAA,OAAO,EAAE;AACP,oBAAA;AACE,wBAAA,IAAI,EAAE,MAAM;AACZ,wBAAA,IAAI,EAAE,EAAE;wBACR,aAAa,EAAE,CAAC,UAAU,CAAC;AAC5B,qBAAA;AACF,iBAAA;AACF,aAAA,CAAC;AACJ,SAAC;;AAED,QAAA,IACE,UAAU;YACV,WAAW;AACX,YAAA,WAAW,CAAC,IAAI,KAAKH,eAAS,CAAC,gBAAgB,EAC/C;YACA,mBAAmB,CAAC,UAAU,CAAC;YAC/B,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;;;AAE9C,aAAA,IACL,CAAC,WAAW;AACZ,YAAA,WAAW,CAAC,IAAI,KAAKA,eAAS,CAAC,gBAAgB,EAC/C;AACA,YAAA,MAAM,SAAS,GAAGD,gBAAY,CAAC,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;AAC1D,YAAA,MAAM,MAAM,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;gBAC5C,IAAI,EAAEC,eAAS,CAAC,gBAAgB;AAChC,gBAAA,gBAAgB,EAAE;AAChB,oBAAA,UAAU,EAAE,SAAS;AACtB,iBAAA;AACF,aAAA,CAAC;YACF,mBAAmB,CAAC,MAAM,CAAC;YAC3B,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;;AAGrD,QAAA,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YAC7B,IAAI,EAAEA,eAAS,CAAC,UAAU;YAC1B,UAAU,EAAE,CAAC,SAAS,CAAC;AACxB,SAAA,CAAC;;AAEN;;;;;"}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var cheerio = require('cheerio');
|
|
4
|
+
|
|
5
|
+
function _interopNamespaceDefault(e) {
|
|
6
|
+
var n = Object.create(null);
|
|
7
|
+
if (e) {
|
|
8
|
+
Object.keys(e).forEach(function (k) {
|
|
9
|
+
if (k !== 'default') {
|
|
10
|
+
var d = Object.getOwnPropertyDescriptor(e, k);
|
|
11
|
+
Object.defineProperty(n, k, d.get ? d : {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
get: function () { return e[k]; }
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
n.default = e;
|
|
19
|
+
return Object.freeze(n);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
|
|
23
|
+
|
|
24
|
+
function processContent(html, markdown) {
|
|
25
|
+
const linkMap = new Map();
|
|
26
|
+
const imageMap = new Map();
|
|
27
|
+
const videoMap = new Map();
|
|
28
|
+
const iframeMap = new Map();
|
|
29
|
+
const $ = cheerio__namespace.load(html, {
|
|
30
|
+
xmlMode: false,
|
|
31
|
+
});
|
|
32
|
+
// Extract all media references
|
|
33
|
+
$('a[href]').each((_, el) => {
|
|
34
|
+
const href = $(el).attr('href');
|
|
35
|
+
if (href != null && href) {
|
|
36
|
+
linkMap.set(href, {
|
|
37
|
+
originalUrl: href,
|
|
38
|
+
title: $(el).attr('title'),
|
|
39
|
+
text: $(el).text().trim(),
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
$('img[src]').each((_, el) => {
|
|
44
|
+
const src = $(el).attr('src');
|
|
45
|
+
if (src != null && src) {
|
|
46
|
+
imageMap.set(src, {
|
|
47
|
+
originalUrl: src,
|
|
48
|
+
title: $(el).attr('alt') ?? $(el).attr('title'),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
// Handle videos (dedicated video elements and video platforms in iframes)
|
|
53
|
+
$('video[src], iframe[src*="youtube"], iframe[src*="vimeo"]').each((_, el) => {
|
|
54
|
+
const src = $(el).attr('src');
|
|
55
|
+
if (src != null && src) {
|
|
56
|
+
videoMap.set(src, {
|
|
57
|
+
originalUrl: src,
|
|
58
|
+
title: $(el).attr('title'),
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
// Handle all other generic iframes that aren't already captured as videos
|
|
63
|
+
$('iframe').each((_, el) => {
|
|
64
|
+
const src = $(el).attr('src');
|
|
65
|
+
if (src != null &&
|
|
66
|
+
src &&
|
|
67
|
+
!src.includes('youtube') &&
|
|
68
|
+
!src.includes('vimeo')) {
|
|
69
|
+
iframeMap.set(src, {
|
|
70
|
+
originalUrl: src,
|
|
71
|
+
title: $(el).attr('title'),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
// Create lookup maps with indices
|
|
76
|
+
const linkIndexMap = new Map();
|
|
77
|
+
const imageIndexMap = new Map();
|
|
78
|
+
const videoIndexMap = new Map();
|
|
79
|
+
const iframeIndexMap = new Map();
|
|
80
|
+
Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));
|
|
81
|
+
Array.from(imageMap.keys()).forEach((url, i) => imageIndexMap.set(url, i + 1));
|
|
82
|
+
Array.from(videoMap.keys()).forEach((url, i) => videoIndexMap.set(url, i + 1));
|
|
83
|
+
Array.from(iframeMap.keys()).forEach((url, i) => iframeIndexMap.set(url, i + 1));
|
|
84
|
+
// Process the markdown
|
|
85
|
+
let result = markdown;
|
|
86
|
+
// Replace each URL one by one, starting with the longest URLs first to avoid partial matches
|
|
87
|
+
const allUrls = [
|
|
88
|
+
...Array.from(imageMap.keys()).map((url) => ({
|
|
89
|
+
url,
|
|
90
|
+
type: 'image',
|
|
91
|
+
idx: imageIndexMap.get(url),
|
|
92
|
+
})),
|
|
93
|
+
...Array.from(videoMap.keys()).map((url) => ({
|
|
94
|
+
url,
|
|
95
|
+
type: 'video',
|
|
96
|
+
idx: videoIndexMap.get(url),
|
|
97
|
+
})),
|
|
98
|
+
...Array.from(iframeMap.keys()).map((url) => ({
|
|
99
|
+
url,
|
|
100
|
+
type: 'iframe',
|
|
101
|
+
idx: iframeIndexMap.get(url),
|
|
102
|
+
})),
|
|
103
|
+
...Array.from(linkMap.keys()).map((url) => ({
|
|
104
|
+
url,
|
|
105
|
+
type: 'link',
|
|
106
|
+
idx: linkIndexMap.get(url),
|
|
107
|
+
})),
|
|
108
|
+
].sort((a, b) => b.url.length - a.url.length);
|
|
109
|
+
// Create a function to escape special characters in URLs for regex
|
|
110
|
+
function escapeRegex(string) {
|
|
111
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
112
|
+
}
|
|
113
|
+
// Replace each URL in the markdown
|
|
114
|
+
for (const { url, type, idx } of allUrls) {
|
|
115
|
+
// Create a regex that captures URLs in markdown links
|
|
116
|
+
const regex = new RegExp(`\\(${escapeRegex(url)}(?:\\s+"[^"]*")?\\)`, 'g');
|
|
117
|
+
result = result.replace(regex, (match) => {
|
|
118
|
+
// Keep any title attribute that might exist
|
|
119
|
+
const titleMatch = match.match(/\s+"([^"]*)"/);
|
|
120
|
+
const titlePart = titleMatch ? ` "${titleMatch[1]}"` : '';
|
|
121
|
+
return `(${type}#${idx}${titlePart})`;
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
iframeMap.clear();
|
|
125
|
+
const links = Array.from(linkMap.values());
|
|
126
|
+
linkMap.clear();
|
|
127
|
+
const images = Array.from(imageMap.values());
|
|
128
|
+
imageMap.clear();
|
|
129
|
+
const videos = Array.from(videoMap.values());
|
|
130
|
+
videoMap.clear();
|
|
131
|
+
return {
|
|
132
|
+
markdown: result,
|
|
133
|
+
links,
|
|
134
|
+
images,
|
|
135
|
+
videos,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
exports.processContent = processContent;
|
|
140
|
+
//# sourceMappingURL=content.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content.cjs","sources":["../../../../src/tools/search/content.ts"],"sourcesContent":["import * as cheerio from 'cheerio';\nimport type { References, MediaReference } from './types';\n\nexport function processContent(\n html: string,\n markdown: string\n): {\n markdown: string;\n} & References {\n const linkMap = new Map<string, MediaReference>();\n const imageMap = new Map<string, MediaReference>();\n const videoMap = new Map<string, MediaReference>();\n const iframeMap = new Map<string, MediaReference>();\n\n const $ = cheerio.load(html, {\n xmlMode: false,\n });\n\n // Extract all media references\n $('a[href]').each((_, el) => {\n const href = $(el).attr('href');\n if (href != null && href) {\n linkMap.set(href, {\n originalUrl: href,\n title: $(el).attr('title'),\n text: $(el).text().trim(),\n });\n }\n });\n\n $('img[src]').each((_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n imageMap.set(src, {\n originalUrl: src,\n title: $(el).attr('alt') ?? $(el).attr('title'),\n });\n }\n });\n\n // Handle videos (dedicated video elements and video platforms in iframes)\n $('video[src], iframe[src*=\"youtube\"], iframe[src*=\"vimeo\"]').each(\n (_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n videoMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n }\n );\n\n // Handle all other generic iframes that aren't already captured as videos\n $('iframe').each((_, el) => {\n const src = $(el).attr('src');\n if (\n src != null &&\n src &&\n !src.includes('youtube') &&\n !src.includes('vimeo')\n ) {\n iframeMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n });\n\n // Create lookup maps with indices\n const linkIndexMap = new Map<string, number>();\n const imageIndexMap = new Map<string, number>();\n const videoIndexMap = new Map<string, number>();\n const iframeIndexMap = new Map<string, number>();\n\n Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));\n Array.from(imageMap.keys()).forEach((url, i) =>\n imageIndexMap.set(url, i + 1)\n );\n Array.from(videoMap.keys()).forEach((url, i) =>\n videoIndexMap.set(url, i + 1)\n );\n Array.from(iframeMap.keys()).forEach((url, i) =>\n iframeIndexMap.set(url, i + 1)\n );\n\n // Process the markdown\n let result = markdown;\n\n // Replace each URL one by one, starting with the longest URLs first to avoid partial matches\n const allUrls = [\n ...Array.from(imageMap.keys()).map((url) => ({\n url,\n type: 'image',\n idx: imageIndexMap.get(url),\n })),\n ...Array.from(videoMap.keys()).map((url) => ({\n url,\n type: 'video',\n idx: videoIndexMap.get(url),\n })),\n ...Array.from(iframeMap.keys()).map((url) => ({\n url,\n type: 'iframe',\n idx: iframeIndexMap.get(url),\n })),\n ...Array.from(linkMap.keys()).map((url) => ({\n url,\n type: 'link',\n idx: linkIndexMap.get(url),\n })),\n ].sort((a, b) => b.url.length - a.url.length);\n\n // Create a function to escape special characters in URLs for regex\n function escapeRegex(string: string): string {\n return string.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n }\n\n // Replace each URL in the markdown\n for (const { url, type, idx } of allUrls) {\n // Create a regex that captures URLs in markdown links\n const regex = new RegExp(`\\\\(${escapeRegex(url)}(?:\\\\s+\"[^\"]*\")?\\\\)`, 'g');\n\n result = result.replace(regex, (match) => {\n // Keep any title attribute that might exist\n const titleMatch = match.match(/\\s+\"([^\"]*)\"/);\n const titlePart = titleMatch ? ` \"${titleMatch[1]}\"` : '';\n\n return `(${type}#${idx}${titlePart})`;\n });\n }\n\n iframeMap.clear();\n const links = Array.from(linkMap.values());\n linkMap.clear();\n const images = Array.from(imageMap.values());\n imageMap.clear();\n const videos = Array.from(videoMap.values());\n videoMap.clear();\n\n return {\n markdown: result,\n links,\n images,\n videos,\n };\n}\n"],"names":["cheerio"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AAGgB,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAgB,EAAA;AAIhB,IAAA,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0B;AACjD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAA0B;AAEnD,IAAA,MAAM,CAAC,GAAGA,kBAAO,CAAC,IAAI,CAAC,IAAI,EAAE;AAC3B,QAAA,OAAO,EAAE,KAAK;AACf,KAAA,CAAC;;IAGF,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;AAC/B,QAAA,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,EAAE;AACxB,YAAA,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;AAChB,gBAAA,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;gBAC1B,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;AAC1B,aAAA,CAAC;;AAEN,KAAC,CAAC;IAEF,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;AAChB,gBAAA,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAChD,aAAA,CAAC;;AAEN,KAAC,CAAC;;IAGF,CAAC,CAAC,0DAA0D,CAAC,CAAC,IAAI,CAChE,CAAC,CAAC,EAAE,EAAE,KAAI;QACR,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CACF;;IAGD,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QACzB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;QAC7B,IACE,GAAG,IAAI,IAAI;YACX,GAAG;AACH,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC;AACxB,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB;AACA,YAAA,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE;AACjB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CAAC;;AAGF,IAAA,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB;AAC9C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB;AAEhD,IAAA,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;AAC5E,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAC1C,cAAc,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC/B;;IAGD,IAAI,MAAM,GAAG,QAAQ;;AAGrB,IAAA,MAAM,OAAO,GAAG;AACd,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC5C,GAAG;AACH,YAAA,IAAI,EAAE,QAAQ;AACd,YAAA,GAAG,EAAE,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC;AAC7B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC1C,GAAG;AACH,YAAA,IAAI,EAAE,MAAM;AACZ,YAAA,GAAG,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC;AAC3B,SAAA,CAAC,CAAC;KACJ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;;IAG7C,SAAS,WAAW,CAAC,MAAc,EAAA;QACjC,OAAO,MAAM,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC;;;IAItD,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,OAAO,EAAE;;AAExC,QAAA,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,CAAM,GAAA,EAAA,WAAW,CAAC,GAAG,CAAC,CAAA,mBAAA,CAAqB,EAAE,GAAG,CAAC;QAE1E,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,KAAI;;YAEvC,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC;AAC9C,YAAA,MAAM,SAAS,GAAG,UAAU,GAAG,CAAK,EAAA,EAAA,UAAU,CAAC,CAAC,CAAC,CAAG,CAAA,CAAA,GAAG,EAAE;AAEzD,YAAA,OAAO,IAAI,IAAI,CAAA,CAAA,EAAI,GAAG,CAAG,EAAA,SAAS,GAAG;AACvC,SAAC,CAAC;;IAGJ,SAAS,CAAC,KAAK,EAAE;IACjB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;IAC1C,OAAO,CAAC,KAAK,EAAE;IACf,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAChB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAEhB,OAAO;AACL,QAAA,QAAQ,EAAE,MAAM;QAChB,KAAK;QACL,MAAM;QACN,MAAM;KACP;AACH;;;;"}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var axios = require('axios');
|
|
4
|
+
var content = require('./content.cjs');
|
|
5
|
+
var utils = require('./utils.cjs');
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Firecrawl scraper implementation
|
|
9
|
+
* Uses the Firecrawl API to scrape web pages
|
|
10
|
+
*/
|
|
11
|
+
class FirecrawlScraper {
|
|
12
|
+
apiKey;
|
|
13
|
+
apiUrl;
|
|
14
|
+
defaultFormats;
|
|
15
|
+
timeout;
|
|
16
|
+
logger;
|
|
17
|
+
constructor(config = {}) {
|
|
18
|
+
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
19
|
+
const baseUrl = config.apiUrl ??
|
|
20
|
+
process.env.FIRECRAWL_BASE_URL ??
|
|
21
|
+
'https://api.firecrawl.dev';
|
|
22
|
+
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
|
|
23
|
+
this.defaultFormats = config.formats ?? ['markdown', 'html'];
|
|
24
|
+
this.timeout = config.timeout ?? 15000;
|
|
25
|
+
this.logger = config.logger || utils.createDefaultLogger();
|
|
26
|
+
if (!this.apiKey) {
|
|
27
|
+
this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
|
|
28
|
+
}
|
|
29
|
+
this.logger.debug(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Scrape a single URL
|
|
33
|
+
* @param url URL to scrape
|
|
34
|
+
* @param options Scrape options
|
|
35
|
+
* @returns Scrape response
|
|
36
|
+
*/
|
|
37
|
+
async scrapeUrl(url, options = {}) {
|
|
38
|
+
if (!this.apiKey) {
|
|
39
|
+
return [
|
|
40
|
+
url,
|
|
41
|
+
{
|
|
42
|
+
success: false,
|
|
43
|
+
error: 'FIRECRAWL_API_KEY is not set',
|
|
44
|
+
},
|
|
45
|
+
];
|
|
46
|
+
}
|
|
47
|
+
try {
|
|
48
|
+
const response = await axios.post(this.apiUrl, {
|
|
49
|
+
url,
|
|
50
|
+
formats: options.formats || this.defaultFormats,
|
|
51
|
+
includeTags: options.includeTags,
|
|
52
|
+
excludeTags: options.excludeTags,
|
|
53
|
+
headers: options.headers,
|
|
54
|
+
waitFor: options.waitFor,
|
|
55
|
+
timeout: options.timeout ?? this.timeout,
|
|
56
|
+
}, {
|
|
57
|
+
headers: {
|
|
58
|
+
'Content-Type': 'application/json',
|
|
59
|
+
Authorization: `Bearer ${this.apiKey}`,
|
|
60
|
+
},
|
|
61
|
+
timeout: this.timeout,
|
|
62
|
+
});
|
|
63
|
+
return [url, response.data];
|
|
64
|
+
}
|
|
65
|
+
catch (error) {
|
|
66
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
67
|
+
return [
|
|
68
|
+
url,
|
|
69
|
+
{
|
|
70
|
+
success: false,
|
|
71
|
+
error: `Firecrawl API request failed: ${errorMessage}`,
|
|
72
|
+
},
|
|
73
|
+
];
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Extract content from scrape response
|
|
78
|
+
* @param response Scrape response
|
|
79
|
+
* @returns Extracted content or empty string if not available
|
|
80
|
+
*/
|
|
81
|
+
extractContent(response) {
|
|
82
|
+
if (!response.success || !response.data) {
|
|
83
|
+
return ['', undefined];
|
|
84
|
+
}
|
|
85
|
+
if (response.data.markdown != null && response.data.html != null) {
|
|
86
|
+
try {
|
|
87
|
+
const { markdown, ...rest } = content.processContent(response.data.html, response.data.markdown);
|
|
88
|
+
return [markdown, rest];
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
this.logger.error('Error processing content:', error);
|
|
92
|
+
return [response.data.markdown, undefined];
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
else if (response.data.markdown != null) {
|
|
96
|
+
return [response.data.markdown, undefined];
|
|
97
|
+
}
|
|
98
|
+
// Fall back to HTML content
|
|
99
|
+
if (response.data.html != null) {
|
|
100
|
+
return [response.data.html, undefined];
|
|
101
|
+
}
|
|
102
|
+
// Fall back to raw HTML content
|
|
103
|
+
if (response.data.rawHtml != null) {
|
|
104
|
+
return [response.data.rawHtml, undefined];
|
|
105
|
+
}
|
|
106
|
+
return ['', undefined];
|
|
107
|
+
}
|
|
108
|
+
/**
|
|
109
|
+
* Extract metadata from scrape response
|
|
110
|
+
* @param response Scrape response
|
|
111
|
+
* @returns Metadata object
|
|
112
|
+
*/
|
|
113
|
+
extractMetadata(response) {
|
|
114
|
+
if (!response.success || !response.data || !response.data.metadata) {
|
|
115
|
+
return {};
|
|
116
|
+
}
|
|
117
|
+
return response.data.metadata;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Create a Firecrawl scraper instance
|
|
122
|
+
* @param config Scraper configuration
|
|
123
|
+
* @returns Firecrawl scraper instance
|
|
124
|
+
*/
|
|
125
|
+
const createFirecrawlScraper = (config = {}) => {
|
|
126
|
+
return new FirecrawlScraper(config);
|
|
127
|
+
};
|
|
128
|
+
|
|
129
|
+
exports.FirecrawlScraper = FirecrawlScraper;
|
|
130
|
+
exports.createFirecrawlScraper = createFirecrawlScraper;
|
|
131
|
+
//# sourceMappingURL=firecrawl.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n this.logger = config.logger || createDefaultLogger();\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":["createDefaultLogger","processContent"],"mappings":";;;;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AAEd,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;QAEtC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAIA,yBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAGC,sBAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;"}
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var utils = require('./utils.cjs');
|
|
4
|
+
|
|
5
|
+
function addHighlightSection() {
|
|
6
|
+
return ['\n## Highlights', ''];
|
|
7
|
+
}
|
|
8
|
+
// Helper function to format a source (organic or top story)
|
|
9
|
+
function formatSource(source, index, turn, sourceType, references) {
|
|
10
|
+
/** Array of all lines to include in the output */
|
|
11
|
+
const outputLines = [];
|
|
12
|
+
// Add the title
|
|
13
|
+
outputLines.push(`# ${sourceType.charAt(0).toUpperCase() + sourceType.slice(1)} ${index}: ${source.title != null && source.title ? `"${source.title}"` : '(no title)'}`);
|
|
14
|
+
outputLines.push(`\nAnchor: \\ue202turn${turn}${sourceType}${index}`);
|
|
15
|
+
outputLines.push(`URL: ${source.link}`);
|
|
16
|
+
// Add optional fields
|
|
17
|
+
if ('snippet' in source && source.snippet != null) {
|
|
18
|
+
outputLines.push(`Summary: ${source.snippet}`);
|
|
19
|
+
}
|
|
20
|
+
if (source.date != null) {
|
|
21
|
+
outputLines.push(`Date: ${source.date}`);
|
|
22
|
+
}
|
|
23
|
+
if (source.attribution != null) {
|
|
24
|
+
outputLines.push(`Source: ${source.attribution}`);
|
|
25
|
+
}
|
|
26
|
+
// Add highlight section or empty line
|
|
27
|
+
if ((source.highlights?.length ?? 0) > 0) {
|
|
28
|
+
outputLines.push(...addHighlightSection());
|
|
29
|
+
}
|
|
30
|
+
else {
|
|
31
|
+
outputLines.push('');
|
|
32
|
+
}
|
|
33
|
+
// Process highlights if they exist
|
|
34
|
+
(source.highlights ?? [])
|
|
35
|
+
.filter((h) => h.text.trim().length > 0)
|
|
36
|
+
.forEach((h, hIndex) => {
|
|
37
|
+
outputLines.push(`### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]`);
|
|
38
|
+
outputLines.push('');
|
|
39
|
+
outputLines.push('```text');
|
|
40
|
+
outputLines.push(h.text.trim());
|
|
41
|
+
outputLines.push('```');
|
|
42
|
+
outputLines.push('');
|
|
43
|
+
if (h.references != null && h.references.length) {
|
|
44
|
+
let hasHeader = false;
|
|
45
|
+
const refLines = [];
|
|
46
|
+
for (let j = 0; j < h.references.length; j++) {
|
|
47
|
+
const ref = h.references[j];
|
|
48
|
+
if (ref.reference.originalUrl.includes('mailto:')) {
|
|
49
|
+
continue;
|
|
50
|
+
}
|
|
51
|
+
references.push({
|
|
52
|
+
type: ref.type,
|
|
53
|
+
link: ref.reference.originalUrl,
|
|
54
|
+
attribution: utils.getDomainName(ref.reference.originalUrl),
|
|
55
|
+
title: (((ref.reference.title ?? '') || ref.reference.text) ??
|
|
56
|
+
'').split('\n')[0],
|
|
57
|
+
});
|
|
58
|
+
if (ref.type !== 'link') {
|
|
59
|
+
continue;
|
|
60
|
+
}
|
|
61
|
+
if (utils.fileExtRegex.test(ref.reference.originalUrl)) {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
if (!hasHeader) {
|
|
65
|
+
refLines.push('Core References:');
|
|
66
|
+
hasHeader = true;
|
|
67
|
+
}
|
|
68
|
+
refLines.push(`- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}`);
|
|
69
|
+
refLines.push(`\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`);
|
|
70
|
+
}
|
|
71
|
+
if (hasHeader) {
|
|
72
|
+
outputLines.push(...refLines);
|
|
73
|
+
outputLines.push('');
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
if (hIndex < (source.highlights?.length ?? 0) - 1) {
|
|
77
|
+
outputLines.push('---');
|
|
78
|
+
outputLines.push('');
|
|
79
|
+
}
|
|
80
|
+
});
|
|
81
|
+
outputLines.push('');
|
|
82
|
+
return outputLines.join('\n');
|
|
83
|
+
}
|
|
84
|
+
function formatResultsForLLM(turn, results) {
|
|
85
|
+
/** Array to collect all output lines */
|
|
86
|
+
const outputLines = [];
|
|
87
|
+
const addSection = (title) => {
|
|
88
|
+
outputLines.push('');
|
|
89
|
+
outputLines.push(`=== ${title} ===`);
|
|
90
|
+
outputLines.push('');
|
|
91
|
+
};
|
|
92
|
+
const references = [];
|
|
93
|
+
// Organic (web) results
|
|
94
|
+
if (results.organic?.length != null && results.organic.length > 0) {
|
|
95
|
+
addSection(`Web Results, Turn ${turn}`);
|
|
96
|
+
for (let i = 0; i < results.organic.length; i++) {
|
|
97
|
+
const r = results.organic[i];
|
|
98
|
+
outputLines.push(formatSource(r, i, turn, 'search', references));
|
|
99
|
+
delete results.organic[i].highlights;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
// Top stories (news)
|
|
103
|
+
const topStories = results.topStories ?? [];
|
|
104
|
+
if (topStories.length) {
|
|
105
|
+
addSection('News Results');
|
|
106
|
+
for (let i = 0; i < topStories.length; i++) {
|
|
107
|
+
const r = topStories[i];
|
|
108
|
+
outputLines.push(formatSource(r, i, turn, 'news', references));
|
|
109
|
+
if (results.topStories?.[i]?.highlights) {
|
|
110
|
+
delete results.topStories[i].highlights;
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
// // Images
|
|
115
|
+
// const images = results.images ?? [];
|
|
116
|
+
// if (images.length) {
|
|
117
|
+
// addSection('Image Results');
|
|
118
|
+
// const imageLines = images.map((img, i) => [
|
|
119
|
+
// `Anchor: \ue202turn0image${i}`,
|
|
120
|
+
// `Title: ${img.title ?? '(no title)'}`,
|
|
121
|
+
// `Image URL: ${img.imageUrl}`,
|
|
122
|
+
// ''
|
|
123
|
+
// ].join('\n'));
|
|
124
|
+
// outputLines.push(imageLines.join('\n'));
|
|
125
|
+
// }
|
|
126
|
+
// Knowledge Graph
|
|
127
|
+
if (results.knowledgeGraph != null) {
|
|
128
|
+
addSection('Knowledge Graph');
|
|
129
|
+
const kgLines = [
|
|
130
|
+
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
131
|
+
results.knowledgeGraph.type != null
|
|
132
|
+
? `**Type:** ${results.knowledgeGraph.type}`
|
|
133
|
+
: '',
|
|
134
|
+
results.knowledgeGraph.description != null
|
|
135
|
+
? `**Description:** ${results.knowledgeGraph.description}`
|
|
136
|
+
: '',
|
|
137
|
+
results.knowledgeGraph.descriptionSource != null
|
|
138
|
+
? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`
|
|
139
|
+
: '',
|
|
140
|
+
results.knowledgeGraph.descriptionLink != null
|
|
141
|
+
? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`
|
|
142
|
+
: '',
|
|
143
|
+
results.knowledgeGraph.imageUrl != null
|
|
144
|
+
? `**Image URL:** ${results.knowledgeGraph.imageUrl}`
|
|
145
|
+
: '',
|
|
146
|
+
results.knowledgeGraph.website != null
|
|
147
|
+
? `**Website:** ${results.knowledgeGraph.website}`
|
|
148
|
+
: '',
|
|
149
|
+
results.knowledgeGraph.attributes != null
|
|
150
|
+
? `**Attributes:**\n\`\`\`json\n${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}\n\`\`\``
|
|
151
|
+
: '',
|
|
152
|
+
'',
|
|
153
|
+
].filter(Boolean);
|
|
154
|
+
outputLines.push(kgLines.join('\n\n'));
|
|
155
|
+
}
|
|
156
|
+
// Answer Box
|
|
157
|
+
if (results.answerBox != null) {
|
|
158
|
+
addSection('Answer Box');
|
|
159
|
+
const abLines = [
|
|
160
|
+
results.answerBox.title != null
|
|
161
|
+
? `**Title:** ${results.answerBox.title}`
|
|
162
|
+
: '',
|
|
163
|
+
results.answerBox.snippet != null
|
|
164
|
+
? `**Snippet:** ${results.answerBox.snippet}`
|
|
165
|
+
: '',
|
|
166
|
+
results.answerBox.snippetHighlighted != null
|
|
167
|
+
? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted
|
|
168
|
+
.map((s) => `\`${s}\``)
|
|
169
|
+
.join(' ')}`
|
|
170
|
+
: '',
|
|
171
|
+
results.answerBox.link != null
|
|
172
|
+
? `**Link:** ${results.answerBox.link}`
|
|
173
|
+
: '',
|
|
174
|
+
'',
|
|
175
|
+
].filter(Boolean);
|
|
176
|
+
outputLines.push(abLines.join('\n\n'));
|
|
177
|
+
}
|
|
178
|
+
// People also ask
|
|
179
|
+
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
180
|
+
if (peopleAlsoAsk.length) {
|
|
181
|
+
addSection('People Also Ask');
|
|
182
|
+
const paaLines = [];
|
|
183
|
+
peopleAlsoAsk.forEach((p, i) => {
|
|
184
|
+
const questionLines = [
|
|
185
|
+
`### Question ${i + 1}:`,
|
|
186
|
+
`"${p.question}"`,
|
|
187
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}` : ''}`,
|
|
188
|
+
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
189
|
+
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
190
|
+
'',
|
|
191
|
+
].filter(Boolean);
|
|
192
|
+
paaLines.push(questionLines.join('\n\n'));
|
|
193
|
+
});
|
|
194
|
+
outputLines.push(paaLines.join(''));
|
|
195
|
+
}
|
|
196
|
+
return {
|
|
197
|
+
output: outputLines.join('\n').trim(),
|
|
198
|
+
references,
|
|
199
|
+
};
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
exports.formatResultsForLLM = formatResultsForLLM;
|
|
203
|
+
//# sourceMappingURL=format.cjs.map
|