@librechat/agents 2.4.317 → 2.4.319
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/events.cjs +3 -3
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/main.cjs +5 -2
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs +23 -0
- package/dist/cjs/messages/ids.cjs.map +1 -0
- package/dist/cjs/stream.cjs +8 -155
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +144 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -0
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +17 -37
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +79 -29
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -13
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/search.cjs +13 -15
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +42 -12
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +35 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/events.mjs +1 -1
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs +21 -0
- package/dist/esm/messages/ids.mjs.map +1 -0
- package/dist/esm/stream.mjs +7 -152
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +141 -0
- package/dist/esm/tools/handlers.mjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +18 -37
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +79 -29
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -13
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/search.mjs +12 -14
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +42 -12
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +32 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/ids.d.ts +3 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/stream.d.ts +0 -8
- package/dist/types/tools/handlers.d.ts +8 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +6 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/search.d.ts +1 -1
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +12 -4
- package/dist/types/tools/search/types.d.ts +388 -53
- package/dist/types/tools/search/utils.d.ts +3 -0
- package/package.json +2 -1
- package/src/events.ts +49 -15
- package/src/index.ts +1 -0
- package/src/messages/ids.ts +26 -0
- package/src/messages/index.ts +1 -0
- package/src/scripts/search.ts +5 -3
- package/src/stream.ts +4 -186
- package/src/tools/handlers.ts +167 -0
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +27 -144
- package/src/tools/search/format.ts +89 -31
- package/src/tools/search/highlights.ts +99 -17
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/search.ts +42 -54
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +52 -15
- package/src/tools/search/types.ts +439 -61
- package/src/tools/search/utils.ts +43 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var nanoid = require('nanoid');
|
|
4
|
+
var _enum = require('../common/enum.cjs');
|
|
5
|
+
require('../messages/core.cjs');
|
|
6
|
+
var ids = require('../messages/ids.cjs');
|
|
7
|
+
require('@langchain/core/messages');
|
|
8
|
+
|
|
9
|
+
/* eslint-disable no-console */
|
|
10
|
+
// src/tools/handlers.ts
|
|
11
|
+
function handleToolCallChunks({ graph, stepKey, toolCallChunks, }) {
|
|
12
|
+
let prevStepId;
|
|
13
|
+
let prevRunStep;
|
|
14
|
+
try {
|
|
15
|
+
prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);
|
|
16
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
/** Edge Case: If no previous step exists, create a new message creation step */
|
|
20
|
+
const message_id = ids.getMessageId(stepKey, graph, true) ?? '';
|
|
21
|
+
prevStepId = graph.dispatchRunStep(stepKey, {
|
|
22
|
+
type: _enum.StepTypes.MESSAGE_CREATION,
|
|
23
|
+
message_creation: {
|
|
24
|
+
message_id,
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
28
|
+
}
|
|
29
|
+
const _stepId = graph.getStepIdByKey(stepKey, prevRunStep?.index);
|
|
30
|
+
/** Edge Case: Tool Call Run Step or `tool_call_ids` never dispatched */
|
|
31
|
+
const tool_calls = prevStepId && prevRunStep && prevRunStep.type === _enum.StepTypes.MESSAGE_CREATION
|
|
32
|
+
? []
|
|
33
|
+
: undefined;
|
|
34
|
+
/** Edge Case: `id` and `name` fields cannot be empty strings */
|
|
35
|
+
for (const toolCallChunk of toolCallChunks) {
|
|
36
|
+
if (toolCallChunk.name === '') {
|
|
37
|
+
toolCallChunk.name = undefined;
|
|
38
|
+
}
|
|
39
|
+
if (toolCallChunk.id === '') {
|
|
40
|
+
toolCallChunk.id = undefined;
|
|
41
|
+
}
|
|
42
|
+
else if (tool_calls != null &&
|
|
43
|
+
toolCallChunk.id != null &&
|
|
44
|
+
toolCallChunk.name != null) {
|
|
45
|
+
tool_calls.push({
|
|
46
|
+
args: {},
|
|
47
|
+
id: toolCallChunk.id,
|
|
48
|
+
name: toolCallChunk.name,
|
|
49
|
+
type: _enum.ToolCallTypes.TOOL_CALL,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
let stepId = _stepId;
|
|
54
|
+
const alreadyDispatched = prevRunStep?.type === _enum.StepTypes.MESSAGE_CREATION &&
|
|
55
|
+
graph.messageStepHasToolCalls.has(prevStepId);
|
|
56
|
+
if (!alreadyDispatched && tool_calls?.length === toolCallChunks.length) {
|
|
57
|
+
graph.dispatchMessageDelta(prevStepId, {
|
|
58
|
+
content: [
|
|
59
|
+
{
|
|
60
|
+
type: _enum.ContentTypes.TEXT,
|
|
61
|
+
text: '',
|
|
62
|
+
tool_call_ids: tool_calls.map((tc) => tc.id ?? ''),
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
});
|
|
66
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
67
|
+
stepId = graph.dispatchRunStep(stepKey, {
|
|
68
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
69
|
+
tool_calls,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
graph.dispatchRunStepDelta(stepId, {
|
|
73
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
74
|
+
tool_calls: toolCallChunks,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
const handleToolCalls = (toolCalls, metadata, graph) => {
|
|
78
|
+
if (!graph || !metadata) {
|
|
79
|
+
console.warn(`Graph or metadata not found in ${event} event`);
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (!toolCalls) {
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
if (toolCalls.length === 0) {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const stepKey = graph.getStepKey(metadata);
|
|
89
|
+
for (const tool_call of toolCalls) {
|
|
90
|
+
const toolCallId = tool_call.id ?? `toolu_${nanoid.nanoid()}`;
|
|
91
|
+
tool_call.id = toolCallId;
|
|
92
|
+
if (!toolCallId || graph.toolCallStepIds.has(toolCallId)) {
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
let prevStepId = '';
|
|
96
|
+
let prevRunStep;
|
|
97
|
+
try {
|
|
98
|
+
prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);
|
|
99
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
// no previous step
|
|
103
|
+
}
|
|
104
|
+
const dispatchToolCallIds = (lastMessageStepId) => {
|
|
105
|
+
graph.dispatchMessageDelta(lastMessageStepId, {
|
|
106
|
+
content: [
|
|
107
|
+
{
|
|
108
|
+
type: 'text',
|
|
109
|
+
text: '',
|
|
110
|
+
tool_call_ids: [toolCallId],
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
});
|
|
114
|
+
};
|
|
115
|
+
/* If the previous step exists and is a message creation */
|
|
116
|
+
if (prevStepId &&
|
|
117
|
+
prevRunStep &&
|
|
118
|
+
prevRunStep.type === _enum.StepTypes.MESSAGE_CREATION) {
|
|
119
|
+
dispatchToolCallIds(prevStepId);
|
|
120
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
121
|
+
/* If the previous step doesn't exist or is not a message creation */
|
|
122
|
+
}
|
|
123
|
+
else if (!prevRunStep ||
|
|
124
|
+
prevRunStep.type !== _enum.StepTypes.MESSAGE_CREATION) {
|
|
125
|
+
const messageId = ids.getMessageId(stepKey, graph, true) ?? '';
|
|
126
|
+
const stepId = graph.dispatchRunStep(stepKey, {
|
|
127
|
+
type: _enum.StepTypes.MESSAGE_CREATION,
|
|
128
|
+
message_creation: {
|
|
129
|
+
message_id: messageId,
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
dispatchToolCallIds(stepId);
|
|
133
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
134
|
+
}
|
|
135
|
+
graph.dispatchRunStep(stepKey, {
|
|
136
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
137
|
+
tool_calls: [tool_call],
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
exports.handleToolCallChunks = handleToolCallChunks;
|
|
143
|
+
exports.handleToolCalls = handleToolCalls;
|
|
144
|
+
//# sourceMappingURL=handlers.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"handlers.cjs","sources":["../../../src/tools/handlers.ts"],"sourcesContent":["/* eslint-disable no-console */\n// src/tools/handlers.ts\nimport { nanoid } from 'nanoid';\nimport type { ToolCall, ToolCallChunk } from '@langchain/core/messages/tool';\nimport type { Graph } from '@/graphs';\nimport type * as t from '@/types';\nimport { StepTypes, ContentTypes, ToolCallTypes } from '@/common';\nimport { getMessageId } from '@/messages';\n\nexport function handleToolCallChunks({\n graph,\n stepKey,\n toolCallChunks,\n}: {\n graph: Graph;\n stepKey: string;\n toolCallChunks: ToolCallChunk[];\n}): void {\n let prevStepId: string;\n let prevRunStep: t.RunStep | undefined;\n try {\n prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);\n prevRunStep = graph.getRunStep(prevStepId);\n } catch {\n /** Edge Case: If no previous step exists, create a new message creation step */\n const message_id = getMessageId(stepKey, graph, true) ?? '';\n prevStepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.MESSAGE_CREATION,\n message_creation: {\n message_id,\n },\n });\n prevRunStep = graph.getRunStep(prevStepId);\n }\n\n const _stepId = graph.getStepIdByKey(stepKey, prevRunStep?.index);\n\n /** Edge Case: Tool Call Run Step or `tool_call_ids` never dispatched */\n const tool_calls: ToolCall[] | undefined =\n prevStepId && prevRunStep && prevRunStep.type === StepTypes.MESSAGE_CREATION\n ? []\n : undefined;\n\n /** Edge Case: `id` and `name` fields cannot be empty strings */\n for (const toolCallChunk of toolCallChunks) {\n if (toolCallChunk.name === '') {\n toolCallChunk.name = undefined;\n }\n if (toolCallChunk.id === '') {\n toolCallChunk.id = undefined;\n } else if (\n tool_calls != null &&\n toolCallChunk.id != null &&\n toolCallChunk.name != null\n ) {\n tool_calls.push({\n args: {},\n id: toolCallChunk.id,\n name: toolCallChunk.name,\n type: ToolCallTypes.TOOL_CALL,\n });\n }\n }\n\n let stepId: string = _stepId;\n const alreadyDispatched =\n prevRunStep?.type === StepTypes.MESSAGE_CREATION &&\n graph.messageStepHasToolCalls.has(prevStepId);\n if (!alreadyDispatched && tool_calls?.length === toolCallChunks.length) {\n graph.dispatchMessageDelta(prevStepId, {\n content: [\n {\n type: ContentTypes.TEXT,\n text: '',\n tool_call_ids: tool_calls.map((tc) => tc.id ?? ''),\n },\n ],\n });\n graph.messageStepHasToolCalls.set(prevStepId, true);\n stepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.TOOL_CALLS,\n tool_calls,\n });\n }\n graph.dispatchRunStepDelta(stepId, {\n type: StepTypes.TOOL_CALLS,\n tool_calls: toolCallChunks,\n });\n}\n\nexport const handleToolCalls = (\n toolCalls?: ToolCall[],\n metadata?: Record<string, unknown>,\n graph?: Graph\n): void => {\n if (!graph || !metadata) {\n console.warn(`Graph or metadata not found in ${event} event`);\n return;\n }\n\n if (!toolCalls) {\n return;\n }\n\n if (toolCalls.length === 0) {\n return;\n }\n\n const stepKey = graph.getStepKey(metadata);\n\n for (const tool_call of toolCalls) {\n const toolCallId = tool_call.id ?? `toolu_${nanoid()}`;\n tool_call.id = toolCallId;\n if (!toolCallId || graph.toolCallStepIds.has(toolCallId)) {\n continue;\n }\n\n let prevStepId = '';\n let prevRunStep: t.RunStep | undefined;\n try {\n prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);\n prevRunStep = graph.getRunStep(prevStepId);\n } catch {\n // no previous step\n }\n\n const dispatchToolCallIds = (lastMessageStepId: string): void => {\n graph.dispatchMessageDelta(lastMessageStepId, {\n content: [\n {\n type: 'text',\n text: '',\n tool_call_ids: [toolCallId],\n },\n ],\n });\n };\n /* If the previous step exists and is a message creation */\n if (\n prevStepId &&\n prevRunStep &&\n prevRunStep.type === StepTypes.MESSAGE_CREATION\n ) {\n dispatchToolCallIds(prevStepId);\n graph.messageStepHasToolCalls.set(prevStepId, true);\n /* If the previous step doesn't exist or is not a message creation */\n } else if (\n !prevRunStep ||\n prevRunStep.type !== StepTypes.MESSAGE_CREATION\n ) {\n const messageId = getMessageId(stepKey, graph, true) ?? '';\n const stepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.MESSAGE_CREATION,\n message_creation: {\n message_id: messageId,\n },\n });\n dispatchToolCallIds(stepId);\n graph.messageStepHasToolCalls.set(prevStepId, true);\n }\n\n graph.dispatchRunStep(stepKey, {\n type: StepTypes.TOOL_CALLS,\n tool_calls: [tool_call],\n });\n }\n};\n"],"names":["getMessageId","StepTypes","ToolCallTypes","ContentTypes","nanoid"],"mappings":";;;;;;;;AAAA;AACA;AAQM,SAAU,oBAAoB,CAAC,EACnC,KAAK,EACL,OAAO,EACP,cAAc,GAKf,EAAA;AACC,IAAA,IAAI,UAAkB;AACtB,IAAA,IAAI,WAAkC;AACtC,IAAA,IAAI;AACF,QAAA,UAAU,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;AACxE,QAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAC1C,IAAA,MAAM;;AAEN,QAAA,MAAM,UAAU,GAAGA,gBAAY,CAAC,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;AAC3D,QAAA,UAAU,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YAC1C,IAAI,EAAEC,eAAS,CAAC,gBAAgB;AAChC,YAAA,gBAAgB,EAAE;gBAChB,UAAU;AACX,aAAA;AACF,SAAA,CAAC;AACF,QAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAG5C,IAAA,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC;;AAGjE,IAAA,MAAM,UAAU,GACd,UAAU,IAAI,WAAW,IAAI,WAAW,CAAC,IAAI,KAAKA,eAAS,CAAC;AAC1D,UAAE;UACA,SAAS;;AAGf,IAAA,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE;AAC1C,QAAA,IAAI,aAAa,CAAC,IAAI,KAAK,EAAE,EAAE;AAC7B,YAAA,aAAa,CAAC,IAAI,GAAG,SAAS;;AAEhC,QAAA,IAAI,aAAa,CAAC,EAAE,KAAK,EAAE,EAAE;AAC3B,YAAA,aAAa,CAAC,EAAE,GAAG,SAAS;;aACvB,IACL,UAAU,IAAI,IAAI;YAClB,aAAa,CAAC,EAAE,IAAI,IAAI;AACxB,YAAA,aAAa,CAAC,IAAI,IAAI,IAAI,EAC1B;YACA,UAAU,CAAC,IAAI,CAAC;AACd,gBAAA,IAAI,EAAE,EAAE;gBACR,EAAE,EAAE,aAAa,CAAC,EAAE;gBACpB,IAAI,EAAE,aAAa,CAAC,IAAI;gBACxB,IAAI,EAAEC,mBAAa,CAAC,SAAS;AAC9B,aAAA,CAAC;;;IAIN,IAAI,MAAM,GAAW,OAAO;IAC5B,MAAM,iBAAiB,GACrB,WAAW,EAAE,IAAI,KAAKD,eAAS,CAAC,gBAAgB;AAChD,QAAA,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,CAAC;IAC/C,IAAI,CAAC,iBAAiB,IAAI,UAAU,EAAE,MAAM,KAAK,cAAc,CAAC,MAAM,EAAE;AACtE,QAAA,KAAK,CAAC,oBAAoB,CAAC,UAAU,EAAE;AACrC,YAAA,OAAO,EAAE;AACP,gBAAA;oBACE,IAAI,EAAEE,kBAAY,CAAC,IAAI;AACvB,oBAAA,IAAI,EAAE,EAAE;AACR,oBAAA,aAAa,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC;AACnD,iBAAA;AACF,aAAA;AACF,SAAA,CAAC;QACF,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;AACnD,QAAA,MAAM,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YACtC,IAAI,EAAEF,eAAS,CAAC,UAAU;YAC1B,UAAU;AACX,SAAA,CAAC;;AAEJ,IAAA,KAAK,CAAC,oBAAoB,CAAC,MAAM,EAAE;QACjC,IAAI,EAAEA,eAAS,CAAC,UAAU;AAC1B,QAAA,UAAU,EAAE,cAAc;AAC3B,KAAA,CAAC;AACJ;AAEa,MAAA,eAAe,GAAG,CAC7B,SAAsB,EACtB,QAAkC,EAClC,KAAa,KACL;AACR,IAAA,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;AACvB,QAAA,OAAO,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;QAC7D;;IAGF,IAAI,CAAC,SAAS,EAAE;QACd;;AAGF,IAAA,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE;QAC1B;;IAGF,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC;AAE1C,IAAA,KAAK,MAAM,SAAS,IAAI,SAAS,EAAE;QACjC,MAAM,UAAU,GAAG,SAAS,CAAC,EAAE,IAAI,CAAS,MAAA,EAAAG,aAAM,EAAE,CAAA,CAAE;AACtD,QAAA,SAAS,CAAC,EAAE,GAAG,UAAU;AACzB,QAAA,IAAI,CAAC,UAAU,IAAI,KAAK,CAAC,eAAe,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE;YACxD;;QAGF,IAAI,UAAU,GAAG,EAAE;AACnB,QAAA,IAAI,WAAkC;AACtC,QAAA,IAAI;AACF,YAAA,UAAU,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;AACxE,YAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAC1C,QAAA,MAAM;;;AAIR,QAAA,MAAM,mBAAmB,GAAG,CAAC,iBAAyB,KAAU;AAC9D,YAAA,KAAK,CAAC,oBAAoB,CAAC,iBAAiB,EAAE;AAC5C,gBAAA,OAAO,EAAE;AACP,oBAAA;AACE,wBAAA,IAAI,EAAE,MAAM;AACZ,wBAAA,IAAI,EAAE,EAAE;wBACR,aAAa,EAAE,CAAC,UAAU,CAAC;AAC5B,qBAAA;AACF,iBAAA;AACF,aAAA,CAAC;AACJ,SAAC;;AAED,QAAA,IACE,UAAU;YACV,WAAW;AACX,YAAA,WAAW,CAAC,IAAI,KAAKH,eAAS,CAAC,gBAAgB,EAC/C;YACA,mBAAmB,CAAC,UAAU,CAAC;YAC/B,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;;;AAE9C,aAAA,IACL,CAAC,WAAW;AACZ,YAAA,WAAW,CAAC,IAAI,KAAKA,eAAS,CAAC,gBAAgB,EAC/C;AACA,YAAA,MAAM,SAAS,GAAGD,gBAAY,CAAC,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;AAC1D,YAAA,MAAM,MAAM,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;gBAC5C,IAAI,EAAEC,eAAS,CAAC,gBAAgB;AAChC,gBAAA,gBAAgB,EAAE;AAChB,oBAAA,UAAU,EAAE,SAAS;AACtB,iBAAA;AACF,aAAA,CAAC;YACF,mBAAmB,CAAC,MAAM,CAAC;YAC3B,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;;AAGrD,QAAA,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YAC7B,IAAI,EAAEA,eAAS,CAAC,UAAU;YAC1B,UAAU,EAAE,CAAC,SAAS,CAAC;AACxB,SAAA,CAAC;;AAEN;;;;;"}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var cheerio = require('cheerio');
|
|
4
|
+
|
|
5
|
+
function _interopNamespaceDefault(e) {
|
|
6
|
+
var n = Object.create(null);
|
|
7
|
+
if (e) {
|
|
8
|
+
Object.keys(e).forEach(function (k) {
|
|
9
|
+
if (k !== 'default') {
|
|
10
|
+
var d = Object.getOwnPropertyDescriptor(e, k);
|
|
11
|
+
Object.defineProperty(n, k, d.get ? d : {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
get: function () { return e[k]; }
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
n.default = e;
|
|
19
|
+
return Object.freeze(n);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
|
|
23
|
+
|
|
24
|
+
function processContent(html, markdown) {
|
|
25
|
+
const linkMap = new Map();
|
|
26
|
+
const imageMap = new Map();
|
|
27
|
+
const videoMap = new Map();
|
|
28
|
+
const iframeMap = new Map();
|
|
29
|
+
const $ = cheerio__namespace.load(html, {
|
|
30
|
+
xmlMode: false,
|
|
31
|
+
});
|
|
32
|
+
// Extract all media references
|
|
33
|
+
$('a[href]').each((_, el) => {
|
|
34
|
+
const href = $(el).attr('href');
|
|
35
|
+
if (href != null && href) {
|
|
36
|
+
linkMap.set(href, {
|
|
37
|
+
originalUrl: href,
|
|
38
|
+
title: $(el).attr('title'),
|
|
39
|
+
text: $(el).text().trim(),
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
$('img[src]').each((_, el) => {
|
|
44
|
+
const src = $(el).attr('src');
|
|
45
|
+
if (src != null && src) {
|
|
46
|
+
imageMap.set(src, {
|
|
47
|
+
originalUrl: src,
|
|
48
|
+
title: $(el).attr('alt') ?? $(el).attr('title'),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
// Handle videos (dedicated video elements and video platforms in iframes)
|
|
53
|
+
$('video[src], iframe[src*="youtube"], iframe[src*="vimeo"]').each((_, el) => {
|
|
54
|
+
const src = $(el).attr('src');
|
|
55
|
+
if (src != null && src) {
|
|
56
|
+
videoMap.set(src, {
|
|
57
|
+
originalUrl: src,
|
|
58
|
+
title: $(el).attr('title'),
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
// Handle all other generic iframes that aren't already captured as videos
|
|
63
|
+
$('iframe').each((_, el) => {
|
|
64
|
+
const src = $(el).attr('src');
|
|
65
|
+
if (src != null &&
|
|
66
|
+
src &&
|
|
67
|
+
!src.includes('youtube') &&
|
|
68
|
+
!src.includes('vimeo')) {
|
|
69
|
+
iframeMap.set(src, {
|
|
70
|
+
originalUrl: src,
|
|
71
|
+
title: $(el).attr('title'),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
// Create lookup maps with indices
|
|
76
|
+
const linkIndexMap = new Map();
|
|
77
|
+
const imageIndexMap = new Map();
|
|
78
|
+
const videoIndexMap = new Map();
|
|
79
|
+
const iframeIndexMap = new Map();
|
|
80
|
+
Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));
|
|
81
|
+
Array.from(imageMap.keys()).forEach((url, i) => imageIndexMap.set(url, i + 1));
|
|
82
|
+
Array.from(videoMap.keys()).forEach((url, i) => videoIndexMap.set(url, i + 1));
|
|
83
|
+
Array.from(iframeMap.keys()).forEach((url, i) => iframeIndexMap.set(url, i + 1));
|
|
84
|
+
// Process the markdown
|
|
85
|
+
let result = markdown;
|
|
86
|
+
// Replace each URL one by one, starting with the longest URLs first to avoid partial matches
|
|
87
|
+
const allUrls = [
|
|
88
|
+
...Array.from(imageMap.keys()).map((url) => ({
|
|
89
|
+
url,
|
|
90
|
+
type: 'image',
|
|
91
|
+
idx: imageIndexMap.get(url),
|
|
92
|
+
})),
|
|
93
|
+
...Array.from(videoMap.keys()).map((url) => ({
|
|
94
|
+
url,
|
|
95
|
+
type: 'video',
|
|
96
|
+
idx: videoIndexMap.get(url),
|
|
97
|
+
})),
|
|
98
|
+
...Array.from(iframeMap.keys()).map((url) => ({
|
|
99
|
+
url,
|
|
100
|
+
type: 'iframe',
|
|
101
|
+
idx: iframeIndexMap.get(url),
|
|
102
|
+
})),
|
|
103
|
+
...Array.from(linkMap.keys()).map((url) => ({
|
|
104
|
+
url,
|
|
105
|
+
type: 'link',
|
|
106
|
+
idx: linkIndexMap.get(url),
|
|
107
|
+
})),
|
|
108
|
+
].sort((a, b) => b.url.length - a.url.length);
|
|
109
|
+
// Create a function to escape special characters in URLs for regex
|
|
110
|
+
function escapeRegex(string) {
|
|
111
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
112
|
+
}
|
|
113
|
+
// Replace each URL in the markdown
|
|
114
|
+
for (const { url, type, idx } of allUrls) {
|
|
115
|
+
// Create a regex that captures URLs in markdown links
|
|
116
|
+
const regex = new RegExp(`\\(${escapeRegex(url)}(?:\\s+"[^"]*")?\\)`, 'g');
|
|
117
|
+
result = result.replace(regex, (match) => {
|
|
118
|
+
// Keep any title attribute that might exist
|
|
119
|
+
const titleMatch = match.match(/\s+"([^"]*)"/);
|
|
120
|
+
const titlePart = titleMatch ? ` "${titleMatch[1]}"` : '';
|
|
121
|
+
return `(${type}#${idx}${titlePart})`;
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
iframeMap.clear();
|
|
125
|
+
const links = Array.from(linkMap.values());
|
|
126
|
+
linkMap.clear();
|
|
127
|
+
const images = Array.from(imageMap.values());
|
|
128
|
+
imageMap.clear();
|
|
129
|
+
const videos = Array.from(videoMap.values());
|
|
130
|
+
videoMap.clear();
|
|
131
|
+
return {
|
|
132
|
+
markdown: result,
|
|
133
|
+
links,
|
|
134
|
+
images,
|
|
135
|
+
videos,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
exports.processContent = processContent;
|
|
140
|
+
//# sourceMappingURL=content.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content.cjs","sources":["../../../../src/tools/search/content.ts"],"sourcesContent":["import * as cheerio from 'cheerio';\nimport type { References, MediaReference } from './types';\n\nexport function processContent(\n html: string,\n markdown: string\n): {\n markdown: string;\n} & References {\n const linkMap = new Map<string, MediaReference>();\n const imageMap = new Map<string, MediaReference>();\n const videoMap = new Map<string, MediaReference>();\n const iframeMap = new Map<string, MediaReference>();\n\n const $ = cheerio.load(html, {\n xmlMode: false,\n });\n\n // Extract all media references\n $('a[href]').each((_, el) => {\n const href = $(el).attr('href');\n if (href != null && href) {\n linkMap.set(href, {\n originalUrl: href,\n title: $(el).attr('title'),\n text: $(el).text().trim(),\n });\n }\n });\n\n $('img[src]').each((_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n imageMap.set(src, {\n originalUrl: src,\n title: $(el).attr('alt') ?? $(el).attr('title'),\n });\n }\n });\n\n // Handle videos (dedicated video elements and video platforms in iframes)\n $('video[src], iframe[src*=\"youtube\"], iframe[src*=\"vimeo\"]').each(\n (_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n videoMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n }\n );\n\n // Handle all other generic iframes that aren't already captured as videos\n $('iframe').each((_, el) => {\n const src = $(el).attr('src');\n if (\n src != null &&\n src &&\n !src.includes('youtube') &&\n !src.includes('vimeo')\n ) {\n iframeMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n });\n\n // Create lookup maps with indices\n const linkIndexMap = new Map<string, number>();\n const imageIndexMap = new Map<string, number>();\n const videoIndexMap = new Map<string, number>();\n const iframeIndexMap = new Map<string, number>();\n\n Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));\n Array.from(imageMap.keys()).forEach((url, i) =>\n imageIndexMap.set(url, i + 1)\n );\n Array.from(videoMap.keys()).forEach((url, i) =>\n videoIndexMap.set(url, i + 1)\n );\n Array.from(iframeMap.keys()).forEach((url, i) =>\n iframeIndexMap.set(url, i + 1)\n );\n\n // Process the markdown\n let result = markdown;\n\n // Replace each URL one by one, starting with the longest URLs first to avoid partial matches\n const allUrls = [\n ...Array.from(imageMap.keys()).map((url) => ({\n url,\n type: 'image',\n idx: imageIndexMap.get(url),\n })),\n ...Array.from(videoMap.keys()).map((url) => ({\n url,\n type: 'video',\n idx: videoIndexMap.get(url),\n })),\n ...Array.from(iframeMap.keys()).map((url) => ({\n url,\n type: 'iframe',\n idx: iframeIndexMap.get(url),\n })),\n ...Array.from(linkMap.keys()).map((url) => ({\n url,\n type: 'link',\n idx: linkIndexMap.get(url),\n })),\n ].sort((a, b) => b.url.length - a.url.length);\n\n // Create a function to escape special characters in URLs for regex\n function escapeRegex(string: string): string {\n return string.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n }\n\n // Replace each URL in the markdown\n for (const { url, type, idx } of allUrls) {\n // Create a regex that captures URLs in markdown links\n const regex = new RegExp(`\\\\(${escapeRegex(url)}(?:\\\\s+\"[^\"]*\")?\\\\)`, 'g');\n\n result = result.replace(regex, (match) => {\n // Keep any title attribute that might exist\n const titleMatch = match.match(/\\s+\"([^\"]*)\"/);\n const titlePart = titleMatch ? ` \"${titleMatch[1]}\"` : '';\n\n return `(${type}#${idx}${titlePart})`;\n });\n }\n\n iframeMap.clear();\n const links = Array.from(linkMap.values());\n linkMap.clear();\n const images = Array.from(imageMap.values());\n imageMap.clear();\n const videos = Array.from(videoMap.values());\n videoMap.clear();\n\n return {\n markdown: result,\n links,\n images,\n videos,\n };\n}\n"],"names":["cheerio"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AAGgB,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAgB,EAAA;AAIhB,IAAA,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0B;AACjD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAA0B;AAEnD,IAAA,MAAM,CAAC,GAAGA,kBAAO,CAAC,IAAI,CAAC,IAAI,EAAE;AAC3B,QAAA,OAAO,EAAE,KAAK;AACf,KAAA,CAAC;;IAGF,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;AAC/B,QAAA,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,EAAE;AACxB,YAAA,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;AAChB,gBAAA,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;gBAC1B,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;AAC1B,aAAA,CAAC;;AAEN,KAAC,CAAC;IAEF,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;AAChB,gBAAA,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAChD,aAAA,CAAC;;AAEN,KAAC,CAAC;;IAGF,CAAC,CAAC,0DAA0D,CAAC,CAAC,IAAI,CAChE,CAAC,CAAC,EAAE,EAAE,KAAI;QACR,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CACF;;IAGD,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QACzB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;QAC7B,IACE,GAAG,IAAI,IAAI;YACX,GAAG;AACH,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC;AACxB,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB;AACA,YAAA,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE;AACjB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CAAC;;AAGF,IAAA,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB;AAC9C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB;AAEhD,IAAA,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;AAC5E,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAC1C,cAAc,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC/B;;IAGD,IAAI,MAAM,GAAG,QAAQ;;AAGrB,IAAA,MAAM,OAAO,GAAG;AACd,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC5C,GAAG;AACH,YAAA,IAAI,EAAE,QAAQ;AACd,YAAA,GAAG,EAAE,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC;AAC7B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC1C,GAAG;AACH,YAAA,IAAI,EAAE,MAAM;AACZ,YAAA,GAAG,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC;AAC3B,SAAA,CAAC,CAAC;KACJ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;;IAG7C,SAAS,WAAW,CAAC,MAAc,EAAA;QACjC,OAAO,MAAM,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC;;;IAItD,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,OAAO,EAAE;;AAExC,QAAA,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,CAAM,GAAA,EAAA,WAAW,CAAC,GAAG,CAAC,CAAA,mBAAA,CAAqB,EAAE,GAAG,CAAC;QAE1E,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,KAAI;;YAEvC,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC;AAC9C,YAAA,MAAM,SAAS,GAAG,UAAU,GAAG,CAAK,EAAA,EAAA,UAAU,CAAC,CAAC,CAAC,CAAG,CAAA,CAAA,GAAG,EAAE;AAEzD,YAAA,OAAO,IAAI,IAAI,CAAA,CAAA,EAAI,GAAG,CAAG,EAAA,SAAS,GAAG;AACvC,SAAC,CAAC;;IAGJ,SAAS,CAAC,KAAK,EAAE;IACjB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;IAC1C,OAAO,CAAC,KAAK,EAAE;IACf,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAChB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAEhB,OAAO;AACL,QAAA,QAAQ,EAAE,MAAM;QAChB,KAAK;QACL,MAAM;QACN,MAAM;KACP;AACH;;;;"}
|
|
@@ -1,37 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var axios = require('axios');
|
|
4
|
+
var content = require('./content.cjs');
|
|
4
5
|
|
|
5
6
|
/* eslint-disable no-console */
|
|
6
|
-
const getDomainName = (link, metadata) => {
|
|
7
|
-
try {
|
|
8
|
-
const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
|
|
9
|
-
const domain = new URL(url).hostname.replace(/^www\./, '');
|
|
10
|
-
if (domain) {
|
|
11
|
-
return domain;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
catch (e) {
|
|
15
|
-
// URL parsing failed
|
|
16
|
-
console.error('Error parsing URL:', e);
|
|
17
|
-
}
|
|
18
|
-
return;
|
|
19
|
-
};
|
|
20
|
-
function getAttribution(link, metadata) {
|
|
21
|
-
if (!metadata)
|
|
22
|
-
return getDomainName(link, metadata);
|
|
23
|
-
const possibleAttributions = [
|
|
24
|
-
metadata.ogSiteName,
|
|
25
|
-
metadata['og:site_name'],
|
|
26
|
-
metadata.title?.split('|').pop()?.trim(),
|
|
27
|
-
metadata['twitter:site']?.replace(/^@/, ''),
|
|
28
|
-
];
|
|
29
|
-
const attribution = possibleAttributions.find((attr) => attr != null && typeof attr === 'string' && attr.trim() !== '');
|
|
30
|
-
if (attribution != null) {
|
|
31
|
-
return attribution;
|
|
32
|
-
}
|
|
33
|
-
return getDomainName(link, metadata);
|
|
34
|
-
}
|
|
35
7
|
/**
|
|
36
8
|
* Firecrawl scraper implementation
|
|
37
9
|
* Uses the Firecrawl API to scrape web pages
|
|
@@ -106,21 +78,30 @@ class FirecrawlScraper {
|
|
|
106
78
|
*/
|
|
107
79
|
extractContent(response) {
|
|
108
80
|
if (!response.success || !response.data) {
|
|
109
|
-
return '';
|
|
81
|
+
return ['', undefined];
|
|
82
|
+
}
|
|
83
|
+
if (response.data.markdown != null && response.data.html != null) {
|
|
84
|
+
try {
|
|
85
|
+
const { markdown, ...rest } = content.processContent(response.data.html, response.data.markdown);
|
|
86
|
+
return [markdown, rest];
|
|
87
|
+
}
|
|
88
|
+
catch (error) {
|
|
89
|
+
console.error('Error processing content:', error);
|
|
90
|
+
return [response.data.markdown, undefined];
|
|
91
|
+
}
|
|
110
92
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
return response.data.markdown;
|
|
93
|
+
else if (response.data.markdown != null) {
|
|
94
|
+
return [response.data.markdown, undefined];
|
|
114
95
|
}
|
|
115
96
|
// Fall back to HTML content
|
|
116
97
|
if (response.data.html != null) {
|
|
117
|
-
return response.data.html;
|
|
98
|
+
return [response.data.html, undefined];
|
|
118
99
|
}
|
|
119
100
|
// Fall back to raw HTML content
|
|
120
101
|
if (response.data.rawHtml != null) {
|
|
121
|
-
return response.data.rawHtml;
|
|
102
|
+
return [response.data.rawHtml, undefined];
|
|
122
103
|
}
|
|
123
|
-
return '';
|
|
104
|
+
return ['', undefined];
|
|
124
105
|
}
|
|
125
106
|
/**
|
|
126
107
|
* Extract metadata from scrape response
|
|
@@ -145,5 +126,4 @@ const createFirecrawlScraper = (config = {}) => {
|
|
|
145
126
|
|
|
146
127
|
exports.FirecrawlScraper = FirecrawlScraper;
|
|
147
128
|
exports.createFirecrawlScraper = createFirecrawlScraper;
|
|
148
|
-
exports.getAttribution = getAttribution;
|
|
149
129
|
//# sourceMappingURL=firecrawl.cjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\n\nexport interface FirecrawlScrapeOptions {\n formats?: string[];\n includeTags?: string[];\n excludeTags?: string[];\n headers?: Record<string, string>;\n waitFor?: number;\n timeout?: number;\n}\n\ninterface ScrapeMetadata {\n // Core source information\n sourceURL?: string;\n url?: string;\n scrapeId?: string;\n statusCode?: number;\n // Basic metadata\n title?: string;\n description?: string;\n language?: string;\n favicon?: string;\n viewport?: string;\n robots?: string;\n 'theme-color'?: string;\n // Open Graph metadata\n 'og:url'?: string;\n 'og:title'?: string;\n 'og:description'?: string;\n 'og:type'?: string;\n 'og:image'?: string;\n 'og:image:width'?: string;\n 'og:image:height'?: string;\n 'og:site_name'?: string;\n ogUrl?: string;\n ogTitle?: string;\n ogDescription?: string;\n ogImage?: string;\n ogSiteName?: string;\n // Article metadata\n 'article:author'?: string;\n 'article:published_time'?: string;\n 'article:modified_time'?: string;\n 'article:section'?: string;\n 'article:tag'?: string;\n 'article:publisher'?: string;\n publishedTime?: string;\n modifiedTime?: string;\n // Twitter metadata\n 'twitter:site'?: string;\n 'twitter:creator'?: string;\n 'twitter:card'?: string;\n 'twitter:image'?: string;\n 'twitter:dnt'?: string;\n 'twitter:app:name:iphone'?: string;\n 'twitter:app:id:iphone'?: string;\n 'twitter:app:url:iphone'?: string;\n 'twitter:app:name:ipad'?: string;\n 'twitter:app:id:ipad'?: string;\n 'twitter:app:url:ipad'?: string;\n 'twitter:app:name:googleplay'?: string;\n 'twitter:app:id:googleplay'?: string;\n 'twitter:app:url:googleplay'?: string;\n // Facebook metadata\n 'fb:app_id'?: string;\n // App links\n 'al:ios:url'?: string;\n 'al:ios:app_name'?: string;\n 'al:ios:app_store_id'?: string;\n // Allow for additional properties that might be present\n [key: string]: string | number | boolean | null | undefined;\n}\n\nexport interface FirecrawlScrapeResponse {\n success: boolean;\n data?: {\n markdown?: string;\n html?: string;\n rawHtml?: string;\n screenshot?: string;\n links?: string[];\n metadata?: ScrapeMetadata;\n };\n error?: string;\n}\n\nexport interface FirecrawlScraperConfig {\n apiKey?: string;\n apiUrl?: string;\n formats?: string[];\n timeout?: number;\n}\nconst getDomainName = (\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined => {\n try {\n const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');\n const domain = new URL(url).hostname.replace(/^www\\./, '');\n if (domain) {\n return domain;\n }\n } catch (e) {\n // URL parsing failed\n console.error('Error parsing URL:', e);\n }\n\n return;\n};\n\nexport function getAttribution(\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined {\n if (!metadata) return getDomainName(link, metadata);\n\n const possibleAttributions = [\n metadata.ogSiteName,\n metadata['og:site_name'],\n metadata.title?.split('|').pop()?.trim(),\n metadata['twitter:site']?.replace(/^@/, ''),\n ];\n\n const attribution = possibleAttributions.find(\n (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''\n );\n if (attribution != null) {\n return attribution;\n }\n\n return getDomainName(link, metadata);\n}\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n\n constructor(config: FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n if (!this.apiKey) {\n console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: FirecrawlScrapeOptions = {}\n ): Promise<[string, FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(response: FirecrawlScrapeResponse): string {\n if (!response.success || !response.data) {\n return '';\n }\n\n // Prefer markdown content if available\n if (response.data.markdown != null) {\n return response.data.markdown;\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return response.data.html;\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return response.data.rawHtml;\n }\n\n return '';\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: FirecrawlScrapeResponse): ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;;;AAAA;AA6FA,MAAM,aAAa,GAAG,CACpB,IAAY,EACZ,QAAyB,KACH;AACtB,IAAA,IAAI;AACF,QAAA,MAAM,GAAG,GAAG,QAAQ,EAAE,SAAS,IAAI,QAAQ,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;AAChE,QAAA,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1D,IAAI,MAAM,EAAE;AACV,YAAA,OAAO,MAAM;;;IAEf,OAAO,CAAC,EAAE;;AAEV,QAAA,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,CAAC,CAAC;;IAGxC;AACF,CAAC;AAEe,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAyB,EAAA;AAEzB,IAAA,IAAI,CAAC,QAAQ;AAAE,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AAEnD,IAAA,MAAM,oBAAoB,GAAG;AAC3B,QAAA,QAAQ,CAAC,UAAU;QACnB,QAAQ,CAAC,cAAc,CAAC;AACxB,QAAA,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE;QACxC,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;KAC5C;IAED,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,CAC3C,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CACzE;AACD,IAAA,IAAI,WAAW,IAAI,IAAI,EAAE;AACvB,QAAA,OAAO,WAAW;;AAGpB,IAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AACtC;AAEA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AAEf,IAAA,WAAA,CAAY,SAAiC,EAAE,EAAA;AAC7C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAGvE,OAAO,CAAC,GAAG,CAAC,CAAA,4CAAA,EAA+C,IAAI,CAAC,MAAM,CAAE,CAAA,CAAC;;AAG3E;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAkC,EAAE,EAAA;AAEpC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CAAC,QAAiC,EAAA;QAC9C,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,EAAE;;;QAIX,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;AAClC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;;QAI/B,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAC9B,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI;;;QAI3B,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;AACjC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO;;AAG9B,QAAA,OAAO,EAAE;;AAGX;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAiC,EAAA;AAC/C,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAiC,GAAA,EAAE,KACf;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;;"}
|
|
1
|
+
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n if (!this.apiKey) {\n console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n console.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":["processContent"],"mappings":";;;;;AAAA;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AAEf,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAGvE,OAAO,CAAC,GAAG,CAAC,CAAA,4CAAA,EAA+C,IAAI,CAAC,MAAM,CAAE,CAAA,CAAC;;AAG3E;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAGA,sBAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;AACd,gBAAA,OAAO,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACjD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;"}
|
|
@@ -1,32 +1,59 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
+
var utils = require('./utils.cjs');
|
|
4
|
+
|
|
3
5
|
function formatResultsForLLM(turn, results) {
|
|
4
6
|
let output = '';
|
|
5
7
|
const addSection = (title) => {
|
|
6
8
|
output += `\n=== ${title} ===\n`;
|
|
7
9
|
};
|
|
10
|
+
const references = [];
|
|
8
11
|
// Organic (web) results
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
if (results.organic?.length != null && results.organic.length > 0) {
|
|
13
|
+
addSection(`Web Results, Turn ${turn}`);
|
|
14
|
+
for (let i = 0; i < results.organic.length; i++) {
|
|
15
|
+
const r = results.organic[i];
|
|
13
16
|
output += [
|
|
14
|
-
|
|
15
|
-
`
|
|
17
|
+
`# Source ${i}: "${r.title ?? '(no title)'}"`,
|
|
18
|
+
`Anchor: \\ue202turn${turn}search${i}`,
|
|
16
19
|
`URL: ${r.link}`,
|
|
17
20
|
r.snippet != null ? `Summary: ${r.snippet}` : '',
|
|
18
21
|
r.date != null ? `Date: ${r.date}` : '',
|
|
19
22
|
r.attribution != null ? `Source: ${r.attribution}` : '',
|
|
20
23
|
'',
|
|
21
|
-
'
|
|
22
|
-
|
|
23
|
-
.filter((h) => h.text.trim().length > 0)
|
|
24
|
-
.map((h) => `[Relevance: ${h.score.toFixed(2)}]\n${h.text.trim()}`),
|
|
24
|
+
'\n## Highlights\n\n',
|
|
25
|
+
'',
|
|
25
26
|
'',
|
|
26
27
|
]
|
|
27
28
|
.filter(Boolean)
|
|
28
29
|
.join('\n');
|
|
29
|
-
|
|
30
|
+
(r.highlights ?? [])
|
|
31
|
+
.filter((h) => h.text.trim().length > 0)
|
|
32
|
+
.forEach((h, hIndex) => {
|
|
33
|
+
output += `### Highlight ${hIndex + 1} [Relevance: ${h.score.toFixed(2)}]\n\n`;
|
|
34
|
+
output += '```text\n' + h.text.trim() + '\n```\n\n';
|
|
35
|
+
if (h.references != null && h.references.length) {
|
|
36
|
+
output += 'Core References:\n';
|
|
37
|
+
output += h.references
|
|
38
|
+
.map((ref) => {
|
|
39
|
+
references.push({
|
|
40
|
+
link: ref.reference.originalUrl,
|
|
41
|
+
attribution: utils.getDomainName(ref.reference.originalUrl),
|
|
42
|
+
title: (((ref.reference.title ?? '') || ref.reference.text) ??
|
|
43
|
+
'').split('\n')[0],
|
|
44
|
+
});
|
|
45
|
+
return `- ${ref.type}#${ref.originalIndex + 1}: ${ref.reference.originalUrl}\n\t- Anchor: \\ue202turn${turn}ref${references.length - 1}`;
|
|
46
|
+
})
|
|
47
|
+
.join('\n');
|
|
48
|
+
output += '\n\n';
|
|
49
|
+
}
|
|
50
|
+
if (hIndex < (r.highlights?.length ?? 0) - 1) {
|
|
51
|
+
output += '---\n\n';
|
|
52
|
+
}
|
|
53
|
+
});
|
|
54
|
+
delete results.organic[i].highlights;
|
|
55
|
+
output += '\n';
|
|
56
|
+
}
|
|
30
57
|
}
|
|
31
58
|
// Ignoring these sections for now
|
|
32
59
|
// // Top stories (news)
|
|
@@ -62,54 +89,77 @@ function formatResultsForLLM(turn, results) {
|
|
|
62
89
|
if (results.knowledgeGraph != null) {
|
|
63
90
|
addSection('Knowledge Graph');
|
|
64
91
|
output += [
|
|
65
|
-
|
|
92
|
+
`**Title:** ${results.knowledgeGraph.title ?? '(no title)'}`,
|
|
93
|
+
results.knowledgeGraph.type != null
|
|
94
|
+
? `**Type:** ${results.knowledgeGraph.type}`
|
|
95
|
+
: '',
|
|
66
96
|
results.knowledgeGraph.description != null
|
|
67
|
-
?
|
|
97
|
+
? `**Description:** ${results.knowledgeGraph.description}`
|
|
68
98
|
: '',
|
|
69
|
-
results.knowledgeGraph.
|
|
70
|
-
?
|
|
99
|
+
results.knowledgeGraph.descriptionSource != null
|
|
100
|
+
? `**Description Source:** ${results.knowledgeGraph.descriptionSource}`
|
|
101
|
+
: '',
|
|
102
|
+
results.knowledgeGraph.descriptionLink != null
|
|
103
|
+
? `**Description Link:** ${results.knowledgeGraph.descriptionLink}`
|
|
71
104
|
: '',
|
|
72
105
|
results.knowledgeGraph.imageUrl != null
|
|
73
|
-
?
|
|
106
|
+
? `**Image URL:** ${results.knowledgeGraph.imageUrl}`
|
|
107
|
+
: '',
|
|
108
|
+
results.knowledgeGraph.website != null
|
|
109
|
+
? `**Website:** ${results.knowledgeGraph.website}`
|
|
74
110
|
: '',
|
|
75
111
|
results.knowledgeGraph.attributes != null
|
|
76
|
-
?
|
|
112
|
+
? `**Attributes:**\n\`\`\`json\n${JSON.stringify(results.knowledgeGraph.attributes, null, 2)}\n\`\`\``
|
|
77
113
|
: '',
|
|
78
114
|
'',
|
|
79
115
|
]
|
|
80
116
|
.filter(Boolean)
|
|
81
|
-
.join('\n');
|
|
117
|
+
.join('\n\n');
|
|
82
118
|
}
|
|
83
119
|
// Answer Box
|
|
84
120
|
if (results.answerBox != null) {
|
|
85
121
|
addSection('Answer Box');
|
|
86
122
|
output += [
|
|
87
123
|
results.answerBox.title != null
|
|
88
|
-
?
|
|
89
|
-
: '',
|
|
90
|
-
results.answerBox.answer != null
|
|
91
|
-
? `Answer: ${results.answerBox.answer}`
|
|
124
|
+
? `**Title:** ${results.answerBox.title}`
|
|
92
125
|
: '',
|
|
93
126
|
results.answerBox.snippet != null
|
|
94
|
-
?
|
|
127
|
+
? `**Snippet:** ${results.answerBox.snippet}`
|
|
128
|
+
: '',
|
|
129
|
+
results.answerBox.snippetHighlighted != null
|
|
130
|
+
? `**Snippet Highlighted:** ${results.answerBox.snippetHighlighted
|
|
131
|
+
.map((s) => `\`${s}\``)
|
|
132
|
+
.join(' ')}`
|
|
133
|
+
: '',
|
|
134
|
+
results.answerBox.link != null
|
|
135
|
+
? `**Link:** ${results.answerBox.link}`
|
|
95
136
|
: '',
|
|
96
|
-
results.answerBox.date != null ? `Date: ${results.answerBox.date}` : '',
|
|
97
137
|
'',
|
|
98
138
|
]
|
|
99
139
|
.filter(Boolean)
|
|
100
|
-
.join('\n');
|
|
140
|
+
.join('\n\n');
|
|
101
141
|
}
|
|
102
142
|
// People also ask
|
|
103
143
|
const peopleAlsoAsk = results.peopleAlsoAsk ?? [];
|
|
104
144
|
if (peopleAlsoAsk.length) {
|
|
105
145
|
addSection('People Also Ask');
|
|
106
|
-
peopleAlsoAsk.forEach((p,
|
|
107
|
-
output += [
|
|
146
|
+
peopleAlsoAsk.forEach((p, i) => {
|
|
147
|
+
output += [
|
|
148
|
+
`### Question ${i + 1}:`,
|
|
149
|
+
`"${p.question}"`,
|
|
150
|
+
`${p.snippet != null && p.snippet ? `Snippet: ${p.snippet}}` : ''}`,
|
|
151
|
+
`${p.title != null && p.title ? `Title: ${p.title}` : ''}`,
|
|
152
|
+
`${p.link != null && p.link ? `Link: ${p.link}` : ''}`,
|
|
153
|
+
'',
|
|
154
|
+
]
|
|
108
155
|
.filter(Boolean)
|
|
109
|
-
.join('\n');
|
|
156
|
+
.join('\n\n');
|
|
110
157
|
});
|
|
111
158
|
}
|
|
112
|
-
return
|
|
159
|
+
return {
|
|
160
|
+
output: output.trim(),
|
|
161
|
+
references,
|
|
162
|
+
};
|
|
113
163
|
}
|
|
114
164
|
|
|
115
165
|
exports.formatResultsForLLM = formatResultsForLLM;
|