@librechat/agents 2.4.31 → 2.4.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/events.cjs +3 -3
- package/dist/cjs/events.cjs.map +1 -1
- package/dist/cjs/graphs/Graph.cjs +2 -1
- package/dist/cjs/graphs/Graph.cjs.map +1 -1
- package/dist/cjs/main.cjs +5 -2
- package/dist/cjs/main.cjs.map +1 -1
- package/dist/cjs/messages/ids.cjs +23 -0
- package/dist/cjs/messages/ids.cjs.map +1 -0
- package/dist/cjs/splitStream.cjs +2 -1
- package/dist/cjs/splitStream.cjs.map +1 -1
- package/dist/cjs/stream.cjs +87 -154
- package/dist/cjs/stream.cjs.map +1 -1
- package/dist/cjs/tools/ToolNode.cjs +14 -3
- package/dist/cjs/tools/ToolNode.cjs.map +1 -1
- package/dist/cjs/tools/handlers.cjs +144 -0
- package/dist/cjs/tools/handlers.cjs.map +1 -0
- package/dist/cjs/tools/search/content.cjs +140 -0
- package/dist/cjs/tools/search/content.cjs.map +1 -0
- package/dist/cjs/tools/search/firecrawl.cjs +23 -41
- package/dist/cjs/tools/search/firecrawl.cjs.map +1 -1
- package/dist/cjs/tools/search/format.cjs +161 -74
- package/dist/cjs/tools/search/format.cjs.map +1 -1
- package/dist/cjs/tools/search/highlights.cjs +64 -12
- package/dist/cjs/tools/search/highlights.cjs.map +1 -1
- package/dist/cjs/tools/search/rerankers.cjs +35 -50
- package/dist/cjs/tools/search/rerankers.cjs.map +1 -1
- package/dist/cjs/tools/search/schema.cjs +70 -0
- package/dist/cjs/tools/search/schema.cjs.map +1 -0
- package/dist/cjs/tools/search/search.cjs +153 -69
- package/dist/cjs/tools/search/search.cjs.map +1 -1
- package/dist/cjs/tools/search/tool.cjs +247 -58
- package/dist/cjs/tools/search/tool.cjs.map +1 -1
- package/dist/cjs/tools/search/utils.cjs +66 -0
- package/dist/cjs/tools/search/utils.cjs.map +1 -0
- package/dist/esm/events.mjs +1 -1
- package/dist/esm/events.mjs.map +1 -1
- package/dist/esm/graphs/Graph.mjs +2 -1
- package/dist/esm/graphs/Graph.mjs.map +1 -1
- package/dist/esm/main.mjs +3 -1
- package/dist/esm/main.mjs.map +1 -1
- package/dist/esm/messages/ids.mjs +21 -0
- package/dist/esm/messages/ids.mjs.map +1 -0
- package/dist/esm/splitStream.mjs +2 -1
- package/dist/esm/splitStream.mjs.map +1 -1
- package/dist/esm/stream.mjs +87 -152
- package/dist/esm/stream.mjs.map +1 -1
- package/dist/esm/tools/ToolNode.mjs +14 -3
- package/dist/esm/tools/ToolNode.mjs.map +1 -1
- package/dist/esm/tools/handlers.mjs +141 -0
- package/dist/esm/tools/handlers.mjs.map +1 -0
- package/dist/esm/tools/search/content.mjs +119 -0
- package/dist/esm/tools/search/content.mjs.map +1 -0
- package/dist/esm/tools/search/firecrawl.mjs +24 -41
- package/dist/esm/tools/search/firecrawl.mjs.map +1 -1
- package/dist/esm/tools/search/format.mjs +161 -74
- package/dist/esm/tools/search/format.mjs.map +1 -1
- package/dist/esm/tools/search/highlights.mjs +64 -12
- package/dist/esm/tools/search/highlights.mjs.map +1 -1
- package/dist/esm/tools/search/rerankers.mjs +35 -50
- package/dist/esm/tools/search/rerankers.mjs.map +1 -1
- package/dist/esm/tools/search/schema.mjs +61 -0
- package/dist/esm/tools/search/schema.mjs.map +1 -0
- package/dist/esm/tools/search/search.mjs +153 -69
- package/dist/esm/tools/search/search.mjs.map +1 -1
- package/dist/esm/tools/search/tool.mjs +246 -57
- package/dist/esm/tools/search/tool.mjs.map +1 -1
- package/dist/esm/tools/search/utils.mjs +61 -0
- package/dist/esm/tools/search/utils.mjs.map +1 -0
- package/dist/types/graphs/Graph.d.ts +1 -1
- package/dist/types/index.d.ts +1 -0
- package/dist/types/messages/ids.d.ts +3 -0
- package/dist/types/messages/index.d.ts +1 -0
- package/dist/types/stream.d.ts +0 -8
- package/dist/types/tools/ToolNode.d.ts +6 -0
- package/dist/types/tools/example.d.ts +23 -3
- package/dist/types/tools/handlers.d.ts +8 -0
- package/dist/types/tools/search/content.d.ts +4 -0
- package/dist/types/tools/search/firecrawl.d.ts +7 -86
- package/dist/types/tools/search/format.d.ts +4 -1
- package/dist/types/tools/search/highlights.d.ts +1 -1
- package/dist/types/tools/search/rerankers.d.ts +8 -5
- package/dist/types/tools/search/schema.d.ts +16 -0
- package/dist/types/tools/search/search.d.ts +2 -2
- package/dist/types/tools/search/test.d.ts +1 -0
- package/dist/types/tools/search/tool.d.ts +25 -4
- package/dist/types/tools/search/types.d.ts +443 -53
- package/dist/types/tools/search/utils.d.ts +10 -0
- package/package.json +9 -7
- package/src/events.ts +49 -15
- package/src/graphs/Graph.ts +6 -2
- package/src/index.ts +1 -0
- package/src/messages/ids.ts +26 -0
- package/src/messages/index.ts +1 -0
- package/src/scripts/search.ts +8 -3
- package/src/splitStream.test.ts +132 -71
- package/src/splitStream.ts +2 -1
- package/src/stream.ts +94 -183
- package/src/tools/ToolNode.ts +37 -14
- package/src/tools/handlers.ts +167 -0
- package/src/tools/search/content.test.ts +173 -0
- package/src/tools/search/content.ts +147 -0
- package/src/tools/search/firecrawl.ts +36 -148
- package/src/tools/search/format.ts +205 -74
- package/src/tools/search/highlights.ts +99 -16
- package/src/tools/search/output.md +2775 -0
- package/src/tools/search/rerankers.ts +50 -62
- package/src/tools/search/schema.ts +63 -0
- package/src/tools/search/search.ts +232 -116
- package/src/tools/search/test.html +884 -0
- package/src/tools/search/test.md +643 -0
- package/src/tools/search/test.ts +159 -0
- package/src/tools/search/tool.ts +363 -87
- package/src/tools/search/types.ts +503 -61
- package/src/tools/search/utils.ts +79 -0
- package/src/utils/llmConfig.ts +1 -1
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var nanoid = require('nanoid');
|
|
4
|
+
var _enum = require('../common/enum.cjs');
|
|
5
|
+
require('../messages/core.cjs');
|
|
6
|
+
var ids = require('../messages/ids.cjs');
|
|
7
|
+
require('@langchain/core/messages');
|
|
8
|
+
|
|
9
|
+
/* eslint-disable no-console */
|
|
10
|
+
// src/tools/handlers.ts
|
|
11
|
+
function handleToolCallChunks({ graph, stepKey, toolCallChunks, }) {
|
|
12
|
+
let prevStepId;
|
|
13
|
+
let prevRunStep;
|
|
14
|
+
try {
|
|
15
|
+
prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);
|
|
16
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
17
|
+
}
|
|
18
|
+
catch {
|
|
19
|
+
/** Edge Case: If no previous step exists, create a new message creation step */
|
|
20
|
+
const message_id = ids.getMessageId(stepKey, graph, true) ?? '';
|
|
21
|
+
prevStepId = graph.dispatchRunStep(stepKey, {
|
|
22
|
+
type: _enum.StepTypes.MESSAGE_CREATION,
|
|
23
|
+
message_creation: {
|
|
24
|
+
message_id,
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
28
|
+
}
|
|
29
|
+
const _stepId = graph.getStepIdByKey(stepKey, prevRunStep?.index);
|
|
30
|
+
/** Edge Case: Tool Call Run Step or `tool_call_ids` never dispatched */
|
|
31
|
+
const tool_calls = prevStepId && prevRunStep && prevRunStep.type === _enum.StepTypes.MESSAGE_CREATION
|
|
32
|
+
? []
|
|
33
|
+
: undefined;
|
|
34
|
+
/** Edge Case: `id` and `name` fields cannot be empty strings */
|
|
35
|
+
for (const toolCallChunk of toolCallChunks) {
|
|
36
|
+
if (toolCallChunk.name === '') {
|
|
37
|
+
toolCallChunk.name = undefined;
|
|
38
|
+
}
|
|
39
|
+
if (toolCallChunk.id === '') {
|
|
40
|
+
toolCallChunk.id = undefined;
|
|
41
|
+
}
|
|
42
|
+
else if (tool_calls != null &&
|
|
43
|
+
toolCallChunk.id != null &&
|
|
44
|
+
toolCallChunk.name != null) {
|
|
45
|
+
tool_calls.push({
|
|
46
|
+
args: {},
|
|
47
|
+
id: toolCallChunk.id,
|
|
48
|
+
name: toolCallChunk.name,
|
|
49
|
+
type: _enum.ToolCallTypes.TOOL_CALL,
|
|
50
|
+
});
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
let stepId = _stepId;
|
|
54
|
+
const alreadyDispatched = prevRunStep?.type === _enum.StepTypes.MESSAGE_CREATION &&
|
|
55
|
+
graph.messageStepHasToolCalls.has(prevStepId);
|
|
56
|
+
if (!alreadyDispatched && tool_calls?.length === toolCallChunks.length) {
|
|
57
|
+
graph.dispatchMessageDelta(prevStepId, {
|
|
58
|
+
content: [
|
|
59
|
+
{
|
|
60
|
+
type: _enum.ContentTypes.TEXT,
|
|
61
|
+
text: '',
|
|
62
|
+
tool_call_ids: tool_calls.map((tc) => tc.id ?? ''),
|
|
63
|
+
},
|
|
64
|
+
],
|
|
65
|
+
});
|
|
66
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
67
|
+
stepId = graph.dispatchRunStep(stepKey, {
|
|
68
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
69
|
+
tool_calls,
|
|
70
|
+
});
|
|
71
|
+
}
|
|
72
|
+
graph.dispatchRunStepDelta(stepId, {
|
|
73
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
74
|
+
tool_calls: toolCallChunks,
|
|
75
|
+
});
|
|
76
|
+
}
|
|
77
|
+
const handleToolCalls = (toolCalls, metadata, graph) => {
|
|
78
|
+
if (!graph || !metadata) {
|
|
79
|
+
console.warn(`Graph or metadata not found in ${event} event`);
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (!toolCalls) {
|
|
83
|
+
return;
|
|
84
|
+
}
|
|
85
|
+
if (toolCalls.length === 0) {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
const stepKey = graph.getStepKey(metadata);
|
|
89
|
+
for (const tool_call of toolCalls) {
|
|
90
|
+
const toolCallId = tool_call.id ?? `toolu_${nanoid.nanoid()}`;
|
|
91
|
+
tool_call.id = toolCallId;
|
|
92
|
+
if (!toolCallId || graph.toolCallStepIds.has(toolCallId)) {
|
|
93
|
+
continue;
|
|
94
|
+
}
|
|
95
|
+
let prevStepId = '';
|
|
96
|
+
let prevRunStep;
|
|
97
|
+
try {
|
|
98
|
+
prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);
|
|
99
|
+
prevRunStep = graph.getRunStep(prevStepId);
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
// no previous step
|
|
103
|
+
}
|
|
104
|
+
const dispatchToolCallIds = (lastMessageStepId) => {
|
|
105
|
+
graph.dispatchMessageDelta(lastMessageStepId, {
|
|
106
|
+
content: [
|
|
107
|
+
{
|
|
108
|
+
type: 'text',
|
|
109
|
+
text: '',
|
|
110
|
+
tool_call_ids: [toolCallId],
|
|
111
|
+
},
|
|
112
|
+
],
|
|
113
|
+
});
|
|
114
|
+
};
|
|
115
|
+
/* If the previous step exists and is a message creation */
|
|
116
|
+
if (prevStepId &&
|
|
117
|
+
prevRunStep &&
|
|
118
|
+
prevRunStep.type === _enum.StepTypes.MESSAGE_CREATION) {
|
|
119
|
+
dispatchToolCallIds(prevStepId);
|
|
120
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
121
|
+
/* If the previous step doesn't exist or is not a message creation */
|
|
122
|
+
}
|
|
123
|
+
else if (!prevRunStep ||
|
|
124
|
+
prevRunStep.type !== _enum.StepTypes.MESSAGE_CREATION) {
|
|
125
|
+
const messageId = ids.getMessageId(stepKey, graph, true) ?? '';
|
|
126
|
+
const stepId = graph.dispatchRunStep(stepKey, {
|
|
127
|
+
type: _enum.StepTypes.MESSAGE_CREATION,
|
|
128
|
+
message_creation: {
|
|
129
|
+
message_id: messageId,
|
|
130
|
+
},
|
|
131
|
+
});
|
|
132
|
+
dispatchToolCallIds(stepId);
|
|
133
|
+
graph.messageStepHasToolCalls.set(prevStepId, true);
|
|
134
|
+
}
|
|
135
|
+
graph.dispatchRunStep(stepKey, {
|
|
136
|
+
type: _enum.StepTypes.TOOL_CALLS,
|
|
137
|
+
tool_calls: [tool_call],
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
};
|
|
141
|
+
|
|
142
|
+
exports.handleToolCallChunks = handleToolCallChunks;
|
|
143
|
+
exports.handleToolCalls = handleToolCalls;
|
|
144
|
+
//# sourceMappingURL=handlers.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"handlers.cjs","sources":["../../../src/tools/handlers.ts"],"sourcesContent":["/* eslint-disable no-console */\n// src/tools/handlers.ts\nimport { nanoid } from 'nanoid';\nimport type { ToolCall, ToolCallChunk } from '@langchain/core/messages/tool';\nimport type { Graph } from '@/graphs';\nimport type * as t from '@/types';\nimport { StepTypes, ContentTypes, ToolCallTypes } from '@/common';\nimport { getMessageId } from '@/messages';\n\nexport function handleToolCallChunks({\n graph,\n stepKey,\n toolCallChunks,\n}: {\n graph: Graph;\n stepKey: string;\n toolCallChunks: ToolCallChunk[];\n}): void {\n let prevStepId: string;\n let prevRunStep: t.RunStep | undefined;\n try {\n prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);\n prevRunStep = graph.getRunStep(prevStepId);\n } catch {\n /** Edge Case: If no previous step exists, create a new message creation step */\n const message_id = getMessageId(stepKey, graph, true) ?? '';\n prevStepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.MESSAGE_CREATION,\n message_creation: {\n message_id,\n },\n });\n prevRunStep = graph.getRunStep(prevStepId);\n }\n\n const _stepId = graph.getStepIdByKey(stepKey, prevRunStep?.index);\n\n /** Edge Case: Tool Call Run Step or `tool_call_ids` never dispatched */\n const tool_calls: ToolCall[] | undefined =\n prevStepId && prevRunStep && prevRunStep.type === StepTypes.MESSAGE_CREATION\n ? []\n : undefined;\n\n /** Edge Case: `id` and `name` fields cannot be empty strings */\n for (const toolCallChunk of toolCallChunks) {\n if (toolCallChunk.name === '') {\n toolCallChunk.name = undefined;\n }\n if (toolCallChunk.id === '') {\n toolCallChunk.id = undefined;\n } else if (\n tool_calls != null &&\n toolCallChunk.id != null &&\n toolCallChunk.name != null\n ) {\n tool_calls.push({\n args: {},\n id: toolCallChunk.id,\n name: toolCallChunk.name,\n type: ToolCallTypes.TOOL_CALL,\n });\n }\n }\n\n let stepId: string = _stepId;\n const alreadyDispatched =\n prevRunStep?.type === StepTypes.MESSAGE_CREATION &&\n graph.messageStepHasToolCalls.has(prevStepId);\n if (!alreadyDispatched && tool_calls?.length === toolCallChunks.length) {\n graph.dispatchMessageDelta(prevStepId, {\n content: [\n {\n type: ContentTypes.TEXT,\n text: '',\n tool_call_ids: tool_calls.map((tc) => tc.id ?? ''),\n },\n ],\n });\n graph.messageStepHasToolCalls.set(prevStepId, true);\n stepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.TOOL_CALLS,\n tool_calls,\n });\n }\n graph.dispatchRunStepDelta(stepId, {\n type: StepTypes.TOOL_CALLS,\n tool_calls: toolCallChunks,\n });\n}\n\nexport const handleToolCalls = (\n toolCalls?: ToolCall[],\n metadata?: Record<string, unknown>,\n graph?: Graph\n): void => {\n if (!graph || !metadata) {\n console.warn(`Graph or metadata not found in ${event} event`);\n return;\n }\n\n if (!toolCalls) {\n return;\n }\n\n if (toolCalls.length === 0) {\n return;\n }\n\n const stepKey = graph.getStepKey(metadata);\n\n for (const tool_call of toolCalls) {\n const toolCallId = tool_call.id ?? `toolu_${nanoid()}`;\n tool_call.id = toolCallId;\n if (!toolCallId || graph.toolCallStepIds.has(toolCallId)) {\n continue;\n }\n\n let prevStepId = '';\n let prevRunStep: t.RunStep | undefined;\n try {\n prevStepId = graph.getStepIdByKey(stepKey, graph.contentData.length - 1);\n prevRunStep = graph.getRunStep(prevStepId);\n } catch {\n // no previous step\n }\n\n const dispatchToolCallIds = (lastMessageStepId: string): void => {\n graph.dispatchMessageDelta(lastMessageStepId, {\n content: [\n {\n type: 'text',\n text: '',\n tool_call_ids: [toolCallId],\n },\n ],\n });\n };\n /* If the previous step exists and is a message creation */\n if (\n prevStepId &&\n prevRunStep &&\n prevRunStep.type === StepTypes.MESSAGE_CREATION\n ) {\n dispatchToolCallIds(prevStepId);\n graph.messageStepHasToolCalls.set(prevStepId, true);\n /* If the previous step doesn't exist or is not a message creation */\n } else if (\n !prevRunStep ||\n prevRunStep.type !== StepTypes.MESSAGE_CREATION\n ) {\n const messageId = getMessageId(stepKey, graph, true) ?? '';\n const stepId = graph.dispatchRunStep(stepKey, {\n type: StepTypes.MESSAGE_CREATION,\n message_creation: {\n message_id: messageId,\n },\n });\n dispatchToolCallIds(stepId);\n graph.messageStepHasToolCalls.set(prevStepId, true);\n }\n\n graph.dispatchRunStep(stepKey, {\n type: StepTypes.TOOL_CALLS,\n tool_calls: [tool_call],\n });\n }\n};\n"],"names":["getMessageId","StepTypes","ToolCallTypes","ContentTypes","nanoid"],"mappings":";;;;;;;;AAAA;AACA;AAQM,SAAU,oBAAoB,CAAC,EACnC,KAAK,EACL,OAAO,EACP,cAAc,GAKf,EAAA;AACC,IAAA,IAAI,UAAkB;AACtB,IAAA,IAAI,WAAkC;AACtC,IAAA,IAAI;AACF,QAAA,UAAU,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;AACxE,QAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAC1C,IAAA,MAAM;;AAEN,QAAA,MAAM,UAAU,GAAGA,gBAAY,CAAC,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;AAC3D,QAAA,UAAU,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YAC1C,IAAI,EAAEC,eAAS,CAAC,gBAAgB;AAChC,YAAA,gBAAgB,EAAE;gBAChB,UAAU;AACX,aAAA;AACF,SAAA,CAAC;AACF,QAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAG5C,IAAA,MAAM,OAAO,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,WAAW,EAAE,KAAK,CAAC;;AAGjE,IAAA,MAAM,UAAU,GACd,UAAU,IAAI,WAAW,IAAI,WAAW,CAAC,IAAI,KAAKA,eAAS,CAAC;AAC1D,UAAE;UACA,SAAS;;AAGf,IAAA,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE;AAC1C,QAAA,IAAI,aAAa,CAAC,IAAI,KAAK,EAAE,EAAE;AAC7B,YAAA,aAAa,CAAC,IAAI,GAAG,SAAS;;AAEhC,QAAA,IAAI,aAAa,CAAC,EAAE,KAAK,EAAE,EAAE;AAC3B,YAAA,aAAa,CAAC,EAAE,GAAG,SAAS;;aACvB,IACL,UAAU,IAAI,IAAI;YAClB,aAAa,CAAC,EAAE,IAAI,IAAI;AACxB,YAAA,aAAa,CAAC,IAAI,IAAI,IAAI,EAC1B;YACA,UAAU,CAAC,IAAI,CAAC;AACd,gBAAA,IAAI,EAAE,EAAE;gBACR,EAAE,EAAE,aAAa,CAAC,EAAE;gBACpB,IAAI,EAAE,aAAa,CAAC,IAAI;gBACxB,IAAI,EAAEC,mBAAa,CAAC,SAAS;AAC9B,aAAA,CAAC;;;IAIN,IAAI,MAAM,GAAW,OAAO;IAC5B,MAAM,iBAAiB,GACrB,WAAW,EAAE,IAAI,KAAKD,eAAS,CAAC,gBAAgB;AAChD,QAAA,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,CAAC;IAC/C,IAAI,CAAC,iBAAiB,IAAI,UAAU,EAAE,MAAM,KAAK,cAAc,CAAC,MAAM,EAAE;AACtE,QAAA,KAAK,CAAC,oBAAoB,CAAC,UAAU,EAAE;AACrC,YAAA,OAAO,EAAE;AACP,gBAAA;oBACE,IAAI,EAAEE,kBAAY,CAAC,IAAI;AACvB,oBAAA,IAAI,EAAE,EAAE;AACR,oBAAA,aAAa,EAAE,UAAU,CAAC,GAAG,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE,IAAI,EAAE,CAAC;AACnD,iBAAA;AACF,aAAA;AACF,SAAA,CAAC;QACF,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;AACnD,QAAA,MAAM,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YACtC,IAAI,EAAEF,eAAS,CAAC,UAAU;YAC1B,UAAU;AACX,SAAA,CAAC;;AAEJ,IAAA,KAAK,CAAC,oBAAoB,CAAC,MAAM,EAAE;QACjC,IAAI,EAAEA,eAAS,CAAC,UAAU;AAC1B,QAAA,UAAU,EAAE,cAAc;AAC3B,KAAA,CAAC;AACJ;AAEa,MAAA,eAAe,GAAG,CAC7B,SAAsB,EACtB,QAAkC,EAClC,KAAa,KACL;AACR,IAAA,IAAI,CAAC,KAAK,IAAI,CAAC,QAAQ,EAAE;AACvB,QAAA,OAAO,CAAC,IAAI,CAAC,kCAAkC,KAAK,CAAA,MAAA,CAAQ,CAAC;QAC7D;;IAGF,IAAI,CAAC,SAAS,EAAE;QACd;;AAGF,IAAA,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE;QAC1B;;IAGF,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,QAAQ,CAAC;AAE1C,IAAA,KAAK,MAAM,SAAS,IAAI,SAAS,EAAE;QACjC,MAAM,UAAU,GAAG,SAAS,CAAC,EAAE,IAAI,CAAS,MAAA,EAAAG,aAAM,EAAE,CAAA,CAAE;AACtD,QAAA,SAAS,CAAC,EAAE,GAAG,UAAU;AACzB,QAAA,IAAI,CAAC,UAAU,IAAI,KAAK,CAAC,eAAe,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE;YACxD;;QAGF,IAAI,UAAU,GAAG,EAAE;AACnB,QAAA,IAAI,WAAkC;AACtC,QAAA,IAAI;AACF,YAAA,UAAU,GAAG,KAAK,CAAC,cAAc,CAAC,OAAO,EAAE,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC;AACxE,YAAA,WAAW,GAAG,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC;;AAC1C,QAAA,MAAM;;;AAIR,QAAA,MAAM,mBAAmB,GAAG,CAAC,iBAAyB,KAAU;AAC9D,YAAA,KAAK,CAAC,oBAAoB,CAAC,iBAAiB,EAAE;AAC5C,gBAAA,OAAO,EAAE;AACP,oBAAA;AACE,wBAAA,IAAI,EAAE,MAAM;AACZ,wBAAA,IAAI,EAAE,EAAE;wBACR,aAAa,EAAE,CAAC,UAAU,CAAC;AAC5B,qBAAA;AACF,iBAAA;AACF,aAAA,CAAC;AACJ,SAAC;;AAED,QAAA,IACE,UAAU;YACV,WAAW;AACX,YAAA,WAAW,CAAC,IAAI,KAAKH,eAAS,CAAC,gBAAgB,EAC/C;YACA,mBAAmB,CAAC,UAAU,CAAC;YAC/B,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;;;AAE9C,aAAA,IACL,CAAC,WAAW;AACZ,YAAA,WAAW,CAAC,IAAI,KAAKA,eAAS,CAAC,gBAAgB,EAC/C;AACA,YAAA,MAAM,SAAS,GAAGD,gBAAY,CAAC,OAAO,EAAE,KAAK,EAAE,IAAI,CAAC,IAAI,EAAE;AAC1D,YAAA,MAAM,MAAM,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;gBAC5C,IAAI,EAAEC,eAAS,CAAC,gBAAgB;AAChC,gBAAA,gBAAgB,EAAE;AAChB,oBAAA,UAAU,EAAE,SAAS;AACtB,iBAAA;AACF,aAAA,CAAC;YACF,mBAAmB,CAAC,MAAM,CAAC;YAC3B,KAAK,CAAC,uBAAuB,CAAC,GAAG,CAAC,UAAU,EAAE,IAAI,CAAC;;AAGrD,QAAA,KAAK,CAAC,eAAe,CAAC,OAAO,EAAE;YAC7B,IAAI,EAAEA,eAAS,CAAC,UAAU;YAC1B,UAAU,EAAE,CAAC,SAAS,CAAC;AACxB,SAAA,CAAC;;AAEN;;;;;"}
|
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var cheerio = require('cheerio');
|
|
4
|
+
|
|
5
|
+
function _interopNamespaceDefault(e) {
|
|
6
|
+
var n = Object.create(null);
|
|
7
|
+
if (e) {
|
|
8
|
+
Object.keys(e).forEach(function (k) {
|
|
9
|
+
if (k !== 'default') {
|
|
10
|
+
var d = Object.getOwnPropertyDescriptor(e, k);
|
|
11
|
+
Object.defineProperty(n, k, d.get ? d : {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
get: function () { return e[k]; }
|
|
14
|
+
});
|
|
15
|
+
}
|
|
16
|
+
});
|
|
17
|
+
}
|
|
18
|
+
n.default = e;
|
|
19
|
+
return Object.freeze(n);
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
|
|
23
|
+
|
|
24
|
+
function processContent(html, markdown) {
|
|
25
|
+
const linkMap = new Map();
|
|
26
|
+
const imageMap = new Map();
|
|
27
|
+
const videoMap = new Map();
|
|
28
|
+
const iframeMap = new Map();
|
|
29
|
+
const $ = cheerio__namespace.load(html, {
|
|
30
|
+
xmlMode: false,
|
|
31
|
+
});
|
|
32
|
+
// Extract all media references
|
|
33
|
+
$('a[href]').each((_, el) => {
|
|
34
|
+
const href = $(el).attr('href');
|
|
35
|
+
if (href != null && href) {
|
|
36
|
+
linkMap.set(href, {
|
|
37
|
+
originalUrl: href,
|
|
38
|
+
title: $(el).attr('title'),
|
|
39
|
+
text: $(el).text().trim(),
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
});
|
|
43
|
+
$('img[src]').each((_, el) => {
|
|
44
|
+
const src = $(el).attr('src');
|
|
45
|
+
if (src != null && src) {
|
|
46
|
+
imageMap.set(src, {
|
|
47
|
+
originalUrl: src,
|
|
48
|
+
title: $(el).attr('alt') ?? $(el).attr('title'),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
});
|
|
52
|
+
// Handle videos (dedicated video elements and video platforms in iframes)
|
|
53
|
+
$('video[src], iframe[src*="youtube"], iframe[src*="vimeo"]').each((_, el) => {
|
|
54
|
+
const src = $(el).attr('src');
|
|
55
|
+
if (src != null && src) {
|
|
56
|
+
videoMap.set(src, {
|
|
57
|
+
originalUrl: src,
|
|
58
|
+
title: $(el).attr('title'),
|
|
59
|
+
});
|
|
60
|
+
}
|
|
61
|
+
});
|
|
62
|
+
// Handle all other generic iframes that aren't already captured as videos
|
|
63
|
+
$('iframe').each((_, el) => {
|
|
64
|
+
const src = $(el).attr('src');
|
|
65
|
+
if (src != null &&
|
|
66
|
+
src &&
|
|
67
|
+
!src.includes('youtube') &&
|
|
68
|
+
!src.includes('vimeo')) {
|
|
69
|
+
iframeMap.set(src, {
|
|
70
|
+
originalUrl: src,
|
|
71
|
+
title: $(el).attr('title'),
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
});
|
|
75
|
+
// Create lookup maps with indices
|
|
76
|
+
const linkIndexMap = new Map();
|
|
77
|
+
const imageIndexMap = new Map();
|
|
78
|
+
const videoIndexMap = new Map();
|
|
79
|
+
const iframeIndexMap = new Map();
|
|
80
|
+
Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));
|
|
81
|
+
Array.from(imageMap.keys()).forEach((url, i) => imageIndexMap.set(url, i + 1));
|
|
82
|
+
Array.from(videoMap.keys()).forEach((url, i) => videoIndexMap.set(url, i + 1));
|
|
83
|
+
Array.from(iframeMap.keys()).forEach((url, i) => iframeIndexMap.set(url, i + 1));
|
|
84
|
+
// Process the markdown
|
|
85
|
+
let result = markdown;
|
|
86
|
+
// Replace each URL one by one, starting with the longest URLs first to avoid partial matches
|
|
87
|
+
const allUrls = [
|
|
88
|
+
...Array.from(imageMap.keys()).map((url) => ({
|
|
89
|
+
url,
|
|
90
|
+
type: 'image',
|
|
91
|
+
idx: imageIndexMap.get(url),
|
|
92
|
+
})),
|
|
93
|
+
...Array.from(videoMap.keys()).map((url) => ({
|
|
94
|
+
url,
|
|
95
|
+
type: 'video',
|
|
96
|
+
idx: videoIndexMap.get(url),
|
|
97
|
+
})),
|
|
98
|
+
...Array.from(iframeMap.keys()).map((url) => ({
|
|
99
|
+
url,
|
|
100
|
+
type: 'iframe',
|
|
101
|
+
idx: iframeIndexMap.get(url),
|
|
102
|
+
})),
|
|
103
|
+
...Array.from(linkMap.keys()).map((url) => ({
|
|
104
|
+
url,
|
|
105
|
+
type: 'link',
|
|
106
|
+
idx: linkIndexMap.get(url),
|
|
107
|
+
})),
|
|
108
|
+
].sort((a, b) => b.url.length - a.url.length);
|
|
109
|
+
// Create a function to escape special characters in URLs for regex
|
|
110
|
+
function escapeRegex(string) {
|
|
111
|
+
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
112
|
+
}
|
|
113
|
+
// Replace each URL in the markdown
|
|
114
|
+
for (const { url, type, idx } of allUrls) {
|
|
115
|
+
// Create a regex that captures URLs in markdown links
|
|
116
|
+
const regex = new RegExp(`\\(${escapeRegex(url)}(?:\\s+"[^"]*")?\\)`, 'g');
|
|
117
|
+
result = result.replace(regex, (match) => {
|
|
118
|
+
// Keep any title attribute that might exist
|
|
119
|
+
const titleMatch = match.match(/\s+"([^"]*)"/);
|
|
120
|
+
const titlePart = titleMatch ? ` "${titleMatch[1]}"` : '';
|
|
121
|
+
return `(${type}#${idx}${titlePart})`;
|
|
122
|
+
});
|
|
123
|
+
}
|
|
124
|
+
iframeMap.clear();
|
|
125
|
+
const links = Array.from(linkMap.values());
|
|
126
|
+
linkMap.clear();
|
|
127
|
+
const images = Array.from(imageMap.values());
|
|
128
|
+
imageMap.clear();
|
|
129
|
+
const videos = Array.from(videoMap.values());
|
|
130
|
+
videoMap.clear();
|
|
131
|
+
return {
|
|
132
|
+
markdown: result,
|
|
133
|
+
links,
|
|
134
|
+
images,
|
|
135
|
+
videos,
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
exports.processContent = processContent;
|
|
140
|
+
//# sourceMappingURL=content.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"content.cjs","sources":["../../../../src/tools/search/content.ts"],"sourcesContent":["import * as cheerio from 'cheerio';\nimport type { References, MediaReference } from './types';\n\nexport function processContent(\n html: string,\n markdown: string\n): {\n markdown: string;\n} & References {\n const linkMap = new Map<string, MediaReference>();\n const imageMap = new Map<string, MediaReference>();\n const videoMap = new Map<string, MediaReference>();\n const iframeMap = new Map<string, MediaReference>();\n\n const $ = cheerio.load(html, {\n xmlMode: false,\n });\n\n // Extract all media references\n $('a[href]').each((_, el) => {\n const href = $(el).attr('href');\n if (href != null && href) {\n linkMap.set(href, {\n originalUrl: href,\n title: $(el).attr('title'),\n text: $(el).text().trim(),\n });\n }\n });\n\n $('img[src]').each((_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n imageMap.set(src, {\n originalUrl: src,\n title: $(el).attr('alt') ?? $(el).attr('title'),\n });\n }\n });\n\n // Handle videos (dedicated video elements and video platforms in iframes)\n $('video[src], iframe[src*=\"youtube\"], iframe[src*=\"vimeo\"]').each(\n (_, el) => {\n const src = $(el).attr('src');\n if (src != null && src) {\n videoMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n }\n );\n\n // Handle all other generic iframes that aren't already captured as videos\n $('iframe').each((_, el) => {\n const src = $(el).attr('src');\n if (\n src != null &&\n src &&\n !src.includes('youtube') &&\n !src.includes('vimeo')\n ) {\n iframeMap.set(src, {\n originalUrl: src,\n title: $(el).attr('title'),\n });\n }\n });\n\n // Create lookup maps with indices\n const linkIndexMap = new Map<string, number>();\n const imageIndexMap = new Map<string, number>();\n const videoIndexMap = new Map<string, number>();\n const iframeIndexMap = new Map<string, number>();\n\n Array.from(linkMap.keys()).forEach((url, i) => linkIndexMap.set(url, i + 1));\n Array.from(imageMap.keys()).forEach((url, i) =>\n imageIndexMap.set(url, i + 1)\n );\n Array.from(videoMap.keys()).forEach((url, i) =>\n videoIndexMap.set(url, i + 1)\n );\n Array.from(iframeMap.keys()).forEach((url, i) =>\n iframeIndexMap.set(url, i + 1)\n );\n\n // Process the markdown\n let result = markdown;\n\n // Replace each URL one by one, starting with the longest URLs first to avoid partial matches\n const allUrls = [\n ...Array.from(imageMap.keys()).map((url) => ({\n url,\n type: 'image',\n idx: imageIndexMap.get(url),\n })),\n ...Array.from(videoMap.keys()).map((url) => ({\n url,\n type: 'video',\n idx: videoIndexMap.get(url),\n })),\n ...Array.from(iframeMap.keys()).map((url) => ({\n url,\n type: 'iframe',\n idx: iframeIndexMap.get(url),\n })),\n ...Array.from(linkMap.keys()).map((url) => ({\n url,\n type: 'link',\n idx: linkIndexMap.get(url),\n })),\n ].sort((a, b) => b.url.length - a.url.length);\n\n // Create a function to escape special characters in URLs for regex\n function escapeRegex(string: string): string {\n return string.replace(/[.*+?^${}()|[\\]\\\\]/g, '\\\\$&');\n }\n\n // Replace each URL in the markdown\n for (const { url, type, idx } of allUrls) {\n // Create a regex that captures URLs in markdown links\n const regex = new RegExp(`\\\\(${escapeRegex(url)}(?:\\\\s+\"[^\"]*\")?\\\\)`, 'g');\n\n result = result.replace(regex, (match) => {\n // Keep any title attribute that might exist\n const titleMatch = match.match(/\\s+\"([^\"]*)\"/);\n const titlePart = titleMatch ? ` \"${titleMatch[1]}\"` : '';\n\n return `(${type}#${idx}${titlePart})`;\n });\n }\n\n iframeMap.clear();\n const links = Array.from(linkMap.values());\n linkMap.clear();\n const images = Array.from(imageMap.values());\n imageMap.clear();\n const videos = Array.from(videoMap.values());\n videoMap.clear();\n\n return {\n markdown: result,\n links,\n images,\n videos,\n };\n}\n"],"names":["cheerio"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;AAGgB,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAgB,EAAA;AAIhB,IAAA,MAAM,OAAO,GAAG,IAAI,GAAG,EAA0B;AACjD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,QAAQ,GAAG,IAAI,GAAG,EAA0B;AAClD,IAAA,MAAM,SAAS,GAAG,IAAI,GAAG,EAA0B;AAEnD,IAAA,MAAM,CAAC,GAAGA,kBAAO,CAAC,IAAI,CAAC,IAAI,EAAE;AAC3B,QAAA,OAAO,EAAE,KAAK;AACf,KAAA,CAAC;;IAGF,CAAC,CAAC,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC1B,MAAM,IAAI,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;AAC/B,QAAA,IAAI,IAAI,IAAI,IAAI,IAAI,IAAI,EAAE;AACxB,YAAA,OAAO,CAAC,GAAG,CAAC,IAAI,EAAE;AAChB,gBAAA,WAAW,EAAE,IAAI;gBACjB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;gBAC1B,IAAI,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;AAC1B,aAAA,CAAC;;AAEN,KAAC,CAAC;IAEF,CAAC,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QAC3B,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;AAChB,gBAAA,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAChD,aAAA,CAAC;;AAEN,KAAC,CAAC;;IAGF,CAAC,CAAC,0DAA0D,CAAC,CAAC,IAAI,CAChE,CAAC,CAAC,EAAE,EAAE,KAAI;QACR,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;AAC7B,QAAA,IAAI,GAAG,IAAI,IAAI,IAAI,GAAG,EAAE;AACtB,YAAA,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE;AAChB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CACF;;IAGD,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,KAAI;QACzB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC;QAC7B,IACE,GAAG,IAAI,IAAI;YACX,GAAG;AACH,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,SAAS,CAAC;AACxB,YAAA,CAAC,GAAG,CAAC,QAAQ,CAAC,OAAO,CAAC,EACtB;AACA,YAAA,SAAS,CAAC,GAAG,CAAC,GAAG,EAAE;AACjB,gBAAA,WAAW,EAAE,GAAG;gBAChB,KAAK,EAAE,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC;AAC3B,aAAA,CAAC;;AAEN,KAAC,CAAC;;AAGF,IAAA,MAAM,YAAY,GAAG,IAAI,GAAG,EAAkB;AAC9C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,aAAa,GAAG,IAAI,GAAG,EAAkB;AAC/C,IAAA,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB;AAEhD,IAAA,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAAK,YAAY,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC;AAC5E,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KACzC,aAAa,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC9B;AACD,IAAA,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,EAAE,CAAC,KAC1C,cAAc,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC,GAAG,CAAC,CAAC,CAC/B;;IAGD,IAAI,MAAM,GAAG,QAAQ;;AAGrB,IAAA,MAAM,OAAO,GAAG;AACd,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC3C,GAAG;AACH,YAAA,IAAI,EAAE,OAAO;AACb,YAAA,GAAG,EAAE,aAAa,CAAC,GAAG,CAAC,GAAG,CAAC;AAC5B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC5C,GAAG;AACH,YAAA,IAAI,EAAE,QAAQ;AACd,YAAA,GAAG,EAAE,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC;AAC7B,SAAA,CAAC,CAAC;AACH,QAAA,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,MAAM;YAC1C,GAAG;AACH,YAAA,IAAI,EAAE,MAAM;AACZ,YAAA,GAAG,EAAE,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC;AAC3B,SAAA,CAAC,CAAC;KACJ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,CAAC,MAAM,CAAC;;IAG7C,SAAS,WAAW,CAAC,MAAc,EAAA;QACjC,OAAO,MAAM,CAAC,OAAO,CAAC,qBAAqB,EAAE,MAAM,CAAC;;;IAItD,KAAK,MAAM,EAAE,GAAG,EAAE,IAAI,EAAE,GAAG,EAAE,IAAI,OAAO,EAAE;;AAExC,QAAA,MAAM,KAAK,GAAG,IAAI,MAAM,CAAC,CAAM,GAAA,EAAA,WAAW,CAAC,GAAG,CAAC,CAAA,mBAAA,CAAqB,EAAE,GAAG,CAAC;QAE1E,MAAM,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC,KAAK,KAAI;;YAEvC,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,cAAc,CAAC;AAC9C,YAAA,MAAM,SAAS,GAAG,UAAU,GAAG,CAAK,EAAA,EAAA,UAAU,CAAC,CAAC,CAAC,CAAG,CAAA,CAAA,GAAG,EAAE;AAEzD,YAAA,OAAO,IAAI,IAAI,CAAA,CAAA,EAAI,GAAG,CAAG,EAAA,SAAS,GAAG;AACvC,SAAC,CAAC;;IAGJ,SAAS,CAAC,KAAK,EAAE;IACjB,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,CAAC;IAC1C,OAAO,CAAC,KAAK,EAAE;IACf,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAChB,MAAM,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;IAC5C,QAAQ,CAAC,KAAK,EAAE;IAEhB,OAAO;AACL,QAAA,QAAQ,EAAE,MAAM;QAChB,KAAK;QACL,MAAM;QACN,MAAM;KACP;AACH;;;;"}
|
|
@@ -1,37 +1,9 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
3
|
var axios = require('axios');
|
|
4
|
+
var content = require('./content.cjs');
|
|
5
|
+
var utils = require('./utils.cjs');
|
|
4
6
|
|
|
5
|
-
/* eslint-disable no-console */
|
|
6
|
-
const getDomainName = (link, metadata) => {
|
|
7
|
-
try {
|
|
8
|
-
const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');
|
|
9
|
-
const domain = new URL(url).hostname.replace(/^www\./, '');
|
|
10
|
-
if (domain) {
|
|
11
|
-
return domain;
|
|
12
|
-
}
|
|
13
|
-
}
|
|
14
|
-
catch (e) {
|
|
15
|
-
// URL parsing failed
|
|
16
|
-
console.error('Error parsing URL:', e);
|
|
17
|
-
}
|
|
18
|
-
return;
|
|
19
|
-
};
|
|
20
|
-
function getAttribution(link, metadata) {
|
|
21
|
-
if (!metadata)
|
|
22
|
-
return getDomainName(link, metadata);
|
|
23
|
-
const possibleAttributions = [
|
|
24
|
-
metadata.ogSiteName,
|
|
25
|
-
metadata['og:site_name'],
|
|
26
|
-
metadata.title?.split('|').pop()?.trim(),
|
|
27
|
-
metadata['twitter:site']?.replace(/^@/, ''),
|
|
28
|
-
];
|
|
29
|
-
const attribution = possibleAttributions.find((attr) => attr != null && typeof attr === 'string' && attr.trim() !== '');
|
|
30
|
-
if (attribution != null) {
|
|
31
|
-
return attribution;
|
|
32
|
-
}
|
|
33
|
-
return getDomainName(link, metadata);
|
|
34
|
-
}
|
|
35
7
|
/**
|
|
36
8
|
* Firecrawl scraper implementation
|
|
37
9
|
* Uses the Firecrawl API to scrape web pages
|
|
@@ -41,6 +13,7 @@ class FirecrawlScraper {
|
|
|
41
13
|
apiUrl;
|
|
42
14
|
defaultFormats;
|
|
43
15
|
timeout;
|
|
16
|
+
logger;
|
|
44
17
|
constructor(config = {}) {
|
|
45
18
|
this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';
|
|
46
19
|
const baseUrl = config.apiUrl ??
|
|
@@ -48,11 +21,12 @@ class FirecrawlScraper {
|
|
|
48
21
|
'https://api.firecrawl.dev';
|
|
49
22
|
this.apiUrl = `${baseUrl.replace(/\/+$/, '')}/v1/scrape`;
|
|
50
23
|
this.defaultFormats = config.formats ?? ['markdown', 'html'];
|
|
51
|
-
this.timeout = config.timeout ??
|
|
24
|
+
this.timeout = config.timeout ?? 15000;
|
|
25
|
+
this.logger = config.logger || utils.createDefaultLogger();
|
|
52
26
|
if (!this.apiKey) {
|
|
53
|
-
|
|
27
|
+
this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');
|
|
54
28
|
}
|
|
55
|
-
|
|
29
|
+
this.logger.debug(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);
|
|
56
30
|
}
|
|
57
31
|
/**
|
|
58
32
|
* Scrape a single URL
|
|
@@ -106,21 +80,30 @@ class FirecrawlScraper {
|
|
|
106
80
|
*/
|
|
107
81
|
extractContent(response) {
|
|
108
82
|
if (!response.success || !response.data) {
|
|
109
|
-
return '';
|
|
83
|
+
return ['', undefined];
|
|
84
|
+
}
|
|
85
|
+
if (response.data.markdown != null && response.data.html != null) {
|
|
86
|
+
try {
|
|
87
|
+
const { markdown, ...rest } = content.processContent(response.data.html, response.data.markdown);
|
|
88
|
+
return [markdown, rest];
|
|
89
|
+
}
|
|
90
|
+
catch (error) {
|
|
91
|
+
this.logger.error('Error processing content:', error);
|
|
92
|
+
return [response.data.markdown, undefined];
|
|
93
|
+
}
|
|
110
94
|
}
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
return response.data.markdown;
|
|
95
|
+
else if (response.data.markdown != null) {
|
|
96
|
+
return [response.data.markdown, undefined];
|
|
114
97
|
}
|
|
115
98
|
// Fall back to HTML content
|
|
116
99
|
if (response.data.html != null) {
|
|
117
|
-
return response.data.html;
|
|
100
|
+
return [response.data.html, undefined];
|
|
118
101
|
}
|
|
119
102
|
// Fall back to raw HTML content
|
|
120
103
|
if (response.data.rawHtml != null) {
|
|
121
|
-
return response.data.rawHtml;
|
|
104
|
+
return [response.data.rawHtml, undefined];
|
|
122
105
|
}
|
|
123
|
-
return '';
|
|
106
|
+
return ['', undefined];
|
|
124
107
|
}
|
|
125
108
|
/**
|
|
126
109
|
* Extract metadata from scrape response
|
|
@@ -145,5 +128,4 @@ const createFirecrawlScraper = (config = {}) => {
|
|
|
145
128
|
|
|
146
129
|
exports.FirecrawlScraper = FirecrawlScraper;
|
|
147
130
|
exports.createFirecrawlScraper = createFirecrawlScraper;
|
|
148
|
-
exports.getAttribution = getAttribution;
|
|
149
131
|
//# sourceMappingURL=firecrawl.cjs.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["/* eslint-disable no-console */\nimport axios from 'axios';\n\nexport interface FirecrawlScrapeOptions {\n formats?: string[];\n includeTags?: string[];\n excludeTags?: string[];\n headers?: Record<string, string>;\n waitFor?: number;\n timeout?: number;\n}\n\ninterface ScrapeMetadata {\n // Core source information\n sourceURL?: string;\n url?: string;\n scrapeId?: string;\n statusCode?: number;\n // Basic metadata\n title?: string;\n description?: string;\n language?: string;\n favicon?: string;\n viewport?: string;\n robots?: string;\n 'theme-color'?: string;\n // Open Graph metadata\n 'og:url'?: string;\n 'og:title'?: string;\n 'og:description'?: string;\n 'og:type'?: string;\n 'og:image'?: string;\n 'og:image:width'?: string;\n 'og:image:height'?: string;\n 'og:site_name'?: string;\n ogUrl?: string;\n ogTitle?: string;\n ogDescription?: string;\n ogImage?: string;\n ogSiteName?: string;\n // Article metadata\n 'article:author'?: string;\n 'article:published_time'?: string;\n 'article:modified_time'?: string;\n 'article:section'?: string;\n 'article:tag'?: string;\n 'article:publisher'?: string;\n publishedTime?: string;\n modifiedTime?: string;\n // Twitter metadata\n 'twitter:site'?: string;\n 'twitter:creator'?: string;\n 'twitter:card'?: string;\n 'twitter:image'?: string;\n 'twitter:dnt'?: string;\n 'twitter:app:name:iphone'?: string;\n 'twitter:app:id:iphone'?: string;\n 'twitter:app:url:iphone'?: string;\n 'twitter:app:name:ipad'?: string;\n 'twitter:app:id:ipad'?: string;\n 'twitter:app:url:ipad'?: string;\n 'twitter:app:name:googleplay'?: string;\n 'twitter:app:id:googleplay'?: string;\n 'twitter:app:url:googleplay'?: string;\n // Facebook metadata\n 'fb:app_id'?: string;\n // App links\n 'al:ios:url'?: string;\n 'al:ios:app_name'?: string;\n 'al:ios:app_store_id'?: string;\n // Allow for additional properties that might be present\n [key: string]: string | number | boolean | null | undefined;\n}\n\nexport interface FirecrawlScrapeResponse {\n success: boolean;\n data?: {\n markdown?: string;\n html?: string;\n rawHtml?: string;\n screenshot?: string;\n links?: string[];\n metadata?: ScrapeMetadata;\n };\n error?: string;\n}\n\nexport interface FirecrawlScraperConfig {\n apiKey?: string;\n apiUrl?: string;\n formats?: string[];\n timeout?: number;\n}\nconst getDomainName = (\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined => {\n try {\n const url = metadata?.sourceURL ?? metadata?.url ?? (link || '');\n const domain = new URL(url).hostname.replace(/^www\\./, '');\n if (domain) {\n return domain;\n }\n } catch (e) {\n // URL parsing failed\n console.error('Error parsing URL:', e);\n }\n\n return;\n};\n\nexport function getAttribution(\n link: string,\n metadata?: ScrapeMetadata\n): string | undefined {\n if (!metadata) return getDomainName(link, metadata);\n\n const possibleAttributions = [\n metadata.ogSiteName,\n metadata['og:site_name'],\n metadata.title?.split('|').pop()?.trim(),\n metadata['twitter:site']?.replace(/^@/, ''),\n ];\n\n const attribution = possibleAttributions.find(\n (attr) => attr != null && typeof attr === 'string' && attr.trim() !== ''\n );\n if (attribution != null) {\n return attribution;\n }\n\n return getDomainName(link, metadata);\n}\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n\n constructor(config: FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 30000;\n\n if (!this.apiKey) {\n console.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n console.log(`Firecrawl scraper initialized with API URL: ${this.apiUrl}`);\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: FirecrawlScrapeOptions = {}\n ): Promise<[string, FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(response: FirecrawlScrapeResponse): string {\n if (!response.success || !response.data) {\n return '';\n }\n\n // Prefer markdown content if available\n if (response.data.markdown != null) {\n return response.data.markdown;\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return response.data.html;\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return response.data.rawHtml;\n }\n\n return '';\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: FirecrawlScrapeResponse): ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":[],"mappings":";;;;AAAA;AA6FA,MAAM,aAAa,GAAG,CACpB,IAAY,EACZ,QAAyB,KACH;AACtB,IAAA,IAAI;AACF,QAAA,MAAM,GAAG,GAAG,QAAQ,EAAE,SAAS,IAAI,QAAQ,EAAE,GAAG,KAAK,IAAI,IAAI,EAAE,CAAC;AAChE,QAAA,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,QAAQ,EAAE,EAAE,CAAC;QAC1D,IAAI,MAAM,EAAE;AACV,YAAA,OAAO,MAAM;;;IAEf,OAAO,CAAC,EAAE;;AAEV,QAAA,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,CAAC,CAAC;;IAGxC;AACF,CAAC;AAEe,SAAA,cAAc,CAC5B,IAAY,EACZ,QAAyB,EAAA;AAEzB,IAAA,IAAI,CAAC,QAAQ;AAAE,QAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AAEnD,IAAA,MAAM,oBAAoB,GAAG;AAC3B,QAAA,QAAQ,CAAC,UAAU;QACnB,QAAQ,CAAC,cAAc,CAAC;AACxB,QAAA,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE;QACxC,QAAQ,CAAC,cAAc,CAAC,EAAE,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;KAC5C;IAED,MAAM,WAAW,GAAG,oBAAoB,CAAC,IAAI,CAC3C,CAAC,IAAI,KAAK,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,CAAC,IAAI,EAAE,KAAK,EAAE,CACzE;AACD,IAAA,IAAI,WAAW,IAAI,IAAI,EAAE;AACvB,QAAA,OAAO,WAAW;;AAGpB,IAAA,OAAO,aAAa,CAAC,IAAI,EAAE,QAAQ,CAAC;AACtC;AAEA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AAEf,IAAA,WAAA,CAAY,SAAiC,EAAE,EAAA;AAC7C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,OAAO,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAGvE,OAAO,CAAC,GAAG,CAAC,CAAA,4CAAA,EAA+C,IAAI,CAAC,MAAM,CAAE,CAAA,CAAC;;AAG3E;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAkC,EAAE,EAAA;AAEpC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CAAC,QAAiC,EAAA;QAC9C,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,EAAE;;;QAIX,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;AAClC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;;QAI/B,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAC9B,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI;;;QAI3B,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;AACjC,YAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,OAAO;;AAG9B,QAAA,OAAO,EAAE;;AAGX;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAiC,EAAA;AAC/C,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAiC,GAAA,EAAE,KACf;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;;"}
|
|
1
|
+
{"version":3,"file":"firecrawl.cjs","sources":["../../../../src/tools/search/firecrawl.ts"],"sourcesContent":["import axios from 'axios';\nimport { processContent } from './content';\nimport type * as t from './types';\nimport { createDefaultLogger } from './utils';\n\n/**\n * Firecrawl scraper implementation\n * Uses the Firecrawl API to scrape web pages\n */\nexport class FirecrawlScraper {\n private apiKey: string;\n private apiUrl: string;\n private defaultFormats: string[];\n private timeout: number;\n private logger: t.Logger;\n\n constructor(config: t.FirecrawlScraperConfig = {}) {\n this.apiKey = config.apiKey ?? process.env.FIRECRAWL_API_KEY ?? '';\n\n const baseUrl =\n config.apiUrl ??\n process.env.FIRECRAWL_BASE_URL ??\n 'https://api.firecrawl.dev';\n this.apiUrl = `${baseUrl.replace(/\\/+$/, '')}/v1/scrape`;\n\n this.defaultFormats = config.formats ?? ['markdown', 'html'];\n this.timeout = config.timeout ?? 15000;\n\n this.logger = config.logger || createDefaultLogger();\n\n if (!this.apiKey) {\n this.logger.warn('FIRECRAWL_API_KEY is not set. Scraping will not work.');\n }\n\n this.logger.debug(\n `Firecrawl scraper initialized with API URL: ${this.apiUrl}`\n );\n }\n\n /**\n * Scrape a single URL\n * @param url URL to scrape\n * @param options Scrape options\n * @returns Scrape response\n */\n async scrapeUrl(\n url: string,\n options: t.FirecrawlScrapeOptions = {}\n ): Promise<[string, t.FirecrawlScrapeResponse]> {\n if (!this.apiKey) {\n return [\n url,\n {\n success: false,\n error: 'FIRECRAWL_API_KEY is not set',\n },\n ];\n }\n\n try {\n const response = await axios.post(\n this.apiUrl,\n {\n url,\n formats: options.formats || this.defaultFormats,\n includeTags: options.includeTags,\n excludeTags: options.excludeTags,\n headers: options.headers,\n waitFor: options.waitFor,\n timeout: options.timeout ?? this.timeout,\n },\n {\n headers: {\n 'Content-Type': 'application/json',\n Authorization: `Bearer ${this.apiKey}`,\n },\n timeout: this.timeout,\n }\n );\n\n return [url, response.data];\n } catch (error) {\n const errorMessage =\n error instanceof Error ? error.message : String(error);\n return [\n url,\n {\n success: false,\n error: `Firecrawl API request failed: ${errorMessage}`,\n },\n ];\n }\n }\n\n /**\n * Extract content from scrape response\n * @param response Scrape response\n * @returns Extracted content or empty string if not available\n */\n extractContent(\n response: t.FirecrawlScrapeResponse\n ): [string, undefined | t.References] {\n if (!response.success || !response.data) {\n return ['', undefined];\n }\n\n if (response.data.markdown != null && response.data.html != null) {\n try {\n const { markdown, ...rest } = processContent(\n response.data.html,\n response.data.markdown\n );\n return [markdown, rest];\n } catch (error) {\n this.logger.error('Error processing content:', error);\n return [response.data.markdown, undefined];\n }\n } else if (response.data.markdown != null) {\n return [response.data.markdown, undefined];\n }\n\n // Fall back to HTML content\n if (response.data.html != null) {\n return [response.data.html, undefined];\n }\n\n // Fall back to raw HTML content\n if (response.data.rawHtml != null) {\n return [response.data.rawHtml, undefined];\n }\n\n return ['', undefined];\n }\n\n /**\n * Extract metadata from scrape response\n * @param response Scrape response\n * @returns Metadata object\n */\n extractMetadata(response: t.FirecrawlScrapeResponse): t.ScrapeMetadata {\n if (!response.success || !response.data || !response.data.metadata) {\n return {};\n }\n\n return response.data.metadata;\n }\n}\n\n/**\n * Create a Firecrawl scraper instance\n * @param config Scraper configuration\n * @returns Firecrawl scraper instance\n */\nexport const createFirecrawlScraper = (\n config: t.FirecrawlScraperConfig = {}\n): FirecrawlScraper => {\n return new FirecrawlScraper(config);\n};\n"],"names":["createDefaultLogger","processContent"],"mappings":";;;;;;AAKA;;;AAGG;MACU,gBAAgB,CAAA;AACnB,IAAA,MAAM;AACN,IAAA,MAAM;AACN,IAAA,cAAc;AACd,IAAA,OAAO;AACP,IAAA,MAAM;AAEd,IAAA,WAAA,CAAY,SAAmC,EAAE,EAAA;AAC/C,QAAA,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAI,OAAO,CAAC,GAAG,CAAC,iBAAiB,IAAI,EAAE;AAElE,QAAA,MAAM,OAAO,GACX,MAAM,CAAC,MAAM;YACb,OAAO,CAAC,GAAG,CAAC,kBAAkB;AAC9B,YAAA,2BAA2B;AAC7B,QAAA,IAAI,CAAC,MAAM,GAAG,CAAA,EAAG,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,YAAY;AAExD,QAAA,IAAI,CAAC,cAAc,GAAG,MAAM,CAAC,OAAO,IAAI,CAAC,UAAU,EAAE,MAAM,CAAC;QAC5D,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,IAAI,KAAK;QAEtC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,IAAIA,yBAAmB,EAAE;AAEpD,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;AAChB,YAAA,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,uDAAuD,CAAC;;QAG3E,IAAI,CAAC,MAAM,CAAC,KAAK,CACf,CAA+C,4CAAA,EAAA,IAAI,CAAC,MAAM,CAAE,CAAA,CAC7D;;AAGH;;;;;AAKG;AACH,IAAA,MAAM,SAAS,CACb,GAAW,EACX,UAAoC,EAAE,EAAA;AAEtC,QAAA,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE;YAChB,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;AACd,oBAAA,KAAK,EAAE,8BAA8B;AACtC,iBAAA;aACF;;AAGH,QAAA,IAAI;YACF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,IAAI,CAC/B,IAAI,CAAC,MAAM,EACX;gBACE,GAAG;AACH,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,cAAc;gBAC/C,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,WAAW,EAAE,OAAO,CAAC,WAAW;gBAChC,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;AACxB,gBAAA,OAAO,EAAE,OAAO,CAAC,OAAO,IAAI,IAAI,CAAC,OAAO;aACzC,EACD;AACE,gBAAA,OAAO,EAAE;AACP,oBAAA,cAAc,EAAE,kBAAkB;AAClC,oBAAA,aAAa,EAAE,CAAA,OAAA,EAAU,IAAI,CAAC,MAAM,CAAE,CAAA;AACvC,iBAAA;gBACD,OAAO,EAAE,IAAI,CAAC,OAAO;AACtB,aAAA,CACF;AAED,YAAA,OAAO,CAAC,GAAG,EAAE,QAAQ,CAAC,IAAI,CAAC;;QAC3B,OAAO,KAAK,EAAE;AACd,YAAA,MAAM,YAAY,GAChB,KAAK,YAAY,KAAK,GAAG,KAAK,CAAC,OAAO,GAAG,MAAM,CAAC,KAAK,CAAC;YACxD,OAAO;gBACL,GAAG;AACH,gBAAA;AACE,oBAAA,OAAO,EAAE,KAAK;oBACd,KAAK,EAAE,CAAiC,8BAAA,EAAA,YAAY,CAAE,CAAA;AACvD,iBAAA;aACF;;;AAIL;;;;AAIG;AACH,IAAA,cAAc,CACZ,QAAmC,EAAA;QAEnC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE;AACvC,YAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB,QAAA,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;AAChE,YAAA,IAAI;gBACF,MAAM,EAAE,QAAQ,EAAE,GAAG,IAAI,EAAE,GAAGC,sBAAc,CAC1C,QAAQ,CAAC,IAAI,CAAC,IAAI,EAClB,QAAQ,CAAC,IAAI,CAAC,QAAQ,CACvB;AACD,gBAAA,OAAO,CAAC,QAAQ,EAAE,IAAI,CAAC;;YACvB,OAAO,KAAK,EAAE;gBACd,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,2BAA2B,EAAE,KAAK,CAAC;gBACrD,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;aAEvC,IAAI,QAAQ,CAAC,IAAI,CAAC,QAAQ,IAAI,IAAI,EAAE;YACzC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,SAAS,CAAC;;;QAI5C,IAAI,QAAQ,CAAC,IAAI,CAAC,IAAI,IAAI,IAAI,EAAE;YAC9B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,IAAI,EAAE,SAAS,CAAC;;;QAIxC,IAAI,QAAQ,CAAC,IAAI,CAAC,OAAO,IAAI,IAAI,EAAE;YACjC,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,EAAE,SAAS,CAAC;;AAG3C,QAAA,OAAO,CAAC,EAAE,EAAE,SAAS,CAAC;;AAGxB;;;;AAIG;AACH,IAAA,eAAe,CAAC,QAAmC,EAAA;AACjD,QAAA,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE;AAClE,YAAA,OAAO,EAAE;;AAGX,QAAA,OAAO,QAAQ,CAAC,IAAI,CAAC,QAAQ;;AAEhC;AAED;;;;AAIG;MACU,sBAAsB,GAAG,CACpC,MAAmC,GAAA,EAAE,KACjB;AACpB,IAAA,OAAO,IAAI,gBAAgB,CAAC,MAAM,CAAC;AACrC;;;;;"}
|