@credal/actions 0.2.117 → 0.2.118
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -3208,6 +3208,29 @@ export const firecrawlScrapeUrlDefinition = {
|
|
|
3208
3208
|
description: "Optional wait time in milliseconds before scraping the page",
|
|
3209
3209
|
minimum: 0,
|
|
3210
3210
|
},
|
|
3211
|
+
onlyMainContent: {
|
|
3212
|
+
type: "boolean",
|
|
3213
|
+
description: "Extract only the main content of the page, excluding headers, footers, and navigation",
|
|
3214
|
+
},
|
|
3215
|
+
formats: {
|
|
3216
|
+
type: "array",
|
|
3217
|
+
description: "Array of formats to return",
|
|
3218
|
+
items: {
|
|
3219
|
+
type: "string",
|
|
3220
|
+
enum: [
|
|
3221
|
+
"content",
|
|
3222
|
+
"json",
|
|
3223
|
+
"html",
|
|
3224
|
+
"screenshot",
|
|
3225
|
+
"markdown",
|
|
3226
|
+
"rawHtml",
|
|
3227
|
+
"links",
|
|
3228
|
+
"screenshot@fullPage",
|
|
3229
|
+
"extract",
|
|
3230
|
+
"changeTracking",
|
|
3231
|
+
],
|
|
3232
|
+
},
|
|
3233
|
+
},
|
|
3211
3234
|
},
|
|
3212
3235
|
},
|
|
3213
3236
|
output: {
|
|
@@ -3142,12 +3142,18 @@ export type firecrawlDeepResearchFunction = ActionFunction<firecrawlDeepResearch
|
|
|
3142
3142
|
export declare const firecrawlScrapeUrlParamsSchema: z.ZodObject<{
|
|
3143
3143
|
url: z.ZodString;
|
|
3144
3144
|
waitMs: z.ZodOptional<z.ZodNumber>;
|
|
3145
|
+
onlyMainContent: z.ZodOptional<z.ZodBoolean>;
|
|
3146
|
+
formats: z.ZodOptional<z.ZodArray<z.ZodEnum<["content", "json", "html", "screenshot", "markdown", "rawHtml", "links", "screenshot@fullPage", "extract", "changeTracking"]>, "many">>;
|
|
3145
3147
|
}, "strip", z.ZodTypeAny, {
|
|
3146
3148
|
url: string;
|
|
3147
3149
|
waitMs?: number | undefined;
|
|
3150
|
+
onlyMainContent?: boolean | undefined;
|
|
3151
|
+
formats?: ("content" | "json" | "html" | "screenshot" | "markdown" | "rawHtml" | "links" | "screenshot@fullPage" | "extract" | "changeTracking")[] | undefined;
|
|
3148
3152
|
}, {
|
|
3149
3153
|
url: string;
|
|
3150
3154
|
waitMs?: number | undefined;
|
|
3155
|
+
onlyMainContent?: boolean | undefined;
|
|
3156
|
+
formats?: ("content" | "json" | "html" | "screenshot" | "markdown" | "rawHtml" | "links" | "screenshot@fullPage" | "extract" | "changeTracking")[] | undefined;
|
|
3151
3157
|
}>;
|
|
3152
3158
|
export type firecrawlScrapeUrlParamsType = z.infer<typeof firecrawlScrapeUrlParamsSchema>;
|
|
3153
3159
|
export declare const firecrawlScrapeUrlOutputSchema: z.ZodObject<{
|
|
@@ -979,6 +979,25 @@ export const firecrawlDeepResearchOutputSchema = z.object({
|
|
|
979
979
|
export const firecrawlScrapeUrlParamsSchema = z.object({
|
|
980
980
|
url: z.string().describe("The URL to scrape"),
|
|
981
981
|
waitMs: z.number().gte(0).describe("Optional wait time in milliseconds before scraping the page").optional(),
|
|
982
|
+
onlyMainContent: z
|
|
983
|
+
.boolean()
|
|
984
|
+
.describe("Extract only the main content of the page, excluding headers, footers, and navigation")
|
|
985
|
+
.optional(),
|
|
986
|
+
formats: z
|
|
987
|
+
.array(z.enum([
|
|
988
|
+
"content",
|
|
989
|
+
"json",
|
|
990
|
+
"html",
|
|
991
|
+
"screenshot",
|
|
992
|
+
"markdown",
|
|
993
|
+
"rawHtml",
|
|
994
|
+
"links",
|
|
995
|
+
"screenshot@fullPage",
|
|
996
|
+
"extract",
|
|
997
|
+
"changeTracking",
|
|
998
|
+
]))
|
|
999
|
+
.describe("Array of formats to return")
|
|
1000
|
+
.optional(),
|
|
982
1001
|
});
|
|
983
1002
|
export const firecrawlScrapeUrlOutputSchema = z.object({ content: z.string().describe("The content of the URL") });
|
|
984
1003
|
export const firecrawlSearchAndScrapeParamsSchema = z.object({
|
|
@@ -13,11 +13,70 @@ const scrapeUrl = (_a) => __awaiter(void 0, [_a], void 0, function* ({ params, a
|
|
|
13
13
|
const firecrawl = new FirecrawlApp({
|
|
14
14
|
apiKey: authParams.apiKey,
|
|
15
15
|
});
|
|
16
|
-
const result = yield firecrawl.scrapeUrl(params.url, Object.assign({}, (params.waitMs !== undefined && {
|
|
16
|
+
const result = yield firecrawl.scrapeUrl(params.url, Object.assign(Object.assign(Object.assign({}, (params.waitMs !== undefined && {
|
|
17
17
|
actions: [{ type: "wait", milliseconds: params.waitMs }],
|
|
18
|
+
})), (params.onlyMainContent !== undefined && {
|
|
19
|
+
onlyMainContent: params.onlyMainContent,
|
|
20
|
+
})), (params.formats !== undefined &&
|
|
21
|
+
params.formats.length > 0 && {
|
|
22
|
+
formats: params.formats,
|
|
18
23
|
})));
|
|
24
|
+
console.log("Result is: ", result);
|
|
25
|
+
if (!result.success) {
|
|
26
|
+
return firecrawlScrapeUrlOutputSchema.parse({
|
|
27
|
+
content: "",
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
// Extract content based on requested formats
|
|
31
|
+
let content = "";
|
|
32
|
+
if (params.formats && params.formats.length > 0) {
|
|
33
|
+
const contentParts = [];
|
|
34
|
+
for (const format of params.formats) {
|
|
35
|
+
let formatContent = undefined;
|
|
36
|
+
// Handle different format mappings
|
|
37
|
+
switch (format) {
|
|
38
|
+
case "rawHtml":
|
|
39
|
+
formatContent = result.rawHtml;
|
|
40
|
+
break;
|
|
41
|
+
case "markdown":
|
|
42
|
+
formatContent = result.markdown;
|
|
43
|
+
break;
|
|
44
|
+
case "html":
|
|
45
|
+
formatContent = result.html;
|
|
46
|
+
break;
|
|
47
|
+
case "links":
|
|
48
|
+
formatContent = Array.isArray(result.links)
|
|
49
|
+
? result.links.map(link => (typeof link === "string" ? link : JSON.stringify(link))).join("\n")
|
|
50
|
+
: JSON.stringify(result.links);
|
|
51
|
+
break;
|
|
52
|
+
case "json":
|
|
53
|
+
formatContent = result.json ? JSON.stringify(result.json, null, 2) : undefined;
|
|
54
|
+
break;
|
|
55
|
+
case "extract":
|
|
56
|
+
formatContent = result.extract ? JSON.stringify(result.extract, null, 2) : undefined;
|
|
57
|
+
break;
|
|
58
|
+
case "screenshot":
|
|
59
|
+
formatContent = result.screenshot;
|
|
60
|
+
break;
|
|
61
|
+
case "changeTracking":
|
|
62
|
+
formatContent = result.changeTracking ? JSON.stringify(result.changeTracking, null, 2) : undefined;
|
|
63
|
+
break;
|
|
64
|
+
default:
|
|
65
|
+
formatContent = result[format];
|
|
66
|
+
}
|
|
67
|
+
if (formatContent) {
|
|
68
|
+
const formatHeader = `=== ${format.toUpperCase()} ===`;
|
|
69
|
+
contentParts.push(`${formatHeader}\n${formatContent}`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
content = contentParts.join("\n\n");
|
|
73
|
+
}
|
|
74
|
+
else {
|
|
75
|
+
// Default to markdown if no formats specified
|
|
76
|
+
content = result.markdown || "";
|
|
77
|
+
}
|
|
19
78
|
return firecrawlScrapeUrlOutputSchema.parse({
|
|
20
|
-
content
|
|
79
|
+
content,
|
|
21
80
|
});
|
|
22
81
|
});
|
|
23
82
|
export default scrapeUrl;
|