@mendable/firecrawl 1.22.0 → 1.23.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +14130 -46
- package/dist/index.d.cts +28 -2
- package/dist/index.d.ts +28 -2
- package/dist/index.js +14141 -26
- package/package.json +3 -3
- package/src/__tests__/v1/snips/change-tracking.test.ts +105 -0
- package/src/index.ts +28 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mendable/firecrawl",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.23.2",
|
|
4
4
|
"description": "JavaScript SDK for Firecrawl API",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"types": "dist/index.d.ts",
|
|
@@ -25,7 +25,6 @@
|
|
|
25
25
|
"author": "Mendable.ai",
|
|
26
26
|
"license": "MIT",
|
|
27
27
|
"dependencies": {
|
|
28
|
-
"axios": "^1.6.8",
|
|
29
28
|
"typescript-event-target": "^1.1.1",
|
|
30
29
|
"zod": "^3.23.8",
|
|
31
30
|
"zod-to-json-schema": "^3.23.0"
|
|
@@ -38,10 +37,11 @@
|
|
|
38
37
|
"@jest/globals": "^29.7.0",
|
|
39
38
|
"@types/axios": "^0.14.0",
|
|
40
39
|
"@types/dotenv": "^8.2.0",
|
|
41
|
-
"@types/jest": "^29.5.
|
|
40
|
+
"@types/jest": "^29.5.14",
|
|
42
41
|
"@types/mocha": "^10.0.6",
|
|
43
42
|
"@types/node": "^20.12.12",
|
|
44
43
|
"@types/uuid": "^9.0.8",
|
|
44
|
+
"axios": "^1.6.8",
|
|
45
45
|
"dotenv": "^16.4.5",
|
|
46
46
|
"jest": "^29.7.0",
|
|
47
47
|
"ts-jest": "^29.2.2",
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import FirecrawlApp from '../../../../src/index';
|
|
3
|
+
|
|
4
|
+
jest.mock('axios');
|
|
5
|
+
const mockedAxios = axios as jest.Mocked<typeof axios>;
|
|
6
|
+
|
|
7
|
+
describe('Change Tracking Tests', () => {
|
|
8
|
+
beforeEach(() => {
|
|
9
|
+
jest.resetAllMocks();
|
|
10
|
+
});
|
|
11
|
+
|
|
12
|
+
it('should support basic change tracking format', async () => {
|
|
13
|
+
mockedAxios.post.mockResolvedValueOnce({
|
|
14
|
+
status: 200,
|
|
15
|
+
data: {
|
|
16
|
+
success: true,
|
|
17
|
+
data: {
|
|
18
|
+
markdown: 'Test markdown content',
|
|
19
|
+
changeTracking: {
|
|
20
|
+
previousScrapeAt: '2023-01-01T00:00:00Z',
|
|
21
|
+
changeStatus: 'changed',
|
|
22
|
+
visibility: 'visible'
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
|
|
29
|
+
const result = await app.scrapeUrl('https://example.com', {
|
|
30
|
+
formats: ['markdown', 'changeTracking']
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
|
|
34
|
+
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
|
|
35
|
+
|
|
36
|
+
expect(result).toHaveProperty('changeTracking');
|
|
37
|
+
expect(result.changeTracking?.previousScrapeAt).toBe('2023-01-01T00:00:00Z');
|
|
38
|
+
expect(result.changeTracking?.changeStatus).toBe('changed');
|
|
39
|
+
expect(result.changeTracking?.visibility).toBe('visible');
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('should support change tracking options with git-diff and json modes', async () => {
|
|
43
|
+
mockedAxios.post.mockResolvedValueOnce({
|
|
44
|
+
status: 200,
|
|
45
|
+
data: {
|
|
46
|
+
success: true,
|
|
47
|
+
data: {
|
|
48
|
+
markdown: 'Test markdown content',
|
|
49
|
+
changeTracking: {
|
|
50
|
+
previousScrapeAt: '2023-01-01T00:00:00Z',
|
|
51
|
+
changeStatus: 'changed',
|
|
52
|
+
visibility: 'visible',
|
|
53
|
+
diff: {
|
|
54
|
+
text: '@@ -1,1 +1,1 @@\n-old content\n+new content',
|
|
55
|
+
json: {
|
|
56
|
+
files: [{
|
|
57
|
+
from: null,
|
|
58
|
+
to: null,
|
|
59
|
+
chunks: [{
|
|
60
|
+
content: '@@ -1,1 +1,1 @@',
|
|
61
|
+
changes: [{
|
|
62
|
+
type: 'del',
|
|
63
|
+
content: '-old content',
|
|
64
|
+
del: true,
|
|
65
|
+
ln: 1
|
|
66
|
+
}, {
|
|
67
|
+
type: 'add',
|
|
68
|
+
content: '+new content',
|
|
69
|
+
add: true,
|
|
70
|
+
ln: 1
|
|
71
|
+
}]
|
|
72
|
+
}]
|
|
73
|
+
}]
|
|
74
|
+
}
|
|
75
|
+
},
|
|
76
|
+
json: {
|
|
77
|
+
title: {
|
|
78
|
+
previous: 'Old Title',
|
|
79
|
+
current: 'New Title'
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
});
|
|
86
|
+
|
|
87
|
+
const app = new FirecrawlApp({ apiKey: process.env.TEST_API_KEY || 'dummy-api-key-for-testing' });
|
|
88
|
+
const result = await app.scrapeUrl('https://example.com', {
|
|
89
|
+
formats: ['markdown', 'changeTracking'],
|
|
90
|
+
changeTrackingOptions: {
|
|
91
|
+
modes: ['git-diff', 'json'],
|
|
92
|
+
schema: { type: 'object', properties: { title: { type: 'string' } } }
|
|
93
|
+
}
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
expect(mockedAxios.post).toHaveBeenCalledTimes(1);
|
|
97
|
+
expect(mockedAxios.post.mock.calls[0][1].formats).toContain('changeTracking');
|
|
98
|
+
expect(mockedAxios.post.mock.calls[0][1].changeTrackingOptions.modes).toEqual(['git-diff', 'json']);
|
|
99
|
+
|
|
100
|
+
expect(result).toHaveProperty('changeTracking');
|
|
101
|
+
expect(result.changeTracking?.diff?.text).toBe('@@ -1,1 +1,1 @@\n-old content\n+new content');
|
|
102
|
+
expect(result.changeTracking?.json?.title.previous).toBe('Old Title');
|
|
103
|
+
expect(result.changeTracking?.json?.title.current).toBe('New Title');
|
|
104
|
+
});
|
|
105
|
+
});
|
package/src/index.ts
CHANGED
|
@@ -68,10 +68,31 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
68
68
|
screenshot?: string;
|
|
69
69
|
metadata?: FirecrawlDocumentMetadata;
|
|
70
70
|
actions: ActionsSchema;
|
|
71
|
-
|
|
71
|
+
changeTracking?: {
|
|
72
72
|
previousScrapeAt: string | null;
|
|
73
73
|
changeStatus: "new" | "same" | "changed" | "removed";
|
|
74
74
|
visibility: "visible" | "hidden";
|
|
75
|
+
diff?: {
|
|
76
|
+
text: string;
|
|
77
|
+
json: {
|
|
78
|
+
files: Array<{
|
|
79
|
+
from: string | null;
|
|
80
|
+
to: string | null;
|
|
81
|
+
chunks: Array<{
|
|
82
|
+
content: string;
|
|
83
|
+
changes: Array<{
|
|
84
|
+
type: string;
|
|
85
|
+
normal?: boolean;
|
|
86
|
+
ln?: number;
|
|
87
|
+
ln1?: number;
|
|
88
|
+
ln2?: number;
|
|
89
|
+
content: string;
|
|
90
|
+
}>;
|
|
91
|
+
}>;
|
|
92
|
+
}>;
|
|
93
|
+
};
|
|
94
|
+
};
|
|
95
|
+
json?: any;
|
|
75
96
|
};
|
|
76
97
|
// v1 search only
|
|
77
98
|
title?: string;
|
|
@@ -83,7 +104,7 @@ export interface FirecrawlDocument<T = any, ActionsSchema extends (ActionsResult
|
|
|
83
104
|
* Defines the options and configurations available for scraping web content.
|
|
84
105
|
*/
|
|
85
106
|
export interface CrawlScrapeOptions {
|
|
86
|
-
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "
|
|
107
|
+
formats?: ("markdown" | "html" | "rawHtml" | "content" | "links" | "screenshot" | "screenshot@fullPage" | "extract" | "json" | "changeTracking")[];
|
|
87
108
|
headers?: Record<string, string>;
|
|
88
109
|
includeTags?: string[];
|
|
89
110
|
excludeTags?: string[];
|
|
@@ -140,6 +161,11 @@ export interface ScrapeParams<LLMSchema extends zt.ZodSchema = any, ActionsSchem
|
|
|
140
161
|
schema?: LLMSchema;
|
|
141
162
|
systemPrompt?: string;
|
|
142
163
|
}
|
|
164
|
+
changeTrackingOptions?: {
|
|
165
|
+
prompt?: string;
|
|
166
|
+
schema?: any;
|
|
167
|
+
modes?: ("json" | "git-diff")[];
|
|
168
|
+
}
|
|
143
169
|
actions?: ActionsSchema;
|
|
144
170
|
}
|
|
145
171
|
|