fetchfox-sdk 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/src/api.js +1 -0
- package/src/configure.js +7 -13
- package/src/crawl.js +1 -1
- package/src/detach.js +38 -10
- package/src/extract.js +1 -1
- package/src/scrape.js +1 -1
- package/tests/fetchfox.test.js +30 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "fetchfox-sdk",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.19",
|
|
4
4
|
"description": "AI scraper",
|
|
5
5
|
"main": "src/index.js",
|
|
6
6
|
"type": "module",
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
},
|
|
12
12
|
"repository": {
|
|
13
13
|
"type": "git",
|
|
14
|
-
"url": "git+https://github.com/fetchfox/fetchfox.git"
|
|
14
|
+
"url": "git+https://github.com/fetchfox/fetchfox-sdk.git"
|
|
15
15
|
},
|
|
16
16
|
"keywords": [
|
|
17
17
|
"ai",
|
|
@@ -20,9 +20,9 @@
|
|
|
20
20
|
"author": "marcell@fetchfoxai.com",
|
|
21
21
|
"license": "ISC",
|
|
22
22
|
"bugs": {
|
|
23
|
-
"url": "https://github.com/fetchfox/fetchfox/issues"
|
|
23
|
+
"url": "https://github.com/fetchfox/fetchfox-sdk/issues"
|
|
24
24
|
},
|
|
25
|
-
"homepage": "https://
|
|
25
|
+
"homepage": "https://fetchfox.ai",
|
|
26
26
|
"devDependencies": {
|
|
27
27
|
"@eslint/js": "^9.31.0",
|
|
28
28
|
"eslint-plugin-promise": "^7.2.1",
|
package/src/api.js
CHANGED
package/src/configure.js
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
|
-
const config = {
|
|
2
|
-
host: 'https://api.fetchfox.ai',
|
|
3
|
-
};
|
|
1
|
+
const config = {};
|
|
4
2
|
|
|
5
3
|
const isNode =
|
|
6
4
|
typeof process !== 'undefined' &&
|
|
@@ -22,16 +20,12 @@ export const apiKey = (options) =>
|
|
|
22
20
|
options?.apiKey || config.apiKey || safeEnv('FETCHFOX_API_KEY');
|
|
23
21
|
|
|
24
22
|
export const host = (options) =>
|
|
25
|
-
options?.host ||
|
|
23
|
+
options?.host ||
|
|
24
|
+
config.host ||
|
|
25
|
+
safeEnv('FETCHFOX_HOST') ||
|
|
26
|
+
'https://api.fetchfox.ai';
|
|
26
27
|
|
|
27
28
|
export const appHost = (options) =>
|
|
28
|
-
(options
|
|
29
|
-
'api.fetchfox.ai',
|
|
30
|
-
'app.fetchfox.ai'
|
|
31
|
-
);
|
|
29
|
+
host(options).replace('api.fetchfox.ai', 'app.fetchfox.ai');
|
|
32
30
|
|
|
33
|
-
export const ws = (options) =>
|
|
34
|
-
(options?.host || config.host || safeEnv('FETCHFOX_HOST')).replace(
|
|
35
|
-
'http',
|
|
36
|
-
'ws'
|
|
37
|
-
);
|
|
31
|
+
export const ws = (options) => host(options).replace('http', 'ws');
|
package/src/crawl.js
CHANGED
package/src/detach.js
CHANGED
|
@@ -11,17 +11,21 @@ export function getSocket() {}
|
|
|
11
11
|
export const Job = class {
|
|
12
12
|
#callbacks;
|
|
13
13
|
#socket;
|
|
14
|
+
#seen;
|
|
14
15
|
|
|
15
|
-
constructor(id) {
|
|
16
|
+
constructor(id, options) {
|
|
16
17
|
this.id = id;
|
|
17
18
|
this.#callbacks = {
|
|
19
|
+
item: [],
|
|
18
20
|
completed: [],
|
|
19
21
|
error: [],
|
|
20
22
|
finished: [],
|
|
21
23
|
progress: [],
|
|
22
24
|
};
|
|
23
25
|
|
|
24
|
-
this.#
|
|
26
|
+
this.#seen = {};
|
|
27
|
+
|
|
28
|
+
this.#socket = new io(ws(options));
|
|
25
29
|
this.#socket.on('progress', (data) => {
|
|
26
30
|
this.handleProgress(data);
|
|
27
31
|
});
|
|
@@ -50,6 +54,15 @@ export const Job = class {
|
|
|
50
54
|
]) {
|
|
51
55
|
s[key] = data[key] || this[key];
|
|
52
56
|
}
|
|
57
|
+
|
|
58
|
+
if (s.progress?.children?.jobs) {
|
|
59
|
+
// const late = this.progress.children.jobs.filter(it => it.late);
|
|
60
|
+
// console.log('late jobs:', late);
|
|
61
|
+
s.progress.children.jobs = s.progress.children.jobs.filter(
|
|
62
|
+
(it) => !it.late
|
|
63
|
+
);
|
|
64
|
+
}
|
|
65
|
+
|
|
53
66
|
return s;
|
|
54
67
|
}
|
|
55
68
|
|
|
@@ -63,6 +76,8 @@ export const Job = class {
|
|
|
63
76
|
}
|
|
64
77
|
|
|
65
78
|
handleProgress(data) {
|
|
79
|
+
console.log('handleProgress', data);
|
|
80
|
+
|
|
66
81
|
const last = JSON.stringify(this);
|
|
67
82
|
|
|
68
83
|
const s = this.#select(data);
|
|
@@ -72,21 +87,34 @@ export const Job = class {
|
|
|
72
87
|
|
|
73
88
|
const didUpdate = JSON.stringify(this) != last;
|
|
74
89
|
if (didUpdate) {
|
|
75
|
-
this.trigger('progress');
|
|
90
|
+
this.trigger('progress', this);
|
|
91
|
+
|
|
92
|
+
for (const item of this.results?.items || []) {
|
|
93
|
+
const ser = JSON.stringify(item);
|
|
94
|
+
if (this.#seen[ser]) {
|
|
95
|
+
continue;
|
|
96
|
+
}
|
|
97
|
+
this.#seen[ser] = true;
|
|
98
|
+
this.trigger('item', item);
|
|
99
|
+
}
|
|
76
100
|
|
|
77
101
|
if (this.state == 'completed') {
|
|
78
102
|
this._completed = true;
|
|
79
|
-
this.trigger('completed');
|
|
103
|
+
this.trigger('completed', this);
|
|
80
104
|
}
|
|
81
105
|
if (this.state == 'error') {
|
|
82
106
|
this._error = true;
|
|
83
|
-
this.trigger('error');
|
|
107
|
+
this.trigger('error', this);
|
|
84
108
|
}
|
|
85
109
|
|
|
86
110
|
if (['completed', 'error'].includes(this.state)) {
|
|
87
|
-
this.
|
|
88
|
-
|
|
89
|
-
|
|
111
|
+
if (this.progress?.children?.jobs) {
|
|
112
|
+
this.progress.children.jobs = this.progress.children.jobs.filter(
|
|
113
|
+
(it) => it.state != 'active'
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
this.trigger('finished', this);
|
|
117
|
+
this.#socket.disconnect();
|
|
90
118
|
}
|
|
91
119
|
}
|
|
92
120
|
}
|
|
@@ -97,10 +125,10 @@ export const Job = class {
|
|
|
97
125
|
}
|
|
98
126
|
}
|
|
99
127
|
|
|
100
|
-
trigger(event) {
|
|
128
|
+
trigger(event, data) {
|
|
101
129
|
this.checkEvent(event);
|
|
102
130
|
for (const cb of this.#callbacks[event]) {
|
|
103
|
-
cb(
|
|
131
|
+
cb(data);
|
|
104
132
|
}
|
|
105
133
|
}
|
|
106
134
|
|
package/src/extract.js
CHANGED
package/src/scrape.js
CHANGED
package/tests/fetchfox.test.js
CHANGED
|
@@ -32,6 +32,36 @@ test('use fetchfox object for detach @fetchfox @sanity', async () => {
|
|
|
32
32
|
expect(count).toBeGreaterThan(0);
|
|
33
33
|
}, 30_000);
|
|
34
34
|
|
|
35
|
+
test('use fetchfox object for extract detach @fetchfox @sanity', async () => {
|
|
36
|
+
const fox = new FetchFox({
|
|
37
|
+
apiKey: process.env.FETCHFOX_API_KEY,
|
|
38
|
+
});
|
|
39
|
+
const job = await fox.extract.detach({
|
|
40
|
+
urls: [
|
|
41
|
+
'https://pokemondb.net/pokedex/bulbasaur',
|
|
42
|
+
'https://pokemondb.net/pokedex/charmander',
|
|
43
|
+
],
|
|
44
|
+
template: 'name and number',
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
let count = 0;
|
|
48
|
+
|
|
49
|
+
job.on('progress', (data) => {
|
|
50
|
+
count++;
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
let itemCount = 0;
|
|
54
|
+
job.on('item', (item) => {
|
|
55
|
+
itemCount++;
|
|
56
|
+
console.log('got item:', item);
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
await job.finished();
|
|
60
|
+
|
|
61
|
+
expect(count).toBeGreaterThan(0);
|
|
62
|
+
expect(itemCount).toBe(2);
|
|
63
|
+
}, 30_000);
|
|
64
|
+
|
|
35
65
|
test('invalid key fails @fetchfox @sanity', async () => {
|
|
36
66
|
const fox = new FetchFox({
|
|
37
67
|
apiKey: 'invalid',
|