fetchfox-sdk 1.0.17 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fetchfox-sdk",
3
- "version": "1.0.17",
3
+ "version": "1.0.19",
4
4
  "description": "AI scraper",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
@@ -11,7 +11,7 @@
11
11
  },
12
12
  "repository": {
13
13
  "type": "git",
14
- "url": "git+https://github.com/fetchfox/fetchfox.git"
14
+ "url": "git+https://github.com/fetchfox/fetchfox-sdk.git"
15
15
  },
16
16
  "keywords": [
17
17
  "ai",
@@ -20,9 +20,9 @@
20
20
  "author": "marcell@fetchfoxai.com",
21
21
  "license": "ISC",
22
22
  "bugs": {
23
- "url": "https://github.com/fetchfox/fetchfox/issues"
23
+ "url": "https://github.com/fetchfox/fetchfox-sdk/issues"
24
24
  },
25
- "homepage": "https://github.com/fetchfox/fetchfox#readme",
25
+ "homepage": "https://fetchfox.ai",
26
26
  "devDependencies": {
27
27
  "@eslint/js": "^9.31.0",
28
28
  "eslint-plugin-promise": "^7.2.1",
package/src/api.js CHANGED
@@ -29,6 +29,7 @@ export const call = async (method, path, params) => {
29
29
  };
30
30
 
31
31
  let url = endpoint(path, params);
32
+ console.log('Url', url);
32
33
  if (method == 'GET') {
33
34
  url += '?' + new URLSearchParams(params).toString();
34
35
  } else {
package/src/configure.js CHANGED
@@ -1,6 +1,4 @@
1
- const config = {
2
- host: 'https://api.fetchfox.ai',
3
- };
1
+ const config = {};
4
2
 
5
3
  const isNode =
6
4
  typeof process !== 'undefined' &&
@@ -22,16 +20,12 @@ export const apiKey = (options) =>
22
20
  options?.apiKey || config.apiKey || safeEnv('FETCHFOX_API_KEY');
23
21
 
24
22
  export const host = (options) =>
25
- options?.host || config.host || safeEnv('FETCHFOX_HOST');
23
+ options?.host ||
24
+ config.host ||
25
+ safeEnv('FETCHFOX_HOST') ||
26
+ 'https://api.fetchfox.ai';
26
27
 
27
28
  export const appHost = (options) =>
28
- (options?.host || config.host || safeEnv('FETCHFOX_HOST')).replace(
29
- 'api.fetchfox.ai',
30
- 'app.fetchfox.ai'
31
- );
29
+ host(options).replace('api.fetchfox.ai', 'app.fetchfox.ai');
32
30
 
33
- export const ws = (options) =>
34
- (options?.host || config.host || safeEnv('FETCHFOX_HOST')).replace(
35
- 'http',
36
- 'ws'
37
- );
31
+ export const ws = (options) => host(options).replace('http', 'ws');
package/src/crawl.js CHANGED
@@ -7,5 +7,5 @@ export async function crawl(args) {
7
7
 
8
8
  crawl.detach = async (args) => {
9
9
  const data = await call('POST', '/api/crawl', { ...args, detach: true });
10
- return new Job(data.jobId);
10
+ return new Job(data.jobId, args);
11
11
  };
package/src/detach.js CHANGED
@@ -11,17 +11,21 @@ export function getSocket() {}
11
11
  export const Job = class {
12
12
  #callbacks;
13
13
  #socket;
14
+ #seen;
14
15
 
15
- constructor(id) {
16
+ constructor(id, options) {
16
17
  this.id = id;
17
18
  this.#callbacks = {
19
+ item: [],
18
20
  completed: [],
19
21
  error: [],
20
22
  finished: [],
21
23
  progress: [],
22
24
  };
23
25
 
24
- this.#socket = new io(ws());
26
+ this.#seen = {};
27
+
28
+ this.#socket = new io(ws(options));
25
29
  this.#socket.on('progress', (data) => {
26
30
  this.handleProgress(data);
27
31
  });
@@ -50,6 +54,15 @@ export const Job = class {
50
54
  ]) {
51
55
  s[key] = data[key] || this[key];
52
56
  }
57
+
58
+ if (s.progress?.children?.jobs) {
59
+ // const late = this.progress.children.jobs.filter(it => it.late);
60
+ // console.log('late jobs:', late);
61
+ s.progress.children.jobs = s.progress.children.jobs.filter(
62
+ (it) => !it.late
63
+ );
64
+ }
65
+
53
66
  return s;
54
67
  }
55
68
 
@@ -63,6 +76,8 @@ export const Job = class {
63
76
  }
64
77
 
65
78
  handleProgress(data) {
79
+ console.log('handleProgress', data);
80
+
66
81
  const last = JSON.stringify(this);
67
82
 
68
83
  const s = this.#select(data);
@@ -72,21 +87,34 @@ export const Job = class {
72
87
 
73
88
  const didUpdate = JSON.stringify(this) != last;
74
89
  if (didUpdate) {
75
- this.trigger('progress');
90
+ this.trigger('progress', this);
91
+
92
+ for (const item of this.results?.items || []) {
93
+ const ser = JSON.stringify(item);
94
+ if (this.#seen[ser]) {
95
+ continue;
96
+ }
97
+ this.#seen[ser] = true;
98
+ this.trigger('item', item);
99
+ }
76
100
 
77
101
  if (this.state == 'completed') {
78
102
  this._completed = true;
79
- this.trigger('completed');
103
+ this.trigger('completed', this);
80
104
  }
81
105
  if (this.state == 'error') {
82
106
  this._error = true;
83
- this.trigger('error');
107
+ this.trigger('error', this);
84
108
  }
85
109
 
86
110
  if (['completed', 'error'].includes(this.state)) {
87
- this.trigger('finished');
88
- // Just in case there are some straggler events, wait a few seconds
89
- setTimeout(() => this.#socket.disconnect(), 5000);
111
+ if (this.progress?.children?.jobs) {
112
+ this.progress.children.jobs = this.progress.children.jobs.filter(
113
+ (it) => it.state != 'active'
114
+ );
115
+ }
116
+ this.trigger('finished', this);
117
+ this.#socket.disconnect();
90
118
  }
91
119
  }
92
120
  }
@@ -97,10 +125,10 @@ export const Job = class {
97
125
  }
98
126
  }
99
127
 
100
- trigger(event) {
128
+ trigger(event, data) {
101
129
  this.checkEvent(event);
102
130
  for (const cb of this.#callbacks[event]) {
103
- cb({ ...this });
131
+ cb(data);
104
132
  }
105
133
  }
106
134
 
package/src/extract.js CHANGED
@@ -7,5 +7,5 @@ export async function extract(args) {
7
7
 
8
8
  extract.detach = async (args) => {
9
9
  const data = await call('POST', '/api/extract', { ...args, detach: true });
10
- return new Job(data.jobId);
10
+ return new Job(data.jobId, args);
11
11
  };
package/src/scrape.js CHANGED
@@ -7,5 +7,5 @@ export async function scrape(args) {
7
7
 
8
8
  scrape.detach = async (args) => {
9
9
  const data = await call('POST', '/api/scrape', { ...args, detach: true });
10
- return new Job(data.jobId);
10
+ return new Job(data.jobId, args);
11
11
  };
@@ -32,6 +32,36 @@ test('use fetchfox object for detach @fetchfox @sanity', async () => {
32
32
  expect(count).toBeGreaterThan(0);
33
33
  }, 30_000);
34
34
 
35
+ test('use fetchfox object for extract detach @fetchfox @sanity', async () => {
36
+ const fox = new FetchFox({
37
+ apiKey: process.env.FETCHFOX_API_KEY,
38
+ });
39
+ const job = await fox.extract.detach({
40
+ urls: [
41
+ 'https://pokemondb.net/pokedex/bulbasaur',
42
+ 'https://pokemondb.net/pokedex/charmander',
43
+ ],
44
+ template: 'name and number',
45
+ });
46
+
47
+ let count = 0;
48
+
49
+ job.on('progress', (data) => {
50
+ count++;
51
+ });
52
+
53
+ let itemCount = 0;
54
+ job.on('item', (item) => {
55
+ itemCount++;
56
+ console.log('got item:', item);
57
+ });
58
+
59
+ await job.finished();
60
+
61
+ expect(count).toBeGreaterThan(0);
62
+ expect(itemCount).toBe(2);
63
+ }, 30_000);
64
+
35
65
  test('invalid key fails @fetchfox @sanity', async () => {
36
66
  const fox = new FetchFox({
37
67
  apiKey: 'invalid',