fetchfox-sdk 1.0.18 → 1.0.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "fetchfox-sdk",
3
- "version": "1.0.18",
3
+ "version": "1.0.20",
4
4
  "description": "AI scraper",
5
5
  "main": "src/index.js",
6
6
  "type": "module",
package/src/api.js CHANGED
@@ -29,6 +29,7 @@ export const call = async (method, path, params) => {
29
29
  };
30
30
 
31
31
  let url = endpoint(path, params);
32
+ console.log('Url', url);
32
33
  if (method == 'GET') {
33
34
  url += '?' + new URLSearchParams(params).toString();
34
35
  } else {
package/src/configure.js CHANGED
@@ -1,6 +1,4 @@
1
- const config = {
2
- host: 'https://api.fetchfox.ai',
3
- };
1
+ const config = {};
4
2
 
5
3
  const isNode =
6
4
  typeof process !== 'undefined' &&
@@ -22,16 +20,12 @@ export const apiKey = (options) =>
22
20
  options?.apiKey || config.apiKey || safeEnv('FETCHFOX_API_KEY');
23
21
 
24
22
  export const host = (options) =>
25
- options?.host || config.host || safeEnv('FETCHFOX_HOST');
23
+ options?.host ||
24
+ config.host ||
25
+ safeEnv('FETCHFOX_HOST') ||
26
+ 'https://api.fetchfox.ai';
26
27
 
27
28
  export const appHost = (options) =>
28
- (options?.host || config.host || safeEnv('FETCHFOX_HOST')).replace(
29
- 'api.fetchfox.ai',
30
- 'app.fetchfox.ai'
31
- );
29
+ host(options).replace('api.fetchfox.ai', 'app.fetchfox.ai');
32
30
 
33
- export const ws = (options) =>
34
- (options?.host || config.host || safeEnv('FETCHFOX_HOST')).replace(
35
- 'http',
36
- 'ws'
37
- );
31
+ export const ws = (options) => host(options).replace('http', 'ws');
package/src/detach.js CHANGED
@@ -11,16 +11,20 @@ export function getSocket() {}
11
11
  export const Job = class {
12
12
  #callbacks;
13
13
  #socket;
14
+ #seen;
14
15
 
15
16
  constructor(id, options) {
16
17
  this.id = id;
17
18
  this.#callbacks = {
19
+ item: [],
18
20
  completed: [],
19
21
  error: [],
20
22
  finished: [],
21
23
  progress: [],
22
24
  };
23
25
 
26
+ this.#seen = {};
27
+
24
28
  this.#socket = new io(ws(options));
25
29
  this.#socket.on('progress', (data) => {
26
30
  this.handleProgress(data);
@@ -50,6 +54,15 @@ export const Job = class {
50
54
  ]) {
51
55
  s[key] = data[key] || this[key];
52
56
  }
57
+
58
+ if (s.progress?.children?.jobs) {
59
+ // const late = this.progress.children.jobs.filter(it => it.late);
60
+ // console.log('late jobs:', late);
61
+ s.progress.children.jobs = s.progress.children.jobs.filter(
62
+ (it) => !it.late
63
+ );
64
+ }
65
+
53
66
  return s;
54
67
  }
55
68
 
@@ -63,6 +76,8 @@ export const Job = class {
63
76
  }
64
77
 
65
78
  handleProgress(data) {
79
+ console.log('handleProgress', data);
80
+
66
81
  const last = JSON.stringify(this);
67
82
 
68
83
  const s = this.#select(data);
@@ -72,21 +87,34 @@ export const Job = class {
72
87
 
73
88
  const didUpdate = JSON.stringify(this) != last;
74
89
  if (didUpdate) {
75
- this.trigger('progress');
90
+ this.trigger('progress', this);
91
+
92
+ for (const item of this.results?.items || []) {
93
+ const ser = JSON.stringify(item);
94
+ if (this.#seen[ser]) {
95
+ continue;
96
+ }
97
+ this.#seen[ser] = true;
98
+ this.trigger('item', item);
99
+ }
76
100
 
77
101
  if (this.state == 'completed') {
78
102
  this._completed = true;
79
- this.trigger('completed');
103
+ this.trigger('completed', this);
80
104
  }
81
105
  if (this.state == 'error') {
82
106
  this._error = true;
83
- this.trigger('error');
107
+ this.trigger('error', this);
84
108
  }
85
109
 
86
110
  if (['completed', 'error'].includes(this.state)) {
87
- this.trigger('finished');
88
- // Just in case there are some straggler events, wait a few seconds
89
- setTimeout(() => this.#socket.disconnect(), 5000);
111
+ if (this.progress?.children?.jobs) {
112
+ this.progress.children.jobs = this.progress.children.jobs.filter(
113
+ (it) => it.state != 'active'
114
+ );
115
+ }
116
+ this.trigger('finished', this);
117
+ this.#socket.disconnect();
90
118
  }
91
119
  }
92
120
  }
@@ -97,10 +125,10 @@ export const Job = class {
97
125
  }
98
126
  }
99
127
 
100
- trigger(event) {
128
+ trigger(event, data) {
101
129
  this.checkEvent(event);
102
130
  for (const cb of this.#callbacks[event]) {
103
- cb({ ...this });
131
+ cb(data);
104
132
  }
105
133
  }
106
134
 
package/src/fetchfox.js CHANGED
@@ -7,7 +7,7 @@ export const FetchFox = class {
7
7
  this.apiKey = apiKey;
8
8
  this.host = host;
9
9
 
10
- const fns = [crawl, extract, scrape];
10
+ const fns = [visit, crawl, extract, scrape];
11
11
 
12
12
  for (const fn of fns) {
13
13
  this[fn.name] = function (args) {
package/src/index.js CHANGED
@@ -1,11 +1,14 @@
1
- export * from './scrape.js';
1
+ export * from './visit.js';
2
2
  export * from './crawl.js';
3
3
  export * from './extract.js';
4
+ export * from './scrape.js';
5
+
4
6
  export * from './jobs.js';
5
7
  export * from './user.js';
6
8
  export * from './credits.js';
7
9
  export * from './proxy.js';
8
10
  export * from './urls.js';
11
+
9
12
  export { Job } from './detach.js';
10
13
  export { call } from './api.js';
11
14
  export { FetchFox } from './fetchfox.js';
package/src/visit.js ADDED
@@ -0,0 +1,11 @@
1
+ import { call } from './api.js';
2
+ import { Job } from './detach.js';
3
+
4
+ export async function visit(args) {
5
+ return call('POST', '/api/visit', args);
6
+ }
7
+
8
+ visit.detach = async (args) => {
9
+ const data = await call('POST', '/api/visit', { ...args, detach: true });
10
+ return new Job(data.jobId, args);
11
+ };
@@ -37,18 +37,29 @@ test('use fetchfox object for extract detach @fetchfox @sanity', async () => {
37
37
  apiKey: process.env.FETCHFOX_API_KEY,
38
38
  });
39
39
  const job = await fox.extract.detach({
40
- urls: ['https://pokemondb.net/pokedex/bulbasaur'],
40
+ urls: [
41
+ 'https://pokemondb.net/pokedex/bulbasaur',
42
+ 'https://pokemondb.net/pokedex/charmander',
43
+ ],
41
44
  template: 'name and number',
42
45
  });
43
46
 
44
47
  let count = 0;
48
+
45
49
  job.on('progress', (data) => {
46
50
  count++;
47
51
  });
48
52
 
53
+ let itemCount = 0;
54
+ job.on('item', (item) => {
55
+ itemCount++;
56
+ console.log('got item:', item);
57
+ });
58
+
49
59
  await job.finished();
50
60
 
51
61
  expect(count).toBeGreaterThan(0);
62
+ expect(itemCount).toBe(2);
52
63
  }, 30_000);
53
64
 
54
65
  test('invalid key fails @fetchfox @sanity', async () => {
package/src/#index.js# DELETED
@@ -1,13 +0,0 @@
1
- export * from './scrape.js';
2
- export * from './crawl.js';
3
- export * from './extract.js';
4
- export * from './jobs.js';
5
- export * from './user.js';
6
- export * from './credits.js';
7
- export * from './proxy.js';
8
- export * from './urls.js';
9
-
10
- export { Job } from './detach.js';
11
- export { call } from './api.js';
12
- export { FetchFox } from './fetchfox.js';
13
- export { configure, host, ws, apiKey } from './configure.js';