instauto 9.1.2 → 9.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +1 -1
  2. package/package.json +1 -1
  3. package/src/index.js +81 -64
package/README.md CHANGED
@@ -25,7 +25,7 @@ Now there is a GUI application for those who don't want to code: [SimpleInstaBot
25
25
 
26
26
  You can run this code for example once every day using cron or pm2 or similar
27
27
 
28
- See [index.js](https://github.com/mifi/instauto/blob/master/index.js) for available options.
28
+ See [index.js](https://github.com/mifi/instauto/blob/master/src/index.js) for available options.
29
29
 
30
30
  ## Supported functionality
31
31
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "instauto",
3
- "version": "9.1.2",
3
+ "version": "9.1.5",
4
4
  "description": "Instagram automation library written in Node.js",
5
5
  "main": "src/index.js",
6
6
  "scripts": {
package/src/index.js CHANGED
@@ -45,6 +45,8 @@ const Instauto = async (db, browser, options) => {
45
45
  followUserMinFollowers = null,
46
46
  followUserMinFollowing = null,
47
47
 
48
+ shouldFollowUser = null,
49
+
48
50
  dontUnfollowUntilTimeElapsed = 3 * 24 * 60 * 60 * 1000,
49
51
 
50
52
  excludeUsers = [],
@@ -58,6 +60,7 @@ const Instauto = async (db, browser, options) => {
58
60
  } = options;
59
61
 
60
62
  let myUsername = myUsernameIn;
63
+ const userDataCache = {};
61
64
 
62
65
  assert(cookiesPath);
63
66
  assert(db);
@@ -73,7 +76,6 @@ const Instauto = async (db, browser, options) => {
73
76
 
74
77
  // State
75
78
  let page;
76
- let graphqlUserMissing = false;
77
79
 
78
80
  async function takeScreenshot() {
79
81
  if (!screenshotOnError) return;
@@ -169,15 +171,20 @@ const Instauto = async (db, browser, options) => {
169
171
  }
170
172
 
171
173
  async function gotoWithRetry(url) {
174
+ const maxAttempts = 3;
172
175
  for (let attempt = 0; ; attempt += 1) {
173
176
  logger.log(`Goto ${url}`);
174
177
  const response = await page.goto(url);
175
- await sleep(1000);
178
+ await sleep(2000);
176
179
  const status = response.status();
177
180
 
178
181
  // https://www.reddit.com/r/Instagram/comments/kwrt0s/error_560/
179
182
  // https://github.com/mifi/instauto/issues/60
180
- if (![560, 429].includes(status) || attempt > 3) return status;
183
+ if (![560, 429].includes(status)) return status;
184
+
185
+ if (attempt > maxAttempts) {
186
+ throw new Error(`Navigate to user failed after ${maxAttempts} attempts, last status: ${status}`);
187
+ }
181
188
 
182
189
  logger.info(`Got ${status} - Retrying request later...`);
183
190
  if (status === 429) logger.warn('429 Too Many Requests could mean that Instagram suspects you\'re using a bot. You could try to use the Instagram Mobile app from the same IP for a few days first');
@@ -185,73 +192,78 @@ const Instauto = async (db, browser, options) => {
185
192
  }
186
193
  }
187
194
 
188
- async function safeGotoUser(url, checkPageForUsername) {
189
- const status = await gotoWithRetry(url);
190
- if (status === 200) {
191
- if (checkPageForUsername != null) {
192
- // some pages return 200 but nothing there (I think deleted accounts)
193
- // https://github.com/mifi/SimpleInstaBot/issues/48
194
- // example: https://www.instagram.com/victorialarson__/
195
- // so we check if the page has the user's name on it
196
- return page.evaluate((username) => window.find(username), checkPageForUsername);
197
- }
198
- return true;
199
- }
200
- if (status === 404) {
201
- logger.log('User not found');
202
- return false;
203
- }
204
- throw new Error(`Navigate to user failed with status ${status}`);
195
+ const getUserPageUrl = (username) => `${instagramBaseUrl}/${encodeURIComponent(username)}`;
196
+
197
+ function isAlreadyOnUserPage(username) {
198
+ const url = getUserPageUrl(username);
199
+ // optimization: already on URL? (ignore trailing slash)
200
+ return (page.url().replace(/\/$/, '') === url.replace(/\/$/, ''));
205
201
  }
206
202
 
207
203
  async function navigateToUser(username) {
208
- const url = `${instagramBaseUrl}/${encodeURIComponent(username)}`;
209
- if (page.url().replace(/\/$/, '') === url.replace(/\/$/, '')) return true; // optimization: already on URL? (ignore trailing slash)
204
+ if (isAlreadyOnUserPage(username)) return true;
205
+
210
206
  // logger.log('navigating from', page.url(), 'to', url);
211
207
  logger.log(`Navigating to user ${username}`);
212
- return safeGotoUser(url, username);
208
+
209
+ const url = getUserPageUrl(username);
210
+ const status = await gotoWithRetry(url);
211
+ if (status === 404) {
212
+ logger.warn('User page returned 404');
213
+ return false;
214
+ }
215
+
216
+ if (status === 200) {
217
+ // some pages return 200 but nothing there (I think deleted accounts)
218
+ // https://github.com/mifi/SimpleInstaBot/issues/48
219
+ // example: https://www.instagram.com/victorialarson__/
220
+ // so we check if the page has the user's name on it
221
+ const foundUsernameOnPage = await page.evaluate((u) => window.find(u), username);
222
+ if (!foundUsernameOnPage) logger.warn(`Cannot find "${username}" on page`);
223
+ return foundUsernameOnPage;
224
+ }
225
+
226
+ throw new Error(`Navigate to user failed with status ${status}`);
213
227
  }
214
228
 
215
229
  async function navigateToUserWithCheck(username) {
216
230
  if (!(await navigateToUser(username))) throw new Error('User not found');
217
231
  }
218
232
 
219
- async function getPageJson() {
220
- return JSON.parse(await (await (await page.$('pre')).getProperty('textContent')).jsonValue());
221
- }
222
-
223
233
  async function navigateToUserAndGetData(username) {
224
- // https://github.com/mifi/SimpleInstaBot/issues/36
225
- if (graphqlUserMissing) {
226
- // https://stackoverflow.com/questions/37593025/instagram-api-get-the-userid
227
- // https://stackoverflow.com/questions/17373886/how-can-i-get-a-users-media-from-instagram-without-authenticating-as-a-user
228
- const found = await safeGotoUser(`${instagramBaseUrl}/${encodeURIComponent(username)}?__a=1`);
229
- if (!found) throw new Error('User not found');
230
-
231
- const json = await getPageJson();
234
+ const cachedUserData = userDataCache[username];
232
235
 
233
- const { user } = json.graphql;
236
+ if (isAlreadyOnUserPage(username)) {
237
+ // assume we have data
238
+ return cachedUserData;
239
+ }
234
240
 
241
+ if (cachedUserData != null) {
242
+ // if we already have userData, just navigate
235
243
  await navigateToUserWithCheck(username);
236
- return user;
244
+ return cachedUserData;
237
245
  }
238
246
 
239
- await navigateToUserWithCheck(username);
240
-
241
- // eslint-disable-next-line no-underscore-dangle
242
- const sharedData = await page.evaluate(() => window._sharedData);
243
- try {
244
- // eslint-disable-next-line prefer-destructuring
245
- return sharedData.entry_data.ProfilePage[0].graphql.user;
247
+ // intercept special XHR network request that fetches user's data and store it in a cache
248
+ // TODO fallback to DOM to get user ID if this request fails?
249
+ // https://github.com/mifi/SimpleInstaBot/issues/125#issuecomment-1145354294
250
+ const [foundResponse] = await Promise.all([
251
+ page.waitForResponse((response) => {
252
+ const request = response.request();
253
+ return request.method() === 'GET' && new RegExp(`https:\\/\\/i\\.instagram\\.com\\/api\\/v1\\/users\\/web_profile_info\\/\\?username=${encodeURIComponent(username.toLowerCase())}`).test(request.url());
254
+ }),
255
+ navigateToUserWithCheck(username),
256
+ // page.waitForNavigation({ waitUntil: 'networkidle0' }),
257
+ ]);
258
+
259
+ const json = JSON.parse(await foundResponse.text());
260
+ const userData = json.data.user;
261
+ userDataCache[username] = userData;
262
+ return userData;
263
+ }
246
264
 
247
- // JSON.parse(Array.from(document.getElementsByTagName('script')).find(el => el.innerHTML.startsWith('window.__additionalDataLoaded(\'feed\',')).innerHTML.replace(/^window.__additionalDataLoaded\('feed',({.*})\);$/, '$1'));
248
- // JSON.parse(Array.from(document.getElementsByTagName('script')).find(el => el.innerHTML.startsWith('window._sharedData')).innerHTML.replace(/^window._sharedData ?= ?({.*});$/, '$1'));
249
- // Array.from(document.getElementsByTagName('a')).find(el => el.attributes?.href?.value.includes(`${username}/followers`)).innerText
250
- } catch (err) {
251
- logger.warn('Missing graphql in page, falling back to alternative method...');
252
- graphqlUserMissing = true; // Store as state so we don't have to do this every time from now on.
253
- return navigateToUserAndGetData(username); // Now try again with alternative method
254
- }
265
+ async function getPageJson() {
266
+ return JSON.parse(await (await (await page.$('pre')).getProperty('textContent')).jsonValue());
255
267
  }
256
268
 
257
269
  async function isActionBlocked() {
@@ -321,7 +333,7 @@ const Instauto = async (db, browser, options) => {
321
333
  }
322
334
 
323
335
  async function followUser(username) {
324
- await navigateToUserWithCheck(username);
336
+ await navigateToUserAndGetData(username);
325
337
  const elementHandle = await findFollowButton();
326
338
 
327
339
  if (!elementHandle) {
@@ -363,7 +375,7 @@ const Instauto = async (db, browser, options) => {
363
375
  // See https://github.com/timgrossmann/InstaPy/pull/2345
364
376
  // https://github.com/timgrossmann/InstaPy/issues/2355
365
377
  async function unfollowUser(username) {
366
- await navigateToUserWithCheck(username);
378
+ await navigateToUserAndGetData(username);
367
379
  logger.log(`Unfollowing user ${username}`);
368
380
 
369
381
  const res = { username, time: new Date().getTime() };
@@ -562,7 +574,7 @@ const Instauto = async (db, browser, options) => {
562
574
  async function likeUserImages({ username, likeImagesMin, likeImagesMax } = {}) {
563
575
  if (!likeImagesMin || !likeImagesMax || likeImagesMax < likeImagesMin || likeImagesMin < 1) throw new Error('Invalid arguments');
564
576
 
565
- await navigateToUserWithCheck(username);
577
+ await navigateToUserAndGetData(username);
566
578
 
567
579
  logger.log(`Liking ${likeImagesMin}-${likeImagesMax} user images`);
568
580
  try {
@@ -581,11 +593,10 @@ const Instauto = async (db, browser, options) => {
581
593
  logger.log('Skipping already followed user', username);
582
594
  return false;
583
595
  }
596
+
584
597
  const graphqlUser = await navigateToUserAndGetData(username);
585
598
 
586
- const followedByCount = graphqlUser.edge_followed_by.count;
587
- const followsCount = graphqlUser.edge_follow.count;
588
- const isPrivate = graphqlUser.is_private;
599
+ const { edge_followed_by: { count: followedByCount }, edge_follow: { count: followsCount }, is_private: isPrivate, is_verified: isVerified, is_business_account: isBusinessAccount, is_professional_account: isProfessionalAccount, full_name: fullName, biography, profile_pic_url_hd: profilePicUrlHd, external_url: externalUrl, business_category_name: businessCategoryName, category_name: categoryName } = graphqlUser;
589
600
 
590
601
  // logger.log('followedByCount:', followedByCount, 'followsCount:', followsCount);
591
602
 
@@ -611,6 +622,10 @@ const Instauto = async (db, browser, options) => {
611
622
  logger.log('User has too many followers compared to follows or opposite, skipping');
612
623
  return false;
613
624
  }
625
+ if (shouldFollowUser !== null && (typeof shouldFollowUser === 'function' && !shouldFollowUser({ username, isVerified, isBusinessAccount, isProfessionalAccount, fullName, biography, profilePicUrlHd, externalUrl, businessCategoryName, categoryName }) === true)) {
626
+ logger.log(`Custom follow logic returned false for ${username}, skipping`);
627
+ return false;
628
+ }
614
629
 
615
630
  await followUser(username);
616
631
 
@@ -632,9 +647,9 @@ const Instauto = async (db, browser, options) => {
632
647
 
633
648
  let numFollowedForThisUser = 0;
634
649
 
635
- const userData = await navigateToUserAndGetData(username);
650
+ const { id: userId } = await navigateToUserAndGetData(username);
636
651
 
637
- for await (const followersBatch of getFollowersOrFollowingGenerator({ userId: userData.id, getFollowers: true })) {
652
+ for await (const followersBatch of getFollowersOrFollowingGenerator({ userId, getFollowers: true })) {
638
653
  logger.log('User followers batch', followersBatch);
639
654
 
640
655
  for (const follower of followersBatch) {
@@ -706,6 +721,8 @@ const Instauto = async (db, browser, options) => {
706
721
  const userFound = await navigateToUser(username);
707
722
 
708
723
  if (!userFound) {
724
+ // to avoid repeatedly unfollowing failed users, flag them as already unfollowed
725
+ logger.log('User not found for unfollow');
709
726
  await addPrevUnfollowedUser({ username, time: new Date().getTime(), noActionTaken: true });
710
727
  await sleep(3000);
711
728
  } else {
@@ -740,6 +757,8 @@ const Instauto = async (db, browser, options) => {
740
757
  }
741
758
  }
742
759
 
760
+ logger.log('Done with unfollowing', i, j);
761
+
743
762
  return j;
744
763
  }
745
764
 
@@ -937,16 +956,14 @@ const Instauto = async (db, browser, options) => {
937
956
  throw new Error('Don\'t know what\'s my username');
938
957
  }
939
958
 
940
- const myUserData = await navigateToUserAndGetData(myUsername);
941
- const myUserId = myUserData.id;
959
+ const { id: myUserId } = await navigateToUserAndGetData(myUsername);
942
960
 
943
961
  // --- END OF INITIALIZATION
944
962
 
945
963
  async function doesUserFollowMe(username) {
946
964
  try {
947
965
  logger.info('Checking if user', username, 'follows us');
948
- const userData = await navigateToUserAndGetData(username);
949
- const userId = userData.id;
966
+ const { id: userId } = await navigateToUserAndGetData(username);
950
967
 
951
968
  const elementHandles = await page.$x("//a[contains(.,' following')][contains(@href,'/following')]");
952
969
  if (elementHandles.length === 0) throw new Error('Following button not found');