apify 2.3.1-beta.4 → 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +6 -5
  2. package/package.json +69 -128
  3. package/build/actor.d.ts +0 -113
  4. package/build/actor.d.ts.map +0 -1
  5. package/build/actor.js +0 -582
  6. package/build/actor.js.map +0 -1
  7. package/build/apify.d.ts +0 -752
  8. package/build/apify.d.ts.map +0 -1
  9. package/build/apify.js +0 -877
  10. package/build/apify.js.map +0 -1
  11. package/build/autoscaling/autoscaled_pool.d.ts +0 -384
  12. package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
  13. package/build/autoscaling/autoscaled_pool.js +0 -557
  14. package/build/autoscaling/autoscaled_pool.js.map +0 -1
  15. package/build/autoscaling/snapshotter.d.ts +0 -278
  16. package/build/autoscaling/snapshotter.d.ts.map +0 -1
  17. package/build/autoscaling/snapshotter.js +0 -447
  18. package/build/autoscaling/snapshotter.js.map +0 -1
  19. package/build/autoscaling/system_status.d.ts +0 -224
  20. package/build/autoscaling/system_status.d.ts.map +0 -1
  21. package/build/autoscaling/system_status.js +0 -228
  22. package/build/autoscaling/system_status.js.map +0 -1
  23. package/build/browser_launchers/browser_launcher.d.ts +0 -154
  24. package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
  25. package/build/browser_launchers/browser_launcher.js +0 -160
  26. package/build/browser_launchers/browser_launcher.js.map +0 -1
  27. package/build/browser_launchers/browser_plugin.d.ts +0 -23
  28. package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
  29. package/build/browser_launchers/browser_plugin.js +0 -25
  30. package/build/browser_launchers/browser_plugin.js.map +0 -1
  31. package/build/browser_launchers/playwright_launcher.d.ts +0 -131
  32. package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
  33. package/build/browser_launchers/playwright_launcher.js +0 -150
  34. package/build/browser_launchers/playwright_launcher.js.map +0 -1
  35. package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
  36. package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
  37. package/build/browser_launchers/puppeteer_launcher.js +0 -197
  38. package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
  39. package/build/cache_container.d.ts +0 -31
  40. package/build/cache_container.d.ts.map +0 -1
  41. package/build/cache_container.js +0 -48
  42. package/build/cache_container.js.map +0 -1
  43. package/build/configuration.d.ts +0 -226
  44. package/build/configuration.d.ts.map +0 -1
  45. package/build/configuration.js +0 -325
  46. package/build/configuration.js.map +0 -1
  47. package/build/constants.d.ts +0 -37
  48. package/build/constants.d.ts.map +0 -1
  49. package/build/constants.js +0 -41
  50. package/build/constants.js.map +0 -1
  51. package/build/crawlers/basic_crawler.d.ts +0 -443
  52. package/build/crawlers/basic_crawler.d.ts.map +0 -1
  53. package/build/crawlers/basic_crawler.js +0 -664
  54. package/build/crawlers/basic_crawler.js.map +0 -1
  55. package/build/crawlers/browser_crawler.d.ts +0 -512
  56. package/build/crawlers/browser_crawler.d.ts.map +0 -1
  57. package/build/crawlers/browser_crawler.js +0 -540
  58. package/build/crawlers/browser_crawler.js.map +0 -1
  59. package/build/crawlers/cheerio_crawler.d.ts +0 -931
  60. package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
  61. package/build/crawlers/cheerio_crawler.js +0 -913
  62. package/build/crawlers/cheerio_crawler.js.map +0 -1
  63. package/build/crawlers/crawler_extension.d.ts +0 -10
  64. package/build/crawlers/crawler_extension.d.ts.map +0 -1
  65. package/build/crawlers/crawler_extension.js +0 -19
  66. package/build/crawlers/crawler_extension.js.map +0 -1
  67. package/build/crawlers/crawler_utils.d.ts +0 -34
  68. package/build/crawlers/crawler_utils.d.ts.map +0 -1
  69. package/build/crawlers/crawler_utils.js +0 -87
  70. package/build/crawlers/crawler_utils.js.map +0 -1
  71. package/build/crawlers/playwright_crawler.d.ts +0 -448
  72. package/build/crawlers/playwright_crawler.d.ts.map +0 -1
  73. package/build/crawlers/playwright_crawler.js +0 -299
  74. package/build/crawlers/playwright_crawler.js.map +0 -1
  75. package/build/crawlers/puppeteer_crawler.d.ts +0 -425
  76. package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
  77. package/build/crawlers/puppeteer_crawler.js +0 -299
  78. package/build/crawlers/puppeteer_crawler.js.map +0 -1
  79. package/build/crawlers/statistics.d.ts +0 -185
  80. package/build/crawlers/statistics.d.ts.map +0 -1
  81. package/build/crawlers/statistics.js +0 -331
  82. package/build/crawlers/statistics.js.map +0 -1
  83. package/build/enqueue_links/click_elements.d.ts +0 -179
  84. package/build/enqueue_links/click_elements.d.ts.map +0 -1
  85. package/build/enqueue_links/click_elements.js +0 -434
  86. package/build/enqueue_links/click_elements.js.map +0 -1
  87. package/build/enqueue_links/enqueue_links.d.ts +0 -117
  88. package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
  89. package/build/enqueue_links/enqueue_links.js +0 -163
  90. package/build/enqueue_links/enqueue_links.js.map +0 -1
  91. package/build/enqueue_links/shared.d.ts +0 -42
  92. package/build/enqueue_links/shared.d.ts.map +0 -1
  93. package/build/enqueue_links/shared.js +0 -121
  94. package/build/enqueue_links/shared.js.map +0 -1
  95. package/build/errors.d.ts +0 -29
  96. package/build/errors.d.ts.map +0 -1
  97. package/build/errors.js +0 -38
  98. package/build/errors.js.map +0 -1
  99. package/build/events.d.ts +0 -11
  100. package/build/events.d.ts.map +0 -1
  101. package/build/events.js +0 -147
  102. package/build/events.js.map +0 -1
  103. package/build/index.d.ts +0 -4
  104. package/build/index.d.ts.map +0 -1
  105. package/build/index.js +0 -7
  106. package/build/index.js.map +0 -1
  107. package/build/main.d.ts +0 -179
  108. package/build/main.d.ts.map +0 -1
  109. package/build/main.js +0 -81
  110. package/build/main.js.map +0 -1
  111. package/build/playwright_utils.d.ts +0 -9
  112. package/build/playwright_utils.d.ts.map +0 -1
  113. package/build/playwright_utils.js +0 -90
  114. package/build/playwright_utils.js.map +0 -1
  115. package/build/proxy_configuration.d.ts +0 -411
  116. package/build/proxy_configuration.d.ts.map +0 -1
  117. package/build/proxy_configuration.js +0 -517
  118. package/build/proxy_configuration.js.map +0 -1
  119. package/build/pseudo_url.d.ts +0 -86
  120. package/build/pseudo_url.d.ts.map +0 -1
  121. package/build/pseudo_url.js +0 -153
  122. package/build/pseudo_url.js.map +0 -1
  123. package/build/puppeteer_request_interception.d.ts +0 -8
  124. package/build/puppeteer_request_interception.d.ts.map +0 -1
  125. package/build/puppeteer_request_interception.js +0 -235
  126. package/build/puppeteer_request_interception.js.map +0 -1
  127. package/build/puppeteer_utils.d.ts +0 -250
  128. package/build/puppeteer_utils.d.ts.map +0 -1
  129. package/build/puppeteer_utils.js +0 -551
  130. package/build/puppeteer_utils.js.map +0 -1
  131. package/build/request.d.ts +0 -180
  132. package/build/request.d.ts.map +0 -1
  133. package/build/request.js +0 -261
  134. package/build/request.js.map +0 -1
  135. package/build/request_list.d.ts +0 -581
  136. package/build/request_list.d.ts.map +0 -1
  137. package/build/request_list.js +0 -826
  138. package/build/request_list.js.map +0 -1
  139. package/build/serialization.d.ts +0 -5
  140. package/build/serialization.d.ts.map +0 -1
  141. package/build/serialization.js +0 -139
  142. package/build/serialization.js.map +0 -1
  143. package/build/session_pool/errors.d.ts +0 -11
  144. package/build/session_pool/errors.d.ts.map +0 -1
  145. package/build/session_pool/errors.js +0 -18
  146. package/build/session_pool/errors.js.map +0 -1
  147. package/build/session_pool/events.d.ts +0 -5
  148. package/build/session_pool/events.d.ts.map +0 -1
  149. package/build/session_pool/events.js +0 -6
  150. package/build/session_pool/events.js.map +0 -1
  151. package/build/session_pool/session.d.ts +0 -286
  152. package/build/session_pool/session.d.ts.map +0 -1
  153. package/build/session_pool/session.js +0 -355
  154. package/build/session_pool/session.js.map +0 -1
  155. package/build/session_pool/session_pool.d.ts +0 -280
  156. package/build/session_pool/session_pool.d.ts.map +0 -1
  157. package/build/session_pool/session_pool.js +0 -393
  158. package/build/session_pool/session_pool.js.map +0 -1
  159. package/build/session_pool/session_utils.d.ts +0 -4
  160. package/build/session_pool/session_utils.d.ts.map +0 -1
  161. package/build/session_pool/session_utils.js +0 -24
  162. package/build/session_pool/session_utils.js.map +0 -1
  163. package/build/stealth/hiding_tricks.d.ts +0 -22
  164. package/build/stealth/hiding_tricks.d.ts.map +0 -1
  165. package/build/stealth/hiding_tricks.js +0 -308
  166. package/build/stealth/hiding_tricks.js.map +0 -1
  167. package/build/stealth/stealth.d.ts +0 -56
  168. package/build/stealth/stealth.d.ts.map +0 -1
  169. package/build/stealth/stealth.js +0 -125
  170. package/build/stealth/stealth.js.map +0 -1
  171. package/build/storages/dataset.d.ts +0 -288
  172. package/build/storages/dataset.d.ts.map +0 -1
  173. package/build/storages/dataset.js +0 -480
  174. package/build/storages/dataset.js.map +0 -1
  175. package/build/storages/key_value_store.d.ts +0 -243
  176. package/build/storages/key_value_store.d.ts.map +0 -1
  177. package/build/storages/key_value_store.js +0 -462
  178. package/build/storages/key_value_store.js.map +0 -1
  179. package/build/storages/request_queue.d.ts +0 -318
  180. package/build/storages/request_queue.d.ts.map +0 -1
  181. package/build/storages/request_queue.js +0 -636
  182. package/build/storages/request_queue.js.map +0 -1
  183. package/build/storages/storage_manager.d.ts +0 -87
  184. package/build/storages/storage_manager.d.ts.map +0 -1
  185. package/build/storages/storage_manager.js +0 -150
  186. package/build/storages/storage_manager.js.map +0 -1
  187. package/build/tsconfig.tsbuildinfo +0 -1
  188. package/build/typedefs.d.ts +0 -146
  189. package/build/typedefs.d.ts.map +0 -1
  190. package/build/typedefs.js +0 -88
  191. package/build/typedefs.js.map +0 -1
  192. package/build/utils.d.ts +0 -175
  193. package/build/utils.d.ts.map +0 -1
  194. package/build/utils.js +0 -731
  195. package/build/utils.js.map +0 -1
  196. package/build/utils_log.d.ts +0 -41
  197. package/build/utils_log.d.ts.map +0 -1
  198. package/build/utils_log.js +0 -192
  199. package/build/utils_log.js.map +0 -1
  200. package/build/utils_request.d.ts +0 -77
  201. package/build/utils_request.d.ts.map +0 -1
  202. package/build/utils_request.js +0 -385
  203. package/build/utils_request.js.map +0 -1
  204. package/build/utils_social.d.ts +0 -210
  205. package/build/utils_social.d.ts.map +0 -1
  206. package/build/utils_social.js +0 -787
  207. package/build/utils_social.js.map +0 -1
  208. package/build/validators.d.ts +0 -23
  209. package/build/validators.d.ts.map +0 -1
  210. package/build/validators.js +0 -29
  211. package/build/validators.js.map +0 -1
@@ -1,787 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.socialUtils = void 0;
4
- const tslib_1 = require("tslib");
5
- /* eslint-disable no-continue */
6
- const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
7
- const cheerio_1 = (0, tslib_1.__importDefault)(require("cheerio"));
8
- const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
9
- const utils_1 = require("./utils");
10
- // Regex inspired by https://zapier.com/blog/extract-links-email-phone-regex/
11
- // eslint-disable-next-line max-len
12
- const EMAIL_REGEX_STRING = '(?:[a-z0-9!#$%&\'*+/=?^_`{|}~-]+(?:\\.[a-z0-9!#$%&\'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\\])';
13
- /**
14
- * Regular expression to exactly match a single email address.
15
- * It has the following form: `/^...$/i`.
16
- * @type {RegExp}
17
- * @memberOf social
18
- */
19
- const EMAIL_REGEX = new RegExp(`^${EMAIL_REGEX_STRING}$`, 'i');
20
- /**
21
- * Regular expression to find multiple email addresses in a text.
22
- * It has the following form: `/.../ig`.
23
- * @type {RegExp}
24
- * @memberOf social
25
- */
26
- const EMAIL_REGEX_GLOBAL = new RegExp(EMAIL_REGEX_STRING, 'ig');
27
- const EMAIL_URL_PREFIX_REGEX = /^mailto:/i;
28
- /**
29
- * The function extracts email addresses from a plain text.
30
- * Note that the function preserves the order of emails and keep duplicates.
31
- * @param {string} text Text to search in.
32
- * @return {string[]} Array of emails addresses found.
33
- * If no emails are found, the function returns an empty array.
34
- * @memberOf social
35
- */
36
- const emailsFromText = (text) => {
37
- if (!underscore_1.default.isString(text))
38
- return [];
39
- return text.match(EMAIL_REGEX_GLOBAL) || [];
40
- };
41
- /**
42
- * The function extracts email addresses from a list of URLs.
43
- * Basically it looks for all `mailto:` URLs and returns valid email addresses from them.
44
- * Note that the function preserves the order of emails and keep duplicates.
45
- * @param {string[]} urls Array of URLs.
46
- * @return {string[]} Array of emails addresses found.
47
- * If no emails are found, the function returns an empty array.
48
- * @memberOf social
49
- */
50
- const emailsFromUrls = (urls) => {
51
- if (!Array.isArray(urls))
52
- throw new Error('The "urls" parameter must be an array');
53
- const emails = [];
54
- for (const url of urls) {
55
- if (!url)
56
- continue;
57
- if (!EMAIL_URL_PREFIX_REGEX.test(url))
58
- continue;
59
- const email = url.replace(EMAIL_URL_PREFIX_REGEX, '').trim();
60
- if (EMAIL_REGEX.test(email))
61
- emails.push(email);
62
- }
63
- return emails;
64
- };
65
- // Supports URLs starting with `tel://`, `tel:/` and `tel:`, and similarly `phone`, `telephone` and `callto`
66
- const PHONE_URL_PREFIX_REGEX = /^(tel|phone|telephone|callto):(\/)?(\/)?/i;
67
- // It's pretty much impossible (and unmaintainable) to have just one large regular expression for all possible phone numbers.
68
- // So here we define various regular expression for typical phone number patterns, which are then used to compile
69
- // a single large regular expressions. Add more patterns as needed.
70
- // NOTE: The patterns are tested in the order as written below, so the longer ones should be before the shorter ones!
71
- const PHONE_REGEXS_STRINGS = [
72
- // 775123456
73
- '[0-9]{6,15}',
74
- // 1(413)555-2378 or 1(413)555.2378 or 1 (413) 555-2378 or 1 (413) 555 2378 or (303) 494-2320
75
- '([0-9]{1,4}( )?)?\\([0-9]{2,4}\\)( )?[0-9]{2,4}(( )?(-|.))?( )?[0-9]{2,6}',
76
- // 1(262) 955-95-79 or 1(262)955.95.79
77
- '([0-9]{1,4}( )?)?\\([0-9]{2,4}\\)( )?[0-9]{2,4}(( )?(-|.))?( )?[0-9]{2,6}',
78
- // (51) 5667-9987 or (19)94138-9398
79
- '\\([0-9]{2}\\)( )?[0-9]{4,5}-[0-9]{4}',
80
- // 413-577-1234-564
81
- '[0-9]{2,4}-[0-9]{2,4}-[0-9]{2,4}-[0-9]{2,6}',
82
- // 413-577-1234
83
- '[0-9]{2,4}-[0-9]{2,4}-[0-9]{2,6}',
84
- // 413-577
85
- '[0-9]{2,4}-[0-9]{2,6}',
86
- // 413.577.1234.564
87
- '[0-9]{2,4}\\.[0-9]{2,4}\\.[0-9]{2,4}\\.[0-9]{2,6}',
88
- // 413.577.1234
89
- '[0-9]{2,4}\\.[0-9]{2,4}\\.[0-9]{2,6}',
90
- // 413.577
91
- '[0-9]{2,4}\\.[0-9]{2,6}',
92
- // 413 577 1234 564
93
- '[0-9]{2,4} [0-9]{2,4} [0-9]{2,4} [0-9]{2,6}',
94
- // 413 577 1234
95
- '[0-9]{2,4} [0-9]{2,4} [0-9]{2,6}',
96
- // 123 4567
97
- '[0-9]{2,4} [0-9]{3,8}',
98
- ];
99
- // All phones might be prefixed with '+' or '00'
100
- for (let i = 0; i < PHONE_REGEXS_STRINGS.length; i++) {
101
- PHONE_REGEXS_STRINGS[i] = `(00|\\+)?${PHONE_REGEXS_STRINGS[i]}`;
102
- }
103
- // The minimum number of digits a phone number can contain.
104
- // That's because the PHONE_REGEXS_STRINGS patterns are quite wide and report a lot of false positives.
105
- const PHONE_MIN_DIGITS = 7;
106
- // These are patterns that might be matched by PHONE_REGEXS_STRINGS,
107
- // but which are most likely not phone numbers. Add more patterns as needed.
108
- const SKIP_PHONE_REGEXS = [
109
- // 2018-11-10
110
- '^[0-9]{4}-[0-9]{2}-[0-9]{2}$',
111
- ];
112
- const PHONE_REGEX_GLOBAL = new RegExp(`(${PHONE_REGEXS_STRINGS.join('|')})`, 'ig');
113
- const PHONE_REGEX = new RegExp(`^(${PHONE_REGEXS_STRINGS.join('|')})$`, 'i');
114
- const SKIP_PHONE_REGEX = new RegExp(`^(${SKIP_PHONE_REGEXS.join('|')})$`, 'i');
115
- /**
116
- * The function attempts to extract phone numbers from a text. Please note that
117
- * the results might not be accurate, since phone numbers appear in a large variety of formats and conventions.
118
- * If you encounter some problems, please [file an issue](https://github.com/apify/apify-js/issues).
119
- * @param {string} text Text to search the phone numbers in.
120
- * @return {string[]} Array of phone numbers found.
121
- * If no phone numbers are found, the function returns an empty array.
122
- * @memberOf social
123
- */
124
- const phonesFromText = (text) => {
125
- if (!underscore_1.default.isString(text))
126
- return [];
127
- let phones = text.match(PHONE_REGEX_GLOBAL) || [];
128
- phones = phones.filter((phone) => {
129
- if (!phone)
130
- return false;
131
- // Skip too short phones, they are most likely incorrect
132
- if (phone.match(/[0-9]/g).length < PHONE_MIN_DIGITS)
133
- return false;
134
- // Skip phone numbers matching specific patterns
135
- if (SKIP_PHONE_REGEX.test(phone))
136
- return false;
137
- return true;
138
- });
139
- return phones;
140
- };
141
- /**
142
- * Finds phone number links in an array of URLs and extracts the phone numbers from them.
143
- * Note that the phone number links look like `tel://123456789`, `tel:/123456789` or `tel:123456789`.
144
- * @param {string[]} urls Array of URLs.
145
- * @return {string[]} Array of phone numbers found.
146
- * If no phone numbers are found, the function returns an empty array.
147
- * @memberOf social
148
- */
149
- const phonesFromUrls = (urls) => {
150
- if (!Array.isArray(urls))
151
- throw new Error('The "urls" parameter must be an array');
152
- const phones = [];
153
- for (const url of urls) {
154
- if (!url)
155
- continue;
156
- if (!PHONE_URL_PREFIX_REGEX.test(url))
157
- continue;
158
- const phone = url.replace(PHONE_URL_PREFIX_REGEX, '').trim();
159
- if (PHONE_REGEX.test(phone))
160
- phones.push(phone);
161
- }
162
- return phones;
163
- };
164
- // NOTEs about the regular expressions
165
- // - They have just a single matching group for the profile username, all other groups are non-matching
166
- // - They use a negative lookbehind and lookahead assertions, which are only supported in Node 8+.
167
- // They are used to prevent matching URLs in strings like "blahttps://www.example.com"
168
- // eslint-disable-next-line max-len
169
- const LINKEDIN_REGEX_STRING = '(?<!\\w)(?:(?:http(?:s)?:\\/\\/)?(?:(?:(?:[a-z]+\\.)?linkedin\\.com\\/(?:in|company)\\/)([a-z0-9\\-_%=]{2,60})(?![a-z0-9\\-_%=])))(?:\\/)?';
170
- // eslint-disable-next-line max-len
171
- const INSTAGRAM_REGEX_STRING = '(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:(?:www\\.)?(?:instagram\\.com|instagr\\.am)\\/)(?!explore|_n|_u)([a-z0-9_.]{2,30})(?![a-z0-9_.])(?:/)?';
172
- const TWITTER_RESERVED_PATHS = 'oauth|account|tos|privacy|signup|home|hashtag|search|login|widgets|i|settings|start|share|intent|oct';
173
- // eslint-disable-next-line max-len, quotes
174
- const TWITTER_REGEX_STRING = `(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:www.)?(?:twitter.com)\\/(?!(?:${TWITTER_RESERVED_PATHS})(?:[\\'\\"\\?\\.\\/]|$))([a-z0-9_]{1,15})(?![a-z0-9_])(?:/)?`;
175
- // eslint-disable-next-line max-len, quotes
176
- const FACEBOOK_RESERVED_PATHS = 'rsrc\\.php|apps|groups|events|l\\.php|friends|images|photo.php|chat|ajax|dyi|common|policies|login|recover|reg|help|security|messages|marketplace|pages|live|bookmarks|games|fundraisers|saved|gaming|salesgroups|jobs|people|ads|ad_campaign|weather|offers|recommendations|crisisresponse|onthisday|developers|settings|connect|business|plugins|intern|sharer';
177
- // eslint-disable-next-line max-len, quotes
178
- const FACEBOOK_REGEX_STRING = `(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:www.)?(?:facebook.com|fb.com)\\/(?!(?:${FACEBOOK_RESERVED_PATHS})(?:[\\'\\"\\?\\.\\/]|$))(profile\\.php\\?id\\=[0-9]{3,20}|(?!profile\\.php)[a-z0-9\\.]{5,51})(?![a-z0-9\\.])(?:/)?`;
179
- // eslint-disable-next-line max-len, quotes
180
- const YOUTUBE_REGEX_STRING = '(?<!\\w)(?:https?:\\/\\/)?(?:youtu\\.be\\/|(?:www\\.|m\\.)?youtube\\.com(?:\\/(?:watch|v|embed|user|c(?:hannel)?)(?:\\.php)?)?(?:\\?[^ ]*v=|\\/))([a-zA-Z0-9\\-_]{2,100})';
181
- // eslint-disable-next-line max-len, quotes
182
- const TIKTOK_REGEX_STRING = '(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:(?:www|m)\\.)?(?:tiktok\\.com)\\/(((?:(?:v|embed|trending)(?:\\?shareId=|\\/))[0-9]{2,50}(?![0-9]))|(?:@)[a-z0-9\\-_\\.]+((?:\\/video\\/)[0-9]{2,50}(?![0-9]))?)(?:\\/)?';
183
- // eslint-disable-next-line max-len, quotes
184
- const PINTEREST_REGEX_STRING = '(?<!\\w)(?:http(?:s)?:\\/\\/)?(?:(?:(?:(?:www\\.)?pinterest(?:\\.com|(?:\\.[a-z]{2}){1,2}))|(?:[a-z]{2})\\.pinterest\\.com)(?:\\/))((pin\\/[0-9]{2,50})|((?!pin)[a-z0-9\\-_\\.]+(\\/[a-z0-9\\-_\\.]+)?))(?:\\/)?';
185
- // eslint-disable-next-line max-len, quotes
186
- const DISCORD_REGEX_STRING = '(?<!\\w)(?:https?:\\/\\/)?(?:www\\.)?((?:(?:(?:canary|ptb).)?(?:discord|discordapp)\\.com\\/channels(?:\\/)[0-9]{2,50}(\\/[0-9]{2,50})*)|(?:(?:(?:canary|ptb).)?(?:discord\\.(?:com|me|li|gg|io)|discordapp\\.com)(?:\\/invite)?)\\/(?!channels)[a-z0-9\\-_]{2,50})(?:\\/)?';
187
- /** @type RegExp */
188
- let LINKEDIN_REGEX;
189
- /** @type RegExp */
190
- let LINKEDIN_REGEX_GLOBAL;
191
- /** @type RegExp */
192
- let INSTAGRAM_REGEX;
193
- /** @type RegExp */
194
- let INSTAGRAM_REGEX_GLOBAL;
195
- /** @type RegExp */
196
- let TWITTER_REGEX;
197
- /** @type RegExp */
198
- let TWITTER_REGEX_GLOBAL;
199
- /** @type RegExp */
200
- let FACEBOOK_REGEX;
201
- /** @type RegExp */
202
- let FACEBOOK_REGEX_GLOBAL;
203
- /** @type RegExp */
204
- let YOUTUBE_REGEX;
205
- /** @type RegExp */
206
- let YOUTUBE_REGEX_GLOBAL;
207
- /** @type RegExp */
208
- let TIKTOK_REGEX;
209
- /** @type RegExp */
210
- let TIKTOK_REGEX_GLOBAL;
211
- /** @type RegExp */
212
- let PINTEREST_REGEX;
213
- /** @type RegExp */
214
- let PINTEREST_REGEX_GLOBAL;
215
- let DISCORD_REGEX;
216
- /** @type RegExp */
217
- let DISCORD_REGEX_GLOBAL;
218
- try {
219
- /**
220
- * Regular expression to exactly match a single LinkedIn profile URL.
221
- * It has the following form: `/^...$/i` and matches URLs such as:
222
- * ```
223
- * https://www.linkedin.com/in/alan-turing
224
- * en.linkedin.com/in/alan-turing
225
- * linkedin.com/in/alan-turing
226
- * https://www.linkedin.com/company/linkedin/
227
- * ```
228
- *
229
- * The regular expression does NOT match URLs with additional
230
- * subdirectories or query parameters, such as:
231
- * ```
232
- * https://www.linkedin.com/in/linus-torvalds/latest-activity
233
- * ```
234
- *
235
- * Example usage:
236
- * ```
237
- * if (Apify.utils.social.LINKEDIN_REGEX.test('https://www.linkedin.com/in/alan-turing')) {
238
- * console.log('Match!');
239
- * }
240
- * ```
241
- * @type {RegExp}
242
- * @memberOf social
243
- */
244
- LINKEDIN_REGEX = new RegExp(`^${LINKEDIN_REGEX_STRING}$`, 'i');
245
- /**
246
- * Regular expression to find multiple LinkedIn profile URLs in a text or HTML.
247
- * It has the following form: `/.../ig` and matches URLs such as:
248
- * ```
249
- * https://www.linkedin.com/in/alan-turing
250
- * en.linkedin.com/in/alan-turing
251
- * linkedin.com/in/alan-turing
252
- * https://www.linkedin.com/company/linkedin/
253
- * ```
254
- *
255
- * If the profile URL contains subdirectories or query parameters, the regular expression
256
- * extracts just the base part of the profile URL. For example, from text such as:
257
- * ```
258
- * https://www.linkedin.com/in/linus-torvalds/latest-activity
259
- * ```
260
- * the expression extracts just the following base URL:
261
- * ```
262
- * https://www.linkedin.com/in/linus-torvalds
263
- * ```
264
- *
265
- * Example usage:
266
- * ```
267
- * const matches = text.match(Apify.utils.social.LINKEDIN_REGEX_GLOBAL);
268
- * if (matches) console.log(`${matches.length} LinkedIn profiles found!`);
269
- * ```
270
- * @type {RegExp}
271
- * @memberOf social
272
- */
273
- LINKEDIN_REGEX_GLOBAL = new RegExp(LINKEDIN_REGEX_STRING, 'ig');
274
- /**
275
- * Regular expression to exactly match a single Instagram profile URL.
276
- * It has the following form: `/^...$/i` and matches URLs such as:
277
- * ```
278
- * https://www.instagram.com/old_prague
279
- * www.instagram.com/old_prague/
280
- * instagr.am/old_prague
281
- * ```
282
- *
283
- * The regular expression does NOT match URLs with additional
284
- * subdirectories or query parameters, such as:
285
- * ```
286
- * https://www.instagram.com/cristiano/followers
287
- * ```
288
- *
289
- * It also does NOT match the following URLs:
290
- * ```
291
- * https://www.instagram.com/explore/
292
- * https://www.instagram.com/_n/
293
- * https://www.instagram.com/_u/
294
- * ```
295
- *
296
- * Example usage:
297
- * ```
298
- * if (Apify.utils.social.INSTAGRAM_REGEX.test('https://www.instagram.com/old_prague')) {
299
- * console.log('Match!');
300
- * }
301
- * ```
302
- * @type {RegExp}
303
- * @memberOf social
304
- */
305
- INSTAGRAM_REGEX = new RegExp(`^${INSTAGRAM_REGEX_STRING}$`, 'i');
306
- /**
307
- * Regular expression to find multiple Instagram profile URLs in a text or HTML.
308
- * It has the following form: `/.../ig` and matches URLs such as:
309
- * ```
310
- * https://www.instagram.com/old_prague
311
- * www.instagram.com/old_prague/
312
- * instagr.am/old_prague
313
- * ```
314
- *
315
- * If the profile URL contains subdirectories or query parameters, the regular expression
316
- * extracts just the base part of the profile URL. For example, from text such as:
317
- * ```
318
- * https://www.instagram.com/cristiano/followers
319
- * ```
320
- * the expression extracts just the following base URL:
321
- * ```
322
- * https://www.instagram.com/cristiano
323
- * ```
324
- *
325
- * The regular expression does NOT match the following URLs:
326
- * ```
327
- * https://www.instagram.com/explore/
328
- * https://www.instagram.com/_n/
329
- * https://www.instagram.com/_u/
330
- * ```
331
- *
332
- * Example usage:
333
- * ```
334
- * const matches = text.match(Apify.utils.social.INSTAGRAM_REGEX_GLOBAL);
335
- * if (matches) console.log(`${matches.length} Instagram profiles found!`);
336
- * ```
337
- * @type {RegExp}
338
- * @memberOf social
339
- */
340
- INSTAGRAM_REGEX_GLOBAL = new RegExp(INSTAGRAM_REGEX_STRING, 'ig');
341
- /**
342
- * Regular expression to exactly match a single Twitter profile URL.
343
- * It has the following form: `/^...$/i` and matches URLs such as:
344
- * ```
345
- * https://www.twitter.com/apify
346
- * twitter.com/apify
347
- * ```
348
- *
349
- * The regular expression does NOT match URLs with additional
350
- * subdirectories or query parameters, such as:
351
- * ```
352
- * https://www.twitter.com/realdonaldtrump/following
353
- * ```
354
- *
355
- * Example usage:
356
- * ```
357
- * if (Apify.utils.social.TWITTER_REGEX.test('https://www.twitter.com/apify')) {
358
- * console.log('Match!');
359
- * }
360
- * ```
361
- * @type {RegExp}
362
- * @memberOf social
363
- */
364
- TWITTER_REGEX = new RegExp(`^${TWITTER_REGEX_STRING}$`, 'i');
365
- /**
366
- * Regular expression to find multiple Twitter profile URLs in a text or HTML.
367
- * It has the following form: `/.../ig` and matches URLs such as:
368
- * ```
369
- * https://www.twitter.com/apify
370
- * twitter.com/apify
371
- * ```
372
- *
373
- * If the profile URL contains subdirectories or query parameters, the regular expression
374
- * extracts just the base part of the profile URL. For example, from text such as:
375
- * ```
376
- * https://www.twitter.com/realdonaldtrump/following
377
- * ```
378
- * the expression extracts only the following base URL:
379
- * ```
380
- * https://www.twitter.com/realdonaldtrump
381
- * ```
382
- *
383
- * Example usage:
384
- * ```
385
- * const matches = text.match(Apify.utils.social.TWITTER_REGEX_STRING);
386
- * if (matches) console.log(`${matches.length} Twitter profiles found!`);
387
- * ```
388
- * @type {RegExp}
389
- * @memberOf social
390
- */
391
- TWITTER_REGEX_GLOBAL = new RegExp(TWITTER_REGEX_STRING, 'ig');
392
- /**
393
- * Regular expression to exactly match a single Facebook profile URL.
394
- * It has the following form: `/^...$/i` and matches URLs such as:
395
- * ```
396
- * https://www.facebook.com/apifytech
397
- * facebook.com/apifytech
398
- * fb.com/apifytech
399
- * https://www.facebook.com/profile.php?id=123456789
400
- * ```
401
- *
402
- * The regular expression does NOT match URLs with additional
403
- * subdirectories or query parameters, such as:
404
- * ```
405
- * https://www.facebook.com/apifytech/photos
406
- * ```
407
- *
408
- * Example usage:
409
- * ```
410
- * if (Apify.utils.social.FACEBOOK_REGEX.test('https://www.facebook.com/apifytech')) {
411
- * console.log('Match!');
412
- * }
413
- * ```
414
- * @type {RegExp}
415
- * @memberOf social
416
- */
417
- FACEBOOK_REGEX = new RegExp(`^${FACEBOOK_REGEX_STRING}$`, 'i');
418
- /**
419
- * Regular expression to find multiple Facebook profile URLs in a text or HTML.
420
- * It has the following form: `/.../ig` and matches URLs such as:
421
- * ```
422
- * https://www.facebook.com/apifytech
423
- * facebook.com/apifytech
424
- * fb.com/apifytech
425
- * ```
426
- *
427
- * If the profile URL contains subdirectories or query parameters, the regular expression
428
- * extracts just the base part of the profile URL. For example, from text such as:
429
- * ```
430
- * https://www.facebook.com/apifytech/photos
431
- * ```
432
- * the expression extracts only the following base URL:
433
- * ```
434
- * https://www.facebook.com/apifytech
435
- * ```
436
- *
437
- * Example usage:
438
- * ```
439
- * const matches = text.match(Apify.utils.social.FACEBOOK_REGEX_GLOBAL);
440
- * if (matches) console.log(`${matches.length} Facebook profiles found!`);
441
- * ```
442
- * @type {RegExp}
443
- * @memberOf social
444
- */
445
- FACEBOOK_REGEX_GLOBAL = new RegExp(FACEBOOK_REGEX_STRING, 'ig');
446
- /**
447
- * Regular expression to exactly match a single Youtube channel, user or video URL.
448
- * It has the following form: `/^...$/i` and matches URLs such as:
449
- * ```
450
- * https://www.youtube.com/watch?v=kM7YfhfkiEE
451
- * https://youtu.be/kM7YfhfkiEE
452
- * https://www.youtube.com/c/TrapNation
453
- * https://www.youtube.com/channel/UCklie6BM0fhFvzWYqQVoCTA
454
- * https://www.youtube.com/user/pewdiepie
455
- * ```
456
- *
457
- * Please note that this won't match URLs like https://www.youtube.com/pewdiepie that redirect to /user or /channel.
458
- *
459
- * Example usage:
460
- * ```
461
- * if (Apify.utils.social.YOUTUBE_REGEX.test('https://www.youtube.com/watch?v=kM7YfhfkiEE')) {
462
- * console.log('Match!');
463
- * }
464
- * ```
465
- * @type {RegExp}
466
- * @memberOf social
467
- */
468
- YOUTUBE_REGEX = new RegExp(`^${YOUTUBE_REGEX_STRING}$`, 'i');
469
- /**
470
- * Regular expression to find multiple Youtube channel, user or video URLs in a text or HTML.
471
- * It has the following form: `/.../ig` and matches URLs such as:
472
- * ```
473
- * https://www.youtube.com/watch?v=kM7YfhfkiEE
474
- * https://youtu.be/kM7YfhfkiEE
475
- * https://www.youtube.com/c/TrapNation
476
- * https://www.youtube.com/channel/UCklie6BM0fhFvzWYqQVoCTA
477
- * https://www.youtube.com/user/pewdiepie
478
- * ```
479
- *
480
- * Please note that this won't match URLs like https://www.youtube.com/pewdiepie that redirect to /user or /channel.
481
- *
482
- * Example usage:
483
- * ```
484
- * const matches = text.match(Apify.utils.social.YOUTUBE_REGEX_GLOBAL);
485
- * if (matches) console.log(`${matches.length} Youtube videos found!`);
486
- * ```
487
- * @type {RegExp}
488
- * @memberOf social
489
- */
490
- YOUTUBE_REGEX_GLOBAL = new RegExp(YOUTUBE_REGEX_STRING, 'ig');
491
- /**
492
- * Regular expression to exactly match a Tiktok video or user account.
493
- * It has the following form: `/^...$/i` and matches URLs such as:
494
- * ```
495
- * https://www.tiktok.com/trending?shareId=123456789
496
- * https://www.tiktok.com/embed/123456789
497
- * https://m.tiktok.com/v/123456789
498
- * https://www.tiktok.com/@user
499
- * https://www.tiktok.com/@user-account.pro
500
- * https://www.tiktok.com/@user/video/123456789
501
- * ```
502
- *
503
- * Example usage:
504
- * ```
505
- * if (Apify.utils.social.DISCORD_REGEX.test('https://www.tiktok.com/@user')) {
506
- * console.log('Match!');
507
- * }
508
- * ```
509
- * @type {RegExp}
510
- * @memberOf social
511
- */
512
- TIKTOK_REGEX = new RegExp(`^${TIKTOK_REGEX_STRING}$`, 'i');
513
- /**
514
- * Regular expression to find multiple Tiktok videos or user accounts in a text or HTML.
515
- * It has the following form: `/.../ig` and matches URLs such as:
516
- * ```
517
- * https://www.tiktok.com/trending?shareId=123456789
518
- * https://www.tiktok.com/embed/123456789
519
- * https://m.tiktok.com/v/123456789
520
- * https://www.tiktok.com/@user
521
- * https://www.tiktok.com/@user-account.pro
522
- * https://www.tiktok.com/@user/video/123456789
523
- * ```
524
- *
525
- * Example usage:
526
- * ```
527
- * const matches = text.match(Apify.utils.social.TIKTOK_REGEX_GLOBAL);
528
- * if (matches) console.log(`${matches.length} TikTok videos and users found!`);
529
- * ```
530
- * @type {RegExp}
531
- * @memberOf social
532
- */
533
- TIKTOK_REGEX_GLOBAL = new RegExp(TIKTOK_REGEX_STRING, 'ig');
534
- /**
535
- * Regular expression to exactly match a Pinterest pin, user or user's board.
536
- * It has the following form: `/^...$/i` and matches URLs such as:
537
- * ```
538
- * https://pinterest.com/pin/123456789
539
- * https://www.pinterest.cz/pin/123456789
540
- * https://www.pinterest.com/user
541
- * https://uk.pinterest.com/user
542
- * https://www.pinterest.co.uk/user
543
- * pinterest.com/user_name.gold
544
- * https://cz.pinterest.com/user/board
545
- * ```
546
- *
547
- * Example usage:
548
- * ```
549
- * if (Apify.utils.social.PINTEREST_REGEX.test('https://www.pinterest.com/user')) {
550
- * console.log('Match!');
551
- * }
552
- * ```
553
- * @type {RegExp}
554
- * @memberOf social
555
- */
556
- PINTEREST_REGEX = new RegExp(`^${PINTEREST_REGEX_STRING}$`, 'i');
557
- /**
558
- * Regular expression to find multiple Pinterest pins, users or boards in a text or HTML.
559
- * It has the following form: `/.../ig` and matches URLs such as:
560
- * ```
561
- * https://pinterest.com/pin/123456789
562
- * https://www.pinterest.cz/pin/123456789
563
- * https://www.pinterest.com/user
564
- * https://uk.pinterest.com/user
565
- * https://www.pinterest.co.uk/user
566
- * pinterest.com/user_name.gold
567
- * https://cz.pinterest.com/user/board
568
- * ```
569
- *
570
- * Example usage:
571
- * ```
572
- * const matches = text.match(Apify.utils.social.PINTEREST_REGEX_GLOBAL);
573
- * if (matches) console.log(`${matches.length} Pinterest pins, users and boards found!`);
574
- * ```
575
- * @type {RegExp}
576
- * @memberOf social
577
- */
578
- PINTEREST_REGEX_GLOBAL = new RegExp(PINTEREST_REGEX_STRING, 'ig');
579
- /**
580
- * Regular expression to exactly match a Discord invite or channel.
581
- * It has the following form: `/^...$/i` and matches URLs such as:
582
- * ```
583
- * https://discord.gg/discord-developers
584
- * https://discord.com/invite/jyEM2PRvMU
585
- * https://discordapp.com/channels/1234
586
- * https://discord.com/channels/1234/1234
587
- * discord.gg/discord-developers
588
- * ```
589
- *
590
- * Example usage:
591
- * ```
592
- * if (Apify.utils.social.DISCORD_REGEX.test('https://discord.gg/discord-developers')) {
593
- * console.log('Match!');
594
- * }
595
- * ```
596
- * @type {RegExp}
597
- * @memberOf social
598
- */
599
- DISCORD_REGEX = new RegExp(`^${DISCORD_REGEX_STRING}$`, 'i');
600
- /**
601
- * Regular expression to find multiple Discord channels or invites in a text or HTML.
602
- * It has the following form: `/.../ig` and matches URLs such as:
603
- * ```
604
- * https://discord.gg/discord-developers
605
- * https://discord.com/invite/jyEM2PRvMU
606
- * https://discordapp.com/channels/1234
607
- * https://discord.com/channels/1234/1234
608
- * discord.gg/discord-developers
609
- * ```
610
- *
611
- * Example usage:
612
- * ```
613
- * const matches = text.match(Apify.utils.social.DISCORD_REGEX_GLOBAL);
614
- * if (matches) console.log(`${matches.length} Discord channels found!`);
615
- * ```
616
- * @type {RegExp}
617
- * @memberOf social
618
- */
619
- DISCORD_REGEX_GLOBAL = new RegExp(DISCORD_REGEX_STRING, 'ig');
620
- }
621
- catch (e) {
622
- // Older versions of Node don't support negative lookbehind and lookahead expressions.
623
- // Show warning instead of failing.
624
- if (e && e.message && e.message.includes('Invalid group')) {
625
- // eslint-disable-next-line max-len
626
- utils_log_1.default.warning(`Your version of Node.js (${process.version}) doesn't support the regular expression syntax used by Apify.utils.social tools. The tools will not work. Please upgrade your Node.js to the latest version.`);
627
- }
628
- else {
629
- throw e;
630
- }
631
- }
632
- /**
633
- * Representation of social handles parsed from a HTML page.
634
- *
635
- * The object has the following structure:
636
- *
637
- * ```
638
- * {
639
- * emails: String[],
640
- * phones: String[],
641
- * phonesUncertain: String[],
642
- * linkedIns: String[],
643
- * twitters: String[],
644
- * instagrams: String[],
645
- * facebooks: String[],
646
- * youtubes: String[],
647
- * tiktoks: String[],
648
- * pinterests: String[],
649
- * discords: String[],
650
- * }
651
- * ```
652
- * @typedef SocialHandles
653
- * @property {string[]} emails
654
- * @property {string[]} phones
655
- * @property {string[]} phonesUncertain
656
- * @property {string[]} linkedIns
657
- * @property {string[]} twitters
658
- * @property {string[]} instagrams
659
- * @property {string[]} facebooks
660
- * @property {string[]} youtubes
661
- * @property {string[]} tiktoks
662
- * @property {string[]} pinterests
663
- * @property {string[]} discords
664
- */
665
- /**
666
- * The function attempts to extract emails, phone numbers and social profile URLs from a HTML document,
667
- * specifically LinkedIn, Twitter, Instagram and Facebook profile URLs.
668
- * The function removes duplicates from the resulting arrays and sorts the items alphabetically.
669
- *
670
- * Note that the `phones` field contains phone numbers extracted from the special phone links
671
- * such as `[call us](tel:+1234556789)` (see {@link social#phonesFromUrls})
672
- * and potentially other sources with high certainty, while `phonesUncertain` contains phone numbers
673
- * extracted from the plain text, which might be very inaccurate.
674
- *
675
- * **Example usage:**
676
- * ```javascript
677
- * const Apify = require('apify');
678
- *
679
- * const browser = await Apify.launchPuppeteer();
680
- * const page = await browser.newPage();
681
- * await page.goto('http://www.example.com');
682
- * const html = await page.content();
683
- *
684
- * const result = Apify.utils.social.parseHandlesFromHtml(html);
685
- * console.log('Social handles:');
686
- * console.dir(result);
687
- * ```
688
- *
689
- * @param {string} html HTML text
690
- * @param {*|null} [data] Optional object which will receive the `text` and `$` properties
691
- * that contain text content of the HTML and `cheerio` object, respectively. This is an optimization
692
- * so that the caller doesn't need to parse the HTML document again, if needed.
693
- * @return {SocialHandles} An object with the social handles.
694
- *
695
- * @memberOf social
696
- */
697
- const parseHandlesFromHtml = (html, data = null) => {
698
- const result = {
699
- emails: [],
700
- phones: [],
701
- phonesUncertain: [],
702
- linkedIns: [],
703
- twitters: [],
704
- instagrams: [],
705
- facebooks: [],
706
- youtubes: [],
707
- tiktoks: [],
708
- pinterests: [],
709
- discords: [],
710
- };
711
- // TODO: maybe extract phone numbers from JSON+LD
712
- if (!underscore_1.default.isString(html))
713
- return result;
714
- const $ = cheerio_1.default.load(html, { decodeEntities: true });
715
- if (data)
716
- data.$ = $;
717
- const text = utils_1.publicUtils.htmlToText($);
718
- if (data)
719
- data.text = text;
720
- // Find all <a> links with href tag
721
- const linkUrls = [];
722
- $('a[href]').each((index, elem) => {
723
- if (elem)
724
- linkUrls.push($(elem).attr('href'));
725
- });
726
- result.emails = emailsFromUrls(linkUrls).concat(emailsFromText(text));
727
- result.phones = phonesFromUrls(linkUrls);
728
- result.phonesUncertain = phonesFromText(text);
729
- // Note that these regexps extract just the base profile path. For example for
730
- // https://www.linkedin.com/in/carl-newman-123456a/detail/recent-activity/
731
- // they match just:
732
- // https://www.linkedin.com/in/carl-newman-123456a
733
- result.linkedIns = html.match(LINKEDIN_REGEX_GLOBAL) || [];
734
- result.twitters = html.match(TWITTER_REGEX_GLOBAL) || [];
735
- result.instagrams = html.match(INSTAGRAM_REGEX_GLOBAL) || [];
736
- result.facebooks = html.match(FACEBOOK_REGEX_GLOBAL) || [];
737
- result.youtubes = html.match(YOUTUBE_REGEX_GLOBAL) || [];
738
- result.tiktoks = html.match(TIKTOK_REGEX_GLOBAL) || [];
739
- result.pinterests = html.match(PINTEREST_REGEX_GLOBAL) || [];
740
- result.discords = html.match(DISCORD_REGEX_GLOBAL) || [];
741
- // Sort and deduplicate handles
742
- // eslint-disable-next-line guard-for-in, no-restricted-syntax
743
- for (const key in result) {
744
- result[key].sort();
745
- result[key] = underscore_1.default.uniq(result[key], true);
746
- }
747
- return result;
748
- };
749
- /**
750
- * A namespace that contains various utilities to help you extract social handles
751
- * from text, URLs and and HTML documents.
752
- *
753
- * **Example usage:**
754
- *
755
- * ```javascript
756
- * const Apify = require('apify');
757
- *
758
- * const emails = Apify.utils.social.emailsFromText('alice@example.com bob@example.com');
759
- * ```
760
- * @namespace social
761
- */
762
- exports.socialUtils = {
763
- emailsFromText,
764
- emailsFromUrls,
765
- phonesFromText,
766
- phonesFromUrls,
767
- parseHandlesFromHtml,
768
- EMAIL_REGEX,
769
- EMAIL_REGEX_GLOBAL,
770
- LINKEDIN_REGEX,
771
- LINKEDIN_REGEX_GLOBAL,
772
- INSTAGRAM_REGEX,
773
- INSTAGRAM_REGEX_GLOBAL,
774
- TWITTER_REGEX,
775
- TWITTER_REGEX_GLOBAL,
776
- FACEBOOK_REGEX,
777
- FACEBOOK_REGEX_GLOBAL,
778
- YOUTUBE_REGEX,
779
- YOUTUBE_REGEX_GLOBAL,
780
- TIKTOK_REGEX,
781
- TIKTOK_REGEX_GLOBAL,
782
- PINTEREST_REGEX,
783
- PINTEREST_REGEX_GLOBAL,
784
- DISCORD_REGEX,
785
- DISCORD_REGEX_GLOBAL,
786
- };
787
- //# sourceMappingURL=utils_social.js.map