apify 2.3.1-beta.4 → 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (211) hide show
  1. package/README.md +6 -5
  2. package/package.json +69 -128
  3. package/build/actor.d.ts +0 -113
  4. package/build/actor.d.ts.map +0 -1
  5. package/build/actor.js +0 -582
  6. package/build/actor.js.map +0 -1
  7. package/build/apify.d.ts +0 -752
  8. package/build/apify.d.ts.map +0 -1
  9. package/build/apify.js +0 -877
  10. package/build/apify.js.map +0 -1
  11. package/build/autoscaling/autoscaled_pool.d.ts +0 -384
  12. package/build/autoscaling/autoscaled_pool.d.ts.map +0 -1
  13. package/build/autoscaling/autoscaled_pool.js +0 -557
  14. package/build/autoscaling/autoscaled_pool.js.map +0 -1
  15. package/build/autoscaling/snapshotter.d.ts +0 -278
  16. package/build/autoscaling/snapshotter.d.ts.map +0 -1
  17. package/build/autoscaling/snapshotter.js +0 -447
  18. package/build/autoscaling/snapshotter.js.map +0 -1
  19. package/build/autoscaling/system_status.d.ts +0 -224
  20. package/build/autoscaling/system_status.d.ts.map +0 -1
  21. package/build/autoscaling/system_status.js +0 -228
  22. package/build/autoscaling/system_status.js.map +0 -1
  23. package/build/browser_launchers/browser_launcher.d.ts +0 -154
  24. package/build/browser_launchers/browser_launcher.d.ts.map +0 -1
  25. package/build/browser_launchers/browser_launcher.js +0 -160
  26. package/build/browser_launchers/browser_launcher.js.map +0 -1
  27. package/build/browser_launchers/browser_plugin.d.ts +0 -23
  28. package/build/browser_launchers/browser_plugin.d.ts.map +0 -1
  29. package/build/browser_launchers/browser_plugin.js +0 -25
  30. package/build/browser_launchers/browser_plugin.js.map +0 -1
  31. package/build/browser_launchers/playwright_launcher.d.ts +0 -131
  32. package/build/browser_launchers/playwright_launcher.d.ts.map +0 -1
  33. package/build/browser_launchers/playwright_launcher.js +0 -150
  34. package/build/browser_launchers/playwright_launcher.js.map +0 -1
  35. package/build/browser_launchers/puppeteer_launcher.d.ts +0 -153
  36. package/build/browser_launchers/puppeteer_launcher.d.ts.map +0 -1
  37. package/build/browser_launchers/puppeteer_launcher.js +0 -197
  38. package/build/browser_launchers/puppeteer_launcher.js.map +0 -1
  39. package/build/cache_container.d.ts +0 -31
  40. package/build/cache_container.d.ts.map +0 -1
  41. package/build/cache_container.js +0 -48
  42. package/build/cache_container.js.map +0 -1
  43. package/build/configuration.d.ts +0 -226
  44. package/build/configuration.d.ts.map +0 -1
  45. package/build/configuration.js +0 -325
  46. package/build/configuration.js.map +0 -1
  47. package/build/constants.d.ts +0 -37
  48. package/build/constants.d.ts.map +0 -1
  49. package/build/constants.js +0 -41
  50. package/build/constants.js.map +0 -1
  51. package/build/crawlers/basic_crawler.d.ts +0 -443
  52. package/build/crawlers/basic_crawler.d.ts.map +0 -1
  53. package/build/crawlers/basic_crawler.js +0 -664
  54. package/build/crawlers/basic_crawler.js.map +0 -1
  55. package/build/crawlers/browser_crawler.d.ts +0 -512
  56. package/build/crawlers/browser_crawler.d.ts.map +0 -1
  57. package/build/crawlers/browser_crawler.js +0 -540
  58. package/build/crawlers/browser_crawler.js.map +0 -1
  59. package/build/crawlers/cheerio_crawler.d.ts +0 -931
  60. package/build/crawlers/cheerio_crawler.d.ts.map +0 -1
  61. package/build/crawlers/cheerio_crawler.js +0 -913
  62. package/build/crawlers/cheerio_crawler.js.map +0 -1
  63. package/build/crawlers/crawler_extension.d.ts +0 -10
  64. package/build/crawlers/crawler_extension.d.ts.map +0 -1
  65. package/build/crawlers/crawler_extension.js +0 -19
  66. package/build/crawlers/crawler_extension.js.map +0 -1
  67. package/build/crawlers/crawler_utils.d.ts +0 -34
  68. package/build/crawlers/crawler_utils.d.ts.map +0 -1
  69. package/build/crawlers/crawler_utils.js +0 -87
  70. package/build/crawlers/crawler_utils.js.map +0 -1
  71. package/build/crawlers/playwright_crawler.d.ts +0 -448
  72. package/build/crawlers/playwright_crawler.d.ts.map +0 -1
  73. package/build/crawlers/playwright_crawler.js +0 -299
  74. package/build/crawlers/playwright_crawler.js.map +0 -1
  75. package/build/crawlers/puppeteer_crawler.d.ts +0 -425
  76. package/build/crawlers/puppeteer_crawler.d.ts.map +0 -1
  77. package/build/crawlers/puppeteer_crawler.js +0 -299
  78. package/build/crawlers/puppeteer_crawler.js.map +0 -1
  79. package/build/crawlers/statistics.d.ts +0 -185
  80. package/build/crawlers/statistics.d.ts.map +0 -1
  81. package/build/crawlers/statistics.js +0 -331
  82. package/build/crawlers/statistics.js.map +0 -1
  83. package/build/enqueue_links/click_elements.d.ts +0 -179
  84. package/build/enqueue_links/click_elements.d.ts.map +0 -1
  85. package/build/enqueue_links/click_elements.js +0 -434
  86. package/build/enqueue_links/click_elements.js.map +0 -1
  87. package/build/enqueue_links/enqueue_links.d.ts +0 -117
  88. package/build/enqueue_links/enqueue_links.d.ts.map +0 -1
  89. package/build/enqueue_links/enqueue_links.js +0 -163
  90. package/build/enqueue_links/enqueue_links.js.map +0 -1
  91. package/build/enqueue_links/shared.d.ts +0 -42
  92. package/build/enqueue_links/shared.d.ts.map +0 -1
  93. package/build/enqueue_links/shared.js +0 -121
  94. package/build/enqueue_links/shared.js.map +0 -1
  95. package/build/errors.d.ts +0 -29
  96. package/build/errors.d.ts.map +0 -1
  97. package/build/errors.js +0 -38
  98. package/build/errors.js.map +0 -1
  99. package/build/events.d.ts +0 -11
  100. package/build/events.d.ts.map +0 -1
  101. package/build/events.js +0 -147
  102. package/build/events.js.map +0 -1
  103. package/build/index.d.ts +0 -4
  104. package/build/index.d.ts.map +0 -1
  105. package/build/index.js +0 -7
  106. package/build/index.js.map +0 -1
  107. package/build/main.d.ts +0 -179
  108. package/build/main.d.ts.map +0 -1
  109. package/build/main.js +0 -81
  110. package/build/main.js.map +0 -1
  111. package/build/playwright_utils.d.ts +0 -9
  112. package/build/playwright_utils.d.ts.map +0 -1
  113. package/build/playwright_utils.js +0 -90
  114. package/build/playwright_utils.js.map +0 -1
  115. package/build/proxy_configuration.d.ts +0 -411
  116. package/build/proxy_configuration.d.ts.map +0 -1
  117. package/build/proxy_configuration.js +0 -517
  118. package/build/proxy_configuration.js.map +0 -1
  119. package/build/pseudo_url.d.ts +0 -86
  120. package/build/pseudo_url.d.ts.map +0 -1
  121. package/build/pseudo_url.js +0 -153
  122. package/build/pseudo_url.js.map +0 -1
  123. package/build/puppeteer_request_interception.d.ts +0 -8
  124. package/build/puppeteer_request_interception.d.ts.map +0 -1
  125. package/build/puppeteer_request_interception.js +0 -235
  126. package/build/puppeteer_request_interception.js.map +0 -1
  127. package/build/puppeteer_utils.d.ts +0 -250
  128. package/build/puppeteer_utils.d.ts.map +0 -1
  129. package/build/puppeteer_utils.js +0 -551
  130. package/build/puppeteer_utils.js.map +0 -1
  131. package/build/request.d.ts +0 -180
  132. package/build/request.d.ts.map +0 -1
  133. package/build/request.js +0 -261
  134. package/build/request.js.map +0 -1
  135. package/build/request_list.d.ts +0 -581
  136. package/build/request_list.d.ts.map +0 -1
  137. package/build/request_list.js +0 -826
  138. package/build/request_list.js.map +0 -1
  139. package/build/serialization.d.ts +0 -5
  140. package/build/serialization.d.ts.map +0 -1
  141. package/build/serialization.js +0 -139
  142. package/build/serialization.js.map +0 -1
  143. package/build/session_pool/errors.d.ts +0 -11
  144. package/build/session_pool/errors.d.ts.map +0 -1
  145. package/build/session_pool/errors.js +0 -18
  146. package/build/session_pool/errors.js.map +0 -1
  147. package/build/session_pool/events.d.ts +0 -5
  148. package/build/session_pool/events.d.ts.map +0 -1
  149. package/build/session_pool/events.js +0 -6
  150. package/build/session_pool/events.js.map +0 -1
  151. package/build/session_pool/session.d.ts +0 -286
  152. package/build/session_pool/session.d.ts.map +0 -1
  153. package/build/session_pool/session.js +0 -355
  154. package/build/session_pool/session.js.map +0 -1
  155. package/build/session_pool/session_pool.d.ts +0 -280
  156. package/build/session_pool/session_pool.d.ts.map +0 -1
  157. package/build/session_pool/session_pool.js +0 -393
  158. package/build/session_pool/session_pool.js.map +0 -1
  159. package/build/session_pool/session_utils.d.ts +0 -4
  160. package/build/session_pool/session_utils.d.ts.map +0 -1
  161. package/build/session_pool/session_utils.js +0 -24
  162. package/build/session_pool/session_utils.js.map +0 -1
  163. package/build/stealth/hiding_tricks.d.ts +0 -22
  164. package/build/stealth/hiding_tricks.d.ts.map +0 -1
  165. package/build/stealth/hiding_tricks.js +0 -308
  166. package/build/stealth/hiding_tricks.js.map +0 -1
  167. package/build/stealth/stealth.d.ts +0 -56
  168. package/build/stealth/stealth.d.ts.map +0 -1
  169. package/build/stealth/stealth.js +0 -125
  170. package/build/stealth/stealth.js.map +0 -1
  171. package/build/storages/dataset.d.ts +0 -288
  172. package/build/storages/dataset.d.ts.map +0 -1
  173. package/build/storages/dataset.js +0 -480
  174. package/build/storages/dataset.js.map +0 -1
  175. package/build/storages/key_value_store.d.ts +0 -243
  176. package/build/storages/key_value_store.d.ts.map +0 -1
  177. package/build/storages/key_value_store.js +0 -462
  178. package/build/storages/key_value_store.js.map +0 -1
  179. package/build/storages/request_queue.d.ts +0 -318
  180. package/build/storages/request_queue.d.ts.map +0 -1
  181. package/build/storages/request_queue.js +0 -636
  182. package/build/storages/request_queue.js.map +0 -1
  183. package/build/storages/storage_manager.d.ts +0 -87
  184. package/build/storages/storage_manager.d.ts.map +0 -1
  185. package/build/storages/storage_manager.js +0 -150
  186. package/build/storages/storage_manager.js.map +0 -1
  187. package/build/tsconfig.tsbuildinfo +0 -1
  188. package/build/typedefs.d.ts +0 -146
  189. package/build/typedefs.d.ts.map +0 -1
  190. package/build/typedefs.js +0 -88
  191. package/build/typedefs.js.map +0 -1
  192. package/build/utils.d.ts +0 -175
  193. package/build/utils.d.ts.map +0 -1
  194. package/build/utils.js +0 -731
  195. package/build/utils.js.map +0 -1
  196. package/build/utils_log.d.ts +0 -41
  197. package/build/utils_log.d.ts.map +0 -1
  198. package/build/utils_log.js +0 -192
  199. package/build/utils_log.js.map +0 -1
  200. package/build/utils_request.d.ts +0 -77
  201. package/build/utils_request.d.ts.map +0 -1
  202. package/build/utils_request.js +0 -385
  203. package/build/utils_request.js.map +0 -1
  204. package/build/utils_social.d.ts +0 -210
  205. package/build/utils_social.d.ts.map +0 -1
  206. package/build/utils_social.js +0 -787
  207. package/build/utils_social.js.map +0 -1
  208. package/build/validators.d.ts +0 -23
  209. package/build/validators.d.ts.map +0 -1
  210. package/build/validators.js +0 -29
  211. package/build/validators.js.map +0 -1
@@ -1,826 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.openRequestList = exports.RequestList = exports.REQUESTS_PERSISTENCE_KEY = exports.STATE_PERSISTENCE_KEY = void 0;
4
- const tslib_1 = require("tslib");
5
- const ow_1 = (0, tslib_1.__importStar)(require("ow"));
6
- const underscore_1 = (0, tslib_1.__importDefault)(require("underscore"));
7
- const constants_1 = require("./constants");
8
- const request_1 = (0, tslib_1.__importDefault)(require("./request")); // eslint-disable-line import/no-duplicates
9
- const events_1 = (0, tslib_1.__importDefault)(require("./events"));
10
- const utils_log_1 = (0, tslib_1.__importDefault)(require("./utils_log"));
11
- const utils_1 = require("./utils");
12
- const key_value_store_1 = require("./storages/key_value_store");
13
- const serialization_1 = require("./serialization");
14
- /* eslint-enable no-unused-vars,import/named,import/no-duplicates,import/order */
15
- exports.STATE_PERSISTENCE_KEY = 'REQUEST_LIST_STATE';
16
- exports.REQUESTS_PERSISTENCE_KEY = 'REQUEST_LIST_REQUESTS';
17
- const CONTENT_TYPE_BINARY = 'application/octet-stream';
18
- /**
19
- * @typedef RequestListOptions
20
- * @property {Array<RequestOptions | Request | { requestsFromUrl: string, regex?: RegExp } | string>} [sources]
21
- * An array of sources of URLs for the {@link RequestList}. It can be either an array of strings,
22
- * plain objects that define at least the `url` property, or an array of {@link Request} instances.
23
- *
24
- * **IMPORTANT:** The `sources` array will be consumed (left empty) after `RequestList` initializes.
25
- * This is a measure to prevent memory leaks in situations when millions of sources are
26
- * added.
27
- *
28
- * Additionally, the `requestsFromUrl` property may be used instead of `url`,
29
- * which will instruct `RequestList` to download the source URLs from a given remote location.
30
- * The URLs will be parsed from the received response.
31
- *
32
- * ```
33
- * [
34
- * // A single URL
35
- * 'http://example.com/a/b',
36
- *
37
- * // Modify Request options
38
- * { method: PUT, 'https://example.com/put, payload: { foo: 'bar' }}
39
- *
40
- * // Batch import of URLs from a file hosted on the web,
41
- * // where the URLs should be requested using the HTTP POST request
42
- * { method: 'POST', requestsFromUrl: 'http://example.com/urls.txt' },
43
- *
44
- * // Batch import from remote file, using a specific regular expression to extract the URLs.
45
- * { requestsFromUrl: 'http://example.com/urls.txt', regex: /https:\/\/example.com\/.+/ },
46
- *
47
- * // Get list of URLs from a Google Sheets document. Just add "/gviz/tq?tqx=out:csv" to the Google Sheet URL.
48
- * // For details, see https://help.apify.com/en/articles/2906022-scraping-a-list-of-urls-from-a-google-sheets-document
49
- * { requestsFromUrl: 'https://docs.google.com/spreadsheets/d/1GA5sSQhQjB_REes8I5IKg31S-TuRcznWOPjcpNqtxmU/gviz/tq?tqx=out:csv' }
50
- * ]
51
- * ```
52
- * @property {RequestListSourcesFunction} [sourcesFunction]
53
- * A function that will be called to get the sources for the `RequestList`, but only if `RequestList`
54
- * was not able to fetch their persisted version (see {@link RequestListOptions.persistRequestsKey}).
55
- * It must return an `Array` of {@link Request} or {@link RequestOptions}.
56
- *
57
- * This is very useful in a scenario when getting the sources is a resource intensive or time consuming
58
- * task, such as fetching URLs from multiple sitemaps or parsing URLs from large datasets. Using the
59
- * `sourcesFunction` in combination with `persistStateKey` and `persistRequestsKey` will allow you to
60
- * fetch and parse those URLs only once, saving valuable time when your actor migrates or restarts.
61
- *
62
- * If both {@link RequestListOptions.sources} and {@link RequestListOptions.sourcesFunction} are provided,
63
- * the sources returned by the function will be added after the `sources`.
64
- *
65
- * **Example:**
66
- * ```javascript
67
- * // Let's say we want to scrape URLs extracted from sitemaps.
68
- *
69
- * const sourcesFunction = async () => {
70
- * // With super large sitemaps, this operation could take very long
71
- * // and big websites typically have multiple sitemaps.
72
- * const sitemaps = await downloadHugeSitemaps();
73
- * return parseUrlsFromSitemaps(sitemaps);
74
- * }
75
- *
76
- * // Sitemaps can change in real-time, so it's important to persist
77
- * // the URLs we collected. Otherwise we might lose our scraping
78
- * // state in case of an actor migration / failure / time-out.
79
- * const requestList = new RequestList({
80
- * sourcesFunction,
81
- * persistStateKey: 'state-key',
82
- * persistRequestsKey: 'requests-key',
83
- * })
84
- *
85
- * // The sourcesFunction is called now and the Requests are persisted.
86
- * // If something goes wrong and we need to start again, RequestList
87
- * // will load the persisted Requests from storage and will NOT
88
- * // call the sourcesFunction again, saving time and resources.
89
- * await requestList.initialize();
90
- * ```
91
- * @property {ProxyConfiguration} [proxyConfiguration]
92
- * Used to pass the the proxy configuration for the `requestsFromUrls` objects.
93
- * Takes advantage of the internal address rotation and authentication process.
94
- * If undefined, the `requestsFromUrls` requests will be made without proxy.
95
- * @property {string} [persistStateKey]
96
- * Identifies the key in the default key-value store under which `RequestList` periodically stores its
97
- * state (i.e. which URLs were crawled and which not).
98
- * If the actor is restarted, `RequestList` will read the state
99
- * and continue where it left off.
100
- *
101
- * If `persistStateKey` is not set, `RequestList` will always start from the beginning,
102
- * and all the source URLs will be crawled again.
103
- * @property {string} [persistRequestsKey]
104
- * Identifies the key in the default key-value store under which the `RequestList` persists its
105
- * Requests during the {@link RequestList#initialize} call.
106
- * This is necessary if `persistStateKey` is set and the source URLs might potentially change,
107
- * to ensure consistency of the source URLs and state object. However, it comes with some
108
- * storage and performance overheads.
109
- *
110
- * If `persistRequestsKey` is not set, {@link RequestList#initialize} will always fetch the sources
111
- * from their origin, check that they are consistent with the restored state (if any)
112
- * and throw an error if they are not.
113
- * @property {RequestListState} [state]
114
- * The state object that the `RequestList` will be initialized from.
115
- * It is in the form as returned by `RequestList.getState()`, such as follows:
116
- *
117
- * ```
118
- * {
119
- * nextIndex: 5,
120
- * nextUniqueKey: 'unique-key-5'
121
- * inProgress: {
122
- * 'unique-key-1': true,
123
- * 'unique-key-4': true,
124
- * },
125
- * }
126
- * ```
127
- *
128
- * Note that the preferred (and simpler) way to persist the state of crawling of the `RequestList`
129
- * is to use the `stateKeyPrefix` parameter instead.
130
- * @property {boolean} [keepDuplicateUrls=false]
131
- * By default, `RequestList` will deduplicate the provided URLs. Default deduplication is based
132
- * on the `uniqueKey` property of passed source {@link Request} objects.
133
- *
134
- * If the property is not present, it is generated by normalizing the URL. If present, it is kept intact.
135
- * In any case, only one request per `uniqueKey` is added to the `RequestList` resulting in removal
136
- * of duplicate URLs / unique keys.
137
- *
138
- * Setting `keepDuplicateUrls` to `true` will append an additional identifier to the `uniqueKey`
139
- * of each request that does not already include a `uniqueKey`. Therefore, duplicate
140
- * URLs will be kept in the list. It does not protect the user from having duplicates in user set
141
- * `uniqueKey`s however. It is the user's responsibility to ensure uniqueness of their unique keys
142
- * if they wish to keep more than just a single copy in the `RequestList`.
143
- */
144
- /**
145
- * Represents a static list of URLs to crawl.
146
- * The URLs can be provided either in code or parsed from a text file hosted on the web.
147
- * `RequestList` is used by {@link BasicCrawler}, {@link CheerioCrawler}, {@link PuppeteerCrawler}
148
- * and {@link PlaywrightCrawler} as a source of URLs to crawl.
149
- *
150
- * Each URL is represented using an instance of the {@link Request} class.
151
- * The list can only contain unique URLs. More precisely, it can only contain `Request` instances
152
- * with distinct `uniqueKey` properties. By default, `uniqueKey` is generated from the URL, but it can also be overridden.
153
- * To add a single URL to the list multiple times, corresponding {@link Request} objects will need to have different
154
- * `uniqueKey` properties. You can use the `keepDuplicateUrls` option to do this for you when initializing the
155
- * `RequestList` from sources.
156
- *
157
- * Once you create an instance of `RequestList`, you need to call the {@link RequestList#initialize} function
158
- * before the instance can be used. After that, no more URLs can be added to the list.
159
- * Unlike {@link RequestQueue}, `RequestList` is static but it can contain even millions of URLs.
160
- * > Note that `RequestList` can be used together with `RequestQueue` by the same crawler.
161
- * > In such cases, each request from `RequestList` is enqueued into `RequestQueue` first and then consumed from the latter.
162
- * > This is necessary to avoid the same URL being processed more than once (from the list first and then possibly from the queue).
163
- * > In practical terms, such a combination can be useful when there is a large number of initial URLs,
164
- * > but more URLs would be added dynamically by the crawler.
165
- *
166
- * `RequestList` has an internal state where it stores information about which requests were already handled,
167
- * which are in progress and which were reclaimed. The state may be automatically persisted to the default
168
- * {@link KeyValueStore} by setting the `persistStateKey` option so that if the Node.js process is restarted,
169
- * the crawling can continue where it left off. The automated persisting is launched upon receiving the `persistState`
170
- * event that is periodically emitted by {@link events|Apify.events}.
171
- *
172
- * The internal state is closely tied to the provided sources (URLs). If the sources change on actor restart, the state will become corrupted and
173
- * `RequestList` will raise an exception. This typically happens when the sources is a list of URLs downloaded from the web.
174
- * In such case, use the `persistRequestsKey` option in conjunction with `persistStateKey`,
175
- * to make the `RequestList` store the initial sources to the default key-value store and load them after restart,
176
- * which will prevent any issues that a live list of URLs might cause.
177
- *
178
- * **Basic usage:**
179
- * ```javascript
180
- * // Use a helper function to simplify request list initialization.
181
- * // State and sources are automatically persisted. This is a preferred usage.
182
- * const requestList = await Apify.openRequestList('my-request-list', [
183
- * 'http://www.example.com/page-1',
184
- * { url: 'http://www.example.com/page-2', method: 'POST', userData: { foo: 'bar' }},
185
- * { requestsFromUrl: 'http://www.example.com/my-url-list.txt', userData: { isFromUrl: true } },
186
- * ]);
187
- * ```
188
- *
189
- * **Advanced usage:**
190
- * ```javascript
191
- * // Use the constructor to get more control over the initialization.
192
- * const requestList = new Apify.RequestList({
193
- * sources: [
194
- * // Separate requests
195
- * { url: 'http://www.example.com/page-1', method: 'GET', headers: { ... } },
196
- * { url: 'http://www.example.com/page-2', userData: { foo: 'bar' }},
197
- *
198
- * // Bulk load of URLs from file `http://www.example.com/my-url-list.txt`
199
- * // Note that all URLs must start with http:// or https://
200
- * { requestsFromUrl: 'http://www.example.com/my-url-list.txt', userData: { isFromUrl: true } },
201
- * ],
202
- *
203
- * // Persist the state to avoid re-crawling which can lead to data duplications.
204
- * // Keep in mind that the sources have to be immutable or this will throw an error.
205
- * persistStateKey: 'my-state',
206
- * });
207
- *
208
- * await requestList.initialize();
209
- * ```
210
- */
211
- class RequestList {
212
- /**
213
- * @param {RequestListOptions} options All `RequestList` configuration options
214
- */
215
- constructor(options = {}) {
216
- const { sources, sourcesFunction, persistStateKey, persistRequestsKey, state, proxyConfiguration, keepDuplicateUrls = false, } = options;
217
- if (!(sources || sourcesFunction)) {
218
- throw new ow_1.ArgumentError('At least one of "sources" or "sourcesFunction" must be provided.', this.constructor);
219
- }
220
- (0, ow_1.default)(options, ow_1.default.object.exactShape({
221
- sources: ow_1.default.optional.array,
222
- sourcesFunction: ow_1.default.optional.function,
223
- persistStateKey: ow_1.default.optional.string,
224
- persistRequestsKey: ow_1.default.optional.string,
225
- state: ow_1.default.optional.object.exactShape({
226
- nextIndex: ow_1.default.number,
227
- nextUniqueKey: ow_1.default.string,
228
- inProgress: ow_1.default.object,
229
- }),
230
- keepDuplicateUrls: ow_1.default.optional.boolean,
231
- proxyConfiguration: ow_1.default.optional.object,
232
- }));
233
- this.log = utils_log_1.default.child({ prefix: 'RequestList' });
234
- // Array of all requests from all sources, in the order as they appeared in sources.
235
- // All requests in the array have distinct uniqueKey!
236
- /** @type {Array<Request>} */
237
- this.requests = [];
238
- // Index to the next item in requests array to fetch. All previous requests are either handled or in progress.
239
- this.nextIndex = 0;
240
- // Dictionary, key is Request.uniqueKey, value is corresponding index in the requests array.
241
- this.uniqueKeyToIndex = {};
242
- // Dictionary of requests that were returned by fetchNextRequest().
243
- // The key is uniqueKey, value is true.
244
- // TODO: Change this to Set
245
- this.inProgress = {};
246
- // Dictionary of requests for which reclaimRequest() was called.
247
- // The key is uniqueKey, value is true. TODO: Change this to Set
248
- // Note that reclaimedRequests is always a subset of inProgress!
249
- this.reclaimed = {};
250
- this.persistStateKey = persistStateKey ? `SDK_${persistStateKey}` : persistStateKey;
251
- this.persistRequestsKey = persistRequestsKey ? `SDK_${persistRequestsKey}` : persistRequestsKey;
252
- this.initialState = state;
253
- // If this option is set then all requests will get a pre-generated unique ID and duplicate URLs will be kept in the list.
254
- this.keepDuplicateUrls = keepDuplicateUrls;
255
- // Starts as true because until we handle the first request, the list is effectively persisted by doing nothing.
256
- this.isStatePersisted = true;
257
- // Starts as false because we don't know yet and sources might change in the meantime (eg. download from live list).
258
- this.areRequestsPersisted = false;
259
- this.isLoading = false;
260
- this.isInitialized = false;
261
- // Will be empty after initialization to save memory.
262
- this.sources = sources || [];
263
- this.sourcesFunction = sourcesFunction;
264
- // The proxy configuration used for `requestsFromUrls` requests.
265
- this.proxyConfiguration = proxyConfiguration;
266
- }
267
- /**
268
- * Loads all remote sources of URLs and potentially starts periodic state persistence.
269
- * This function must be called before you can start using the instance in a meaningful way.
270
- *
271
- * @returns {Promise<void>}
272
- */
273
- async initialize() {
274
- if (this.isLoading) {
275
- throw new Error('RequestList sources are already loading or were loaded.');
276
- }
277
- this.isLoading = true;
278
- const [state, persistedRequests] = await this._loadStateAndPersistedRequests();
279
- // Add persisted requests / new sources in a memory efficient way because with very
280
- // large lists, we were running out of memory.
281
- if (persistedRequests) {
282
- await this._addPersistedRequests(persistedRequests);
283
- }
284
- else {
285
- await this._addRequestsFromSources();
286
- }
287
- this._restoreState(state);
288
- this.isInitialized = true;
289
- if (this.persistRequestsKey && !this.areRequestsPersisted)
290
- await this._persistRequests();
291
- if (this.persistStateKey) {
292
- events_1.default.on(constants_1.ACTOR_EVENT_NAMES_EX.PERSIST_STATE, this.persistState.bind(this));
293
- }
294
- }
295
- /**
296
- * Adds previously persisted Requests, as retrieved from the key-value store.
297
- * This needs to be done in a memory efficient way. We should update the input
298
- * to a Stream once apify-client supports streams.
299
- * @param {Buffer} persistedRequests
300
- * @ignore
301
- * @protected
302
- * @internal
303
- */
304
- async _addPersistedRequests(persistedRequests) {
305
- // We don't need the sources so we purge them to
306
- // prevent them from hanging in memory.
307
- for (let i = 0; i < this.sources.length; i++) {
308
- delete this.sources[i];
309
- }
310
- this.sources = [];
311
- this.areRequestsPersisted = true;
312
- const requestStream = (0, serialization_1.createDeserialize)(persistedRequests);
313
- for await (const request of requestStream) {
314
- this._addRequest(request);
315
- }
316
- }
317
- /**
318
- * Add Requests from both options.sources and options.sourcesFunction.
319
- * This function is called only when persisted sources were not loaded.
320
- * We need to avoid keeping both sources and requests in memory
321
- * to reduce memory footprint with very large sources.
322
- * @returns {Promise<void>}
323
- * @ignore
324
- * @protected
325
- * @internal
326
- */
327
- async _addRequestsFromSources() {
328
- // We'll load all sources in sequence to ensure that they get loaded in the right order.
329
- const sourcesCount = this.sources.length;
330
- for (let i = 0; i < sourcesCount; i++) {
331
- const source = this.sources[i];
332
- // Using delete here to drop the original object ASAP to free memory
333
- // .pop would reverse the array and .shift is SLOW.
334
- delete this.sources[i];
335
- if (source.requestsFromUrl) {
336
- const fetchedRequests = await this._fetchRequestsFromUrl(source);
337
- await this._addFetchedRequests(source, fetchedRequests);
338
- }
339
- else {
340
- this._addRequest(source);
341
- }
342
- }
343
- // Drop the original array full of empty indexes.
344
- this.sources = [];
345
- if (this.sourcesFunction) {
346
- try {
347
- const sourcesFromFunction = await this.sourcesFunction();
348
- const sourcesFromFunctionCount = sourcesFromFunction.length;
349
- for (let i = 0; i < sourcesFromFunctionCount; i++) {
350
- const source = sourcesFromFunction.shift();
351
- this._addRequest(source);
352
- }
353
- }
354
- catch (err) {
355
- throw new Error(`Loading requests with sourcesFunction failed.\nCause: ${err.message}`);
356
- }
357
- }
358
- }
359
- /**
360
- * Persists the current state of the `RequestList` into the default {@link KeyValueStore}.
361
- * The state is persisted automatically in regular intervals, but calling this method manually
362
- * is useful in cases where you want to have the most current state available after you pause
363
- * or stop fetching its requests. For example after you pause or abort a crawl. Or just before
364
- * a server migration.
365
- *
366
- * @return {Promise<void>}
367
- */
368
- async persistState() {
369
- if (!this.persistStateKey) {
370
- throw new Error('Cannot persist state. options.persistStateKey is not set.');
371
- }
372
- if (this.isStatePersisted)
373
- return;
374
- try {
375
- await (0, key_value_store_1.setValue)(this.persistStateKey, this.getState());
376
- this.isStatePersisted = true;
377
- }
378
- catch (err) {
379
- this.log.exception(err, 'Attempted to persist state, but failed.');
380
- }
381
- }
382
- /**
383
- * Unlike persistState(), this is used only internally, since the sources
384
- * are automatically persisted at RequestList initialization (if the persistRequestsKey is set),
385
- * but there's no reason to persist it again afterwards, because RequestList is immutable.
386
- *
387
- * @return {Promise<void>}
388
- * @ignore
389
- * @protected
390
- * @internal
391
- */
392
- async _persistRequests() {
393
- const serializedRequests = await (0, serialization_1.serializeArray)(this.requests);
394
- await (0, key_value_store_1.setValue)(this.persistRequestsKey, serializedRequests, { contentType: CONTENT_TYPE_BINARY });
395
- this.areRequestsPersisted = true;
396
- }
397
- /**
398
- * Restores RequestList state from a state object.
399
- *
400
- * @param {RequestListState} state
401
- * @ignore
402
- * @protected
403
- * @internal
404
- */
405
- _restoreState(state) {
406
- // If there's no state it means we've not persisted any (yet).
407
- if (!state)
408
- return;
409
- // Restore previous state.
410
- if (typeof state.nextIndex !== 'number' || state.nextIndex < 0) {
411
- throw new Error('The state object is invalid: nextIndex must be a non-negative number.');
412
- }
413
- if (state.nextIndex > this.requests.length) {
414
- throw new Error('The state object is not consistent with RequestList too few requests loaded.');
415
- }
416
- if (state.nextIndex < this.requests.length
417
- && this.requests[state.nextIndex].uniqueKey !== state.nextUniqueKey) {
418
- throw new Error('The state object is not consistent with RequestList the order of URLs seems to have changed.');
419
- }
420
- const deleteFromInProgress = [];
421
- underscore_1.default.keys(state.inProgress).forEach((uniqueKey) => {
422
- const index = this.uniqueKeyToIndex[uniqueKey];
423
- if (typeof index !== 'number') {
424
- throw new Error('The state object is not consistent with RequestList. Unknown uniqueKey is present in the state.');
425
- }
426
- if (index >= state.nextIndex) {
427
- deleteFromInProgress.push(uniqueKey);
428
- }
429
- });
430
- // WORKAROUND:
431
- // It happened to some users that state object contained something like:
432
- // {
433
- // "nextIndex": 11308,
434
- // "nextUniqueKey": "https://www.anychart.com",
435
- // "inProgress": {
436
- // "https://www.ams360.com": true,
437
- // ...
438
- // "https://www.anychart.com": true,
439
- // }
440
- // Which then caused error "The request is not being processed (uniqueKey: https://www.anychart.com)"
441
- // As a workaround, we just remove all inProgress requests whose index >= nextIndex,
442
- // since they will be crawled again.
443
- if (deleteFromInProgress.length) {
444
- this.log.warning('RequestList\'s in-progress field is not consistent, skipping invalid in-progress entries', {
445
- deleteFromInProgress,
446
- });
447
- underscore_1.default.each(deleteFromInProgress, (uniqueKey) => {
448
- delete state.inProgress[uniqueKey];
449
- });
450
- }
451
- this.nextIndex = state.nextIndex;
452
- this.inProgress = state.inProgress;
453
- // All in-progress requests need to be recrawled
454
- this.reclaimed = underscore_1.default.clone(this.inProgress);
455
- }
456
- /**
457
- * Attempts to load state and requests using the `RequestList` configuration
458
- * and returns a tuple of [state, requests] where each may be null if not loaded.
459
- *
460
- * @return {Promise<Array<(RequestListState|null)>>}
461
- * @ignore
462
- * @protected
463
- * @internal
464
- */
465
- async _loadStateAndPersistedRequests() {
466
- let state;
467
- let persistedRequests;
468
- if (this.initialState) {
469
- state = this.initialState;
470
- this.log.debug('Loaded state from options.state argument.');
471
- }
472
- else if (this.persistStateKey) {
473
- state = (0, key_value_store_1.getValue)(this.persistStateKey);
474
- if (state)
475
- this.log.debug('Loaded state from key value store using the persistStateKey.');
476
- }
477
- if (this.persistRequestsKey) {
478
- persistedRequests = await (0, key_value_store_1.getValue)(this.persistRequestsKey);
479
- if (persistedRequests)
480
- this.log.debug('Loaded requests from key value store using the persistRequestsKey.');
481
- }
482
- // Unwraps "state" promise if needed, otherwise no-op.
483
- return Promise.all([state, persistedRequests]);
484
- }
485
- /**
486
- * Returns an object representing the internal state of the `RequestList` instance.
487
- * Note that the object's fields can change in future releases.
488
- *
489
- * @returns {RequestListState}
490
- */
491
- getState() {
492
- this._ensureIsInitialized();
493
- return {
494
- nextIndex: this.nextIndex,
495
- nextUniqueKey: this.nextIndex < this.requests.length
496
- ? this.requests[this.nextIndex].uniqueKey
497
- : null,
498
- inProgress: this.inProgress,
499
- };
500
- }
501
- /**
502
- * Resolves to `true` if the next call to {@link RequestList#fetchNextRequest} function
503
- * would return `null`, otherwise it resolves to `false`.
504
- * Note that even if the list is empty, there might be some pending requests currently being processed.
505
- *
506
- * @returns {Promise<boolean>}
507
- */
508
- async isEmpty() {
509
- this._ensureIsInitialized();
510
- return !(0, utils_1.getFirstKey)(this.reclaimed) && this.nextIndex >= this.requests.length;
511
- }
512
- /**
513
- * Returns `true` if all requests were already handled and there are no more left.
514
- *
515
- * @returns {Promise<boolean>}
516
- */
517
- async isFinished() {
518
- this._ensureIsInitialized();
519
- return !(0, utils_1.getFirstKey)(this.inProgress) && this.nextIndex >= this.requests.length;
520
- }
521
- /**
522
- * Gets the next {@link Request} to process. First, the function gets a request previously reclaimed
523
- * using the {@link RequestList#reclaimRequest} function, if there is any.
524
- * Otherwise it gets the next request from sources.
525
- *
526
- * The function's `Promise` resolves to `null` if there are no more
527
- * requests to process.
528
- *
529
- * @returns {Promise<(Request|null)>}
530
- */
531
- async fetchNextRequest() {
532
- this._ensureIsInitialized();
533
- // First return reclaimed requests if any.
534
- const uniqueKey = (0, utils_1.getFirstKey)(this.reclaimed);
535
- if (uniqueKey) {
536
- delete this.reclaimed[uniqueKey];
537
- const index = this.uniqueKeyToIndex[uniqueKey];
538
- return this.requests[index];
539
- }
540
- // Otherwise return next request.
541
- if (this.nextIndex < this.requests.length) {
542
- const request = this.requests[this.nextIndex];
543
- this.inProgress[request.uniqueKey] = true;
544
- this.nextIndex++;
545
- this.isStatePersisted = false;
546
- return request;
547
- }
548
- return null;
549
- }
550
- /**
551
- * Marks request as handled after successful processing.
552
- *
553
- * @param {Request} request
554
- * @returns {Promise<void>}
555
- */
556
- async markRequestHandled(request) {
557
- const { uniqueKey } = request;
558
- this._ensureUniqueKeyValid(uniqueKey);
559
- this._ensureInProgressAndNotReclaimed(uniqueKey);
560
- this._ensureIsInitialized();
561
- delete this.inProgress[uniqueKey];
562
- this.isStatePersisted = false;
563
- }
564
- /**
565
- * Reclaims request to the list if its processing failed.
566
- * The request will become available in the next `this.fetchNextRequest()`.
567
- *
568
- * @param {Request} request
569
- * @returns {Promise<void>}
570
- */
571
- async reclaimRequest(request) {
572
- const { uniqueKey } = request;
573
- this._ensureUniqueKeyValid(uniqueKey);
574
- this._ensureInProgressAndNotReclaimed(uniqueKey);
575
- this._ensureIsInitialized();
576
- this.reclaimed[uniqueKey] = true;
577
- }
578
- /**
579
- * Adds all fetched requests from a URL from a remote resource.
580
- *
581
- * @ignore
582
- * @protected
583
- * @internal
584
- */
585
- async _addFetchedRequests(source, fetchedRequests) {
586
- const { requestsFromUrl, regex } = source;
587
- const originalLength = this.requests.length;
588
- fetchedRequests.forEach((request) => this._addRequest(request));
589
- const fetchedCount = fetchedRequests.length;
590
- const importedCount = this.requests.length - originalLength;
591
- this.log.info('Fetched and loaded Requests from a remote resource.', {
592
- requestsFromUrl,
593
- regex,
594
- fetchedCount,
595
- importedCount,
596
- duplicateCount: fetchedCount - importedCount,
597
- sample: JSON.stringify(fetchedRequests.slice(0, 5)),
598
- });
599
- }
600
- /**
601
- * Fetches URLs from requestsFromUrl and returns them in format of list of requests
602
- * @param {*} source
603
- * @return {Promise<Array<RequestOptions>>}
604
- * @ignore
605
- * @protected
606
- * @internal
607
- */
608
- async _fetchRequestsFromUrl(source) {
609
- const { requestsFromUrl, regex, ...sharedOpts } = source;
610
- const { downloadListOfUrls } = utils_1.publicUtils;
611
- // Download remote resource and parse URLs.
612
- let urlsArr;
613
- try {
614
- urlsArr = await downloadListOfUrls({ url: requestsFromUrl, urlRegExp: regex, proxyUrl: this.proxyConfiguration?.newUrl() });
615
- }
616
- catch (err) {
617
- throw new Error(`Cannot fetch a request list from ${requestsFromUrl}: ${err}`);
618
- }
619
- // Skip if resource contained no URLs.
620
- if (!urlsArr.length) {
621
- this.log.warning('list fetched, but it is empty.', { requestsFromUrl, regex });
622
- return [];
623
- }
624
- return urlsArr.map((url) => ({ url, ...sharedOpts }));
625
- }
626
- /**
627
- * Adds given request.
628
- * If the `source` parameter is a string or plain object and not an instance
629
- * of a `Request`, then the function creates a `Request` instance.
630
- *
631
- * @param {(string|Request|object)} source
632
- * @ignore
633
- * @protected
634
- * @internal
635
- */
636
- _addRequest(source) {
637
- let request;
638
- const type = typeof source;
639
- if (type === 'string') {
640
- request = new request_1.default({ url: source });
641
- }
642
- else if (source instanceof request_1.default) {
643
- request = source;
644
- }
645
- else if (source && type === 'object') {
646
- request = new request_1.default(source);
647
- }
648
- else {
649
- throw new Error(`Cannot create Request from type: ${type}`);
650
- }
651
- const hasUniqueKey = !!source.uniqueKey;
652
- // Add index to uniqueKey if duplicates are to be kept
653
- if (this.keepDuplicateUrls && !hasUniqueKey) {
654
- request.uniqueKey += `-${this.requests.length}`;
655
- }
656
- const { uniqueKey } = request;
657
- this._ensureUniqueKeyValid(uniqueKey);
658
- // Skip requests with duplicate uniqueKey
659
- if (!this.uniqueKeyToIndex.hasOwnProperty(uniqueKey)) { // eslint-disable-line no-prototype-builtins
660
- this.uniqueKeyToIndex[uniqueKey] = this.requests.length;
661
- this.requests.push(request);
662
- }
663
- else if (this.keepDuplicateUrls) {
664
- this.log.warning(`Duplicate uniqueKey: ${uniqueKey} found while the keepDuplicateUrls option was set. Check your sources' unique keys.`); // eslint-disable-line max-len
665
- }
666
- }
667
- /**
668
- * Helper function that validates unique key.
669
- * Throws an error if uniqueKey is not a non-empty string.
670
- *
671
- * @ignore
672
- * @protected
673
- * @internal
674
- */
675
- _ensureUniqueKeyValid(uniqueKey) {
676
- if (typeof uniqueKey !== 'string' || !uniqueKey) {
677
- throw new Error('Request object\'s uniqueKey must be a non-empty string');
678
- }
679
- }
680
- /**
681
- * Checks that request is not reclaimed and throws an error if so.
682
- *
683
- * @ignore
684
- * @protected
685
- * @internal
686
- */
687
- _ensureInProgressAndNotReclaimed(uniqueKey) {
688
- if (!this.inProgress[uniqueKey]) {
689
- throw new Error(`The request is not being processed (uniqueKey: ${uniqueKey})`);
690
- }
691
- if (this.reclaimed[uniqueKey]) {
692
- throw new Error(`The request was already reclaimed (uniqueKey: ${uniqueKey})`);
693
- }
694
- }
695
- /**
696
- * Throws an error if request list wasn't initialized.
697
- *
698
- * @ignore
699
- * @protected
700
- * @internal
701
- */
702
- _ensureIsInitialized() {
703
- if (!this.isInitialized) {
704
- throw new Error('RequestList is not initialized; you must call "await requestList.initialize()" before using it!');
705
- }
706
- }
707
- /**
708
- * Returns the total number of unique requests present in the `RequestList`.
709
- *
710
- * @returns {number}
711
- */
712
- length() {
713
- this._ensureIsInitialized();
714
- return this.requests.length;
715
- }
716
- /**
717
- * Returns number of handled requests.
718
- *
719
- * @returns {number}
720
- */
721
- handledCount() {
722
- this._ensureIsInitialized();
723
- return this.nextIndex - underscore_1.default.size(this.inProgress);
724
- }
725
- }
726
- exports.RequestList = RequestList;
727
- /**
728
- * Opens a request list and returns a promise resolving to an instance
729
- * of the {@link RequestList} class that is already initialized.
730
- *
731
- * {@link RequestList} represents a list of URLs to crawl, which is always stored in memory.
732
- * To enable picking up where left off after a process restart, the request list sources
733
- * are persisted to the key-value store at initialization of the list. Then, while crawling,
734
- * a small state object is regularly persisted to keep track of the crawling status.
735
- *
736
- * For more details and code examples, see the {@link RequestList} class.
737
- *
738
- * **Example usage:**
739
- *
740
- * ```javascript
741
- * const sources = [
742
- * 'https://www.example.com',
743
- * 'https://www.google.com',
744
- * 'https://www.bing.com'
745
- * ];
746
- *
747
- * const requestList = await Apify.openRequestList('my-name', sources);
748
- * ```
749
- *
750
- * @param {string|null} listName
751
- * Name of the request list to be opened. Setting a name enables the `RequestList`'s state to be persisted
752
- * in the key-value store. This is useful in case of a restart or migration. Since `RequestList` is only
753
- * stored in memory, a restart or migration wipes it clean. Setting a name will enable the `RequestList`'s
754
- * state to survive those situations and continue where it left off.
755
- *
756
- * The name will be used as a prefix in key-value store, producing keys such as `NAME-REQUEST_LIST_STATE`
757
- * and `NAME-REQUEST_LIST_SOURCES`.
758
- *
759
- * If `null`, the list will not be persisted and will only be stored in memory. Process restart
760
- * will then cause the list to be crawled again from the beginning. We suggest always using a name.
761
- * @param {RequestListOptions['sources']} sources
762
- * An array of sources of URLs for the {@link RequestList}. It can be either an array of strings,
763
- * plain objects that define at least the `url` property, or an array of {@link Request} instances.
764
- *
765
- * **IMPORTANT:** The `sources` array will be consumed (left empty) after {@link RequestList} initializes.
766
- * This is a measure to prevent memory leaks in situations when millions of sources are
767
- * added.
768
- *
769
- * Additionally, the `requestsFromUrl` property may be used instead of `url`,
770
- * which will instruct {@link RequestList} to download the source URLs from a given remote location.
771
- * The URLs will be parsed from the received response. In this case you can limit the URLs
772
- * using `regex` parameter containing regular expression pattern for URLs to be included.
773
- *
774
- * For details, see the {@link RequestListOptions.sources}
775
- * @param {RequestListOptions} [options]
776
- * The {@link RequestList} options. Note that the `listName` parameter supersedes
777
- * the {@link RequestListOptions.persistStateKey} and {@link RequestListOptions.persistRequestsKey}
778
- * options and the `sources` parameter supersedes the {@link RequestListOptions.sources} option.
779
- * @returns {Promise<RequestList>}
780
- * @memberof module:Apify
781
- * @name openRequestList
782
- * @function
783
- */
784
- const openRequestList = async (listName, sources, options = {}) => {
785
- (0, ow_1.default)(listName, ow_1.default.any(ow_1.default.string, ow_1.default.null));
786
- (0, ow_1.default)(sources, ow_1.default.array);
787
- (0, ow_1.default)(options, ow_1.default.object.is((v) => !Array.isArray(v)));
788
- const rl = new RequestList({
789
- ...options,
790
- persistStateKey: listName ? `${listName}-${exports.STATE_PERSISTENCE_KEY}` : undefined,
791
- persistRequestsKey: listName ? `${listName}-${exports.REQUESTS_PERSISTENCE_KEY}` : undefined,
792
- sources,
793
- });
794
- await rl.initialize();
795
- return rl;
796
- };
797
- exports.openRequestList = openRequestList;
798
- /**
799
- * Represents state of a {@link RequestList}. It can be used to resume a {@link RequestList} which has been previously processed.
800
- * You can obtain the state by calling {@link RequestList#getState} and receive an object with
801
- * the following structure:
802
- *
803
- * ```
804
- * {
805
- * nextIndex: 5,
806
- * nextUniqueKey: 'unique-key-5'
807
- * inProgress: {
808
- * 'unique-key-1': true,
809
- * 'unique-key-4': true
810
- * },
811
- * }
812
- * ```
813
- *
814
- * @typedef RequestListState
815
- * @property {number} nextIndex
816
- * Position of the next request to be processed.
817
- * @property {string} nextUniqueKey
818
- * Key of the next request to be processed.
819
- * @property {Object<string,boolean>} inProgress
820
- * An object mapping request keys to a boolean value respresenting whether they are being processed at the moment.
821
- */
822
- /**
823
- * @callback RequestListSourcesFunction
824
- * @return {Promise<Array<(RequestOptions|Request|string)>>}
825
- */
826
- //# sourceMappingURL=request_list.js.map