instasave-sdk 1.2.1 → 1.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. package/CHANGELOG.md +47 -0
  2. package/README.md +1 -1
  3. package/dist/{auth-fast.d.ts → auth/auth-manager.d.ts} +4 -12
  4. package/dist/auth/auth-manager.d.ts.map +1 -0
  5. package/dist/{auth-fast.js → auth/auth-manager.js} +26 -68
  6. package/dist/auth/auth-manager.js.map +1 -0
  7. package/dist/auth/index.d.ts +4 -0
  8. package/dist/auth/index.d.ts.map +1 -0
  9. package/dist/auth/index.js +8 -0
  10. package/dist/auth/index.js.map +1 -0
  11. package/dist/auth/session-manager.d.ts +8 -0
  12. package/dist/auth/session-manager.d.ts.map +1 -0
  13. package/dist/auth/session-manager.js +57 -0
  14. package/dist/auth/session-manager.js.map +1 -0
  15. package/dist/browser/browser-pool.d.ts.map +1 -0
  16. package/dist/{browser-pool.js → browser/browser-pool.js} +1 -1
  17. package/dist/browser/browser-pool.js.map +1 -0
  18. package/dist/browser/crawler-manager.d.ts.map +1 -0
  19. package/dist/{crawler-manager.js → browser/crawler-manager.js} +6 -6
  20. package/dist/browser/crawler-manager.js.map +1 -0
  21. package/dist/browser/index.d.ts +3 -0
  22. package/dist/browser/index.d.ts.map +1 -0
  23. package/dist/browser/index.js +8 -0
  24. package/dist/browser/index.js.map +1 -0
  25. package/dist/core/index.d.ts +3 -0
  26. package/dist/core/index.d.ts.map +1 -0
  27. package/dist/core/index.js +8 -0
  28. package/dist/core/index.js.map +1 -0
  29. package/dist/core/media-scraper.d.ts +107 -0
  30. package/dist/core/media-scraper.d.ts.map +1 -0
  31. package/dist/core/media-scraper.js +496 -0
  32. package/dist/core/media-scraper.js.map +1 -0
  33. package/dist/core/workflow-executor.d.ts +17 -0
  34. package/dist/core/workflow-executor.d.ts.map +1 -0
  35. package/dist/core/workflow-executor.js +146 -0
  36. package/dist/core/workflow-executor.js.map +1 -0
  37. package/dist/index.d.ts +13 -132
  38. package/dist/index.d.ts.map +1 -1
  39. package/dist/index.js +31 -635
  40. package/dist/index.js.map +1 -1
  41. package/dist/platforms/instagram/constants.d.ts +25 -0
  42. package/dist/platforms/instagram/constants.d.ts.map +1 -0
  43. package/dist/platforms/instagram/constants.js +34 -0
  44. package/dist/platforms/instagram/constants.js.map +1 -0
  45. package/dist/platforms/instagram/extractors/carousel.d.ts +21 -0
  46. package/dist/platforms/instagram/extractors/carousel.d.ts.map +1 -0
  47. package/dist/platforms/instagram/extractors/carousel.js +64 -0
  48. package/dist/platforms/instagram/extractors/carousel.js.map +1 -0
  49. package/dist/platforms/instagram/extractors/common.d.ts +17 -0
  50. package/dist/platforms/instagram/extractors/common.d.ts.map +1 -0
  51. package/dist/platforms/instagram/extractors/common.js +67 -0
  52. package/dist/platforms/instagram/extractors/common.js.map +1 -0
  53. package/dist/platforms/instagram/extractors/metadata.d.ts +24 -0
  54. package/dist/platforms/instagram/extractors/metadata.d.ts.map +1 -0
  55. package/dist/platforms/instagram/extractors/metadata.js +129 -0
  56. package/dist/platforms/instagram/extractors/metadata.js.map +1 -0
  57. package/dist/platforms/instagram/extractors/profile.d.ts +16 -0
  58. package/dist/platforms/instagram/extractors/profile.d.ts.map +1 -0
  59. package/dist/platforms/instagram/extractors/profile.js +54 -0
  60. package/dist/platforms/instagram/extractors/profile.js.map +1 -0
  61. package/dist/platforms/{instagram.d.ts → instagram/index.d.ts} +2 -2
  62. package/dist/platforms/instagram/index.d.ts.map +1 -0
  63. package/dist/platforms/instagram/index.js +53 -0
  64. package/dist/platforms/instagram/index.js.map +1 -0
  65. package/dist/plugins/index.d.ts +4 -0
  66. package/dist/plugins/index.d.ts.map +1 -0
  67. package/dist/plugins/index.js +21 -0
  68. package/dist/plugins/index.js.map +1 -0
  69. package/dist/plugins/plugin-manager.d.ts +50 -0
  70. package/dist/plugins/plugin-manager.d.ts.map +1 -0
  71. package/dist/{plugins.js → plugins/plugin-manager.js} +4 -4
  72. package/dist/plugins/plugin-manager.js.map +1 -0
  73. package/dist/types/auth.d.ts +15 -0
  74. package/dist/types/auth.d.ts.map +1 -0
  75. package/dist/types/auth.js +3 -0
  76. package/dist/types/auth.js.map +1 -0
  77. package/dist/types/browser.d.ts +28 -0
  78. package/dist/types/browser.d.ts.map +1 -0
  79. package/dist/types/browser.js +3 -0
  80. package/dist/types/browser.js.map +1 -0
  81. package/dist/{types.d.ts → types/common.d.ts} +15 -15
  82. package/dist/types/common.d.ts.map +1 -0
  83. package/dist/{types.js → types/common.js} +1 -1
  84. package/dist/types/common.js.map +1 -0
  85. package/dist/types/index.d.ts +7 -0
  86. package/dist/types/index.d.ts.map +1 -0
  87. package/dist/types/index.js +24 -0
  88. package/dist/types/index.js.map +1 -0
  89. package/dist/types/platform.d.ts +3 -0
  90. package/dist/types/platform.d.ts.map +1 -0
  91. package/dist/types/platform.js +20 -0
  92. package/dist/types/platform.js.map +1 -0
  93. package/dist/types/plugins.d.ts +61 -0
  94. package/dist/types/plugins.d.ts.map +1 -0
  95. package/dist/types/plugins.js +3 -0
  96. package/dist/types/plugins.js.map +1 -0
  97. package/dist/types/workflow.d.ts +17 -0
  98. package/dist/types/workflow.d.ts.map +1 -0
  99. package/dist/types/workflow.js +3 -0
  100. package/dist/types/workflow.js.map +1 -0
  101. package/dist/utils/config/config-parser.d.ts.map +1 -0
  102. package/dist/{config-parser.js → utils/config/config-parser.js} +5 -5
  103. package/dist/utils/config/config-parser.js.map +1 -0
  104. package/dist/utils/config/index.d.ts +2 -0
  105. package/dist/utils/config/index.d.ts.map +1 -0
  106. package/dist/utils/config/index.js +18 -0
  107. package/dist/utils/config/index.js.map +1 -0
  108. package/dist/{file-manager.d.ts → utils/file-manager.d.ts} +1 -1
  109. package/dist/utils/file-manager.d.ts.map +1 -0
  110. package/dist/utils/file-manager.js.map +1 -0
  111. package/dist/{health.d.ts → utils/health/health-checker.d.ts} +1 -1
  112. package/dist/utils/health/health-checker.d.ts.map +1 -0
  113. package/dist/{health.js → utils/health/health-checker.js} +1 -1
  114. package/dist/utils/health/health-checker.js.map +1 -0
  115. package/dist/utils/health/index.d.ts +2 -0
  116. package/dist/utils/health/index.d.ts.map +1 -0
  117. package/dist/utils/health/index.js +18 -0
  118. package/dist/utils/health/index.js.map +1 -0
  119. package/dist/utils/index.d.ts +8 -0
  120. package/dist/utils/index.d.ts.map +1 -0
  121. package/dist/utils/index.js +25 -0
  122. package/dist/utils/index.js.map +1 -0
  123. package/dist/utils/logger/index.d.ts +2 -0
  124. package/dist/utils/logger/index.d.ts.map +1 -0
  125. package/dist/utils/logger/index.js +18 -0
  126. package/dist/utils/logger/index.js.map +1 -0
  127. package/dist/utils/logger/logger.d.ts +44 -0
  128. package/dist/utils/logger/logger.d.ts.map +1 -0
  129. package/dist/utils/logger/logger.js +301 -0
  130. package/dist/utils/logger/logger.js.map +1 -0
  131. package/dist/utils/metrics/index.d.ts +2 -0
  132. package/dist/utils/metrics/index.d.ts.map +1 -0
  133. package/dist/utils/metrics/index.js +18 -0
  134. package/dist/utils/metrics/index.js.map +1 -0
  135. package/dist/{metrics.d.ts → utils/metrics/metrics-collector.d.ts} +1 -1
  136. package/dist/utils/metrics/metrics-collector.d.ts.map +1 -0
  137. package/dist/{metrics.js → utils/metrics/metrics-collector.js} +1 -1
  138. package/dist/utils/metrics/metrics-collector.js.map +1 -0
  139. package/dist/utils/progress-tracker.d.ts +11 -0
  140. package/dist/utils/progress-tracker.d.ts.map +1 -0
  141. package/dist/{progress-tracker.js → utils/progress-tracker.js} +0 -3
  142. package/dist/utils/progress-tracker.js.map +1 -0
  143. package/dist/utils/url-normalizer.d.ts.map +1 -0
  144. package/dist/utils/url-normalizer.js.map +1 -0
  145. package/package.json +1 -1
  146. package/dist/auth-fast.d.ts.map +0 -1
  147. package/dist/auth-fast.js.map +0 -1
  148. package/dist/auth.d.ts +0 -80
  149. package/dist/auth.d.ts.map +0 -1
  150. package/dist/auth.js +0 -370
  151. package/dist/auth.js.map +0 -1
  152. package/dist/benchmark.d.ts +0 -48
  153. package/dist/benchmark.d.ts.map +0 -1
  154. package/dist/benchmark.js +0 -125
  155. package/dist/benchmark.js.map +0 -1
  156. package/dist/browser-pool.d.ts.map +0 -1
  157. package/dist/browser-pool.js.map +0 -1
  158. package/dist/config-parser.d.ts.map +0 -1
  159. package/dist/config-parser.js.map +0 -1
  160. package/dist/crawler-manager.d.ts.map +0 -1
  161. package/dist/crawler-manager.js.map +0 -1
  162. package/dist/file-manager.d.ts.map +0 -1
  163. package/dist/file-manager.js.map +0 -1
  164. package/dist/health.d.ts.map +0 -1
  165. package/dist/health.js.map +0 -1
  166. package/dist/logger.d.ts +0 -22
  167. package/dist/logger.d.ts.map +0 -1
  168. package/dist/logger.js +0 -151
  169. package/dist/logger.js.map +0 -1
  170. package/dist/memory.d.ts +0 -56
  171. package/dist/memory.d.ts.map +0 -1
  172. package/dist/memory.js +0 -144
  173. package/dist/memory.js.map +0 -1
  174. package/dist/metrics.d.ts.map +0 -1
  175. package/dist/metrics.js.map +0 -1
  176. package/dist/parallel.d.ts +0 -59
  177. package/dist/parallel.d.ts.map +0 -1
  178. package/dist/parallel.js +0 -202
  179. package/dist/parallel.js.map +0 -1
  180. package/dist/platforms/instagram.d.ts.map +0 -1
  181. package/dist/platforms/instagram.js +0 -263
  182. package/dist/platforms/instagram.js.map +0 -1
  183. package/dist/plugins.d.ts +0 -132
  184. package/dist/plugins.d.ts.map +0 -1
  185. package/dist/plugins.js.map +0 -1
  186. package/dist/progress-tracker.d.ts +0 -15
  187. package/dist/progress-tracker.d.ts.map +0 -1
  188. package/dist/progress-tracker.js.map +0 -1
  189. package/dist/smart-logger.d.ts +0 -64
  190. package/dist/smart-logger.d.ts.map +0 -1
  191. package/dist/smart-logger.js +0 -149
  192. package/dist/smart-logger.js.map +0 -1
  193. package/dist/test-integration.d.ts +0 -2
  194. package/dist/test-integration.d.ts.map +0 -1
  195. package/dist/test-integration.js +0 -46
  196. package/dist/test-integration.js.map +0 -1
  197. package/dist/types.d.ts.map +0 -1
  198. package/dist/types.js.map +0 -1
  199. package/dist/url-normalizer.d.ts.map +0 -1
  200. package/dist/url-normalizer.js.map +0 -1
  201. package/dist/worker.d.ts +0 -2
  202. package/dist/worker.d.ts.map +0 -1
  203. package/dist/worker.js +0 -23
  204. package/dist/worker.js.map +0 -1
  205. /package/dist/{browser-pool.d.ts → browser/browser-pool.d.ts} +0 -0
  206. /package/dist/{crawler-manager.d.ts → browser/crawler-manager.d.ts} +0 -0
  207. /package/dist/{config-parser.d.ts → utils/config/config-parser.d.ts} +0 -0
  208. /package/dist/{file-manager.js → utils/file-manager.js} +0 -0
  209. /package/dist/{url-normalizer.d.ts → utils/url-normalizer.d.ts} +0 -0
  210. /package/dist/{url-normalizer.js → utils/url-normalizer.js} +0 -0
package/dist/index.js CHANGED
@@ -10,645 +10,41 @@ var __createBinding = (this && this.__createBinding) || (Object.create ? (functi
10
10
  if (k2 === undefined) k2 = k;
11
11
  o[k2] = m[k];
12
12
  }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
13
  var __exportStar = (this && this.__exportStar) || function(m, exports) {
36
14
  for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
37
15
  };
38
16
  Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.log = exports.MediaScraper = exports.WorkflowExecutor = void 0;
40
- const logger_1 = require("./logger");
41
- const fs_1 = require("fs");
42
- const path_1 = require("path");
43
- const crawlee_1 = require("crawlee");
44
- const dotenv = __importStar(require("dotenv"));
45
- const auth_fast_1 = require("./auth-fast");
46
- const plugins_1 = require("./plugins");
47
- const metrics_1 = require("./metrics");
48
- const health_1 = require("./health");
49
- const platforms_1 = require("./platforms");
50
- const file_manager_1 = require("./file-manager");
51
- const config_parser_1 = require("./config-parser");
52
- const smart_logger_1 = require("./smart-logger");
53
- const url_normalizer_1 = require("./url-normalizer");
54
- const crawler_manager_1 = require("./crawler-manager");
55
- // Load environment variables
56
- dotenv.config();
57
- // Re-export types for backward compatibility
17
+ exports.log = exports.BrowserPool = exports.CrawlerManager = exports.URLNormalizer = exports.ConfigParser = exports.FileManager = exports.ProgressTracker = exports.BUILTIN_PLATFORMS = exports.HealthChecker = exports.MetricsCollector = exports.PluginManager = exports.FastAuthManager = exports.WorkflowExecutor = exports.MediaScraper = void 0;
18
+ // Main SDK exports - backward compatibility maintained
19
+ var core_1 = require("./core");
20
+ Object.defineProperty(exports, "MediaScraper", { enumerable: true, get: function () { return core_1.MediaScraper; } });
21
+ Object.defineProperty(exports, "WorkflowExecutor", { enumerable: true, get: function () { return core_1.WorkflowExecutor; } });
22
+ var auth_1 = require("./auth");
23
+ Object.defineProperty(exports, "FastAuthManager", { enumerable: true, get: function () { return auth_1.FastAuthManager; } });
24
+ var plugins_1 = require("./plugins");
25
+ Object.defineProperty(exports, "PluginManager", { enumerable: true, get: function () { return plugins_1.PluginManager; } });
26
+ var metrics_1 = require("./utils/metrics");
27
+ Object.defineProperty(exports, "MetricsCollector", { enumerable: true, get: function () { return metrics_1.MetricsCollector; } });
28
+ var health_1 = require("./utils/health");
29
+ Object.defineProperty(exports, "HealthChecker", { enumerable: true, get: function () { return health_1.HealthChecker; } });
30
+ var platforms_1 = require("./platforms");
31
+ Object.defineProperty(exports, "BUILTIN_PLATFORMS", { enumerable: true, get: function () { return platforms_1.BUILTIN_PLATFORMS; } });
32
+ var progress_tracker_1 = require("./utils/progress-tracker");
33
+ Object.defineProperty(exports, "ProgressTracker", { enumerable: true, get: function () { return progress_tracker_1.ProgressTracker; } });
34
+ var file_manager_1 = require("./utils/file-manager");
35
+ Object.defineProperty(exports, "FileManager", { enumerable: true, get: function () { return file_manager_1.FileManager; } });
36
+ var config_1 = require("./utils/config");
37
+ Object.defineProperty(exports, "ConfigParser", { enumerable: true, get: function () { return config_1.ConfigParser; } });
38
+ var url_normalizer_1 = require("./utils/url-normalizer");
39
+ Object.defineProperty(exports, "URLNormalizer", { enumerable: true, get: function () { return url_normalizer_1.URLNormalizer; } });
40
+ var crawler_manager_1 = require("./browser/crawler-manager");
41
+ Object.defineProperty(exports, "CrawlerManager", { enumerable: true, get: function () { return crawler_manager_1.CrawlerManager; } });
42
+ var browser_pool_1 = require("./browser/browser-pool");
43
+ Object.defineProperty(exports, "BrowserPool", { enumerable: true, get: function () { return browser_pool_1.BrowserPool; } });
44
+ var logger_1 = require("./utils/logger");
45
+ Object.defineProperty(exports, "log", { enumerable: true, get: function () { return logger_1.log; } });
46
+ // Re-export all types for backward compatibility
58
47
  __exportStar(require("./types"), exports);
59
- __exportStar(require("./auth-fast"), exports);
60
- __exportStar(require("./plugins"), exports);
61
- __exportStar(require("./metrics"), exports);
62
- __exportStar(require("./health"), exports);
48
+ // Re-export platforms
63
49
  __exportStar(require("./platforms"), exports);
64
- __exportStar(require("./progress-tracker"), exports);
65
- __exportStar(require("./file-manager"), exports);
66
- __exportStar(require("./config-parser"), exports);
67
- __exportStar(require("./smart-logger"), exports);
68
- __exportStar(require("./url-normalizer"), exports);
69
- __exportStar(require("./crawler-manager"), exports);
70
- /**
71
- * Executes predefined workflows for platform interaction automation
72
- * @class WorkflowExecutor
73
- */
74
- class WorkflowExecutor {
75
- /**
76
- * Pauses execution for specified milliseconds
77
- */
78
- sleep(ms) {
79
- return new Promise(resolve => setTimeout(resolve, ms));
80
- }
81
- /**
82
- * Executes a workflow from JSON file or array
83
- */
84
- async executeWorkflow(page, workflowOrPath) {
85
- let workflow;
86
- if (typeof workflowOrPath === 'string') {
87
- workflow = JSON.parse((0, fs_1.readFileSync)(workflowOrPath, 'utf-8'));
88
- }
89
- else {
90
- workflow = workflowOrPath;
91
- }
92
- for (const task of workflow) {
93
- logger_1.log.info(`Executing: ${task.name}`);
94
- try {
95
- switch (task.action) {
96
- case 'clickOutside':
97
- for (let i = 0; i < (task.clicks || 1); i++) {
98
- await page.mouse.click(100, 100);
99
- logger_1.log.info(`✓ Clicked outside modal (${i + 1}/${task.clicks})`);
100
- await this.sleep(500);
101
- }
102
- break;
103
- case 'doubleClick':
104
- let doubleClicked = false;
105
- for (const selector of task.selectors || []) {
106
- try {
107
- await page.waitForSelector(selector, { timeout: 2000 });
108
- await page.click(selector, { clickCount: 2 });
109
- logger_1.log.info(`✓ Double-clicked with selector: ${selector}`);
110
- doubleClicked = true;
111
- break;
112
- }
113
- catch (e) { }
114
- }
115
- if (!doubleClicked && task.fallback === 'findByText' && task.fallbackText) {
116
- const found = await page.evaluate((text) => {
117
- const elements = Array.from(document.querySelectorAll('*'));
118
- const el = elements.find(e => e.textContent?.includes(text));
119
- if (el && el instanceof HTMLElement) {
120
- const event = new MouseEvent('dblclick', { bubbles: true });
121
- el.dispatchEvent(event);
122
- return true;
123
- }
124
- return false;
125
- }, task.fallbackText);
126
- if (found)
127
- logger_1.log.info(`✓ Double-clicked via text: ${task.fallbackText}`);
128
- }
129
- break;
130
- case 'hold':
131
- const holdDuration = task.holdDuration || 1000;
132
- for (const selector of task.selectors || []) {
133
- try {
134
- await page.waitForSelector(selector, { timeout: 2000 });
135
- const element = await page.$(selector);
136
- if (element) {
137
- const box = await element.boundingBox();
138
- if (box) {
139
- await page.mouse.move(box.x + box.width / 2, box.y + box.height / 2);
140
- await page.mouse.down();
141
- await this.sleep(holdDuration);
142
- await page.mouse.up();
143
- logger_1.log.info(`✓ Held mouse button for ${holdDuration}ms on: ${selector}`);
144
- break;
145
- }
146
- }
147
- }
148
- catch (e) { }
149
- }
150
- break;
151
- case 'keypress':
152
- if (task.key) {
153
- // Press modifiers first
154
- if (task.modifiers) {
155
- for (const mod of task.modifiers) {
156
- await page.keyboard.down(mod);
157
- }
158
- }
159
- // Press the main key
160
- await page.keyboard.press(task.key); // Cast to any for flexibility
161
- // Release modifiers
162
- if (task.modifiers) {
163
- for (const mod of task.modifiers.reverse()) {
164
- await page.keyboard.up(mod);
165
- }
166
- }
167
- logger_1.log.info(`✓ Pressed key: ${task.modifiers ? task.modifiers.join('+') + '+' : ''}${task.key}`);
168
- }
169
- break;
170
- case 'click':
171
- default:
172
- let clicked = false;
173
- for (const selector of task.selectors || []) {
174
- try {
175
- await page.waitForSelector(selector, { timeout: 2000 });
176
- await page.click(selector);
177
- logger_1.log.info(`✓ Clicked with selector: ${selector}`);
178
- clicked = true;
179
- break;
180
- }
181
- catch (e) { }
182
- }
183
- if (!clicked && task.fallback === 'findByText' && task.fallbackText) {
184
- const found = await page.evaluate((text) => {
185
- const buttons = Array.from(document.querySelectorAll('button'));
186
- const btn = buttons.find(b => b.textContent?.includes(text));
187
- if (btn) {
188
- btn.click();
189
- return true;
190
- }
191
- return false;
192
- }, task.fallbackText);
193
- if (found)
194
- logger_1.log.info(`✓ Clicked via text: ${task.fallbackText}`);
195
- }
196
- break;
197
- }
198
- if (task.wait)
199
- await this.sleep(task.wait);
200
- }
201
- catch (e) {
202
- if (!task.optional)
203
- throw e;
204
- logger_1.log.info(`⚠ Skipped (optional): ${task.name}`);
205
- }
206
- }
207
- }
208
- }
209
- exports.WorkflowExecutor = WorkflowExecutor;
210
- /**
211
- * Main media scraper class with multi-platform support
212
- * @class MediaScraper
213
- */
214
- class MediaScraper {
215
- /**
216
- * Creates a new MediaScraper instance
217
- */
218
- constructor() {
219
- this.currentScrapeId = null;
220
- this.stepCallback = null;
221
- this.workflowExecutor = new WorkflowExecutor();
222
- this.metricsCollector = new metrics_1.MetricsCollector();
223
- this.healthChecker = new health_1.HealthChecker();
224
- this.healthChecker = new health_1.HealthChecker();
225
- this.pluginManager = new plugins_1.PluginManager();
226
- this.authManager = new auth_fast_1.FastAuthManager();
227
- this.fileManager = new file_manager_1.FileManager();
228
- this.configParser = new config_parser_1.ConfigParser();
229
- this.crawlerManager = crawler_manager_1.CrawlerManager.getInstance();
230
- // Set crawler manager for auth manager
231
- this.authManager.setCrawlerManager(this.crawlerManager);
232
- // Load configuration
233
- this.config = this.configParser.loadConfig();
234
- this.configParser.applyToEnvironment();
235
- // Setup config change listener
236
- this.configParser.onConfigChange((newConfig, oldConfig) => {
237
- this.handleConfigChange(newConfig, oldConfig);
238
- });
239
- // Setup smart logging
240
- smart_logger_1.SmartLogger.setMode(this.config.settings.console_mode || 'Classic');
241
- smart_logger_1.SmartLogger.suppressCrawlee();
242
- smart_logger_1.SmartLogger.config(true);
243
- // Auto-login if credentials are provided in config
244
- this.autoLogin();
245
- // Register built-in platforms
246
- platforms_1.BUILTIN_PLATFORMS.forEach(plugin => this.pluginManager.register(plugin));
247
- }
248
- /**
249
- * Handle configuration changes
250
- */
251
- handleConfigChange(newConfig, oldConfig) {
252
- this.config = newConfig;
253
- this.configParser.applyToEnvironment();
254
- // Update logging mode if changed
255
- if (newConfig.settings.console_mode !== oldConfig.settings.console_mode) {
256
- smart_logger_1.SmartLogger.setMode(newConfig.settings.console_mode || 'Classic');
257
- smart_logger_1.SmartLogger.classic(`Console mode changed to: ${newConfig.settings.console_mode}`);
258
- }
259
- // Re-login if credentials changed
260
- if (newConfig.account.username !== oldConfig.account.username ||
261
- newConfig.account.password !== oldConfig.account.password) {
262
- smart_logger_1.SmartLogger.classic('Credentials changed, attempting re-login...');
263
- this.autoLogin();
264
- }
265
- smart_logger_1.SmartLogger.classic('Configuration reloaded');
266
- }
267
- /**
268
- * Auto-login if credentials are available in config
269
- */
270
- async autoLogin() {
271
- const { auto_login, username, password } = this.config.account;
272
- if (auto_login !== false && username && password) {
273
- try {
274
- smart_logger_1.SmartLogger.debug('🔐 Auto-login enabled, logging in with config credentials...');
275
- await this.login({ username, password });
276
- }
277
- catch (error) {
278
- smart_logger_1.SmartLogger.always('⚠️ Auto-login failed: ' + error);
279
- }
280
- }
281
- else if (auto_login === false) {
282
- smart_logger_1.SmartLogger.debug('🔐 Auto-login disabled in config');
283
- }
284
- }
285
- /**
286
- * Register a plugin (platform or utility)
287
- */
288
- use(plugin) {
289
- this.pluginManager.register(plugin);
290
- }
291
- /**
292
- * Login to Instagram
293
- */
294
- async login(credentials) {
295
- // Use original approach for login - create temporary crawler
296
- const crawler = new crawlee_1.PuppeteerCrawler({
297
- maxRequestRetries: 0,
298
- requestHandlerTimeoutSecs: 60,
299
- launchContext: {
300
- launchOptions: {
301
- executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || undefined,
302
- headless: this.config.settings.headless !== false,
303
- args: ['--no-sandbox', '--disable-setuid-sandbox']
304
- }
305
- },
306
- requestHandler: async ({ page }) => {
307
- logger_1.log.info('🔧 Puppeteer page created, starting login...');
308
- await this.authManager.login(page, credentials);
309
- }
310
- });
311
- try {
312
- await crawler.run(['https://www.instagram.com/accounts/login']);
313
- }
314
- finally {
315
- await crawler.teardown();
316
- }
317
- return this.authManager.isAuthenticated();
318
- }
319
- /**
320
- * Universal scrape method - automatically detects platform
321
- */
322
- async scrape(url, options = {}) {
323
- // Normalize URL for consistent results
324
- const normalizedUrl = url_normalizer_1.URLNormalizer.isInstagramPostUrl(url)
325
- ? url_normalizer_1.URLNormalizer.normalizeInstagramUrl(url)
326
- : url;
327
- smart_logger_1.SmartLogger.debug(`🔗 URL normalized: ${url} → ${normalizedUrl}`);
328
- // Initialize progress callback
329
- const { progressCallback } = options;
330
- // Step 1: Browser launch
331
- if (progressCallback) {
332
- progressCallback({ type: 'browser', percentage: 0, message: 'Launching browser...' });
333
- }
334
- // Find platform plugin for URL
335
- const platformPlugin = this.pluginManager.getPlatformForUrl(normalizedUrl);
336
- if (!platformPlugin) {
337
- throw new Error(`No platform plugin found for URL: ${normalizedUrl}. Supported platforms: ${this.pluginManager.getPlatformPlugins().map(p => p.name).join(', ')}`);
338
- }
339
- smart_logger_1.SmartLogger.platform(platformPlugin.name);
340
- logger_1.log.info(`🔍 Detected platform: ${platformPlugin.name}`);
341
- // Execute onBeforeScrape hooks
342
- await this.pluginManager.executeHook('onBeforeScrape', normalizedUrl, options);
343
- const startTime = Date.now();
344
- const authManager = this.authManager;
345
- const useSession = options.useSession !== false && authManager.isAuthenticated();
346
- return new Promise((resolve, reject) => {
347
- const crawler = new crawlee_1.PuppeteerCrawler({
348
- maxRequestRetries: options.retries || 0,
349
- requestHandlerTimeoutSecs: options.timeout || 60,
350
- launchContext: {
351
- launchOptions: {
352
- executablePath: process.env.PUPPETEER_EXECUTABLE_PATH || undefined,
353
- args: ['--no-sandbox', '--disable-setuid-sandbox']
354
- }
355
- },
356
- // Apply cookies BEFORE navigation
357
- preNavigationHooks: [
358
- async ({ page }) => {
359
- if (useSession) {
360
- logger_1.log.info('🔐 Applying session cookies before navigation...');
361
- await authManager.applyCookies(page);
362
- }
363
- }
364
- ],
365
- requestHandler: async ({ page, request }) => {
366
- try {
367
- logger_1.log.info('Starting scrape operation...');
368
- // Step 2: Navigation
369
- if (progressCallback) {
370
- progressCallback({ type: 'navigation', percentage: 25, message: 'Navigating to post...' });
371
- }
372
- // Set user agent
373
- // Try mobile user agent for better audio detection
374
- await page.setUserAgent('Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1');
375
- // Force fresh content - disable cache and add cache-busting headers
376
- await page.setExtraHTTPHeaders({
377
- 'Cache-Control': 'no-cache, no-store, must-revalidate',
378
- 'Pragma': 'no-cache',
379
- 'Expires': '0'
380
- });
381
- // Clear browser cache to force fresh data
382
- const client = await page.target().createCDPSession();
383
- await client.send('Network.clearBrowserCache');
384
- await client.send('Network.clearBrowserCookies');
385
- // Block unnecessary resources for faster loading
386
- await page.setRequestInterception(true);
387
- page.on('request', (req) => {
388
- const resourceType = req.resourceType();
389
- if (['stylesheet', 'font'].includes(resourceType)) {
390
- req.abort();
391
- }
392
- else {
393
- req.continue();
394
- }
395
- });
396
- // Navigate to the URL
397
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
398
- // Check if login is required
399
- const needsLogin = await page.evaluate(() => {
400
- const loginIndicators = [
401
- 'Log in to continue',
402
- 'Login • Instagram',
403
- 'You must log in',
404
- 'This content isn\'t available right now'
405
- ];
406
- const pageText = document.body.innerText;
407
- const currentUrl = window.location.href;
408
- return loginIndicators.some(indicator => pageText.includes(indicator) || currentUrl.includes('accounts/login'));
409
- });
410
- if (needsLogin) {
411
- logger_1.log.info('🔐 Login required for this content');
412
- if (!this.authManager.isAuthenticated()) {
413
- throw new Error('Login required but no authentication available. Please login first via settings.');
414
- }
415
- // Validate session before proceeding
416
- const sessionValid = await this.authManager.validateSession(page);
417
- if (!sessionValid) {
418
- throw new Error('Session expired or invalid. Please login again.');
419
- }
420
- logger_1.log.info('🔐 Applying authenticated session...');
421
- // Navigate to Instagram homepage to establish session
422
- await page.goto('https://www.instagram.com/', { waitUntil: 'domcontentloaded' });
423
- await page.waitForTimeout(2000);
424
- // Navigate back to target URL with session
425
- await page.goto(url, { waitUntil: 'domcontentloaded', timeout: 30000 });
426
- // Verify access was granted
427
- const stillBlocked = await page.evaluate(() => {
428
- const pageText = document.body.innerText;
429
- return pageText.includes('Log in to continue') || pageText.includes('You must log in');
430
- });
431
- if (stillBlocked) {
432
- throw new Error('Content requires login but authentication failed. Content may be private or age-restricted.');
433
- }
434
- logger_1.log.info('✅ Successfully accessed content with authentication');
435
- }
436
- // Navigate to page with cache-busting
437
- const cacheBustUrl = normalizedUrl + (normalizedUrl.includes('?') ? '&' : '?') + '_cb=' + Date.now();
438
- smart_logger_1.SmartLogger.debug(`🔄 Cache-busting URL: ${cacheBustUrl}`);
439
- await page.goto(cacheBustUrl, {
440
- waitUntil: 'domcontentloaded',
441
- timeout: 15000
442
- });
443
- await this.sleep(1000);
444
- // Execute platform workflow if available
445
- if (platformPlugin.workflow) {
446
- await this.workflowExecutor.executeWorkflow(page, platformPlugin.workflow);
447
- }
448
- // Step 3: Extraction
449
- if (progressCallback) {
450
- progressCallback({ type: 'extraction', percentage: 50, message: 'Extracting media...' });
451
- }
452
- // Scrape using platform plugin
453
- let data = await platformPlugin.scrape(page, normalizedUrl);
454
- // Step 4: Metadata
455
- if (progressCallback) {
456
- progressCallback({ type: 'metadata', percentage: 75, message: 'Fetching metadata...' });
457
- }
458
- // Execute onMediaExtracted hook
459
- data.media = await this.pluginManager.executeHook('onMediaExtracted', data.media) || data.media;
460
- // Execute onAfterScrape hook
461
- data = await this.pluginManager.executeHook('onAfterScrape', data) || data;
462
- // Validate extracted data
463
- if (!data.post_id) {
464
- throw new Error('Failed to extract post ID');
465
- }
466
- // Step 5: Complete
467
- if (progressCallback) {
468
- progressCallback({ type: 'complete', percentage: 100, message: 'Done!' });
469
- }
470
- // Save to file if requested
471
- if (options.saveToFile) {
472
- try {
473
- if (options.filenamePattern || options.skipDuplicates) {
474
- // Use new FileManager
475
- const savedFiles = await this.fileManager.saveMediaFiles(data, {
476
- filenamePattern: options.filenamePattern,
477
- skipDuplicates: options.skipDuplicates,
478
- outputPath: options.outputPath
479
- });
480
- logger_1.log.info(`✓ Saved ${savedFiles.length} files with custom naming`);
481
- }
482
- else {
483
- // Use legacy save method
484
- await this.saveToFile(data, options.outputPath);
485
- }
486
- }
487
- catch (error) {
488
- if (error instanceof Error && error.message.includes('Duplicate')) {
489
- logger_1.log.info('⚠️ Skipped duplicate file');
490
- }
491
- else {
492
- throw error;
493
- }
494
- }
495
- }
496
- smart_logger_1.SmartLogger.scrape('start', data.post_id);
497
- smart_logger_1.SmartLogger.scrape('success', data.post_id, { mediaCount: data.media.length });
498
- logger_1.log.info(`✅ Successfully scraped ${data.platform} post: ${data.post_id}`);
499
- // Record metrics
500
- const responseTime = Date.now() - startTime;
501
- this.metricsCollector.recordRequest(true, responseTime, data.media.length, JSON.stringify(data).length);
502
- resolve(data);
503
- }
504
- catch (error) {
505
- logger_1.log.error('Scraping failed:', error);
506
- // Execute onError hook
507
- await this.pluginManager.executeHook('onError', error);
508
- // Record failed metrics
509
- const responseTime = Date.now() - startTime;
510
- this.metricsCollector.recordRequest(false, responseTime, 0, 0, error instanceof Error ? error.message : String(error));
511
- // Ensure crawler cleanup on error
512
- setTimeout(() => crawler.teardown().catch(console.error), 100);
513
- reject(error);
514
- }
515
- },
516
- failedRequestHandler: async ({ request, error }) => {
517
- logger_1.log.error(`Request failed for ${request.url}:`, error);
518
- // Ensure crawler cleanup on failed request
519
- setTimeout(() => crawler.teardown().catch(console.error), 100);
520
- reject(error);
521
- }
522
- });
523
- crawler.run([normalizedUrl])
524
- .then(() => {
525
- // Cleanup crawler after completion
526
- crawler.teardown().catch(console.error);
527
- })
528
- .catch((error) => {
529
- // Cleanup crawler even on error
530
- crawler.teardown().catch(console.error);
531
- reject(error);
532
- });
533
- });
534
- }
535
- async saveToFile(data, outputPath) {
536
- try {
537
- const filename = `${data.platform}_${data.post_id || 'post'}.json`;
538
- const filepath = outputPath ? (0, path_1.join)(outputPath, filename) : filename;
539
- (0, fs_1.writeFileSync)(filepath, JSON.stringify(data, null, 2));
540
- logger_1.log.info(`✓ Saved data to ${filepath}`);
541
- }
542
- catch (error) {
543
- logger_1.log.error('Failed to save file:', error);
544
- throw new Error(`Failed to save data to file: ${error}`);
545
- }
546
- }
547
- /**
548
- * Set callback for step tracking
549
- */
550
- setStepCallback(callback) {
551
- this.stepCallback = callback;
552
- }
553
- /**
554
- * Gets current metrics
555
- */
556
- getMetrics() {
557
- return this.metricsCollector.getMetrics();
558
- }
559
- /**
560
- * Gets health status
561
- */
562
- async getHealth() {
563
- return await this.healthChecker.checkHealth();
564
- }
565
- /**
566
- * Gets Prometheus-formatted metrics
567
- */
568
- getPrometheusMetrics() {
569
- return this.metricsCollector.getPrometheusMetrics();
570
- }
571
- /**
572
- * Get plugin manager for advanced plugin operations
573
- */
574
- getPluginManager() {
575
- return this.pluginManager;
576
- }
577
- /**
578
- * Get auth manager for advanced auth operations
579
- */
580
- getAuthManager() {
581
- return this.authManager;
582
- }
583
- /**
584
- * Get current configuration
585
- */
586
- getConfig() {
587
- return this.config;
588
- }
589
- /**
590
- * Get config parser for advanced config operations
591
- */
592
- getConfigParser() {
593
- return this.configParser;
594
- }
595
- async refreshSession() {
596
- try {
597
- return await this.authManager.refreshSessionWithCrawler();
598
- }
599
- catch (error) {
600
- logger_1.log.error('RefreshSession failed:', error);
601
- throw error;
602
- }
603
- return this.authManager.isAuthenticated();
604
- }
605
- /**
606
- * Get current logged-in username
607
- */
608
- getUsername() {
609
- return this.authManager.getUsername();
610
- }
611
- /**
612
- * Get current logged-in user's display name (real name)
613
- */
614
- getDisplayName() {
615
- return this.authManager.getDisplayName();
616
- }
617
- /**
618
- * Get current logged-in user info
619
- */
620
- getUserInfo() {
621
- return {
622
- username: this.authManager.getUsername(),
623
- displayName: this.authManager.getDisplayName()
624
- };
625
- }
626
- /**
627
- * Logout and clear session data
628
- */
629
- async logout() {
630
- await this.authManager.logout();
631
- logger_1.log.info('🔐 Session cleared successfully');
632
- }
633
- /**
634
- * Clear all cached data and reset SDK state
635
- */
636
- async cleanup() {
637
- await this.logout();
638
- this.configParser.cleanup();
639
- await this.crawlerManager.cleanup();
640
- // Clear any other cached data if needed
641
- logger_1.log.info('🧹 SDK cleanup completed');
642
- }
643
- /**
644
- * Pauses execution for specified milliseconds
645
- */
646
- sleep(ms) {
647
- return new Promise(resolve => setTimeout(resolve, ms));
648
- }
649
- }
650
- exports.MediaScraper = MediaScraper;
651
- // Export logger for external use
652
- var logger_2 = require("./logger");
653
- Object.defineProperty(exports, "log", { enumerable: true, get: function () { return logger_2.log; } });
654
50
  //# sourceMappingURL=index.js.map