instasave-sdk 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +153 -0
- package/dist/auth-fast.d.ts +32 -0
- package/dist/auth-fast.d.ts.map +1 -0
- package/dist/auth-fast.js +505 -0
- package/dist/auth-fast.js.map +1 -0
- package/dist/auth.d.ts +80 -0
- package/dist/auth.d.ts.map +1 -0
- package/dist/auth.js +370 -0
- package/dist/auth.js.map +1 -0
- package/dist/benchmark.d.ts +48 -0
- package/dist/benchmark.d.ts.map +1 -0
- package/dist/benchmark.js +125 -0
- package/dist/benchmark.js.map +1 -0
- package/dist/health.d.ts +28 -0
- package/dist/health.d.ts.map +1 -0
- package/dist/health.js +108 -0
- package/dist/health.js.map +1 -0
- package/dist/index.d.ts +101 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +492 -0
- package/dist/index.js.map +1 -0
- package/dist/logger.d.ts +22 -0
- package/dist/logger.d.ts.map +1 -0
- package/dist/logger.js +151 -0
- package/dist/logger.js.map +1 -0
- package/dist/memory.d.ts +56 -0
- package/dist/memory.d.ts.map +1 -0
- package/dist/memory.js +144 -0
- package/dist/memory.js.map +1 -0
- package/dist/metrics.d.ts +19 -0
- package/dist/metrics.d.ts.map +1 -0
- package/dist/metrics.js +79 -0
- package/dist/metrics.js.map +1 -0
- package/dist/parallel.d.ts +59 -0
- package/dist/parallel.d.ts.map +1 -0
- package/dist/parallel.js +202 -0
- package/dist/parallel.js.map +1 -0
- package/dist/platforms/index.d.ts +7 -0
- package/dist/platforms/index.d.ts.map +1 -0
- package/dist/platforms/index.js +13 -0
- package/dist/platforms/index.js.map +1 -0
- package/dist/platforms/instagram.d.ts +6 -0
- package/dist/platforms/instagram.d.ts.map +1 -0
- package/dist/platforms/instagram.js +189 -0
- package/dist/platforms/instagram.js.map +1 -0
- package/dist/plugins.d.ts +128 -0
- package/dist/plugins.d.ts.map +1 -0
- package/dist/plugins.js +107 -0
- package/dist/plugins.js.map +1 -0
- package/dist/test-integration.d.ts +2 -0
- package/dist/test-integration.d.ts.map +1 -0
- package/dist/test-integration.js +46 -0
- package/dist/test-integration.js.map +1 -0
- package/dist/types.d.ts +75 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +6 -0
- package/dist/types.js.map +1 -0
- package/dist/worker.d.ts +2 -0
- package/dist/worker.d.ts.map +1 -0
- package/dist/worker.js +23 -0
- package/dist/worker.js.map +1 -0
- package/package.json +56 -0
package/dist/parallel.js
ADDED
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParallelProcessor = void 0;
|
|
4
|
+
const logger_1 = require("./logger");
|
|
5
|
+
const worker_threads_1 = require("worker_threads");
|
|
6
|
+
const path_1 = require("path");
|
|
7
|
+
/**
|
|
8
|
+
* Parallel processing manager for Instagram scraping
|
|
9
|
+
* @class ParallelProcessor
|
|
10
|
+
*/
|
|
11
|
+
class ParallelProcessor {
|
|
12
|
+
/**
|
|
13
|
+
* Creates a new ParallelProcessor
|
|
14
|
+
* @param maxWorkers - Maximum number of worker threads (default: 4)
|
|
15
|
+
*/
|
|
16
|
+
constructor(maxWorkers = 4) {
|
|
17
|
+
this.tasks = new Map();
|
|
18
|
+
this.maxWorkers = maxWorkers;
|
|
19
|
+
this.workerPool = {
|
|
20
|
+
workers: [],
|
|
21
|
+
availableWorkers: [],
|
|
22
|
+
busyWorkers: new Set()
|
|
23
|
+
};
|
|
24
|
+
this.initializeWorkerPool();
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Initializes the worker thread pool
|
|
28
|
+
*/
|
|
29
|
+
initializeWorkerPool() {
|
|
30
|
+
for (let i = 0; i < this.maxWorkers; i++) {
|
|
31
|
+
const worker = new worker_threads_1.Worker((0, path_1.join)(__dirname, 'worker.js'));
|
|
32
|
+
this.workerPool.workers.push(worker);
|
|
33
|
+
this.workerPool.availableWorkers.push(i);
|
|
34
|
+
worker.on('message', (message) => {
|
|
35
|
+
this.handleWorkerMessage(i, message);
|
|
36
|
+
});
|
|
37
|
+
worker.on('error', (error) => {
|
|
38
|
+
logger_1.log.error(`Worker ${i} error:`, error);
|
|
39
|
+
this.handleWorkerError(i, error);
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Handles messages from worker threads
|
|
45
|
+
*/
|
|
46
|
+
handleWorkerMessage(workerId, message) {
|
|
47
|
+
const { taskId, type, data, error } = message;
|
|
48
|
+
const task = this.tasks.get(taskId);
|
|
49
|
+
if (!task)
|
|
50
|
+
return;
|
|
51
|
+
switch (type) {
|
|
52
|
+
case 'success':
|
|
53
|
+
task.status = 'completed';
|
|
54
|
+
task.result = data;
|
|
55
|
+
task.endTime = Date.now();
|
|
56
|
+
break;
|
|
57
|
+
case 'error':
|
|
58
|
+
task.status = 'failed';
|
|
59
|
+
task.error = error;
|
|
60
|
+
task.endTime = Date.now();
|
|
61
|
+
break;
|
|
62
|
+
}
|
|
63
|
+
// Mark worker as available
|
|
64
|
+
this.workerPool.busyWorkers.delete(workerId);
|
|
65
|
+
this.workerPool.availableWorkers.push(workerId);
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Handles worker errors
|
|
69
|
+
*/
|
|
70
|
+
handleWorkerError(workerId, error) {
|
|
71
|
+
logger_1.log.error(`Worker ${workerId} encountered an error:`, error);
|
|
72
|
+
// Find tasks assigned to this worker and mark them as failed
|
|
73
|
+
for (const [taskId, task] of this.tasks.entries()) {
|
|
74
|
+
if (task.status === 'running') {
|
|
75
|
+
task.status = 'failed';
|
|
76
|
+
task.error = `Worker error: ${error.message}`;
|
|
77
|
+
task.endTime = Date.now();
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Mark worker as available again
|
|
81
|
+
this.workerPool.busyWorkers.delete(workerId);
|
|
82
|
+
this.workerPool.availableWorkers.push(workerId);
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Scrapes multiple Instagram posts in parallel
|
|
86
|
+
* @param urls - Array of Instagram post URLs
|
|
87
|
+
* @param options - Scraping options
|
|
88
|
+
* @returns Promise<InstagramPostData[]>
|
|
89
|
+
*/
|
|
90
|
+
async scrapeMultiple(urls, options = {}) {
|
|
91
|
+
const taskIds = [];
|
|
92
|
+
// Create tasks
|
|
93
|
+
for (const url of urls) {
|
|
94
|
+
const taskId = this.generateTaskId();
|
|
95
|
+
const task = {
|
|
96
|
+
id: taskId,
|
|
97
|
+
url,
|
|
98
|
+
options,
|
|
99
|
+
status: 'pending'
|
|
100
|
+
};
|
|
101
|
+
this.tasks.set(taskId, task);
|
|
102
|
+
taskIds.push(taskId);
|
|
103
|
+
}
|
|
104
|
+
// Execute tasks
|
|
105
|
+
await this.executeTasks(taskIds);
|
|
106
|
+
// Collect results
|
|
107
|
+
const results = [];
|
|
108
|
+
for (const taskId of taskIds) {
|
|
109
|
+
const task = this.tasks.get(taskId);
|
|
110
|
+
if (task?.result) {
|
|
111
|
+
results.push(task.result);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return results;
|
|
115
|
+
}
|
|
116
|
+
/**
|
|
117
|
+
* Executes tasks using available workers
|
|
118
|
+
*/
|
|
119
|
+
async executeTasks(taskIds) {
|
|
120
|
+
return new Promise((resolve) => {
|
|
121
|
+
let completedTasks = 0;
|
|
122
|
+
const totalTasks = taskIds.length;
|
|
123
|
+
const checkCompletion = () => {
|
|
124
|
+
if (completedTasks >= totalTasks) {
|
|
125
|
+
resolve();
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
const assignTask = () => {
|
|
129
|
+
if (this.workerPool.availableWorkers.length === 0)
|
|
130
|
+
return;
|
|
131
|
+
const pendingTaskId = taskIds.find(id => {
|
|
132
|
+
const task = this.tasks.get(id);
|
|
133
|
+
return task?.status === 'pending';
|
|
134
|
+
});
|
|
135
|
+
if (!pendingTaskId)
|
|
136
|
+
return;
|
|
137
|
+
const workerId = this.workerPool.availableWorkers.pop();
|
|
138
|
+
const task = this.tasks.get(pendingTaskId);
|
|
139
|
+
task.status = 'running';
|
|
140
|
+
task.startTime = Date.now();
|
|
141
|
+
this.workerPool.busyWorkers.add(workerId);
|
|
142
|
+
// Send task to worker
|
|
143
|
+
this.workerPool.workers[workerId].postMessage({
|
|
144
|
+
taskId: pendingTaskId,
|
|
145
|
+
url: task.url,
|
|
146
|
+
options: task.options
|
|
147
|
+
});
|
|
148
|
+
};
|
|
149
|
+
// Monitor task completion
|
|
150
|
+
const monitor = setInterval(() => {
|
|
151
|
+
completedTasks = taskIds.filter(id => {
|
|
152
|
+
const task = this.tasks.get(id);
|
|
153
|
+
return task?.status === 'completed' || task?.status === 'failed';
|
|
154
|
+
}).length;
|
|
155
|
+
// Assign new tasks to available workers
|
|
156
|
+
assignTask();
|
|
157
|
+
if (completedTasks >= totalTasks) {
|
|
158
|
+
clearInterval(monitor);
|
|
159
|
+
resolve();
|
|
160
|
+
}
|
|
161
|
+
}, 100);
|
|
162
|
+
// Start initial task assignment
|
|
163
|
+
for (let i = 0; i < Math.min(this.maxWorkers, taskIds.length); i++) {
|
|
164
|
+
assignTask();
|
|
165
|
+
}
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
/**
|
|
169
|
+
* Gets processing statistics
|
|
170
|
+
*/
|
|
171
|
+
getStats() {
|
|
172
|
+
const tasks = Array.from(this.tasks.values());
|
|
173
|
+
const completed = tasks.filter(t => t.status === 'completed').length;
|
|
174
|
+
const failed = tasks.filter(t => t.status === 'failed').length;
|
|
175
|
+
const running = tasks.filter(t => t.status === 'running').length;
|
|
176
|
+
const pending = tasks.filter(t => t.status === 'pending').length;
|
|
177
|
+
return {
|
|
178
|
+
totalTasks: tasks.length,
|
|
179
|
+
completed,
|
|
180
|
+
failed,
|
|
181
|
+
running,
|
|
182
|
+
pending,
|
|
183
|
+
availableWorkers: this.workerPool.availableWorkers.length,
|
|
184
|
+
busyWorkers: this.workerPool.busyWorkers.size
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
/**
|
|
188
|
+
* Generates unique task ID
|
|
189
|
+
*/
|
|
190
|
+
generateTaskId() {
|
|
191
|
+
return `task_${Date.now()}_${Math.random().toString(36).substr(2, 9)}`;
|
|
192
|
+
}
|
|
193
|
+
/**
|
|
194
|
+
* Shuts down all workers
|
|
195
|
+
*/
|
|
196
|
+
async shutdown() {
|
|
197
|
+
await Promise.all(this.workerPool.workers.map(worker => worker.terminate()));
|
|
198
|
+
logger_1.log.info('🛑 All workers terminated');
|
|
199
|
+
}
|
|
200
|
+
}
|
|
201
|
+
exports.ParallelProcessor = ParallelProcessor;
|
|
202
|
+
//# sourceMappingURL=parallel.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parallel.js","sourceRoot":"","sources":["../src/parallel.ts"],"names":[],"mappings":";;;AAAA,qCAA+B;AAE/B,mDAAwC;AACxC,+BAA4B;AAmB5B;;;GAGG;AACH,MAAa,iBAAiB;IAK5B;;;OAGG;IACH,YAAY,aAAqB,CAAC;QAP1B,UAAK,GAA8B,IAAI,GAAG,EAAE,CAAC;QAQnD,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC7B,IAAI,CAAC,UAAU,GAAG;YAChB,OAAO,EAAE,EAAE;YACX,gBAAgB,EAAE,EAAE;YACpB,WAAW,EAAE,IAAI,GAAG,EAAE;SACvB,CAAC;QACF,IAAI,CAAC,oBAAoB,EAAE,CAAC;IAC9B,CAAC;IAED;;OAEG;IACK,oBAAoB;QAC1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;YACzC,MAAM,MAAM,GAAG,IAAI,uBAAM,CAAC,IAAA,WAAI,EAAC,SAAS,EAAE,WAAW,CAAC,CAAC,CAAC;YACxD,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrC,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YAEzC,MAAM,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,OAAO,EAAE,EAAE;gBAC/B,IAAI,CAAC,mBAAmB,CAAC,CAAC,EAAE,OAAO,CAAC,CAAC;YACvC,CAAC,CAAC,CAAC;YAEH,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAK,EAAE,EAAE;gBAC3B,YAAG,CAAC,KAAK,CAAC,UAAU,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;gBACvC,IAAI,CAAC,iBAAiB,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;YACnC,CAAC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,QAAgB,EAAE,OAAY;QACxD,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC;QAC9C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;QAEpC,IAAI,CAAC,IAAI;YAAE,OAAO;QAElB,QAAQ,IAAI,EAAE,CAAC;YACb,KAAK,SAAS;gBACZ,IAAI,CAAC,MAAM,GAAG,WAAW,CAAC;gBAC1B,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC;gBACnB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBAC1B,MAAM;YACR,KAAK,OAAO;gBACV,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC;gBACvB,IAAI,CAAC,KAAK,GAAG,KAAK,CAAC;gBACnB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBAC1B,MAAM;QACV,CAAC;QAED,2BAA2B;QAC3B,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAClD,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,QAAgB,EAAE,KAAY;QACtD,YAAG,CAAC,KAAK,CAAC,UAAU,QAAQ,wBAAwB,EAAE,KAAK,CAAC,CAAC;QAE7D,6DAA6D;QAC7D,KAAK,MAAM,CAAC,MAAM,EAAE,IAAI,CAAC,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YAClD,IAAI,IAAI,CAAC,MAAM,KAAK,SAAS,EAAE,CAAC;gBAC9B,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC;gBACvB,IAAI,CAAC,KAAK,GAAG,iBAAiB,KAAK,CAAC,OAAO,EAAE,CAAC;gBAC9C,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,iCAAiC;QACjC,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC;QAC7C,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC;IAClD,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,cAAc,CAAC,IAAc,EAAE,UAA2B,EAAE;QAChE,MAAM,OAAO,GAAa,EAAE,CAAC;QAE7B,eAAe;QACf,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,MAAM,GAAG,IAAI,CAAC,cAAc,EAAE,CAAC;YACrC,MAAM,IAAI,GAAiB;gBACzB,EAAE,EAAE,MAAM;gBACV,GAAG;gBACH,OAAO;gBACP,MAAM,EAAE,SAAS;aAClB,CAAC;YAEF,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YAC7B,OAAO,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACvB,CAAC;QAED,gBAAgB;QAChB,MAAM,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,CAAC;QAEjC,kBAAkB;QAClB,MAAM,OAAO,GAAwB,EAAE,CAAC;QACxC,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC;YACpC,IAAI,IAAI,EAAE,MAAM,EAAE,CAAC;gBACjB,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,YAAY,CAAC,OAAiB;QAC1C,OAAO,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE;YAC7B,IAAI,cAAc,GAAG,CAAC,CAAC;YACvB,MAAM,UAAU,GAAG,OAAO,CAAC,MAAM,CAAC;YAElC,MAAM,eAAe,GAAG,GAAG,EAAE;gBAC3B,IAAI,cAAc,IAAI,UAAU,EAAE,CAAC;oBACjC,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC,CAAC;YAEF,MAAM,UAAU,GAAG,GAAG,EAAE;gBACtB,IAAI,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,MAAM,KAAK,CAAC;oBAAE,OAAO;gBAE1D,MAAM,aAAa,GAAG,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC,EAAE;oBACtC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;oBAChC,OAAO,IAAI,EAAE,MAAM,KAAK,SAAS,CAAC;gBACpC,CAAC,CAAC,CAAC;gBAEH,IAAI,CAAC,aAAa;oBAAE,OAAO;gBAE3B,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,GAAG,EAAG,CAAC;gBACzD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,aAAa,CAAE,CAAC;gBAE5C,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;gBACxB,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;gBAC5B,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;gBAE1C,sBAAsB;gBACtB,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,CAAC;oBAC5C,MAAM,EAAE,aAAa;oBACrB,GAAG,EAAE,IAAI,CAAC,GAAG;oBACb,OAAO,EAAE,IAAI,CAAC,OAAO;iBACtB,CAAC,CAAC;YACL,CAAC,CAAC;YAEF,0BAA0B;YAC1B,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,EAAE;gBAC/B,cAAc,GAAG,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE;oBACnC,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;oBAChC,OAAO,IAAI,EAAE,MAAM,KAAK,WAAW,IAAI,IAAI,EAAE,MAAM,KAAK,QAAQ,CAAC;gBACnE,CAAC,CAAC,CAAC,MAAM,CAAC;gBAEV,wCAAwC;gBACxC,UAAU,EAAE,CAAC;gBAEb,IAAI,cAAc,IAAI,UAAU,EAAE,CAAC;oBACjC,aAAa,CAAC,OAAO,CAAC,CAAC;oBACvB,OAAO,EAAE,CAAC;gBACZ,CAAC;YACH,CAAC,EAAE,GAAG,CAAC,CAAC;YAER,gCAAgC;YAChC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,UAAU,EAAE,OAAO,CAAC,MAAM,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC;gBACnE,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;OAEG;IACH,QAAQ;QACN,MAAM,KAAK,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC;QAC9C,MAAM,SAAS,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,WAAW,CAAC,CAAC,MAAM,CAAC;QACrE,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,QAAQ,CAAC,CAAC,MAAM,CAAC;QAC/D,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;QACjE,MAAM,OAAO,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,KAAK,SAAS,CAAC,CAAC,MAAM,CAAC;QAEjE,OAAO;YACL,UAAU,EAAE,KAAK,CAAC,MAAM;YACxB,SAAS;YACT,MAAM;YACN,OAAO;YACP,OAAO;YACP,gBAAgB,EAAE,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,MAAM;YACzD,WAAW,EAAE,IAAI,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI;SAC9C,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,cAAc;QACpB,OAAO,QAAQ,IAAI,CAAC,GAAG,EAAE,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,EAAE,CAAC;IACzE,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,QAAQ;QACZ,MAAM,OAAO,CAAC,GAAG,CACf,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC,MAAM,CAAC,SAAS,EAAE,CAAC,CAC1D,CAAC;QACF,YAAG,CAAC,IAAI,CAAC,2BAA2B,CAAC,CAAC;IACxC,CAAC;CACF;AA/ND,8CA+NC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/platforms/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,eAAe,EAAE,MAAM,aAAa,CAAC;AAE9C,OAAO,EAAE,eAAe,EAAE,CAAC;AAG3B,eAAO,MAAM,iBAAiB,+BAE7B,CAAC"}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Platform plugins export
|
|
4
|
+
*/
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.BUILTIN_PLATFORMS = exports.InstagramPlugin = void 0;
|
|
7
|
+
const instagram_1 = require("./instagram");
|
|
8
|
+
Object.defineProperty(exports, "InstagramPlugin", { enumerable: true, get: function () { return instagram_1.InstagramPlugin; } });
|
|
9
|
+
// Export all built-in platforms
|
|
10
|
+
exports.BUILTIN_PLATFORMS = [
|
|
11
|
+
instagram_1.InstagramPlugin
|
|
12
|
+
];
|
|
13
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/platforms/index.ts"],"names":[],"mappings":";AAAA;;GAEG;;;AAEH,2CAA8C;AAErC,gGAFA,2BAAe,OAEA;AAExB,gCAAgC;AACnB,QAAA,iBAAiB,GAAG;IAC/B,2BAAe;CAChB,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"instagram.d.ts","sourceRoot":"","sources":["../../src/platforms/instagram.ts"],"names":[],"mappings":"AAMA,OAAO,EAAE,cAAc,EAAgB,MAAM,YAAY,CAAC;AAG1D;;GAEG;AACH,eAAO,MAAM,eAAe,EAAE,cAgI7B,CAAC"}
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.InstagramPlugin = void 0;
|
|
4
|
+
const logger_1 = require("../logger");
|
|
5
|
+
/**
|
|
6
|
+
* Instagram platform plugin - built-in support for Instagram
|
|
7
|
+
*/
|
|
8
|
+
exports.InstagramPlugin = {
|
|
9
|
+
type: 'platform',
|
|
10
|
+
name: 'instagram',
|
|
11
|
+
version: '1.0.0',
|
|
12
|
+
urlPatterns: [
|
|
13
|
+
/instagram\.com\/p\//,
|
|
14
|
+
/instagram\.com\/reel\//,
|
|
15
|
+
],
|
|
16
|
+
validateUrl(url) {
|
|
17
|
+
return this.urlPatterns.some(pattern => pattern.test(url));
|
|
18
|
+
},
|
|
19
|
+
async scrape(page, url) {
|
|
20
|
+
// Extract post ID from URL
|
|
21
|
+
const urlPath = new URL(url).pathname;
|
|
22
|
+
const postId = urlPath.match(/\/(p|reel)\/([^\/]+)/)?.[2] || '';
|
|
23
|
+
logger_1.log.info('🔍 Target post ID from URL:', postId);
|
|
24
|
+
// Wait for page to fully load
|
|
25
|
+
await page.waitForTimeout(3000);
|
|
26
|
+
// Extract profile name from meta tag
|
|
27
|
+
const profileName = await page.$eval('meta[property="og:title"]', (el) => el.content?.split(' ')[0] || '').catch(() => '');
|
|
28
|
+
// Extract carousel data from JSON
|
|
29
|
+
const carouselData = await page.evaluate(() => {
|
|
30
|
+
const currentUrl = window.location.href;
|
|
31
|
+
const scripts = Array.from(document.querySelectorAll('script[type="application/json"]'));
|
|
32
|
+
for (const script of scripts) {
|
|
33
|
+
try {
|
|
34
|
+
const text = script.textContent;
|
|
35
|
+
if (!text?.includes('carousel_media'))
|
|
36
|
+
continue;
|
|
37
|
+
const json = JSON.parse(text);
|
|
38
|
+
// Debug: log the structure we're working with
|
|
39
|
+
const debugInfo = {
|
|
40
|
+
currentUrl,
|
|
41
|
+
jsonKeys: Object.keys(json),
|
|
42
|
+
hasCarouselMedia: false,
|
|
43
|
+
firstUrls: [],
|
|
44
|
+
extractedPostId: ''
|
|
45
|
+
};
|
|
46
|
+
// Recursively search for carousel_media
|
|
47
|
+
function findCarouselMedia(obj) {
|
|
48
|
+
if (!obj || typeof obj !== 'object')
|
|
49
|
+
return null;
|
|
50
|
+
if (obj.carousel_media && Array.isArray(obj.carousel_media)) {
|
|
51
|
+
debugInfo.hasCarouselMedia = true;
|
|
52
|
+
return obj.carousel_media;
|
|
53
|
+
}
|
|
54
|
+
for (const key in obj) {
|
|
55
|
+
const result = findCarouselMedia(obj[key]);
|
|
56
|
+
if (result)
|
|
57
|
+
return result;
|
|
58
|
+
}
|
|
59
|
+
return null;
|
|
60
|
+
}
|
|
61
|
+
const carouselMedia = findCarouselMedia(json);
|
|
62
|
+
if (carouselMedia) {
|
|
63
|
+
const items = carouselMedia.map((item, index) => ({
|
|
64
|
+
index,
|
|
65
|
+
url: item.image_versions2?.candidates?.[0]?.url || '',
|
|
66
|
+
width: item.original_width,
|
|
67
|
+
height: item.original_height,
|
|
68
|
+
})).filter((item) => item.url);
|
|
69
|
+
// Add first few URLs to debug
|
|
70
|
+
debugInfo.firstUrls = items.slice(0, 3).map((item) => item.url);
|
|
71
|
+
return { items, count: items.length, debug: debugInfo };
|
|
72
|
+
}
|
|
73
|
+
else {
|
|
74
|
+
return { items: [], count: 0, debug: debugInfo };
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
catch (e) {
|
|
78
|
+
logger_1.log.info('JSON parse error:', e);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
return { items: [], count: 0 };
|
|
82
|
+
});
|
|
83
|
+
logger_1.log.info(`Found ${carouselData.count} media items from JSON`);
|
|
84
|
+
// Extract extended metadata
|
|
85
|
+
const metadata = await extractMetadata(page);
|
|
86
|
+
const mediaData = {
|
|
87
|
+
url,
|
|
88
|
+
post_id: postId,
|
|
89
|
+
platform: 'instagram',
|
|
90
|
+
profile_name: profileName,
|
|
91
|
+
media: carouselData.items,
|
|
92
|
+
metadata
|
|
93
|
+
};
|
|
94
|
+
return mediaData;
|
|
95
|
+
},
|
|
96
|
+
workflow: [
|
|
97
|
+
{
|
|
98
|
+
name: 'Close cookies modal',
|
|
99
|
+
action: 'click',
|
|
100
|
+
selectors: ['button::-p-text(Allow all cookies)'],
|
|
101
|
+
fallback: 'findByText',
|
|
102
|
+
fallbackText: 'Allow all cookies',
|
|
103
|
+
wait: 200,
|
|
104
|
+
optional: true
|
|
105
|
+
},
|
|
106
|
+
{
|
|
107
|
+
name: 'Close auth modal - click outside',
|
|
108
|
+
action: 'clickOutside',
|
|
109
|
+
clicks: 2,
|
|
110
|
+
wait: 500,
|
|
111
|
+
optional: true
|
|
112
|
+
}
|
|
113
|
+
]
|
|
114
|
+
};
|
|
115
|
+
/**
|
|
116
|
+
* Extract extended metadata from Instagram page
|
|
117
|
+
*/
|
|
118
|
+
async function extractMetadata(page) {
|
|
119
|
+
const metadata = await page.evaluate(() => {
|
|
120
|
+
const result = {
|
|
121
|
+
likesCount: null,
|
|
122
|
+
commentsCount: null,
|
|
123
|
+
caption: null,
|
|
124
|
+
hashtags: [],
|
|
125
|
+
mentions: [],
|
|
126
|
+
timestamp: null,
|
|
127
|
+
location: null
|
|
128
|
+
};
|
|
129
|
+
// Try to find metadata in JSON-LD or meta tags
|
|
130
|
+
const scripts = Array.from(document.querySelectorAll('script[type="application/json"]'));
|
|
131
|
+
for (const script of scripts) {
|
|
132
|
+
try {
|
|
133
|
+
const text = script.textContent;
|
|
134
|
+
if (!text)
|
|
135
|
+
continue;
|
|
136
|
+
const json = JSON.parse(text);
|
|
137
|
+
// Recursively search for post data
|
|
138
|
+
function findPostData(obj) {
|
|
139
|
+
if (!obj || typeof obj !== 'object')
|
|
140
|
+
return null;
|
|
141
|
+
// Look for edge_media_to_caption (caption)
|
|
142
|
+
if (obj.edge_media_to_caption?.edges?.[0]?.node?.text) {
|
|
143
|
+
result.caption = obj.edge_media_to_caption.edges[0].node.text;
|
|
144
|
+
// Extract hashtags
|
|
145
|
+
if (result.caption) {
|
|
146
|
+
const hashtagMatches = result.caption.match(/#\w+/g);
|
|
147
|
+
if (hashtagMatches) {
|
|
148
|
+
result.hashtags = hashtagMatches.map(tag => tag.substring(1));
|
|
149
|
+
}
|
|
150
|
+
// Extract mentions
|
|
151
|
+
const mentionMatches = result.caption.match(/@\w+/g);
|
|
152
|
+
if (mentionMatches) {
|
|
153
|
+
result.mentions = mentionMatches.map(mention => mention.substring(1));
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
// Look for like count
|
|
158
|
+
if (obj.edge_media_preview_like?.count !== undefined) {
|
|
159
|
+
result.likesCount = obj.edge_media_preview_like.count;
|
|
160
|
+
}
|
|
161
|
+
// Look for comment count
|
|
162
|
+
if (obj.edge_media_to_comment?.count !== undefined) {
|
|
163
|
+
result.commentsCount = obj.edge_media_to_comment.count;
|
|
164
|
+
}
|
|
165
|
+
// Look for timestamp
|
|
166
|
+
if (obj.taken_at_timestamp) {
|
|
167
|
+
result.timestamp = new Date(obj.taken_at_timestamp * 1000).toISOString();
|
|
168
|
+
}
|
|
169
|
+
// Look for location
|
|
170
|
+
if (obj.location?.name) {
|
|
171
|
+
result.location = obj.location.name;
|
|
172
|
+
}
|
|
173
|
+
// Recurse through object
|
|
174
|
+
for (const key in obj) {
|
|
175
|
+
findPostData(obj[key]);
|
|
176
|
+
}
|
|
177
|
+
return null;
|
|
178
|
+
}
|
|
179
|
+
findPostData(json);
|
|
180
|
+
}
|
|
181
|
+
catch (e) {
|
|
182
|
+
// Continue to next script
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
return result;
|
|
186
|
+
});
|
|
187
|
+
return metadata;
|
|
188
|
+
}
|
|
189
|
+
//# sourceMappingURL=instagram.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"instagram.js","sourceRoot":"","sources":["../../src/platforms/instagram.ts"],"names":[],"mappings":";;;AAAA,sCAAgC;AAShC;;GAEG;AACU,QAAA,eAAe,GAAmB;IAC7C,IAAI,EAAE,UAAU;IAChB,IAAI,EAAE,WAAW;IACjB,OAAO,EAAE,OAAO;IAEhB,WAAW,EAAE;QACX,qBAAqB;QACrB,wBAAwB;KACzB;IAED,WAAW,CAAC,GAAW;QACrB,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;IAC7D,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,IAAU,EAAE,GAAW;QAClC,2BAA2B;QAC3B,MAAM,OAAO,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC,QAAQ,CAAC;QACtC,MAAM,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,sBAAsB,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAEhE,YAAG,CAAC,IAAI,CAAC,6BAA6B,EAAE,MAAM,CAAC,CAAC;QAEhD,8BAA8B;QAC9B,MAAM,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,CAAC;QAEhC,qCAAqC;QACrC,MAAM,WAAW,GAAG,MAAM,IAAI,CAAC,KAAK,CAClC,2BAA2B,EAC3B,CAAC,EAAmB,EAAE,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CACzD,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,EAAE,CAAC,CAAC;QAElB,kCAAkC;QAClC,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;YAC5C,MAAM,UAAU,GAAG,MAAM,CAAC,QAAQ,CAAC,IAAI,CAAC;YAExC,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,iCAAiC,CAAC,CAAC,CAAC;YAEzF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;gBAC7B,IAAI,CAAC;oBACH,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC;oBAChC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,gBAAgB,CAAC;wBAAE,SAAS;oBAEhD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;oBAE9B,8CAA8C;oBAC9C,MAAM,SAAS,GAAG;wBAChB,UAAU;wBACV,QAAQ,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC;wBAC3B,gBAAgB,EAAE,KAAK;wBACvB,SAAS,EAAE,EAAc;wBACzB,eAAe,EAAE,EAAE;qBACpB,CAAC;oBAEF,wCAAwC;oBACxC,SAAS,iBAAiB,CAAC,GAAQ;wBACjC,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;4BAAE,OAAO,IAAI,CAAC;wBAEjD,IAAI,GAAG,CAAC,cAAc,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,EAAE,CAAC;4BAC5D,SAAS,CAAC,gBAAgB,GAAG,IAAI,CAAC;4BAClC,OAAO,GAAG,CAAC,cAAc,CAAC;wBAC5B,CAAC;wBAED,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;4BACtB,MAAM,MAAM,GAAG,iBAAiB,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;4BAC3C,IAAI,MAAM;gCAAE,OAAO,MAAM,CAAC;wBAC5B,CAAC;wBAED,OAAO,IAAI,CAAC;oBACd,CAAC;oBAED,MAAM,aAAa,GAAG,iBAAiB,CAAC,IAAI,CAAC,CAAC;oBAC9C,IAAI,aAAa,EAAE,CAAC;wBAClB,MAAM,KAAK,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,KAAa,EAAE,EAAE,CAAC,CAAC;4BAC7D,KAAK;4BACL,GAAG,EAAE,IAAI,CAAC,eAAe,EAAE,UAAU,EAAE,CAAC,CAAC,CAAC,EAAE,GAAG,IAAI,EAAE;4BACrD,KAAK,EAAE,IAAI,CAAC,cAAc;4BAC1B,MAAM,EAAE,IAAI,CAAC,eAAe;yBAC7B,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;wBAEpC,8BAA8B;wBAC9B,SAAS,CAAC,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;wBAErE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;oBAC1D,CAAC;yBAAM,CAAC;wBACN,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;oBACnD,CAAC;gBACH,CAAC;gBAAC,OAAO,CAAC,EAAE,CAAC;oBACX,YAAG,CAAC,IAAI,CAAC,mBAAmB,EAAE,CAAC,CAAC,CAAC;gBACnC,CAAC;YACH,CAAC;YAED,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,CAAC;QACjC,CAAC,CAAC,CAAC;QAEH,YAAG,CAAC,IAAI,CAAC,SAAS,YAAY,CAAC,KAAK,wBAAwB,CAAC,CAAC;QAE9D,4BAA4B;QAC5B,MAAM,QAAQ,GAAG,MAAM,eAAe,CAAC,IAAI,CAAC,CAAC;QAE7C,MAAM,SAAS,GAAc;YAC3B,GAAG;YACH,OAAO,EAAE,MAAM;YACf,QAAQ,EAAE,WAAW;YACrB,YAAY,EAAE,WAAW;YACzB,KAAK,EAAE,YAAY,CAAC,KAAuB;YAC3C,QAAQ;SACT,CAAC;QAEF,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,QAAQ,EAAE;QACR;YACE,IAAI,EAAE,qBAAqB;YAC3B,MAAM,EAAE,OAAO;YACf,SAAS,EAAE,CAAC,oCAAoC,CAAC;YACjD,QAAQ,EAAE,YAAY;YACtB,YAAY,EAAE,mBAAmB;YACjC,IAAI,EAAE,GAAG;YACT,QAAQ,EAAE,IAAI;SACf;QACD;YACE,IAAI,EAAE,kCAAkC;YACxC,MAAM,EAAE,cAAc;YACtB,MAAM,EAAE,CAAC;YACT,IAAI,EAAE,GAAG;YACT,QAAQ,EAAE,IAAI;SACf;KACF;CACF,CAAC;AAEF;;GAEG;AACH,KAAK,UAAU,eAAe,CAAC,IAAU;IACvC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,GAAG,EAAE;QACxC,MAAM,MAAM,GAA0B;YACpC,UAAU,EAAE,IAAI;YAChB,aAAa,EAAE,IAAI;YACnB,OAAO,EAAE,IAAI;YACb,QAAQ,EAAE,EAAE;YACZ,QAAQ,EAAE,EAAE;YACZ,SAAS,EAAE,IAAI;YACf,QAAQ,EAAE,IAAI;SACf,CAAC;QAEF,+CAA+C;QAC/C,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,iCAAiC,CAAC,CAAC,CAAC;QAEzF,KAAK,MAAM,MAAM,IAAI,OAAO,EAAE,CAAC;YAC7B,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,CAAC,WAAW,CAAC;gBAChC,IAAI,CAAC,IAAI;oBAAE,SAAS;gBAEpB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;gBAE9B,mCAAmC;gBACnC,SAAS,YAAY,CAAC,GAAQ;oBAC5B,IAAI,CAAC,GAAG,IAAI,OAAO,GAAG,KAAK,QAAQ;wBAAE,OAAO,IAAI,CAAC;oBAEjD,2CAA2C;oBAC3C,IAAI,GAAG,CAAC,qBAAqB,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,EAAE,CAAC;wBACtD,MAAM,CAAC,OAAO,GAAG,GAAG,CAAC,qBAAqB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC;wBAE9D,mBAAmB;wBACnB,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;4BACnB,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;4BACrD,IAAI,cAAc,EAAE,CAAC;gCACnB,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;4BAChE,CAAC;4BAED,mBAAmB;4BACnB,MAAM,cAAc,GAAG,MAAM,CAAC,OAAO,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;4BACrD,IAAI,cAAc,EAAE,CAAC;gCACnB,MAAM,CAAC,QAAQ,GAAG,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,EAAE,CAAC,OAAO,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC,CAAC;4BACxE,CAAC;wBACH,CAAC;oBACH,CAAC;oBAED,sBAAsB;oBACtB,IAAI,GAAG,CAAC,uBAAuB,EAAE,KAAK,KAAK,SAAS,EAAE,CAAC;wBACrD,MAAM,CAAC,UAAU,GAAG,GAAG,CAAC,uBAAuB,CAAC,KAAK,CAAC;oBACxD,CAAC;oBAED,yBAAyB;oBACzB,IAAI,GAAG,CAAC,qBAAqB,EAAE,KAAK,KAAK,SAAS,EAAE,CAAC;wBACnD,MAAM,CAAC,aAAa,GAAG,GAAG,CAAC,qBAAqB,CAAC,KAAK,CAAC;oBACzD,CAAC;oBAED,qBAAqB;oBACrB,IAAI,GAAG,CAAC,kBAAkB,EAAE,CAAC;wBAC3B,MAAM,CAAC,SAAS,GAAG,IAAI,IAAI,CAAC,GAAG,CAAC,kBAAkB,GAAG,IAAI,CAAC,CAAC,WAAW,EAAE,CAAC;oBAC3E,CAAC;oBAED,oBAAoB;oBACpB,IAAI,GAAG,CAAC,QAAQ,EAAE,IAAI,EAAE,CAAC;wBACvB,MAAM,CAAC,QAAQ,GAAG,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC;oBACtC,CAAC;oBAED,yBAAyB;oBACzB,KAAK,MAAM,GAAG,IAAI,GAAG,EAAE,CAAC;wBACtB,YAAY,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC;oBACzB,CAAC;oBAED,OAAO,IAAI,CAAC;gBACd,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,CAAC;YAErB,CAAC;YAAC,OAAO,CAAC,EAAE,CAAC;gBACX,0BAA0B;YAC5B,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC,CAAC,CAAC;IAEH,OAAO,QAAwB,CAAC;AAClC,CAAC"}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Plugin system for extensibility and multi-platform support
|
|
3
|
+
*/
|
|
4
|
+
import { Page } from 'puppeteer';
|
|
5
|
+
import { MediaData, CarouselItem } from './types';
|
|
6
|
+
/**
|
|
7
|
+
* Scraping options
|
|
8
|
+
*/
|
|
9
|
+
export interface ScrapingOptions {
|
|
10
|
+
saveToFile?: boolean;
|
|
11
|
+
outputPath?: string;
|
|
12
|
+
headless?: boolean;
|
|
13
|
+
timeout?: number;
|
|
14
|
+
retries?: number;
|
|
15
|
+
useSession?: boolean;
|
|
16
|
+
}
|
|
17
|
+
/**
|
|
18
|
+
* Workflow task definition
|
|
19
|
+
*/
|
|
20
|
+
export interface WorkflowTask {
|
|
21
|
+
name: string;
|
|
22
|
+
action: 'click' | 'clickOutside' | 'doubleClick' | 'hold' | 'keypress';
|
|
23
|
+
selectors?: string[];
|
|
24
|
+
fallback?: string;
|
|
25
|
+
fallbackText?: string;
|
|
26
|
+
clicks?: number;
|
|
27
|
+
wait?: number;
|
|
28
|
+
optional?: boolean;
|
|
29
|
+
holdDuration?: number;
|
|
30
|
+
key?: string;
|
|
31
|
+
modifiers?: ('Control' | 'Shift' | 'Alt' | 'Meta')[];
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Platform plugin - adds support for a new platform
|
|
35
|
+
*/
|
|
36
|
+
export interface PlatformPlugin {
|
|
37
|
+
type: 'platform';
|
|
38
|
+
/** Plugin name (e.g., 'pinterest', 'tiktok') */
|
|
39
|
+
name: string;
|
|
40
|
+
/** Plugin version */
|
|
41
|
+
version: string;
|
|
42
|
+
/** URL patterns this plugin handles */
|
|
43
|
+
urlPatterns: RegExp[];
|
|
44
|
+
/** Validate if URL is supported by this plugin */
|
|
45
|
+
validateUrl(url: string): boolean;
|
|
46
|
+
/** Scrape media from the platform */
|
|
47
|
+
scrape(page: Page, url: string): Promise<MediaData>;
|
|
48
|
+
/** Optional workflow for handling modals, cookies, etc. */
|
|
49
|
+
workflow?: WorkflowTask[];
|
|
50
|
+
}
|
|
51
|
+
/**
|
|
52
|
+
* Utility plugin - extends functionality (logging, caching, analytics, etc.)
|
|
53
|
+
*/
|
|
54
|
+
export interface UtilityPlugin {
|
|
55
|
+
type: 'utility';
|
|
56
|
+
/** Plugin name */
|
|
57
|
+
name: string;
|
|
58
|
+
/** Plugin version */
|
|
59
|
+
version: string;
|
|
60
|
+
/** Lifecycle hooks */
|
|
61
|
+
hooks: PluginHooks;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Plugin lifecycle hooks
|
|
65
|
+
*/
|
|
66
|
+
export interface PluginHooks {
|
|
67
|
+
/** Called before scraping starts */
|
|
68
|
+
onBeforeScrape?: (url: string, options: ScrapingOptions) => Promise<void>;
|
|
69
|
+
/** Called after scraping completes */
|
|
70
|
+
onAfterScrape?: (data: MediaData) => Promise<MediaData>;
|
|
71
|
+
/** Called when an error occurs */
|
|
72
|
+
onError?: (error: Error) => Promise<void>;
|
|
73
|
+
/** Called after media items are extracted */
|
|
74
|
+
onMediaExtracted?: (media: CarouselItem[]) => Promise<CarouselItem[]>;
|
|
75
|
+
}
|
|
76
|
+
/**
|
|
77
|
+
* Union type for all plugins
|
|
78
|
+
*/
|
|
79
|
+
export type Plugin = PlatformPlugin | UtilityPlugin;
|
|
80
|
+
/**
|
|
81
|
+
* Plugin manager for registering and managing plugins
|
|
82
|
+
*/
|
|
83
|
+
export declare class PluginManager {
|
|
84
|
+
private plugins;
|
|
85
|
+
/**
|
|
86
|
+
* Register a plugin
|
|
87
|
+
* @param plugin - Plugin to register
|
|
88
|
+
*/
|
|
89
|
+
register(plugin: Plugin): void;
|
|
90
|
+
/**
|
|
91
|
+
* Unregister a plugin
|
|
92
|
+
* @param pluginName - Name of plugin to unregister
|
|
93
|
+
*/
|
|
94
|
+
unregister(pluginName: string): void;
|
|
95
|
+
/**
|
|
96
|
+
* Get platform plugin for a given URL
|
|
97
|
+
* @param url - URL to match against platform patterns
|
|
98
|
+
* @returns PlatformPlugin or null if no match
|
|
99
|
+
*/
|
|
100
|
+
getPlatformForUrl(url: string): PlatformPlugin | null;
|
|
101
|
+
/**
|
|
102
|
+
* Get all registered plugins
|
|
103
|
+
*/
|
|
104
|
+
getPlugins(): Plugin[];
|
|
105
|
+
/**
|
|
106
|
+
* Get all platform plugins
|
|
107
|
+
*/
|
|
108
|
+
getPlatformPlugins(): PlatformPlugin[];
|
|
109
|
+
/**
|
|
110
|
+
* Get all utility plugins
|
|
111
|
+
*/
|
|
112
|
+
getUtilityPlugins(): UtilityPlugin[];
|
|
113
|
+
/**
|
|
114
|
+
* Execute a lifecycle hook across all utility plugins
|
|
115
|
+
* @param hookName - Name of the hook to execute
|
|
116
|
+
* @param args - Arguments to pass to the hook
|
|
117
|
+
*/
|
|
118
|
+
executeHook<K extends keyof PluginHooks>(hookName: K, ...args: Parameters<NonNullable<PluginHooks[K]>>): Promise<any>;
|
|
119
|
+
/**
|
|
120
|
+
* Check if a plugin is registered
|
|
121
|
+
*/
|
|
122
|
+
hasPlugin(pluginName: string): boolean;
|
|
123
|
+
/**
|
|
124
|
+
* Get a specific plugin by name
|
|
125
|
+
*/
|
|
126
|
+
getPlugin(pluginName: string): Plugin | undefined;
|
|
127
|
+
}
|
|
128
|
+
//# sourceMappingURL=plugins.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"plugins.d.ts","sourceRoot":"","sources":["../src/plugins.ts"],"names":[],"mappings":"AACA;;GAEG;AAEH,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AACjC,OAAO,EAAE,SAAS,EAAE,YAAY,EAAE,MAAM,SAAS,CAAC;AAElD;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,UAAU,CAAC,EAAE,OAAO,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,OAAO,CAAC;CACtB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,OAAO,GAAG,cAAc,GAAG,aAAa,GAAG,MAAM,GAAG,UAAU,CAAC;IACvE,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,QAAQ,CAAC,EAAE,OAAO,CAAC;IAGnB,YAAY,CAAC,EAAE,MAAM,CAAC;IAGtB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,SAAS,CAAC,EAAE,CAAC,SAAS,GAAG,OAAO,GAAG,KAAK,GAAG,MAAM,CAAC,EAAE,CAAC;CACtD;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,UAAU,CAAC;IACjB,gDAAgD;IAChD,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAEhB,uCAAuC;IACvC,WAAW,EAAE,MAAM,EAAE,CAAC;IAEtB,kDAAkD;IAClD,WAAW,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC;IAElC,qCAAqC;IACrC,MAAM,CAAC,IAAI,EAAE,IAAI,EAAE,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,SAAS,CAAC,CAAC;IAEpD,2DAA2D;IAC3D,QAAQ,CAAC,EAAE,YAAY,EAAE,CAAC;CAC3B;AAED;;GAEG;AACH,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,SAAS,CAAC;IAChB,kBAAkB;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB;IACrB,OAAO,EAAE,MAAM,CAAC;IAChB,sBAAsB;IACtB,KAAK,EAAE,WAAW,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,WAAW;IAC1B,oCAAoC;IACpC,cAAc,CAAC,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1E,sCAAsC;IACtC,aAAa,CAAC,EAAE,CAAC,IAAI,EAAE,SAAS,KAAK,OAAO,CAAC,SAAS,CAAC,CAAC;IAExD,kCAAkC;IAClC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,KAAK,KAAK,OAAO,CAAC,IAAI,CAAC,CAAC;IAE1C,6CAA6C;IAC7C,gBAAgB,CAAC,EAAE,CAAC,KAAK,EAAE,YAAY,EAAE,KAAK,OAAO,CAAC,YAAY,EAAE,CAAC,CAAC;CACvE;AAED;;GAEG;AACH,MAAM,MAAM,MAAM,GAAG,cAAc,GAAG,aAAa,CAAC;AAEpD;;GAEG;AACH,qBAAa,aAAa;IACxB,OAAO,CAAC,OAAO,CAAkC;IAEjD;;;OAGG;IACH,QAAQ,CAAC,MAAM,EAAE,MAAM,GAAG,IAAI;IAS9B;;;OAGG;IACH,UAAU,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;IAQpC;;;;OAIG;IACH,iBAAiB,CAAC,GAAG,EAAE,MAAM,GAAG,cAAc,GAAG,IAAI;IASrD;;OAEG;IACH,UAAU,IAAI,MAAM,EAAE;IAItB;;OAEG;IACH,kBAAkB,IAAI,cAAc,EAAE;IAItC;;OAEG;IACH,iBAAiB,IAAI,aAAa,EAAE;IAIpC;;;;OAIG;IACG,WAAW,CAAC,CAAC,SAAS,MAAM,WAAW,EAC3C,QAAQ,EAAE,CAAC,EACX,GAAG,IAAI,EAAE,UAAU,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,GAC/C,OAAO,CAAC,GAAG,CAAC;IAyBf;;OAEG;IACH,SAAS,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO;IAItC;;OAEG;IACH,SAAS,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,GAAG,SAAS;CAGlD"}
|