puppyproxy 0.0.0-security → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +261 -1
- package/package.json +40 -6
- package/src/PuppyProxy.js +131 -0
- package/src/constants.js +82 -0
- package/src/database/DatabaseUtils.js +9 -0
- package/src/database/ProxyDatabase.js +77 -0
- package/src/index.d.ts +186 -0
- package/src/index.js +2 -0
- package/src/network/RequestClient.js +284 -0
- package/src/providers/IPVanish.js +36 -0
- package/src/providers/Scraper.js +144 -0
- package/src/utils.js +12 -0
- package/src/workers/collector.js +311 -0
package/src/index.d.ts
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
// index.d.ts
|
|
2
|
+
/// <reference types="node" />
|
|
3
|
+
|
|
4
|
+
import { Agent } from 'http';
|
|
5
|
+
import { WebSocket as WS } from 'ws';
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Configuration for PuppyProxy
|
|
9
|
+
*/
|
|
10
|
+
export interface PuppyProxyConfig {
|
|
11
|
+
storeDir?: string;
|
|
12
|
+
ipvanish?: {
|
|
13
|
+
use?: boolean;
|
|
14
|
+
creds?: {
|
|
15
|
+
username: string;
|
|
16
|
+
password: string;
|
|
17
|
+
};
|
|
18
|
+
reducedmode?: boolean;
|
|
19
|
+
};
|
|
20
|
+
scraper?: {
|
|
21
|
+
use?: boolean;
|
|
22
|
+
timeBetweenScrapes?: number;
|
|
23
|
+
maxProxiesToCheck?: number;
|
|
24
|
+
limitProxies?: number;
|
|
25
|
+
timeoutPerProxy?: number;
|
|
26
|
+
timeoutMax?: number;
|
|
27
|
+
verbose?: boolean;
|
|
28
|
+
logStatus?: boolean;
|
|
29
|
+
proxyTypes?: Array<'socks4' | 'socks5' | 'http' | 'https'>;
|
|
30
|
+
timeoutFetch?: number;
|
|
31
|
+
timeoutWs?: number;
|
|
32
|
+
maxRetries?: number;
|
|
33
|
+
};
|
|
34
|
+
LOG?: {
|
|
35
|
+
connect?: boolean;
|
|
36
|
+
connectNoProxy?: boolean;
|
|
37
|
+
response?: boolean;
|
|
38
|
+
error?: boolean;
|
|
39
|
+
minor?: boolean;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Main PuppyProxy class
|
|
45
|
+
*/
|
|
46
|
+
export default class PuppyProxy {
|
|
47
|
+
constructor(config: PuppyProxyConfig);
|
|
48
|
+
|
|
49
|
+
config: PuppyProxyConfig;
|
|
50
|
+
proxyDB: ProxyDatabase;
|
|
51
|
+
ipvanish: IPVanish;
|
|
52
|
+
scraper: Scraper;
|
|
53
|
+
request: RequestClient;
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
* Initializes the database and runs the proxy collector.
|
|
57
|
+
* @param forceCollector Whether to force scraping proxies
|
|
58
|
+
*/
|
|
59
|
+
init(forceCollector?: boolean): Promise<void>;
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Get an HTTP/SOCKS agent for the given URL.
|
|
63
|
+
* @param url Target URL
|
|
64
|
+
* @param preferredAgent 'ipvanish' | 'scraper' | 'none'
|
|
65
|
+
*/
|
|
66
|
+
getAgent(url: string, preferredAgent?: string): Agent | null;
|
|
67
|
+
|
|
68
|
+
/**
|
|
69
|
+
* Create a ProxyRequest instance
|
|
70
|
+
*/
|
|
71
|
+
createProxyRequest(url: string, options?: ProxyRequestOptions): ProxyRequest;
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Manually run the proxy collector
|
|
75
|
+
*/
|
|
76
|
+
collectProxies(force?: boolean): Promise<void>;
|
|
77
|
+
|
|
78
|
+
/**
|
|
79
|
+
* Patches global fetch and WebSocket
|
|
80
|
+
*/
|
|
81
|
+
createGlobalPatch(
|
|
82
|
+
fetchOverride?: (url: string, options?: any) => Promise<any>,
|
|
83
|
+
wsOverride?: (url: string, options?: any) => WS,
|
|
84
|
+
fetchRule?: (url: string, options?: any) => boolean,
|
|
85
|
+
wsRule?: (url: string, options?: any) => boolean
|
|
86
|
+
): void;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
/**
|
|
90
|
+
* Options for ProxyRequest
|
|
91
|
+
*/
|
|
92
|
+
export interface ProxyRequestOptions {
|
|
93
|
+
AGENT?: Agent;
|
|
94
|
+
preferredAgent?: 'ipvanish' | 'scraper' | 'none';
|
|
95
|
+
autoNewAgent?: boolean;
|
|
96
|
+
maxRetries?: number;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/**
|
|
100
|
+
* Request client
|
|
101
|
+
*/
|
|
102
|
+
export class RequestClient {
|
|
103
|
+
constructor(proxyManager: PuppyProxy, config: PuppyProxyConfig);
|
|
104
|
+
|
|
105
|
+
proxyManager: PuppyProxy;
|
|
106
|
+
config: PuppyProxyConfig;
|
|
107
|
+
timeoutWs: number;
|
|
108
|
+
timeoutFetch: number;
|
|
109
|
+
maxRetries: number;
|
|
110
|
+
|
|
111
|
+
fetchBasic(url: string, options?: any): Promise<any>;
|
|
112
|
+
fetch(url: string, options?: any): Promise<any>;
|
|
113
|
+
wsBasic(url: string, options?: any): WS;
|
|
114
|
+
ws(
|
|
115
|
+
url: string,
|
|
116
|
+
options?: any,
|
|
117
|
+
onopen?: (ws: WS, event?: any) => void,
|
|
118
|
+
onmessage?: (ws: WS, event?: any) => void,
|
|
119
|
+
onclose?: (ws: WS, event?: any) => void,
|
|
120
|
+
onerror?: (ws: WS, error?: any) => void
|
|
121
|
+
): Promise<WS>;
|
|
122
|
+
|
|
123
|
+
createProxyRequest(url: string, options?: ProxyRequestOptions): ProxyRequest;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
/**
|
|
127
|
+
* ProxyRequest for single URL
|
|
128
|
+
*/
|
|
129
|
+
export class ProxyRequest {
|
|
130
|
+
constructor(requestClient: RequestClient, url: string, options?: ProxyRequestOptions);
|
|
131
|
+
|
|
132
|
+
client: RequestClient;
|
|
133
|
+
url: string;
|
|
134
|
+
options: ProxyRequestOptions;
|
|
135
|
+
successes: number;
|
|
136
|
+
|
|
137
|
+
success(): void;
|
|
138
|
+
failure(): void;
|
|
139
|
+
newAgent(url?: string): Agent | null;
|
|
140
|
+
|
|
141
|
+
fetchBasic(url?: string, dataAppend?: any): Promise<any>;
|
|
142
|
+
fetch(url?: string, dataAppend?: any): Promise<any>;
|
|
143
|
+
wsBasic(url?: string, dataAppend?: any): WS;
|
|
144
|
+
ws(
|
|
145
|
+
url?: string,
|
|
146
|
+
dataAppend?: any,
|
|
147
|
+
onopen?: (ws: WS, event?: any) => void,
|
|
148
|
+
onmessage?: (ws: WS, event?: any) => void,
|
|
149
|
+
onclose?: (ws: WS, event?: any) => void,
|
|
150
|
+
onerror?: (ws: WS, error?: any) => void
|
|
151
|
+
): Promise<WS>;
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Proxy database wrapper
|
|
156
|
+
*/
|
|
157
|
+
export class ProxyDatabase {
|
|
158
|
+
constructor(config: PuppyProxyConfig);
|
|
159
|
+
|
|
160
|
+
config: PuppyProxyConfig;
|
|
161
|
+
dbPath: string;
|
|
162
|
+
db: any;
|
|
163
|
+
|
|
164
|
+
init(): Promise<void>;
|
|
165
|
+
dealWithProxies(newProxies: string[], outputPath: string): Promise<void>;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
/**
|
|
169
|
+
* Scraper for external proxies
|
|
170
|
+
*/
|
|
171
|
+
export class Scraper {
|
|
172
|
+
constructor(config: PuppyProxyConfig, proxyDB: ProxyDatabase);
|
|
173
|
+
|
|
174
|
+
runCollector(force?: boolean): Promise<void>;
|
|
175
|
+
loadScrapedProxies(): Promise<void>;
|
|
176
|
+
getAgent(url?: string, type?: 'socks' | 'http'): Agent | null;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* IPVanish proxy provider
|
|
181
|
+
*/
|
|
182
|
+
export class IPVanish {
|
|
183
|
+
constructor(config: PuppyProxyConfig);
|
|
184
|
+
|
|
185
|
+
getAgent(url: string): Agent | null;
|
|
186
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
import log from 'puppylog';
|
|
2
|
+
import id from 'puppyid';
|
|
3
|
+
import { originals } from '../constants.js';
|
|
4
|
+
|
|
5
|
+
export class ProxyRequest {
|
|
6
|
+
constructor(requestClient, url, options = {}) {
|
|
7
|
+
this.client = requestClient;
|
|
8
|
+
this.url = url;
|
|
9
|
+
this.options = options;
|
|
10
|
+
this.newAgent();
|
|
11
|
+
this.successes = 0;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
success() { this.successes++; }
|
|
15
|
+
failure() { if (this.options.autoNewAgent) this.newAgent(); }
|
|
16
|
+
|
|
17
|
+
newAgent(url = this.url) {
|
|
18
|
+
const agent = this.client.proxyManager.getAgent(url, this.options.preferredAgent);
|
|
19
|
+
this.options.AGENT = agent;
|
|
20
|
+
this.successes = 0;
|
|
21
|
+
return agent;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
async fetchBasic(url = this.url, dataAppend = {}) {
|
|
25
|
+
let req = this.client.fetchBasic(url, {
|
|
26
|
+
...this.options,
|
|
27
|
+
...dataAppend,
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
return new Promise((resolve, reject) => {
|
|
31
|
+
req.then((response) => {
|
|
32
|
+
if (response.ok) {
|
|
33
|
+
this.success();
|
|
34
|
+
resolve(response);
|
|
35
|
+
} else {
|
|
36
|
+
this.failure();
|
|
37
|
+
reject(new Error(`Failed to fetchBasic ${url} with status ${response.status}`));
|
|
38
|
+
}
|
|
39
|
+
}).catch((error) => {
|
|
40
|
+
this.failure();
|
|
41
|
+
reject(error);
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
async fetch(url = this.url, dataAppend = {}) {
|
|
47
|
+
const retry = async () => {
|
|
48
|
+
try {
|
|
49
|
+
const response = await (this.client.fetchBasic(url, {
|
|
50
|
+
...this.options,
|
|
51
|
+
...dataAppend
|
|
52
|
+
}));
|
|
53
|
+
if (response.ok) {
|
|
54
|
+
this.success();
|
|
55
|
+
return response;
|
|
56
|
+
} else {
|
|
57
|
+
this.failure();
|
|
58
|
+
throw new Error(`Failed to fetch ${url} status ${response.status}`);
|
|
59
|
+
}
|
|
60
|
+
} catch (error) {
|
|
61
|
+
this.failure();
|
|
62
|
+
throw error;
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
let retries = this.options.maxRetries || this.client.maxRetries;
|
|
67
|
+
for (let attempt = 0; attempt < retries; attempt++) {
|
|
68
|
+
try {
|
|
69
|
+
return await retry();
|
|
70
|
+
} catch (error) {
|
|
71
|
+
this.client.config.LOG?.error && log.error(id, `ProxyRequest fetch attempt ${attempt + 1} of ${retries} failed for ${url}: ${error.message}`);
|
|
72
|
+
if (attempt === retries - 1) {
|
|
73
|
+
throw error;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
wsBasic(url = this.url, dataAppend = this.options) {
|
|
80
|
+
return this.client.wsBasic(url, dataAppend);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
ws(url = this.url, dataAppend = this.options, onopen, onmessage, onclose, onerror) {
|
|
84
|
+
return this.client.ws(url, dataAppend, onopen, onmessage, onclose, onerror);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
export default class RequestClient {
|
|
89
|
+
constructor(proxyManager, config) {
|
|
90
|
+
this.proxyManager = proxyManager;
|
|
91
|
+
this.config = config;
|
|
92
|
+
this.timeoutWs = config.timeoutWs || 7000;
|
|
93
|
+
this.timeoutFetch = config.timeoutFetch || 15000;
|
|
94
|
+
this.maxRetries = config.maxRetries || 15;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
fetchBasic (url, options = {}) {
|
|
98
|
+
let agent = options.AGENT || this.proxyManager.getAgent(url, options.preferredAgent);
|
|
99
|
+
delete options.preferredAgent;
|
|
100
|
+
|
|
101
|
+
if (agent) {
|
|
102
|
+
Object.assign(options, { agent });
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
return originals.fetch(url, options);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
async fetch(url, options = {}) {
|
|
109
|
+
let attempts = 0;
|
|
110
|
+
const retry = async () => {
|
|
111
|
+
attempts++;
|
|
112
|
+
try {
|
|
113
|
+
let agent = options.AGENT || this.proxyManager.getAgent(url, options.preferredAgent);
|
|
114
|
+
const fetchOpts = { ...options, agent: agent || undefined };
|
|
115
|
+
delete fetchOpts.AGENT;
|
|
116
|
+
delete fetchOpts.preferredAgent;
|
|
117
|
+
|
|
118
|
+
const controller = new AbortController();
|
|
119
|
+
const timeout = setTimeout(() => controller.abort(), this.timeoutFetch);
|
|
120
|
+
fetchOpts.signal = controller.signal;
|
|
121
|
+
|
|
122
|
+
const response = await originals.fetch(url, fetchOpts);
|
|
123
|
+
clearTimeout(timeout);
|
|
124
|
+
|
|
125
|
+
if (response.ok) {
|
|
126
|
+
if(this.config.LOG?.response) console.log(id, `got ok response ${url}`);
|
|
127
|
+
return response;
|
|
128
|
+
} else {
|
|
129
|
+
if(this.config.LOG?.error) log.warning(id, `Fetch attempt ${attempts} failed ${url} status ${response.status}`);
|
|
130
|
+
if (attempts < this.maxRetries) return retry();
|
|
131
|
+
throw new Error(`Failed after ${this.maxRetries} attempts`);
|
|
132
|
+
}
|
|
133
|
+
} catch (error) {
|
|
134
|
+
if(this.config.LOG?.error) log.error(id, `Fetch error ${attempts}: ${error.message}`);
|
|
135
|
+
if (attempts < this.maxRetries) return retry();
|
|
136
|
+
throw error;
|
|
137
|
+
}
|
|
138
|
+
};
|
|
139
|
+
return retry();
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
wsBasic (url, options = {}) {
|
|
143
|
+
let agent = options.AGENT || this.proxyManager.getAgent(url, options.preferredAgent);
|
|
144
|
+
|
|
145
|
+
if (agent) {
|
|
146
|
+
Object.assign(options, { agent });
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
try {
|
|
150
|
+
const ws = new originals.WebSocket(url, options);
|
|
151
|
+
|
|
152
|
+
setTimeout(() => {
|
|
153
|
+
if (ws.readyState !== originals.WebSocket.OPEN) {
|
|
154
|
+
console.log("wsBasic timeout, closing ws for", url);
|
|
155
|
+
ws.close();
|
|
156
|
+
}
|
|
157
|
+
}, this.timeoutWs);
|
|
158
|
+
|
|
159
|
+
return ws;
|
|
160
|
+
} catch (error) {
|
|
161
|
+
console.log("error creating ws for", url);
|
|
162
|
+
|
|
163
|
+
throw error;
|
|
164
|
+
};
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
ws(url, options = {}, onopen, onmessage, onclose, onerror) {
|
|
168
|
+
return new Promise((resolve, reject) => {
|
|
169
|
+
let attempts = 0;
|
|
170
|
+
const retry = async () => {
|
|
171
|
+
attempts++;
|
|
172
|
+
|
|
173
|
+
try {
|
|
174
|
+
let agent = options.AGENT || this.proxyManager.getAgent(url, options.preferredAgent);
|
|
175
|
+
|
|
176
|
+
let attempt = attempts;
|
|
177
|
+
|
|
178
|
+
if (agent) {
|
|
179
|
+
Object.assign(options, { agent });
|
|
180
|
+
};
|
|
181
|
+
|
|
182
|
+
const ws = new originals.WebSocket(url, options);
|
|
183
|
+
|
|
184
|
+
ws.destroyCompletely = () => {
|
|
185
|
+
if (!ws || ws.readyState === undefined) {
|
|
186
|
+
this.config.LOG.error && log.warning(id, "WebSocket is not initialized or invalid");
|
|
187
|
+
return;
|
|
188
|
+
};
|
|
189
|
+
if (ws && ws.readyState !== originals.WebSocket.CLOSED && ws.readyState !== originals.WebSocket.CLOSING) {
|
|
190
|
+
try {
|
|
191
|
+
ws.onclose = null;
|
|
192
|
+
ws.onerror = null;
|
|
193
|
+
ws.onmessage = null;
|
|
194
|
+
ws.onopen = null;
|
|
195
|
+
ws.removeAllListeners();
|
|
196
|
+
this.config.LOG.minor && log.pink(id, "destroyed websocket", url, attempt);
|
|
197
|
+
} catch (error) {
|
|
198
|
+
this.config.LOG.error && log.error(id, "Error in destroyCompletely", error);
|
|
199
|
+
};
|
|
200
|
+
};
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
let opened = false;
|
|
204
|
+
let errored = false;
|
|
205
|
+
let message = false;
|
|
206
|
+
|
|
207
|
+
setTimeout(() => {
|
|
208
|
+
if (!opened) ws.close();
|
|
209
|
+
}, this.timeoutWs);
|
|
210
|
+
|
|
211
|
+
ws.onopen = (event) => {
|
|
212
|
+
opened = true;
|
|
213
|
+
try {
|
|
214
|
+
onopen && onopen(ws, event);
|
|
215
|
+
} catch (error) {
|
|
216
|
+
this.config.LOG.error && log.error(id, "error in onopen", error);
|
|
217
|
+
};
|
|
218
|
+
resolve(ws);
|
|
219
|
+
return;
|
|
220
|
+
};
|
|
221
|
+
|
|
222
|
+
ws.onerror = (error) => {
|
|
223
|
+
opened = true; errored = true;
|
|
224
|
+
|
|
225
|
+
ws.destroyCompletely();
|
|
226
|
+
|
|
227
|
+
try {
|
|
228
|
+
onerror && onerror(ws, error);
|
|
229
|
+
} catch (error) {
|
|
230
|
+
this.config.LOG.error && log.error(id, "error in onerror", error);
|
|
231
|
+
};
|
|
232
|
+
|
|
233
|
+
if (attempts >= this.maxRetries) {
|
|
234
|
+
this.config.LOG.error && log.error(id, `Failed to connect to ${url} after ${this.maxRetries} attempts`);
|
|
235
|
+
} else {
|
|
236
|
+
this.config.LOG.error && log.warning(id, `Retrying connect to ${url} (${attempts}/${this.maxRetries})`);
|
|
237
|
+
retry();
|
|
238
|
+
};
|
|
239
|
+
};
|
|
240
|
+
|
|
241
|
+
ws.onclose = (event) => {
|
|
242
|
+
try {
|
|
243
|
+
// console.log(id, "ws closed", url, event.code, event.reason, event.wasClean, "o/e/m", opened, errored, message, (!(opened || message)), (options.requireMessage && !message));
|
|
244
|
+
onclose && onclose(ws, event);
|
|
245
|
+
if (!errored) {
|
|
246
|
+
if (!(opened || message)) {
|
|
247
|
+
ws.destroyCompletely();
|
|
248
|
+
this.config.LOG.error && log.warning(id, `WebSocket closed before opening, erroring: ${url}`);
|
|
249
|
+
retry();
|
|
250
|
+
};
|
|
251
|
+
if (options.requireMessage && !message) { //options.requireMessage &&
|
|
252
|
+
ws.destroyCompletely();
|
|
253
|
+
this.config.LOG.error && log.warning(id, `WebSocket closed before doing anything: ${url}`);
|
|
254
|
+
retry();
|
|
255
|
+
};
|
|
256
|
+
};
|
|
257
|
+
} catch (error) {
|
|
258
|
+
this.config.LOG.error && log.error(id, "error in onclose", error);
|
|
259
|
+
};
|
|
260
|
+
};
|
|
261
|
+
|
|
262
|
+
ws.onmessage = (event) => {
|
|
263
|
+
opened = true; message = true;
|
|
264
|
+
try {
|
|
265
|
+
onmessage && onmessage(ws, event);
|
|
266
|
+
} catch (error) {
|
|
267
|
+
this.config.LOG.error && log.error(id, "error in onmessage", error);
|
|
268
|
+
};
|
|
269
|
+
};
|
|
270
|
+
return;
|
|
271
|
+
} catch (error) {
|
|
272
|
+
if (attempts >= this.maxRetries) {
|
|
273
|
+
reject(new Error(`Failed to connect to ${url} after ${this.maxRetries} attempts: ${error.message}`));
|
|
274
|
+
};
|
|
275
|
+
};
|
|
276
|
+
};
|
|
277
|
+
retry();
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
createProxyRequest(url, options) {
|
|
282
|
+
return new ProxyRequest(this, url, options);
|
|
283
|
+
}
|
|
284
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { SocksProxyAgent } from 'socks-proxy-agent';
|
|
2
|
+
import { IPVANISH_HOSTS } from '../constants.js'; // Moved big list to constants
|
|
3
|
+
import scrambled from 'puppyscrambled'; // For Math.shuffleArray
|
|
4
|
+
|
|
5
|
+
export default class IPVanish {
|
|
6
|
+
constructor(config) {
|
|
7
|
+
this.config = config;
|
|
8
|
+
this.hosts = [...IPVANISH_HOSTS];
|
|
9
|
+
this.hostUsage = {};
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
getHost(url) {
|
|
13
|
+
if (!this.hostUsage[url]) {
|
|
14
|
+
this.hostUsage[url] = scrambled.shuffleArray(JSON.parse(JSON.stringify(this.hosts)));
|
|
15
|
+
}
|
|
16
|
+
const takenHost = this.hostUsage[url].shift();
|
|
17
|
+
this.hostUsage[url].push(takenHost);
|
|
18
|
+
return takenHost;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
getAgent(url) {
|
|
22
|
+
const conf = this.config.ipvanish;
|
|
23
|
+
if (!conf || !conf.use) return null;
|
|
24
|
+
|
|
25
|
+
if (conf.reducedmode && !(url.includes("googleapis") || url.includes("/game/"))) {
|
|
26
|
+
return null;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const creds = `${conf.creds.username}:${conf.creds.password}`;
|
|
30
|
+
const socksProxy = `socks5://${creds}@${this.getHost(url)}:1080`;
|
|
31
|
+
|
|
32
|
+
const agent = new SocksProxyAgent(socksProxy);
|
|
33
|
+
agent.USINGSCRAPED = socksProxy;
|
|
34
|
+
return agent;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { Worker } from 'worker_threads';
|
|
4
|
+
import { SocksProxyAgent } from 'socks-proxy-agent';
|
|
5
|
+
import { HttpProxyAgent } from 'http-proxy-agent';
|
|
6
|
+
import { HttpsProxyAgent } from 'https-proxy-agent';
|
|
7
|
+
import log from 'puppylog';
|
|
8
|
+
import id from 'puppyid';
|
|
9
|
+
import { getLastSavedTimestamp } from 'puppymisc';
|
|
10
|
+
import scrambled from 'puppyscrambled';
|
|
11
|
+
|
|
12
|
+
export default class Scraper {
|
|
13
|
+
constructor(config, proxyDB) {
|
|
14
|
+
this.config = config;
|
|
15
|
+
this.proxyDB = proxyDB; // Reference to ProxyDatabase instance
|
|
16
|
+
this.hosts = [];
|
|
17
|
+
this.hostUsage = {};
|
|
18
|
+
|
|
19
|
+
// Paths
|
|
20
|
+
this.storeDir = this.config.storeDir || './store';
|
|
21
|
+
this.outputPath = path.join(this.storeDir, 'scraped_proxies.json');
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
createAgent(proxyUrl) {
|
|
25
|
+
if (proxyUrl.startsWith('socks')) return new SocksProxyAgent(proxyUrl);
|
|
26
|
+
if (proxyUrl.startsWith('http:')) return new HttpProxyAgent(proxyUrl);
|
|
27
|
+
if (proxyUrl.startsWith('https:')) return new HttpsProxyAgent(proxyUrl);
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
genHostsScraper() {
|
|
32
|
+
try {
|
|
33
|
+
if (!fs.existsSync(this.outputPath)) return;
|
|
34
|
+
const data = fs.readFileSync(this.outputPath, 'utf8');
|
|
35
|
+
let newHosts = JSON.parse(data);
|
|
36
|
+
|
|
37
|
+
if (newHosts.length < 20) {
|
|
38
|
+
this.hosts = [...new Set(this.hosts.concat(newHosts))];
|
|
39
|
+
console.log("Scraper hosts merged!", this.hosts.length);
|
|
40
|
+
} else {
|
|
41
|
+
this.hosts = newHosts;
|
|
42
|
+
console.log("Scraper hosts updated!", this.hosts.length);
|
|
43
|
+
}
|
|
44
|
+
this.hostUsage = {};
|
|
45
|
+
} catch (error) {
|
|
46
|
+
console.error("Error loading scraper hosts:", error);
|
|
47
|
+
this.hosts = [];
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// Alias for init
|
|
52
|
+
async loadScrapedProxies() {
|
|
53
|
+
this.genHostsScraper();
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
getHostAny(url = '') {
|
|
57
|
+
if (!this.hosts || this.hosts.length === 0) {
|
|
58
|
+
log.error(id, "No hosts available from scraper! IP would be exposed!");
|
|
59
|
+
// In a library, we shouldn't process.exit. We throw.
|
|
60
|
+
throw new Error("No hosts available from scraper");
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
if (!this.hostUsage[url]) {
|
|
64
|
+
this.hostUsage[url] = scrambled.shuffleArray(JSON.parse(JSON.stringify(this.hosts)));
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const takenHost = this.hostUsage[url].shift();
|
|
68
|
+
this.hostUsage[url].push(takenHost);
|
|
69
|
+
return takenHost;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
getHost(url = '', type = "socks") {
|
|
73
|
+
let max = 10000;
|
|
74
|
+
let count = 0;
|
|
75
|
+
while (count < max) {
|
|
76
|
+
const host = this.getHostAny(url);
|
|
77
|
+
if (host.startsWith(type)) {
|
|
78
|
+
return host;
|
|
79
|
+
}
|
|
80
|
+
count++;
|
|
81
|
+
}
|
|
82
|
+
log.error(id, `No matching host found that starts with '${type}'`);
|
|
83
|
+
if (type.startsWith("http")) {
|
|
84
|
+
return this.getHost(url, "socks");
|
|
85
|
+
}
|
|
86
|
+
return null;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
getAgent(url = '', type = "socks") {
|
|
90
|
+
const conf = this.config.scraper;
|
|
91
|
+
if (conf && conf.use && this.hosts && this.hosts.length > 0) {
|
|
92
|
+
if (conf.reducedmode && !(url.includes("googleapis") || url.includes("/game/"))) {
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
const socksProxy = this.getHost(url, type);
|
|
96
|
+
if (!socksProxy) return null;
|
|
97
|
+
|
|
98
|
+
const agent = this.createAgent(socksProxy);
|
|
99
|
+
agent.USINGSCRAPED = socksProxy;
|
|
100
|
+
return agent;
|
|
101
|
+
}
|
|
102
|
+
return null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Worker Orchestration
|
|
106
|
+
async runCollector(force = false) {
|
|
107
|
+
let lastSaved = (Date.now() - getLastSavedTimestamp(this.outputPath)) / 1000;
|
|
108
|
+
if (lastSaved && !force) {
|
|
109
|
+
console.log("lastSaved", lastSaved);
|
|
110
|
+
let timeBetweenScrapes = this.config.scraper.timeBetweenScrapes * 60; // 25 mins
|
|
111
|
+
if (lastSaved < timeBetweenScrapes) {
|
|
112
|
+
console.log(`Proxies saved < ${timeBetweenScrapes}s ago, skipping.`);
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return new Promise((resolve, reject) => {
|
|
118
|
+
try {
|
|
119
|
+
// Pointing to the worker file.
|
|
120
|
+
// Note: In a real npm package, ensure 'src/workers/collector.js' is included
|
|
121
|
+
const workerUrl = new URL('../workers/collector.js', import.meta.url);
|
|
122
|
+
const worker = new Worker(workerUrl);
|
|
123
|
+
|
|
124
|
+
worker.postMessage(["deploy", { config: this.config }]);
|
|
125
|
+
|
|
126
|
+
worker.on('message', async (message) => {
|
|
127
|
+
worker.terminate();
|
|
128
|
+
// The worker returns raw proxies, we process them into DB
|
|
129
|
+
await this.proxyDB.dealWithProxies(message, this.outputPath);
|
|
130
|
+
this.genHostsScraper(); // Reload memory
|
|
131
|
+
resolve();
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
worker.on('error', (error) => {
|
|
135
|
+
console.error("Worker error:", error);
|
|
136
|
+
reject(error);
|
|
137
|
+
});
|
|
138
|
+
} catch (error) {
|
|
139
|
+
console.error("Proxy collector launch error:", error);
|
|
140
|
+
reject(error);
|
|
141
|
+
}
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
package/src/utils.js
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { SocksProxyAgent } from 'socks-proxy-agent';
|
|
2
|
+
import { HttpProxyAgent } from 'http-proxy-agent';
|
|
3
|
+
import { HttpsProxyAgent } from 'https-proxy-agent';
|
|
4
|
+
|
|
5
|
+
export function createBasicAgent(proxyUrl) {
|
|
6
|
+
if (!proxyUrl) return null;
|
|
7
|
+
|
|
8
|
+
if (proxyUrl.startsWith("socks")) return new SocksProxyAgent(proxyUrl);
|
|
9
|
+
if (proxyUrl.startsWith("https")) return new HttpsProxyAgent(proxyUrl);
|
|
10
|
+
if (proxyUrl.startsWith("http")) return new HttpProxyAgent(proxyUrl);
|
|
11
|
+
return null;
|
|
12
|
+
}
|