rezo 1.0.43 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/index.cjs +6 -6
- package/dist/cache/index.cjs +9 -15
- package/dist/cache/index.js +0 -3
- package/dist/crawler/addon/decodo/index.cjs +1 -0
- package/dist/crawler/addon/decodo/index.js +1 -0
- package/dist/crawler/crawler-options.cjs +1 -0
- package/dist/crawler/crawler-options.js +1 -0
- package/dist/{plugin → crawler}/crawler.cjs +392 -32
- package/dist/{plugin → crawler}/crawler.js +392 -32
- package/dist/crawler/index.cjs +40 -0
- package/dist/{plugin → crawler}/index.js +4 -2
- package/dist/crawler/plugin/file-cacher.cjs +19 -0
- package/dist/crawler/plugin/file-cacher.js +19 -0
- package/dist/crawler/plugin/index.cjs +1 -0
- package/dist/crawler/plugin/index.js +1 -0
- package/dist/crawler/plugin/navigation-history.cjs +43 -0
- package/dist/crawler/plugin/navigation-history.js +43 -0
- package/dist/crawler/plugin/robots-txt.cjs +2 -0
- package/dist/crawler/plugin/robots-txt.js +2 -0
- package/dist/crawler/plugin/url-store.cjs +18 -0
- package/dist/crawler/plugin/url-store.js +18 -0
- package/dist/crawler.d.ts +315 -172
- package/dist/entries/crawler.cjs +5 -5
- package/dist/entries/crawler.js +2 -2
- package/dist/index.cjs +27 -27
- package/dist/internal/agents/index.cjs +10 -10
- package/dist/proxy/index.cjs +4 -4
- package/dist/queue/index.cjs +8 -8
- package/dist/responses/universal/index.cjs +11 -11
- package/package.json +2 -6
- package/dist/cache/file-cacher.cjs +0 -270
- package/dist/cache/file-cacher.js +0 -267
- package/dist/cache/navigation-history.cjs +0 -298
- package/dist/cache/navigation-history.js +0 -296
- package/dist/cache/url-store.cjs +0 -294
- package/dist/cache/url-store.js +0 -291
- package/dist/plugin/addon/decodo/index.cjs +0 -1
- package/dist/plugin/addon/decodo/index.js +0 -1
- package/dist/plugin/crawler-options.cjs +0 -1
- package/dist/plugin/crawler-options.js +0 -1
- package/dist/plugin/index.cjs +0 -36
- /package/dist/{plugin → crawler}/addon/decodo/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/decodo/types.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/index.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/options.js +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.cjs +0 -0
- /package/dist/{plugin → crawler}/addon/oxylabs/types.js +0 -0
- /package/dist/{plugin → crawler}/scraper.cjs +0 -0
- /package/dist/{plugin → crawler}/scraper.js +0 -0
package/dist/cache/url-store.js
DELETED
|
@@ -1,291 +0,0 @@
|
|
|
1
|
-
import fs from "node:fs";
|
|
2
|
-
import path from "node:path";
|
|
3
|
-
import { createHash } from "node:crypto";
|
|
4
|
-
function detectRuntime() {
|
|
5
|
-
if (typeof globalThis.Bun !== "undefined") {
|
|
6
|
-
return "bun";
|
|
7
|
-
}
|
|
8
|
-
if (typeof globalThis.Deno !== "undefined") {
|
|
9
|
-
return "deno";
|
|
10
|
-
}
|
|
11
|
-
return "node";
|
|
12
|
-
}
|
|
13
|
-
async function createDatabase(dbPath) {
|
|
14
|
-
const runtime = detectRuntime();
|
|
15
|
-
if (runtime === "bun") {
|
|
16
|
-
const { Database } = await import("bun:sqlite");
|
|
17
|
-
const db = new Database(dbPath);
|
|
18
|
-
return {
|
|
19
|
-
run: (sql, ...params) => db.run(sql, ...params),
|
|
20
|
-
get: (sql, ...params) => db.query(sql).get(...params),
|
|
21
|
-
all: (sql, ...params) => db.query(sql).all(...params),
|
|
22
|
-
close: () => db.close()
|
|
23
|
-
};
|
|
24
|
-
}
|
|
25
|
-
if (runtime === "deno") {
|
|
26
|
-
try {
|
|
27
|
-
const { Database } = await import("node:sqlite");
|
|
28
|
-
const db = new Database(dbPath);
|
|
29
|
-
return {
|
|
30
|
-
run: (sql, ...params) => db.exec(sql, params),
|
|
31
|
-
get: (sql, ...params) => {
|
|
32
|
-
const stmt = db.prepare(sql);
|
|
33
|
-
return stmt.get(...params);
|
|
34
|
-
},
|
|
35
|
-
all: (sql, ...params) => {
|
|
36
|
-
const stmt = db.prepare(sql);
|
|
37
|
-
return stmt.all(...params);
|
|
38
|
-
},
|
|
39
|
-
close: () => db.close()
|
|
40
|
-
};
|
|
41
|
-
} catch {
|
|
42
|
-
throw new Error("Deno SQLite support requires Node.js compatibility mode");
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
const { DatabaseSync } = await import("node:sqlite");
|
|
46
|
-
const db = new DatabaseSync(dbPath);
|
|
47
|
-
return {
|
|
48
|
-
run: (sql, ...params) => {
|
|
49
|
-
if (params.length === 0) {
|
|
50
|
-
db.exec(sql);
|
|
51
|
-
} else {
|
|
52
|
-
db.prepare(sql).run(...params);
|
|
53
|
-
}
|
|
54
|
-
},
|
|
55
|
-
get: (sql, ...params) => {
|
|
56
|
-
const stmt = db.prepare(sql);
|
|
57
|
-
return stmt.get(...params);
|
|
58
|
-
},
|
|
59
|
-
all: (sql, ...params) => {
|
|
60
|
-
const stmt = db.prepare(sql);
|
|
61
|
-
return stmt.all(...params);
|
|
62
|
-
},
|
|
63
|
-
close: () => db.close()
|
|
64
|
-
};
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
export class UrlStore {
|
|
68
|
-
db = null;
|
|
69
|
-
options;
|
|
70
|
-
storeDir;
|
|
71
|
-
dbPath;
|
|
72
|
-
closed = false;
|
|
73
|
-
initPromise = null;
|
|
74
|
-
constructor(options = {}) {
|
|
75
|
-
this.options = {
|
|
76
|
-
storeDir: options.storeDir || "./url-store",
|
|
77
|
-
dbFileName: options.dbFileName || "urls.db",
|
|
78
|
-
ttl: options.ttl || 604800000,
|
|
79
|
-
maxUrls: options.maxUrls ?? 0,
|
|
80
|
-
hashUrls: options.hashUrls ?? false
|
|
81
|
-
};
|
|
82
|
-
this.storeDir = path.resolve(this.options.storeDir);
|
|
83
|
-
this.dbPath = path.join(this.storeDir, this.options.dbFileName);
|
|
84
|
-
if (!fs.existsSync(this.storeDir)) {
|
|
85
|
-
fs.mkdirSync(this.storeDir, { recursive: true });
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
static async create(options = {}) {
|
|
89
|
-
const store = new UrlStore(options);
|
|
90
|
-
await store.initialize();
|
|
91
|
-
return store;
|
|
92
|
-
}
|
|
93
|
-
async initialize() {
|
|
94
|
-
if (this.initPromise)
|
|
95
|
-
return this.initPromise;
|
|
96
|
-
this.initPromise = (async () => {
|
|
97
|
-
this.db = await createDatabase(this.dbPath);
|
|
98
|
-
this.db.run(`
|
|
99
|
-
CREATE TABLE IF NOT EXISTS urls (
|
|
100
|
-
url TEXT PRIMARY KEY,
|
|
101
|
-
visitedAt INTEGER NOT NULL,
|
|
102
|
-
expiresAt INTEGER NOT NULL,
|
|
103
|
-
namespace TEXT DEFAULT 'default',
|
|
104
|
-
metadata TEXT
|
|
105
|
-
)
|
|
106
|
-
`);
|
|
107
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_expires ON urls(expiresAt)");
|
|
108
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_namespace ON urls(namespace)");
|
|
109
|
-
this.db.run("CREATE INDEX IF NOT EXISTS idx_visited ON urls(visitedAt)");
|
|
110
|
-
})();
|
|
111
|
-
return this.initPromise;
|
|
112
|
-
}
|
|
113
|
-
getUrlKey(url) {
|
|
114
|
-
if (this.options.hashUrls) {
|
|
115
|
-
return createHash("sha256").update(url).digest("hex");
|
|
116
|
-
}
|
|
117
|
-
return url;
|
|
118
|
-
}
|
|
119
|
-
async set(url, namespace = "default", metadata, ttl) {
|
|
120
|
-
if (this.closed)
|
|
121
|
-
throw new Error("UrlStore is closed");
|
|
122
|
-
await this.initialize();
|
|
123
|
-
const key = this.getUrlKey(url);
|
|
124
|
-
const now = Date.now();
|
|
125
|
-
const expiresAt = now + (ttl ?? this.options.ttl);
|
|
126
|
-
const metaStr = metadata ? JSON.stringify(metadata) : null;
|
|
127
|
-
this.db.run(`
|
|
128
|
-
INSERT OR REPLACE INTO urls (url, visitedAt, expiresAt, namespace, metadata)
|
|
129
|
-
VALUES (?, ?, ?, ?, ?)
|
|
130
|
-
`, key, now, expiresAt, namespace, metaStr);
|
|
131
|
-
if (this.options.maxUrls > 0) {
|
|
132
|
-
const count = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
133
|
-
if (count && count.cnt > this.options.maxUrls) {
|
|
134
|
-
const excess = count.cnt - this.options.maxUrls;
|
|
135
|
-
this.db.run(`
|
|
136
|
-
DELETE FROM urls WHERE url IN (
|
|
137
|
-
SELECT url FROM urls ORDER BY visitedAt ASC LIMIT ?
|
|
138
|
-
)
|
|
139
|
-
`, excess);
|
|
140
|
-
}
|
|
141
|
-
}
|
|
142
|
-
}
|
|
143
|
-
async has(url, namespace) {
|
|
144
|
-
if (this.closed)
|
|
145
|
-
return false;
|
|
146
|
-
await this.initialize();
|
|
147
|
-
const key = this.getUrlKey(url);
|
|
148
|
-
const now = Date.now();
|
|
149
|
-
let entry;
|
|
150
|
-
if (namespace) {
|
|
151
|
-
entry = this.db.get("SELECT url, expiresAt FROM urls WHERE url = ? AND namespace = ?", key, namespace);
|
|
152
|
-
} else {
|
|
153
|
-
entry = this.db.get("SELECT url, expiresAt FROM urls WHERE url = ?", key);
|
|
154
|
-
}
|
|
155
|
-
if (!entry)
|
|
156
|
-
return false;
|
|
157
|
-
return entry.expiresAt >= now;
|
|
158
|
-
}
|
|
159
|
-
async hasMany(urls, namespace) {
|
|
160
|
-
if (this.closed)
|
|
161
|
-
return new Set;
|
|
162
|
-
await this.initialize();
|
|
163
|
-
const result = new Set;
|
|
164
|
-
const now = Date.now();
|
|
165
|
-
const batchSize = 100;
|
|
166
|
-
for (let i = 0;i < urls.length; i += batchSize) {
|
|
167
|
-
const batch = urls.slice(i, i + batchSize);
|
|
168
|
-
const keys = batch.map((u) => this.getUrlKey(u));
|
|
169
|
-
const placeholders = keys.map(() => "?").join(",");
|
|
170
|
-
let entries;
|
|
171
|
-
if (namespace) {
|
|
172
|
-
entries = this.db.all(`SELECT url, expiresAt FROM urls WHERE url IN (${placeholders}) AND namespace = ?`, ...keys, namespace);
|
|
173
|
-
} else {
|
|
174
|
-
entries = this.db.all(`SELECT url, expiresAt FROM urls WHERE url IN (${placeholders})`, ...keys);
|
|
175
|
-
}
|
|
176
|
-
for (const entry of entries) {
|
|
177
|
-
if (entry.expiresAt >= now) {
|
|
178
|
-
const idx = keys.indexOf(entry.url);
|
|
179
|
-
if (idx !== -1) {
|
|
180
|
-
result.add(batch[idx]);
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
return result;
|
|
186
|
-
}
|
|
187
|
-
async getMetadata(url, namespace) {
|
|
188
|
-
if (this.closed)
|
|
189
|
-
return null;
|
|
190
|
-
await this.initialize();
|
|
191
|
-
const key = this.getUrlKey(url);
|
|
192
|
-
let entry;
|
|
193
|
-
if (namespace) {
|
|
194
|
-
entry = this.db.get("SELECT metadata FROM urls WHERE url = ? AND namespace = ?", key, namespace);
|
|
195
|
-
} else {
|
|
196
|
-
entry = this.db.get("SELECT metadata FROM urls WHERE url = ?", key);
|
|
197
|
-
}
|
|
198
|
-
if (!entry?.metadata)
|
|
199
|
-
return null;
|
|
200
|
-
try {
|
|
201
|
-
return JSON.parse(entry.metadata);
|
|
202
|
-
} catch {
|
|
203
|
-
return null;
|
|
204
|
-
}
|
|
205
|
-
}
|
|
206
|
-
async delete(url, namespace) {
|
|
207
|
-
if (this.closed)
|
|
208
|
-
return false;
|
|
209
|
-
await this.initialize();
|
|
210
|
-
const key = this.getUrlKey(url);
|
|
211
|
-
if (namespace) {
|
|
212
|
-
this.db.run("DELETE FROM urls WHERE url = ? AND namespace = ?", key, namespace);
|
|
213
|
-
} else {
|
|
214
|
-
this.db.run("DELETE FROM urls WHERE url = ?", key);
|
|
215
|
-
}
|
|
216
|
-
return true;
|
|
217
|
-
}
|
|
218
|
-
async clear(namespace) {
|
|
219
|
-
if (this.closed)
|
|
220
|
-
return;
|
|
221
|
-
await this.initialize();
|
|
222
|
-
if (namespace) {
|
|
223
|
-
this.db.run("DELETE FROM urls WHERE namespace = ?", namespace);
|
|
224
|
-
} else {
|
|
225
|
-
this.db.run("DELETE FROM urls");
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
async cleanup() {
|
|
229
|
-
if (this.closed)
|
|
230
|
-
return 0;
|
|
231
|
-
await this.initialize();
|
|
232
|
-
const now = Date.now();
|
|
233
|
-
const countBefore = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
234
|
-
this.db.run("DELETE FROM urls WHERE expiresAt < ?", now);
|
|
235
|
-
const countAfter = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
236
|
-
return (countBefore?.cnt || 0) - (countAfter?.cnt || 0);
|
|
237
|
-
}
|
|
238
|
-
async getAll(namespace = "default", includeExpired = false) {
|
|
239
|
-
if (this.closed)
|
|
240
|
-
return [];
|
|
241
|
-
await this.initialize();
|
|
242
|
-
const now = Date.now();
|
|
243
|
-
let entries;
|
|
244
|
-
if (includeExpired) {
|
|
245
|
-
entries = this.db.all("SELECT url FROM urls WHERE namespace = ?", namespace);
|
|
246
|
-
} else {
|
|
247
|
-
entries = this.db.all("SELECT url FROM urls WHERE namespace = ? AND expiresAt >= ?", namespace, now);
|
|
248
|
-
}
|
|
249
|
-
return entries.map((e) => e.url);
|
|
250
|
-
}
|
|
251
|
-
async stats(namespace) {
|
|
252
|
-
if (this.closed)
|
|
253
|
-
return { total: 0, expired: 0, namespaces: 0 };
|
|
254
|
-
await this.initialize();
|
|
255
|
-
const now = Date.now();
|
|
256
|
-
let total;
|
|
257
|
-
let expired;
|
|
258
|
-
if (namespace) {
|
|
259
|
-
total = this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ?", namespace);
|
|
260
|
-
expired = this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE namespace = ? AND expiresAt < ?", namespace, now);
|
|
261
|
-
} else {
|
|
262
|
-
total = this.db.get("SELECT COUNT(*) as cnt FROM urls");
|
|
263
|
-
expired = this.db.get("SELECT COUNT(*) as cnt FROM urls WHERE expiresAt < ?", now);
|
|
264
|
-
}
|
|
265
|
-
const namespaceCount = this.db.get("SELECT COUNT(DISTINCT namespace) as cnt FROM urls");
|
|
266
|
-
return {
|
|
267
|
-
total: total?.cnt || 0,
|
|
268
|
-
expired: expired?.cnt || 0,
|
|
269
|
-
namespaces: namespaceCount?.cnt || 0
|
|
270
|
-
};
|
|
271
|
-
}
|
|
272
|
-
async close() {
|
|
273
|
-
if (this.closed)
|
|
274
|
-
return;
|
|
275
|
-
this.closed = true;
|
|
276
|
-
await this.initPromise;
|
|
277
|
-
if (this.db) {
|
|
278
|
-
try {
|
|
279
|
-
this.db.close();
|
|
280
|
-
} catch {}
|
|
281
|
-
this.db = null;
|
|
282
|
-
}
|
|
283
|
-
}
|
|
284
|
-
get isClosed() {
|
|
285
|
-
return this.closed;
|
|
286
|
-
}
|
|
287
|
-
get path() {
|
|
288
|
-
return this.dbPath;
|
|
289
|
-
}
|
|
290
|
-
}
|
|
291
|
-
export default UrlStore;
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
var{Rezo:O}=require("../../../core/rezo.cjs"),r=require("./options.cjs");exports.DECODO_DEVICE_TYPES=r.DECODO_DEVICE_TYPES;exports.DECODO_HEADLESS_MODES=r.DECODO_HEADLESS_MODES;exports.DECODO_COMMON_LOCALES=r.DECODO_COMMON_LOCALES;exports.DECODO_COMMON_COUNTRIES=r.DECODO_COMMON_COUNTRIES;exports.DECODO_EUROPEAN_COUNTRIES=r.DECODO_EUROPEAN_COUNTRIES;exports.DECODO_ASIAN_COUNTRIES=r.DECODO_ASIAN_COUNTRIES;exports.DECODO_US_STATES=r.DECODO_US_STATES;exports.DECODO_COMMON_CITIES=r.DECODO_COMMON_CITIES;exports.getRandomDeviceType=r.getRandomDeviceType;exports.getRandomLocale=r.getRandomLocale;exports.getRandomCountry=r.getRandomCountry;exports.getRandomCity=r.getRandomCity;exports.generateSessionId=r.generateSessionId;var u="https://scraper-api.smartproxy.com/v2/scrape";class d{config;http;authHeader;constructor(e){if(!e.username||!e.password)throw Error("Decodo username and password are required");this.config={username:e.username,password:e.password,deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new O({baseURL:u,timeout:this.config.timeout}),this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},n=this.buildRequestBody(e,s),a=(await this.http.postJson(u,n,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(a.error)throw Error(`Decodo API error: ${a.error} (${a.error_code||"unknown"})`);if(!a.results||a.results.length===0)throw Error("Decodo API returned no results");let o=a.results[0];return{statusCode:o.status_code,url:o.url,content:o.body,cookies:o.cookies||[],headers:o.headers||{},taskId:a.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:o.content_type,contentLength:o.content_length,raw:a}}async scrapeMany(e,t,s=1000){let n=[];for(let i=0;i<e.length;i++){let a=await this.scrape(e[i],t);if(n.push(a),i<e.length-1&&s>0)await new Promise((o)=>setTimeout(o,s))}return n}async scrapeWithSession(e,t,s=10){let n=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:n,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new d({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}exports.Decodo=d;exports.default=d;module.exports=Object.assign(d,exports);
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import{Rezo as u}from"../../../core/rezo.js";import{DECODO_DEVICE_TYPES as y,DECODO_HEADLESS_MODES as l,DECODO_COMMON_LOCALES as w,DECODO_COMMON_COUNTRIES as O,DECODO_EUROPEAN_COUNTRIES as _,DECODO_ASIAN_COUNTRIES as D,DECODO_US_STATES as C,DECODO_COMMON_CITIES as E,getRandomDeviceType as m,getRandomLocale as v,getRandomCountry as S,getRandomCity as I,generateSessionId as T}from"./options.js";var d="https://scraper-api.smartproxy.com/v2/scrape";class n{config;http;authHeader;constructor(e){if(!e.username||!e.password)throw Error("Decodo username and password are required");this.config={username:e.username,password:e.password,deviceType:e.deviceType??"desktop",locale:e.locale??"en-US",country:e.country??"",state:e.state??"",city:e.city??"",headless:e.headless??void 0,headers:e.headers??{},sessionId:e.sessionId??"",sessionDuration:e.sessionDuration??0,javascript:e.javascript??"",javascriptWait:e.javascriptWait??0,waitForCss:e.waitForCss??"",timeout:e.timeout??120000},this.http=new u({baseURL:d,timeout:this.config.timeout}),this.authHeader=`Basic ${Buffer.from(`${e.username}:${e.password}`).toString("base64")}`}async scrape(e,t){let s={...this.config,...t,headers:{...this.config.headers,...t?.headers||{}}},o=this.buildRequestBody(e,s),r=(await this.http.postJson(d,o,{headers:{Authorization:this.authHeader,"Content-Type":"application/json"}})).data;if(r.error)throw Error(`Decodo API error: ${r.error} (${r.error_code||"unknown"})`);if(!r.results||r.results.length===0)throw Error("Decodo API returned no results");let a=r.results[0];return{statusCode:a.status_code,url:a.url,content:a.body,cookies:a.cookies||[],headers:a.headers||{},taskId:r.id,rendered:!!s.headless,country:s.country||void 0,city:s.city||void 0,state:s.state||void 0,deviceType:s.deviceType,contentType:a.content_type,contentLength:a.content_length,raw:r}}async scrapeMany(e,t,s=1000){let o=[];for(let i=0;i<e.length;i++){let r=await this.scrape(e[i],t);if(o.push(r),i<e.length-1&&s>0)await new Promise((a)=>setTimeout(a,s))}return o}async scrapeWithSession(e,t,s=10){let o=`session_${Date.now()}_${Math.random().toString(36).substring(2,11)}`;return this.scrapeMany(e,{...t,sessionId:o,sessionDuration:s},500)}buildRequestBody(e,t){let s={url:e,return_cookies:!0,return_headers:!0};if(t.deviceType)s.device_type=t.deviceType;if(t.headless)s.headless=t.headless;if(t.locale)s.locale=t.locale;if(t.country)s.country=t.country;if(t.state)s.state=t.state;if(t.city)s.city=t.city;if(t.sessionId){if(s.session=t.sessionId,t.sessionDuration)s.session_duration=t.sessionDuration}if(t.headers&&Object.keys(t.headers).length>0)s.headers=t.headers;if(t.javascript){if(s.javascript=t.javascript,t.javascriptWait)s.javascript_wait=t.javascriptWait}if(t.waitForCss)s.wait_for_css=t.waitForCss;return s}getConfig(){return{...this.config,password:"***"}}withConfig(e){return new n({...this.config,...e})}async testConnection(){try{return await this.scrape("https://httpbin.org/ip"),!0}catch(e){throw Error(`Decodo connection test failed: ${e.message}`)}}}var R=n;export{v as getRandomLocale,m as getRandomDeviceType,S as getRandomCountry,I as getRandomCity,T as generateSessionId,R as default,n as Decodo,C as DECODO_US_STATES,l as DECODO_HEADLESS_MODES,_ as DECODO_EUROPEAN_COUNTRIES,y as DECODO_DEVICE_TYPES,w as DECODO_COMMON_LOCALES,O as DECODO_COMMON_COUNTRIES,E as DECODO_COMMON_CITIES,D as DECODO_ASIAN_COUNTRIES};
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
var{defineProperty:f,getOwnPropertyNames:x,getOwnPropertyDescriptor:y}=Object,A=Object.prototype.hasOwnProperty;var m=new WeakMap,v=(e)=>{var r=m.get(e),i;if(r)return r;if(r=f({},"__esModule",{value:!0}),e&&typeof e==="object"||typeof e==="function")x(e).map((a)=>!A.call(r,a)&&f(r,a,{get:()=>e[a],enumerable:!(i=y(e,a))||i.enumerable}));return m.set(e,r),r};var g={};module.exports=v(g);var{RezoQueue:$}=require("../queue/queue.cjs"),{Oxylabs:p}=require("./addon/oxylabs/index.cjs"),k=require("node:path"),M=require("node:os"),{Decodo:O}=require("./addon/decodo/index.cjs");class b{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=_();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??k.join(M.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,a,t)=>t.indexOf(i)===a)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.proxies=this.proxies.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.limiters=this.limiters.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.oxylabs=this.oxylabs.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,a)=>i===r[a]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:a,headers:t}=r;if(!i&&!a)continue;if(r instanceof Headers&&Object.keys(Object.fromEntries(r.entries())).length<1)continue;else if(Object.keys(t).length<1)continue;t=r instanceof Headers?Object.fromEntries(r.entries()):t,this.requestHeaders.push({domain:i,isGlobal:a,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:a,proxy:t}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:a,proxy:t})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:a,options:t}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.limiters.push({domain:i,isGlobal:a,pqueue:new $(t)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.oxylabs.push({domain:i,isGlobal:a,adaptar:new p(t)})}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:a,options:t,queueOptions:s}=r;if(!i&&!a)continue;if(!t||Object.keys(t).length<1)continue;this.decodo.push({domain:i,isGlobal:a,adaptar:new O(t)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,a){if(!this.getDomainName(e))return null;let s=[],n=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let o=0;o<n.length;o++)if(this._hasDomain(e,n[o].domain))s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];return r==="headers"?this.requestHeaders[o].headers:r==="limiters"?this.limiters[o].pqueue:r==="oxylabs"?this.oxylabs[o].adaptar:r==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}s.length=0;for(let o=0;o<n.length;o++)s.push(o);if(s.length){let o=a?s[this.rnd(0,s.length-1)]:s[0];if(n[o].isGlobal&&i)return r==="headers"?this.requestHeaders[o].headers:r==="limiters"?this.limiters[o].pqueue:r==="oxylabs"?this.oxylabs[o].adaptar:r==="decodo"?this.decodo[o].adaptar:this.proxies[o].proxy}return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,a){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{});if(i&&i instanceof Headers)for(let[n,o]of Object.entries(i.entries()))s.set(n,o);else if(i&&typeof i==="object"){for(let[n,o]of Object.entries(i))if(typeof o==="string")s.set(n,o)}if(a)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let a=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let n=s.toString().trim();if(i.toLowerCase()===n.toLowerCase())return!0;if(n.includes("*")){let l=n.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(a(n))try{let l=n,h="i",c=n.match(/^\/(.*)\/(\w*)$/);if(c)l=c[1],h=c[2]||"i";let u=new RegExp(l,h);return u.test(i)||u.test(e)}catch(l){return i.toLowerCase().includes(n.toLowerCase())}let o=i.toLowerCase(),d=n.toLowerCase();return o===d||o.endsWith("."+d)||d.endsWith("."+o)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function _(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let a=0;a<200;a++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],n="";switch(t.name){case"Chrome":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":n=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":n=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(n)}return i}g.CrawlerOptions=b;
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
import{RezoQueue as f}from"../queue/queue.js";import{Oxylabs as m}from"./addon/oxylabs/index.js";import b from"node:path";import g from"node:os";import{Decodo as x}from"./addon/decodo/index.js";class y{baseUrl;adapter;enableNavigationHistory;sessionId;rejectUnauthorized;userAgent;useRndUserAgent;timeout;maxRedirects;maxRetryAttempts;retryDelay;retryOnStatusCode;forceRevisit;retryWithoutProxyOnStatusCode;retryOnProxyError;maxRetryOnProxyError;allowRevisiting;enableCache;cacheTTL;cacheDir;throwFatalError;debug;oxylabs=[];decodo=[];proxies=[];limiters=[];requestHeaders=[];userAgents=A();constructor(e={}){this.baseUrl=e.baseUrl||"",this.adapter=e.adapter??"http",this.enableNavigationHistory=e.enableNavigationHistory??!1,this.sessionId=e.sessionId??`session_${Date.now()}_${Math.random().toString(36).slice(2,8)}`,this.rejectUnauthorized=e.rejectUnauthorized??!0,this.userAgent=e.userAgent,this.useRndUserAgent=e.useRndUserAgent??!1,this.timeout=e.timeout??30000,this.maxRedirects=e.maxRedirects??10,this.maxRetryAttempts=e.maxRetryAttempts??3,this.retryDelay=e.retryDelay??0,this.retryOnStatusCode=e.retryOnStatusCode??[408,429,500,502,503,504],this.forceRevisit=e.forceRevisit??!1,this.retryWithoutProxyOnStatusCode=e.retryWithoutProxyOnStatusCode??[407,403],this.retryOnProxyError=e.retryOnProxyError??!0,this.maxRetryOnProxyError=e.maxRetryOnProxyError??3,this.allowRevisiting=e.allowRevisiting??!1,this.enableCache=e.enableCache??!0,this.cacheTTL=e.cacheTTL??604800000,this.cacheDir=e.cacheDir??b.join(g.tmpdir(),"uiniqhtt_cache"),this.throwFatalError=e.throwFatalError??!1,this.debug=e.debug??!1,this._addHeaders(e.headers),this._addOxylabs(e.oxylabs),this._addProxies(e.proxy),this._addLimiters(e.limiter)}getConfiguredDomains(e){return(e==="headers"?this.requestHeaders:e==="limiters"?this.limiters:e==="oxylabs"?this.oxylabs:this.proxies).filter((i)=>i.domain).map((i)=>i.domain).filter((i,o,t)=>t.indexOf(i)===o)}removeDomain(e){return this.requestHeaders=this.requestHeaders.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.proxies=this.proxies.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.limiters=this.limiters.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this.oxylabs=this.oxylabs.filter((r)=>!r.domain||!this._domainsEqual(r.domain,e)),this}_domainsEqual(e,r){if(Array.isArray(e)&&Array.isArray(r))return e.length===r.length&&e.every((i,o)=>i===r[o]);return e===r}getConfigurationSummary(){let e=(r)=>({total:r.length,global:r.filter((i)=>i.isGlobal).length,domainSpecific:r.filter((i)=>!i.isGlobal&&i.domain).length});return{headers:e(this.requestHeaders),proxies:e(this.proxies),limiters:e(this.limiters),oxylabs:e(this.oxylabs)}}_addHeaders(e){if(!e||!e.enable)return;for(let r of e.httpHeaders){let{domain:i,isGlobal:o,headers:t}=r;if(!i&&!o)continue;if(r instanceof Headers&&Object.keys(Object.fromEntries(r.entries())).length<1)continue;else if(Object.keys(t).length<1)continue;t=r instanceof Headers?Object.fromEntries(r.entries()):t,this.requestHeaders.push({domain:i,isGlobal:o,headers:t})}}_addProxies(e){if(!e||!e.enable)return;for(let r of e.proxies){let{domain:i,isGlobal:o,proxy:t}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.proxies.push({domain:i,isGlobal:o,proxy:t})}}_addLimiters(e){if(!e||!e.enable)return;for(let r of e.limiters){let{domain:i,isGlobal:o,options:t}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.limiters.push({domain:i,isGlobal:o,pqueue:new f(t)})}}_addOxylabs(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:o,options:t,queueOptions:s}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.oxylabs.push({domain:i,isGlobal:o,adaptar:new m(t)})}}_addDecodo(e){if(!e||!e.enable)return;for(let r of e.labs){let{domain:i,isGlobal:o,options:t,queueOptions:s}=r;if(!i&&!o)continue;if(!t||Object.keys(t).length<1)continue;this.decodo.push({domain:i,isGlobal:o,adaptar:new x(t)})}}addHeaders(e){return this._addHeaders({enable:!0,httpHeaders:[e]}),this}addProxy(e){return this._addProxies({enable:!0,proxies:[e]}),this}addLimiter(e){return this._addLimiters({enable:!0,limiters:[e]}),this}addOxylabs(e){return this._addOxylabs({enable:!0,labs:[e]}),this}addDecodo(e){return this._addDecodo({enable:!0,labs:[e]}),this}clearGlobalConfigs(){if(Array.isArray(this.requestHeaders))this.requestHeaders=this.requestHeaders.filter((e)=>!e.isGlobal);if(Array.isArray(this.oxylabs))this.oxylabs=this.oxylabs.filter((e)=>!e.isGlobal);if(Array.isArray(this.limiters))this.limiters=this.limiters.filter((e)=>!e.isGlobal);if(Array.isArray(this.proxies))this.proxies=this.proxies.filter((e)=>!e.isGlobal);return this}getAdapter(e,r,i,o){if(!this.getDomainName(e))return null;let s=[],a=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let n=0;n<a.length;n++)if(this._hasDomain(e,a[n].domain))s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];return r==="headers"?this.requestHeaders[n].headers:r==="limiters"?this.limiters[n].pqueue:r==="oxylabs"?this.oxylabs[n].adaptar:r==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}s.length=0;for(let n=0;n<a.length;n++)s.push(n);if(s.length){let n=o?s[this.rnd(0,s.length-1)]:s[0];if(a[n].isGlobal&&i)return r==="headers"?this.requestHeaders[n].headers:r==="limiters"?this.limiters[n].pqueue:r==="oxylabs"?this.oxylabs[n].adaptar:r==="decodo"?this.decodo[n].adaptar:this.proxies[n].proxy}return null}rnd(e=0,r=Number.MAX_VALUE){return Math.floor(Math.random()*(r-e+1))+e}hasDomain(e,r,i){if(!this.getDomainName(e))return!1;let t=r==="headers"?this.requestHeaders:r==="limiters"?this.limiters:r==="oxylabs"?this.oxylabs:r==="decodo"?this.decodo:this.proxies;for(let s=0;s<t.length;s++)if(this._hasDomain(e,t[s].domain))return!0;if(i){for(let s=0;s<t.length;s++)if(t[s].isGlobal)return!0}return!1}pickHeaders(e,r,i,o){let t=this.getAdapter(e,"headers",r),s=new Headers(t??{});if(i&&i instanceof Headers)for(let[a,n]of Object.entries(i.entries()))s.set(a,n);else if(i&&typeof i==="object"){for(let[a,n]of Object.entries(i))if(typeof n==="string")s.set(a,n)}if(o)s.set("user-agent",this.getRandomUserAgent());return Object.fromEntries(s.entries())}_hasDomain(e,r){if(!r)return!1;let i=this.getDomainName(e);if(!i)return!1;let o=(s)=>{return/[\^\$\*\+\?\{\}\[\]\(\)\|\\]/.test(s)||s.startsWith("/")||s.includes(".*")||s.includes(".+")},t=(s)=>{if(s instanceof RegExp)return s.test(i)||s.test(e);let a=s.toString().trim();if(i.toLowerCase()===a.toLowerCase())return!0;if(a.includes("*")){let l=a.replace(/[.*+?^${}()|[\]\\]/g,"\\$&").replace(/\\\*/g,".*"),h=new RegExp(`^${l}$`,"i");return h.test(i)||h.test(e)}if(o(a))try{let l=a,h="i",c=a.match(/^\/(.*)\/(\w*)$/);if(c)l=c[1],h=c[2]||"i";let u=new RegExp(l,h);return u.test(i)||u.test(e)}catch(l){return i.toLowerCase().includes(a.toLowerCase())}let n=i.toLowerCase(),d=a.toLowerCase();return n===d||n.endsWith("."+d)||d.endsWith("."+n)};if(Array.isArray(r)){for(let s of r)if(t(s))return!0;return!1}return t(r)}getDomainName(e){if(this.isValidUrl(e))return new URL(e).hostname;else if(this.isHostName(e))return e;return null}isHostName(e){if(!e)return!1;if(e.length>255)return!1;let r=/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+ [a-zA-Z]{2,})$/;return e=e.trim().toLowerCase(),r.test(e)&&!e.startsWith("-")&&!e.endsWith("-")}isValidUrl(e){if(!e)return!1;e=e.trim();try{let r=new URL(e);if(!r.protocol||!["http:","https:"].includes(r.protocol.toLowerCase()))return!1;if(!r.hostname)return!1;if(!/^(?:(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?\.)+[a-zA-Z]{2,})$/.test(r.hostname))return!1;return!0}catch{return!1}}getRandomUserAgent(){return this.userAgents[Math.floor(Math.random()*this.userAgents.length)]}}function A(){let e=[{name:"Chrome",version:"91.0.4472.124",engine:"AppleWebKit/537.36"},{name:"Firefox",version:"89.0",engine:"Gecko/20100101"},{name:"Safari",version:"14.1.1",engine:"AppleWebKit/605.1.15"},{name:"Edge",version:"91.0.864.59",engine:"AppleWebKit/537.36"},{name:"Opera",version:"77.0.4054.277",engine:"AppleWebKit/537.36"},{name:"Vivaldi",version:"3.8.2259.42",engine:"AppleWebKit/537.36"},{name:"Brave",version:"1.26.74",engine:"AppleWebKit/537.36"},{name:"Chromium",version:"91.0.4472.101",engine:"AppleWebKit/537.36"},{name:"Yandex",version:"21.5.3.742",engine:"AppleWebKit/537.36"},{name:"Maxthon",version:"5.3.8.2000",engine:"AppleWebKit/537.36"}],r=["Windows NT 10.0","Windows NT 6.1","Macintosh; Intel Mac OS X 10_15_7","Macintosh; Intel Mac OS X 11_4_0","X11; Linux x86_64","X11; Ubuntu; Linux x86_64"],i=[];for(let o=0;o<200;o++){let t=e[Math.floor(Math.random()*e.length)],s=r[Math.floor(Math.random()*r.length)],a="";switch(t.name){case"Chrome":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36`;break;case"Firefox":a=`Mozilla/5.0 (${s}; rv:${t.version}) ${t.engine} Firefox/${t.version}`;break;case"Safari":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Version/${t.version} Safari/605.1.15`;break;case"Edge":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Edg/${t.version}`;break;case"Opera":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 OPR/${t.version}`;break;case"Vivaldi":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Vivaldi/${t.version}`;break;case"Brave":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Brave/${t.version}`;break;case"Chromium":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chromium/${t.version} Chrome/${t.version} Safari/537.36`;break;case"Yandex":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} YaBrowser/${t.version} Safari/537.36`;break;case"Maxthon":a=`Mozilla/5.0 (${s}) ${t.engine} (KHTML, like Gecko) Chrome/${t.version} Safari/537.36 Maxthon/${t.version}`;break}i.push(a)}return i}export{y as CrawlerOptions};
|
package/dist/plugin/index.cjs
DELETED
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
const _mod_pwsonp = require('./crawler.cjs');
|
|
2
|
-
exports.Crawler = _mod_pwsonp.Crawler;;
|
|
3
|
-
const _mod_f0a514 = require('./crawler-options.cjs');
|
|
4
|
-
exports.CrawlerOptions = _mod_f0a514.CrawlerOptions;;
|
|
5
|
-
const _mod_0j6c45 = require('../cache/file-cacher.cjs');
|
|
6
|
-
exports.FileCacher = _mod_0j6c45.FileCacher;;
|
|
7
|
-
const _mod_0ys1f7 = require('../cache/url-store.cjs');
|
|
8
|
-
exports.UrlStore = _mod_0ys1f7.UrlStore;;
|
|
9
|
-
const _mod_bs9mae = require('./addon/oxylabs/index.cjs');
|
|
10
|
-
exports.Oxylabs = _mod_bs9mae.Oxylabs;;
|
|
11
|
-
const _mod_afta57 = require('./addon/oxylabs/options.cjs');
|
|
12
|
-
exports.OXYLABS_BROWSER_TYPES = _mod_afta57.OXYLABS_BROWSER_TYPES;
|
|
13
|
-
exports.OXYLABS_COMMON_LOCALES = _mod_afta57.OXYLABS_COMMON_LOCALES;
|
|
14
|
-
exports.OXYLABS_COMMON_GEO_LOCATIONS = _mod_afta57.OXYLABS_COMMON_GEO_LOCATIONS;
|
|
15
|
-
exports.OXYLABS_US_STATES = _mod_afta57.OXYLABS_US_STATES;
|
|
16
|
-
exports.OXYLABS_EUROPEAN_COUNTRIES = _mod_afta57.OXYLABS_EUROPEAN_COUNTRIES;
|
|
17
|
-
exports.OXYLABS_ASIAN_COUNTRIES = _mod_afta57.OXYLABS_ASIAN_COUNTRIES;
|
|
18
|
-
exports.getRandomOxylabsBrowserType = _mod_afta57.getRandomBrowserType;
|
|
19
|
-
exports.getRandomOxylabsLocale = _mod_afta57.getRandomLocale;
|
|
20
|
-
exports.getRandomOxylabsGeoLocation = _mod_afta57.getRandomGeoLocation;;
|
|
21
|
-
const _mod_8woeb1 = require('./addon/decodo/index.cjs');
|
|
22
|
-
exports.Decodo = _mod_8woeb1.Decodo;;
|
|
23
|
-
const _mod_rpxa81 = require('./addon/decodo/options.cjs');
|
|
24
|
-
exports.DECODO_DEVICE_TYPES = _mod_rpxa81.DECODO_DEVICE_TYPES;
|
|
25
|
-
exports.DECODO_HEADLESS_MODES = _mod_rpxa81.DECODO_HEADLESS_MODES;
|
|
26
|
-
exports.DECODO_COMMON_LOCALES = _mod_rpxa81.DECODO_COMMON_LOCALES;
|
|
27
|
-
exports.DECODO_COMMON_COUNTRIES = _mod_rpxa81.DECODO_COMMON_COUNTRIES;
|
|
28
|
-
exports.DECODO_EUROPEAN_COUNTRIES = _mod_rpxa81.DECODO_EUROPEAN_COUNTRIES;
|
|
29
|
-
exports.DECODO_ASIAN_COUNTRIES = _mod_rpxa81.DECODO_ASIAN_COUNTRIES;
|
|
30
|
-
exports.DECODO_US_STATES = _mod_rpxa81.DECODO_US_STATES;
|
|
31
|
-
exports.DECODO_COMMON_CITIES = _mod_rpxa81.DECODO_COMMON_CITIES;
|
|
32
|
-
exports.getRandomDecodoDeviceType = _mod_rpxa81.getRandomDeviceType;
|
|
33
|
-
exports.getRandomDecodoLocale = _mod_rpxa81.getRandomLocale;
|
|
34
|
-
exports.getRandomDecodoCountry = _mod_rpxa81.getRandomCountry;
|
|
35
|
-
exports.getRandomDecodoCity = _mod_rpxa81.getRandomCity;
|
|
36
|
-
exports.generateDecodoSessionId = _mod_rpxa81.generateSessionId;;
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|