postal-code-scraper 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +55 -61
  2. package/dist/index.cjs +392 -0
  3. package/dist/index.d.cts +43 -0
  4. package/dist/index.d.ts +43 -2
  5. package/dist/index.js +355 -20
  6. package/package.json +17 -6
  7. package/.mocharc.json +0 -4
  8. package/build/test/src/index.js +0 -26
  9. package/build/test/src/index.js.map +0 -1
  10. package/build/test/src/scraper/fetchers.js +0 -49
  11. package/build/test/src/scraper/fetchers.js.map +0 -1
  12. package/build/test/src/scraper/parsers.js +0 -63
  13. package/build/test/src/scraper/parsers.js.map +0 -1
  14. package/build/test/src/scraper/queue.js +0 -69
  15. package/build/test/src/scraper/queue.js.map +0 -1
  16. package/build/test/src/scraper/scrapers.js +0 -148
  17. package/build/test/src/scraper/scrapers.js.map +0 -1
  18. package/build/test/src/types.js +0 -3
  19. package/build/test/src/types.js.map +0 -1
  20. package/build/test/src/utils/id-generator.js +0 -33
  21. package/build/test/src/utils/id-generator.js.map +0 -1
  22. package/build/test/src/utils/logger.js +0 -87
  23. package/build/test/src/utils/logger.js.map +0 -1
  24. package/build/test/tests/postal-code-scraper.test.js +0 -14
  25. package/build/test/tests/postal-code-scraper.test.js.map +0 -1
  26. package/dist/scraper/fetchers.d.ts +0 -9
  27. package/dist/scraper/fetchers.js +0 -48
  28. package/dist/scraper/parsers.d.ts +0 -7
  29. package/dist/scraper/parsers.js +0 -62
  30. package/dist/scraper/queue.d.ts +0 -12
  31. package/dist/scraper/queue.js +0 -67
  32. package/dist/scraper/scrapers.d.ts +0 -18
  33. package/dist/scraper/scrapers.js +0 -149
  34. package/dist/types.d.ts +0 -32
  35. package/dist/types.js +0 -2
  36. package/dist/utils/env-config.d.ts +0 -1
  37. package/dist/utils/env-config.js +0 -7
  38. package/dist/utils/id-generator.d.ts +0 -4
  39. package/dist/utils/id-generator.js +0 -26
  40. package/dist/utils/logger.d.ts +0 -33
  41. package/dist/utils/logger.js +0 -86
  42. package/dist/utils/string-utils.d.ts +0 -1
  43. package/dist/utils/string-utils.js +0 -13
  44. package/src/index.ts +0 -2
  45. package/src/scraper/fetchers.ts +0 -30
  46. package/src/scraper/parsers.ts +0 -67
  47. package/src/scraper/queue.ts +0 -55
  48. package/src/scraper/scrapers.ts +0 -143
  49. package/src/types.ts +0 -37
  50. package/src/utils/env-config.ts +0 -3
  51. package/src/utils/id-generator.ts +0 -35
  52. package/src/utils/logger.ts +0 -105
  53. package/src/utils/string-utils.ts +0 -9
  54. package/tests/postal-code-scraper.test.ts +0 -100
  55. package/tests/tsconfig.json +0 -13
  56. package/tsconfig.json +0 -15
package/dist/index.js CHANGED
@@ -1,20 +1,355 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
- for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
- };
16
- Object.defineProperty(exports, "__esModule", { value: true });
17
- exports.PostalCodeScraper = void 0;
18
- __exportStar(require("./types"), exports);
19
- var scrapers_1 = require("./scraper/scrapers");
20
- Object.defineProperty(exports, "PostalCodeScraper", { enumerable: true, get: function () { return scrapers_1.PostalCodeScraper; } });
1
+ // src/scraper/scrapers.ts
2
+ import path from "path";
3
+ import puppeteer from "puppeteer";
4
+
5
+ // src/scraper/queue.ts
6
+ import { load } from "cheerio";
7
+ import pLimit from "p-limit";
8
+
9
+ // src/scraper/parsers.ts
10
+ var Parser = class {
11
+ static parseRegions($, config) {
12
+ return $("h2:contains('Regions')").next(".regions").find("a").map((_index, element) => {
13
+ const path2 = $(element).attr("href");
14
+ const prettyName = $(element).text().trim();
15
+ if (!path2 || !prettyName) return null;
16
+ return {
17
+ name: path2.split("/").filter(Boolean).pop(),
18
+ prettyName,
19
+ path: path2
20
+ };
21
+ }).get().filter(Boolean);
22
+ }
23
+ static parsePostalCodes($, config) {
24
+ const codes = {};
25
+ $(".codes .container").each((_i, element) => {
26
+ const place = $(element).find(".place").text().trim();
27
+ const codesList = $(element).find(".code span").map((_j, el) => $(el).text().trim()).get();
28
+ if (place) {
29
+ const key = config.usePrettyName ? place : place.toLowerCase().replace(/\s+/g, "-");
30
+ codes[key] = codesList;
31
+ }
32
+ });
33
+ return codes;
34
+ }
35
+ static parseCountries($, config) {
36
+ return $(".regions div a").map((_i, element) => {
37
+ const path2 = $(element).attr("href");
38
+ return path2 ? { name: path2.replace(/\//g, ""), prettyName: $(element).text().trim(), path: path2 } : null;
39
+ }).get().filter(Boolean);
40
+ }
41
+ static parseCountryByName($, config, name) {
42
+ const countryElement = $(`.regions div a`).filter((_, el) => $(el).attr("href")?.replace(/\//g, "") === name.toLowerCase().trim());
43
+ if (!countryElement.length) return null;
44
+ const path2 = countryElement.attr("href");
45
+ const prettyName = countryElement.text().trim();
46
+ return path2 && prettyName ? {
47
+ name: path2.replace(/\//g, ""),
48
+ prettyName,
49
+ path: path2
50
+ } : null;
51
+ }
52
+ };
53
+
54
+ // src/utils/env-config.ts
55
+ var getBaseUrl = () => {
56
+ return "https://worldpostalcode.com";
57
+ };
58
+
59
+ // src/scraper/queue.ts
60
+ var ProcessingQueue = class {
61
+ constructor(fetcher, config) {
62
+ this.fetcher = fetcher;
63
+ this.config = config;
64
+ this.limit = pLimit(config.concurrency || 15);
65
+ }
66
+ queue = [];
67
+ visitedUrls = /* @__PURE__ */ new Set();
68
+ limit;
69
+ async process(startRegion, data) {
70
+ this.queue.push({ region: startRegion, currData: data });
71
+ while (this.queue.length > 0) {
72
+ const tasks = this.queue.map((item) => this.limit(() => this.processItem(item)));
73
+ this.queue = [];
74
+ await Promise.all(tasks);
75
+ }
76
+ }
77
+ async processItem(item) {
78
+ const url = `${getBaseUrl()}${item.region.path}`;
79
+ if (this.visitedUrls.has(url)) return;
80
+ this.visitedUrls.add(url);
81
+ this.config.logger?.info(`Fetching: ${url}`);
82
+ try {
83
+ const html = await this.fetcher.fetchWithRetry(url);
84
+ const $ = load(html);
85
+ const regions = Parser.parseRegions($, this.config);
86
+ regions.forEach((region) => {
87
+ const key = this.config.usePrettyName ? region.prettyName : region.name;
88
+ item.currData[key] = {};
89
+ this.queue.push({
90
+ region,
91
+ currData: item.currData[key]
92
+ });
93
+ });
94
+ const codes = Parser.parsePostalCodes($, this.config);
95
+ Object.assign(item.currData, codes);
96
+ } catch (error) {
97
+ this.config.logger?.error(`Error processing ${url}:`, error);
98
+ }
99
+ }
100
+ };
101
+
102
+ // src/scraper/fetchers.ts
103
+ var Fetcher = class {
104
+ constructor(browser, config) {
105
+ this.browser = browser;
106
+ this.config = config;
107
+ }
108
+ async fetchHtml(url) {
109
+ const page = await this.browser.newPage();
110
+ try {
111
+ page.setDefaultNavigationTimeout(6e4);
112
+ await page.goto(url, { waitUntil: "domcontentloaded" });
113
+ return await page.content();
114
+ } finally {
115
+ await page.close();
116
+ }
117
+ }
118
+ async fetchWithRetry(url, retries = this.config.maxRetries || 5) {
119
+ try {
120
+ return await this.fetchHtml(url);
121
+ } catch (error) {
122
+ this.config.logger?.warn(`Retrying (${this.config.maxRetries - retries + 1}) for: ${url}`);
123
+ if (retries > 0) {
124
+ await new Promise((resolve) => setTimeout(resolve, Math.random() * 7e3 + 5e3));
125
+ return this.fetchWithRetry(url, retries - 1);
126
+ }
127
+ throw new Error(`Failed to fetch: ${url} after ${this.config.maxRetries} attempts`);
128
+ }
129
+ }
130
+ };
131
+
132
+ // src/utils/id-generator.ts
133
+ var createRegionIdGenerator = () => {
134
+ const regionRegistry = /* @__PURE__ */ new Map();
135
+ const counterMap = /* @__PURE__ */ new Map();
136
+ return (regions) => {
137
+ const normalized = regions.map(
138
+ (region) => region.trim().toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\s+/g, "_")
139
+ );
140
+ const compositeKey = normalized.join("|");
141
+ if (regionRegistry.has(compositeKey)) {
142
+ return regionRegistry.get(compositeKey);
143
+ }
144
+ const baseName = normalized[normalized.length - 1];
145
+ const count = (counterMap.get(baseName) || 0) + 1;
146
+ counterMap.set(baseName, count);
147
+ const newId = `${baseName}_${count}`;
148
+ regionRegistry.set(compositeKey, newId);
149
+ return newId;
150
+ };
151
+ };
152
+
153
+ // src/scraper/scrapers.ts
154
+ import { writeFileSync, mkdirSync } from "fs";
155
+ import { load as load2 } from "cheerio";
156
+
157
+ // src/utils/string-utils.ts
158
+ var normalizeString = (str) => {
159
+ return str.trim().toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\s+/g, "-").replace(/[^a-z0-9.-]/g, "");
160
+ };
161
+
162
+ // src/utils/logger.ts
163
+ var Logger = class _Logger {
164
+ static logLevel = "info";
165
+ static useColors = true;
166
+ static prefix = "[POSTAL-CODE-SCRAPER]";
167
+ static instance;
168
+ static configure(config) {
169
+ if (config.level) this.logLevel = config.level;
170
+ if (config.colors !== void 0) this.useColors = config.colors;
171
+ if (config.prefix) this.prefix = config.prefix;
172
+ if (config.logger) this.instance = config.logger;
173
+ }
174
+ static getInstance() {
175
+ return this.instance || new _Logger();
176
+ }
177
+ static debug(message, ...args) {
178
+ this.log("debug", message, args);
179
+ }
180
+ static info(message, ...args) {
181
+ this.log("info", message, args);
182
+ }
183
+ static warn(message, ...args) {
184
+ this.log("warn", message, args);
185
+ }
186
+ static error(message, ...args) {
187
+ this.log("error", message, args);
188
+ }
189
+ static shouldLog(level) {
190
+ if (this.logLevel === "silent") return false;
191
+ const levels = ["error", "warn", "info", "debug"];
192
+ return levels.indexOf(level) <= levels.indexOf(this.logLevel);
193
+ }
194
+ static log(level, message, args) {
195
+ if (!this.shouldLog(level)) return;
196
+ const logger = this.getInstance();
197
+ const formatted = this.formatMessage(level, message);
198
+ logger[level](formatted, ...args);
199
+ }
200
+ static formatMessage(level, message) {
201
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
202
+ const levelColor = this.getLevelColor(level);
203
+ const messageColor = this.useColors ? "\x1B[37m" : "";
204
+ return [
205
+ this.useColors ? "\x1B[90m" : "",
206
+ `${this.prefix} `,
207
+ `${timestamp} `,
208
+ levelColor,
209
+ `[${level.toUpperCase()}]`,
210
+ this.useColors ? "\x1B[0m" : "",
211
+ messageColor,
212
+ ` ${message}`,
213
+ this.useColors ? "\x1B[0m" : ""
214
+ ].join("");
215
+ }
216
+ static getLevelColor(level) {
217
+ if (!this.useColors) return "";
218
+ return {
219
+ error: "\x1B[31m",
220
+ // Red
221
+ warn: "\x1B[33m",
222
+ // Yellow
223
+ info: "\x1B[36m",
224
+ // Cyan
225
+ debug: "\x1B[35m"
226
+ // Magenta
227
+ }[level];
228
+ }
229
+ // Instance methods to implement LoggerInterface
230
+ debug(message, ...args) {
231
+ console.debug(message, ...args);
232
+ }
233
+ info(message, ...args) {
234
+ console.log(message, ...args);
235
+ }
236
+ warn(message, ...args) {
237
+ console.warn(message, ...args);
238
+ }
239
+ error(message, ...args) {
240
+ console.error(message, ...args);
241
+ }
242
+ };
243
+
244
+ // src/scraper/scrapers.ts
245
+ var PostalCodeScraper = class {
246
+ constructor(config = {}) {
247
+ this.config = config;
248
+ this.config = {
249
+ concurrency: 15,
250
+ maxRetries: 5,
251
+ headless: true,
252
+ directory: "src/data",
253
+ logger: Logger,
254
+ usePrettyName: false,
255
+ ...config
256
+ };
257
+ }
258
+ browser;
259
+ queue;
260
+ fetcher;
261
+ async scrapeCountry(countryName) {
262
+ await this.initBrowser();
263
+ try {
264
+ const country = await this.getCountryDetails(countryName);
265
+ if (!country) {
266
+ this.config.logger?.warn(`Country not found: ${countryName}`);
267
+ return null;
268
+ }
269
+ const data = {};
270
+ await this.queue.process(country, data);
271
+ this.saveData(data, `${country.name}-postal-codes.json`, this.config.directory);
272
+ const postalCodeLookup = this.generatePostalCodeLookup(data);
273
+ this.saveData(postalCodeLookup, `${country.name}-lookup.json`, this.config.directory);
274
+ } finally {
275
+ await this.cleanup();
276
+ }
277
+ }
278
+ async scrapeCountries() {
279
+ await this.initBrowser();
280
+ try {
281
+ const countries = await this.getCountriesDetails();
282
+ if (countries.length === 0) {
283
+ this.config.logger?.warn("No countries found.");
284
+ return null;
285
+ }
286
+ for (const country of countries) {
287
+ const key = this.config.usePrettyName ? country.prettyName : country.name;
288
+ const countryData = {};
289
+ this.config.logger?.info(`Processing country: ${key}`);
290
+ await this.queue.process(country, countryData);
291
+ this.saveData(countryData, `${key}-postal-codes.json`, this.config.directory);
292
+ const postalCodeLookup = this.generatePostalCodeLookup(countryData);
293
+ this.saveData(postalCodeLookup, `${key}-lookup.json`, this.config.directory);
294
+ }
295
+ } finally {
296
+ await this.cleanup();
297
+ }
298
+ }
299
+ async initBrowser() {
300
+ this.browser = await puppeteer.launch({ headless: this.config.headless });
301
+ this.fetcher = new Fetcher(this.browser, this.config);
302
+ this.queue = new ProcessingQueue(this.fetcher, this.config);
303
+ }
304
+ async getCountryDetails(name) {
305
+ try {
306
+ const html = await this.fetcher.fetchWithRetry(getBaseUrl());
307
+ return Parser.parseCountryByName(load2(html), this.config, name);
308
+ } catch (error) {
309
+ this.config.logger?.error(`Error fetching country details: ${name}`, error);
310
+ return null;
311
+ }
312
+ }
313
+ async getCountriesDetails() {
314
+ try {
315
+ const html = await this.fetcher.fetchWithRetry(getBaseUrl());
316
+ return Parser.parseCountries(load2(html), this.config);
317
+ } catch (error) {
318
+ this.config.logger?.error("Error fetching countries details", error);
319
+ return [];
320
+ }
321
+ }
322
+ generatePostalCodeLookup(data) {
323
+ return this.buildLookup(data, createRegionIdGenerator());
324
+ }
325
+ buildLookup(regionObj, idGenerator, acc = [], result = { postalCodeMap: {}, regions: {} }) {
326
+ if (Array.isArray(regionObj)) {
327
+ for (const item of regionObj) {
328
+ const id = idGenerator(acc);
329
+ result.postalCodeMap[item] = id;
330
+ result.regions[id] = [...acc];
331
+ }
332
+ } else if (typeof regionObj === "object" && regionObj !== null) {
333
+ for (const [regionKey, regionValue] of Object.entries(regionObj)) {
334
+ this.buildLookup(regionValue, idGenerator, [...acc, regionKey], result);
335
+ }
336
+ }
337
+ return result;
338
+ }
339
+ saveData(data, fileName, directory = "src/data") {
340
+ try {
341
+ mkdirSync(directory, { recursive: true });
342
+ const filePath = path.join(directory, normalizeString(fileName));
343
+ writeFileSync(filePath, JSON.stringify(data, null, 2), { flag: "w" });
344
+ this.config.logger?.info(`Saved data to ${filePath}`);
345
+ } catch (error) {
346
+ this.config.logger?.error(`Error saving data to ${fileName}`, error);
347
+ }
348
+ }
349
+ async cleanup() {
350
+ await this.browser?.close();
351
+ }
352
+ };
353
+ export {
354
+ PostalCodeScraper
355
+ };
package/package.json CHANGED
@@ -1,13 +1,16 @@
1
1
  {
2
2
  "name": "postal-code-scraper",
3
- "version": "1.0.3",
3
+ "version": "1.0.4",
4
4
  "description": "A tool for scraping country data, including regions and their postal codes",
5
- "main": "dist/index.js",
6
- "types": "dist/index.d.ts",
5
+ "main": "./dist/index.cjs",
6
+ "module": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "files": [
9
+ "dist"
10
+ ],
7
11
  "scripts": {
8
- "start": "tsx src/index.ts",
9
12
  "prebuild": "shx rm -rf dist",
10
- "build": "tsc",
13
+ "build": "tsup src/index.ts --format esm,cjs --dts --clean",
11
14
  "pretest": "npm run build",
12
15
  "test": "mocha -r tsx tests/**/*.test.ts",
13
16
  "prepare": "npm run build",
@@ -27,8 +30,9 @@
27
30
  "mocha": "^11.1.0",
28
31
  "sinon": "^19.0.2",
29
32
  "tslib": "^2.8.1",
33
+ "tsup": "^8.5.1",
30
34
  "tsx": "^3.14.0",
31
- "typescript": "^5.8.2"
35
+ "typescript": "^5.9.3"
32
36
  },
33
37
  "dependencies": {
34
38
  "cheerio": "^1.0.0",
@@ -57,5 +61,12 @@
57
61
  },
58
62
  "engines": {
59
63
  "node": ">=18"
64
+ },
65
+ "exports": {
66
+ ".": {
67
+ "types": "./dist/index.d.ts",
68
+ "import": "./dist/index.js",
69
+ "require": "./dist/index.cjs"
70
+ }
60
71
  }
61
72
  }
package/.mocharc.json DELETED
@@ -1,4 +0,0 @@
1
- {
2
- "extension": ["ts"],
3
- "spec": "tests/**/*.test.ts"
4
- }
@@ -1,26 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
- for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
- };
16
- var __importDefault = (this && this.__importDefault) || function (mod) {
17
- return (mod && mod.__esModule) ? mod : { "default": mod };
18
- };
19
- Object.defineProperty(exports, "__esModule", { value: true });
20
- exports.default = exports.PostalCodeScraper = void 0;
21
- __exportStar(require("./types"), exports);
22
- var scrapers_1 = require("./scraper/scrapers");
23
- Object.defineProperty(exports, "PostalCodeScraper", { enumerable: true, get: function () { return scrapers_1.PostalCodeScraper; } });
24
- var scrapers_2 = require("./scraper/scrapers");
25
- Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(scrapers_2).default; } });
26
- //# sourceMappingURL=index.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;AAAA,0CAAwB;AACxB,+CAAuD;AAA9C,6GAAA,iBAAiB,OAAA;AAC1B,+CAA6C;AAApC,oHAAA,OAAO,OAAA"}
@@ -1,49 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.Fetcher = void 0;
13
- class Fetcher {
14
- constructor(browser, config) {
15
- this.browser = browser;
16
- this.config = config;
17
- }
18
- fetchHtml(url) {
19
- return __awaiter(this, void 0, void 0, function* () {
20
- const page = yield this.browser.newPage();
21
- try {
22
- page.setDefaultNavigationTimeout(60000);
23
- yield page.goto(url, { waitUntil: "domcontentloaded" });
24
- return yield page.content();
25
- }
26
- finally {
27
- yield page.close();
28
- }
29
- });
30
- }
31
- fetchWithRetry(url_1) {
32
- return __awaiter(this, arguments, void 0, function* (url, retries = this.config.maxRetries || 5) {
33
- var _a;
34
- try {
35
- return yield this.fetchHtml(url);
36
- }
37
- catch (error) {
38
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn(`Retrying (${this.config.maxRetries - retries + 1}) for: ${url}`);
39
- if (retries > 0) {
40
- yield new Promise((resolve) => setTimeout(resolve, Math.random() * 7000 + 5000));
41
- return this.fetchWithRetry(url, retries - 1);
42
- }
43
- throw new Error(`Failed to fetch: ${url} after ${this.config.maxRetries} attempts`);
44
- }
45
- });
46
- }
47
- }
48
- exports.Fetcher = Fetcher;
49
- //# sourceMappingURL=fetchers.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"fetchers.js","sourceRoot":"","sources":["../../../../src/scraper/fetchers.ts"],"names":[],"mappings":";;;;;;;;;;;;AAGA,MAAa,OAAO;IACnB,YAAoB,OAAgB,EAAU,MAAqB;QAA/C,YAAO,GAAP,OAAO,CAAS;QAAU,WAAM,GAAN,MAAM,CAAe;IAAG,CAAC;IAEjE,SAAS,CAAC,GAAW;;YAC1B,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,OAAO,EAAE,CAAC;YAC1C,IAAI,CAAC;gBACJ,IAAI,CAAC,2BAA2B,CAAC,KAAK,CAAC,CAAC;gBACxC,MAAM,IAAI,CAAC,IAAI,CAAC,GAAG,EAAE,EAAE,SAAS,EAAE,kBAAkB,EAAE,CAAC,CAAC;gBACxD,OAAO,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;YAC7B,CAAC;oBAAS,CAAC;gBACV,MAAM,IAAI,CAAC,KAAK,EAAE,CAAC;YACpB,CAAC;QACF,CAAC;KAAA;IAEK,cAAc;6DAAC,GAAW,EAAE,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,UAAU,IAAI,CAAC;;YACtE,IAAI,CAAC;gBACJ,OAAO,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC;YAClC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,IAAI,CAAC,aAAa,IAAI,CAAC,MAAM,CAAC,UAAW,GAAG,OAAO,GAAG,CAAC,UAAU,GAAG,EAAE,CAAC,CAAC;gBAC5F,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;oBACjB,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,EAAE,GAAG,IAAI,GAAG,IAAI,CAAC,CAAC,CAAC;oBACjF,OAAO,IAAI,CAAC,cAAc,CAAC,GAAG,EAAE,OAAO,GAAG,CAAC,CAAC,CAAC;gBAC9C,CAAC;gBACD,MAAM,IAAI,KAAK,CAAC,oBAAoB,GAAG,UAAU,IAAI,CAAC,MAAM,CAAC,UAAU,WAAW,CAAC,CAAC;YACrF,CAAC;QACF,CAAC;KAAA;CACD;AA1BD,0BA0BC"}
@@ -1,63 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Parser = void 0;
4
- class Parser {
5
- static parseRegions($, config) {
6
- return $("h2:contains('Regions')")
7
- .next(".regions")
8
- .find("a")
9
- .map((_index, element) => {
10
- const path = $(element).attr("href");
11
- const prettyName = $(element).text().trim();
12
- if (!path || !prettyName)
13
- return null;
14
- return {
15
- name: path.split("/").filter(Boolean).pop(),
16
- prettyName,
17
- path,
18
- };
19
- })
20
- .get()
21
- .filter(Boolean);
22
- }
23
- static parsePostalCodes($, config) {
24
- const codes = {};
25
- $(".codes .container").each((_i, element) => {
26
- const place = $(element).find(".place").text().trim();
27
- const codesList = $(element)
28
- .find(".code span")
29
- .map((_j, el) => $(el).text().trim())
30
- .get();
31
- if (place) {
32
- const key = config.usePrettyName ? place : place.toLowerCase().replace(/\s+/g, "-");
33
- codes[key] = codesList;
34
- }
35
- });
36
- return codes;
37
- }
38
- static parseCountries($, config) {
39
- return $(".regions div a")
40
- .map((_i, element) => {
41
- const path = $(element).attr("href");
42
- return path ? { name: path.replace(/\//g, ""), prettyName: $(element).text().trim(), path } : null;
43
- })
44
- .get()
45
- .filter(Boolean);
46
- }
47
- static parseCountryByName($, config, name) {
48
- const countryElement = $(`.regions div a`).filter((_, el) => { var _a; return ((_a = $(el).attr("href")) === null || _a === void 0 ? void 0 : _a.replace(/\//g, "")) === name.toLowerCase().trim(); });
49
- if (!countryElement.length)
50
- return null;
51
- const path = countryElement.attr("href");
52
- const prettyName = countryElement.text().trim();
53
- return path && prettyName
54
- ? {
55
- name: path.replace(/\//g, ""),
56
- prettyName,
57
- path,
58
- }
59
- : null;
60
- }
61
- }
62
- exports.Parser = Parser;
63
- //# sourceMappingURL=parsers.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"parsers.js","sourceRoot":"","sources":["../../../../src/scraper/parsers.ts"],"names":[],"mappings":";;;AAEA,MAAa,MAAM;IAClB,MAAM,CAAC,YAAY,CAAC,CAAe,EAAE,MAAqB;QACzD,OAAO,CAAC,CAAC,wBAAwB,CAAC;aAChC,IAAI,CAAC,UAAU,CAAC;aAChB,IAAI,CAAC,GAAG,CAAC;aACT,GAAG,CAAC,CAAC,MAAM,EAAE,OAAO,EAAE,EAAE;YACxB,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrC,MAAM,UAAU,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YAC5C,IAAI,CAAC,IAAI,IAAI,CAAC,UAAU;gBAAE,OAAO,IAAI,CAAC;YAEtC,OAAO;gBACN,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,GAAG,EAAG;gBAC5C,UAAU;gBACV,IAAI;aACJ,CAAC;QACH,CAAC,CAAC;aACD,GAAG,EAAE;aACL,MAAM,CAAC,OAAO,CAAa,CAAC;IAC/B,CAAC;IAED,MAAM,CAAC,gBAAgB,CAAC,CAAe,EAAE,MAAqB;QAC7D,MAAM,KAAK,GAA6B,EAAE,CAAC;QAE3C,CAAC,CAAC,mBAAmB,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,EAAE;YAC3C,MAAM,KAAK,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;YACtD,MAAM,SAAS,GAAG,CAAC,CAAC,OAAO,CAAC;iBAC1B,IAAI,CAAC,YAAY,CAAC;iBAClB,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;iBACpC,GAAG,EAAE,CAAC;YAER,IAAI,KAAK,EAAE,CAAC;gBACX,MAAM,GAAG,GAAG,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;gBACpF,KAAK,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC;YACxB,CAAC;QACF,CAAC,CAAC,CAAC;QAEH,OAAO,KAAK,CAAC;IACd,CAAC;IAED,MAAM,CAAC,cAAc,CAAC,CAAe,EAAE,MAAqB;QAC3D,OAAO,CAAC,CAAC,gBAAgB,CAAC;aACxB,GAAG,CAAC,CAAC,EAAE,EAAE,OAAO,EAAE,EAAE;YACpB,MAAM,IAAI,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrC,OAAO,IAAI,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACpG,CAAC,CAAC;aACD,GAAG,EAAE;aACL,MAAM,CAAC,OAAO,CAAa,CAAC;IAC/B,CAAC;IAED,MAAM,CAAC,kBAAkB,CAAC,CAAe,EAAE,MAAqB,EAAE,IAAY;QAC7E,MAAM,cAAc,GAAG,CAAC,CAAC,gBAAgB,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE,WAAC,OAAA,CAAA,MAAA,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,0CAAE,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,MAAK,IAAI,CAAC,WAAW,EAAE,CAAC,IAAI,EAAE,CAAA,EAAA,CAAC,CAAC;QAEnI,IAAI,CAAC,cAAc,CAAC,MAAM;YAAE,OAAO,IAAI,CAAC;QAExC,MAAM,IAAI,GAAG,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACzC,MAAM,UAAU,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;QAChD,OAAO,IAAI,IAAI,UAAU;YACxB,CAAC,CAAC;gBACA,IAAI,EAAE,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC;gBAC7B,UAAU;gBACV,IAAI;aACH;YACH,CAAC,CAAC,IAAI,CAAC;IACT,CAAC;CACD;AAhED,wBAgEC"}
@@ -1,69 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.ProcessingQueue = void 0;
16
- const cheerio_1 = require("cheerio");
17
- const p_limit_1 = __importDefault(require("p-limit"));
18
- const parsers_1 = require("./parsers");
19
- class ProcessingQueue {
20
- constructor(fetcher, config) {
21
- this.fetcher = fetcher;
22
- this.config = config;
23
- this.queue = [];
24
- this.visitedUrls = new Set();
25
- this.limit = (0, p_limit_1.default)(config.concurrency || 15);
26
- }
27
- process(startRegion, data) {
28
- return __awaiter(this, void 0, void 0, function* () {
29
- this.queue.push({ region: startRegion, currData: data });
30
- while (this.queue.length > 0) {
31
- const tasks = this.queue.map((item) => this.limit(() => this.processItem(item)));
32
- this.queue = [];
33
- yield Promise.all(tasks);
34
- }
35
- });
36
- }
37
- processItem(item) {
38
- return __awaiter(this, void 0, void 0, function* () {
39
- var _a, _b;
40
- const url = `${this.config.baseUrl}${item.region.path}`;
41
- if (this.visitedUrls.has(url))
42
- return;
43
- this.visitedUrls.add(url);
44
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.info(`Fetching: ${url}`);
45
- try {
46
- const html = yield this.fetcher.fetchWithRetry(url);
47
- const $ = (0, cheerio_1.load)(html);
48
- // Parse and add new regions to queue
49
- const regions = parsers_1.Parser.parseRegions($, this.config);
50
- regions.forEach((region) => {
51
- const key = this.config.usePrettyName ? region.prettyName : region.name;
52
- item.currData[key] = {};
53
- this.queue.push({
54
- region,
55
- currData: item.currData[key],
56
- });
57
- });
58
- // Parse postal codes
59
- const codes = parsers_1.Parser.parsePostalCodes($, this.config);
60
- Object.assign(item.currData, codes);
61
- }
62
- catch (error) {
63
- (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.error(`Error processing ${url}:`, error);
64
- }
65
- });
66
- }
67
- }
68
- exports.ProcessingQueue = ProcessingQueue;
69
- //# sourceMappingURL=queue.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"queue.js","sourceRoot":"","sources":["../../../../src/scraper/queue.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,qCAA+B;AAG/B,sDAA6B;AAC7B,uCAAmC;AAEnC,MAAa,eAAe;IAK3B,YAAoB,OAAgB,EAAU,MAAqB;QAA/C,YAAO,GAAP,OAAO,CAAS;QAAU,WAAM,GAAN,MAAM,CAAe;QAJ3D,UAAK,GAA0B,EAAE,CAAC;QAClC,gBAAW,GAAG,IAAI,GAAG,EAAU,CAAC;QAIvC,IAAI,CAAC,KAAK,GAAG,IAAA,iBAAM,EAAC,MAAM,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC;IAC/C,CAAC;IAEK,OAAO,CAAC,WAAmB,EAAE,IAAS;;YAC3C,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,EAAE,MAAM,EAAE,WAAW,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;YAEzD,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9B,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;gBACjF,IAAI,CAAC,KAAK,GAAG,EAAE,CAAC;gBAChB,MAAM,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;YAC1B,CAAC;QACF,CAAC;KAAA;IAEa,WAAW,CAAC,IAAyB;;;YAClD,MAAM,GAAG,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,IAAI,EAAE,CAAC;YAExD,IAAI,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC;gBAAE,OAAO;YACtC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YAE1B,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,IAAI,CAAC,aAAa,GAAG,EAAE,CAAC,CAAC;YAE7C,IAAI,CAAC;gBACJ,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,GAAG,CAAC,CAAC;gBACpD,MAAM,CAAC,GAAG,IAAA,cAAI,EAAC,IAAI,CAAC,CAAC;gBAErB,qCAAqC;gBACrC,MAAM,OAAO,GAAG,gBAAM,CAAC,YAAY,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;gBACpD,OAAO,CAAC,OAAO,CAAC,CAAC,MAAM,EAAE,EAAE;oBAC1B,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,MAAM,CAAC,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC;oBACxE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,GAAG,EAAE,CAAC;oBACxB,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC;wBACf,MAAM;wBACN,QAAQ,EAAE,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;qBAC5B,CAAC,CAAC;gBACJ,CAAC,CAAC,CAAC;gBAEH,qBAAqB;gBACrB,MAAM,KAAK,GAAG,gBAAM,CAAC,gBAAgB,CAAC,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;gBACtD,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;YACrC,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,KAAK,CAAC,oBAAoB,GAAG,GAAG,EAAE,KAAK,CAAC,CAAC;YAC9D,CAAC;QACF,CAAC;KAAA;CACD;AAjDD,0CAiDC"}