postal-code-scraper 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. package/.mocharc.json +4 -0
  2. package/LICENSE +21 -0
  3. package/README.md +194 -0
  4. package/build/test/src/index.js +26 -0
  5. package/build/test/src/index.js.map +1 -0
  6. package/build/test/src/scraper/fetchers.js +49 -0
  7. package/build/test/src/scraper/fetchers.js.map +1 -0
  8. package/build/test/src/scraper/parsers.js +63 -0
  9. package/build/test/src/scraper/parsers.js.map +1 -0
  10. package/build/test/src/scraper/queue.js +69 -0
  11. package/build/test/src/scraper/queue.js.map +1 -0
  12. package/build/test/src/scraper/scrapers.js +148 -0
  13. package/build/test/src/scraper/scrapers.js.map +1 -0
  14. package/build/test/src/types.js +3 -0
  15. package/build/test/src/types.js.map +1 -0
  16. package/build/test/src/utils/id-generator.js +33 -0
  17. package/build/test/src/utils/id-generator.js.map +1 -0
  18. package/build/test/src/utils/logger.js +87 -0
  19. package/build/test/src/utils/logger.js.map +1 -0
  20. package/build/test/tests/postal-code-scraper.test.js +14 -0
  21. package/build/test/tests/postal-code-scraper.test.js.map +1 -0
  22. package/dist/index.d.ts +3 -0
  23. package/dist/index.js +25 -0
  24. package/dist/scraper/fetchers.d.ts +9 -0
  25. package/dist/scraper/fetchers.js +48 -0
  26. package/dist/scraper/parsers.d.ts +7 -0
  27. package/dist/scraper/parsers.js +62 -0
  28. package/dist/scraper/queue.d.ts +12 -0
  29. package/dist/scraper/queue.js +67 -0
  30. package/dist/scraper/scrapers.d.ts +19 -0
  31. package/dist/scraper/scrapers.js +149 -0
  32. package/dist/types.d.ts +32 -0
  33. package/dist/types.js +2 -0
  34. package/dist/utils/env-config.d.ts +1 -0
  35. package/dist/utils/env-config.js +7 -0
  36. package/dist/utils/id-generator.d.ts +4 -0
  37. package/dist/utils/id-generator.js +26 -0
  38. package/dist/utils/logger.d.ts +33 -0
  39. package/dist/utils/logger.js +86 -0
  40. package/dist/utils/string-utils.d.ts +1 -0
  41. package/dist/utils/string-utils.js +13 -0
  42. package/package.json +61 -0
  43. package/src/index.ts +3 -0
  44. package/src/scraper/fetchers.ts +30 -0
  45. package/src/scraper/parsers.ts +67 -0
  46. package/src/scraper/queue.ts +55 -0
  47. package/src/scraper/scrapers.ts +143 -0
  48. package/src/types.ts +37 -0
  49. package/src/utils/env-config.ts +3 -0
  50. package/src/utils/id-generator.ts +35 -0
  51. package/src/utils/logger.ts +105 -0
  52. package/src/utils/string-utils.ts +9 -0
  53. package/tests/postal-code-scraper.test.ts +100 -0
  54. package/tests/tsconfig.json +13 -0
  55. package/tsconfig.json +15 -0
@@ -0,0 +1,87 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Logger = void 0;
4
+ class Logger {
5
+ static configure(config) {
6
+ if (config.level)
7
+ this.logLevel = config.level;
8
+ if (config.colors !== undefined)
9
+ this.useColors = config.colors;
10
+ if (config.prefix)
11
+ this.prefix = config.prefix;
12
+ if (config.logger)
13
+ this.instance = config.logger;
14
+ }
15
+ static getInstance() {
16
+ return this.instance || new Logger();
17
+ }
18
+ static debug(message, ...args) {
19
+ this.log("debug", message, args);
20
+ }
21
+ static info(message, ...args) {
22
+ this.log("info", message, args);
23
+ }
24
+ static warn(message, ...args) {
25
+ this.log("warn", message, args);
26
+ }
27
+ static error(message, ...args) {
28
+ this.log("error", message, args);
29
+ }
30
+ static shouldLog(level) {
31
+ if (this.logLevel === "silent")
32
+ return false;
33
+ const levels = ["error", "warn", "info", "debug"];
34
+ return levels.indexOf(level) <= levels.indexOf(this.logLevel);
35
+ }
36
+ static log(level, message, args) {
37
+ if (!this.shouldLog(level))
38
+ return;
39
+ const logger = this.getInstance();
40
+ const formatted = this.formatMessage(level, message);
41
+ logger[level](formatted, ...args);
42
+ }
43
+ static formatMessage(level, message) {
44
+ const timestamp = new Date().toISOString();
45
+ const levelColor = this.getLevelColor(level);
46
+ const messageColor = this.useColors ? "\x1b[37m" : "";
47
+ return [
48
+ this.useColors ? "\x1b[90m" : "",
49
+ `${this.prefix} `,
50
+ `${timestamp} `,
51
+ levelColor,
52
+ `[${level.toUpperCase()}]`,
53
+ this.useColors ? "\x1b[0m" : "",
54
+ messageColor,
55
+ ` ${message}`,
56
+ this.useColors ? "\x1b[0m" : "",
57
+ ].join("");
58
+ }
59
+ static getLevelColor(level) {
60
+ if (!this.useColors)
61
+ return "";
62
+ return {
63
+ error: "\x1b[31m", // Red
64
+ warn: "\x1b[33m", // Yellow
65
+ info: "\x1b[36m", // Cyan
66
+ debug: "\x1b[35m", // Magenta
67
+ }[level];
68
+ }
69
+ // Instance methods to implement LoggerInterface
70
+ debug(message, ...args) {
71
+ console.debug(message, ...args);
72
+ }
73
+ info(message, ...args) {
74
+ console.log(message, ...args);
75
+ }
76
+ warn(message, ...args) {
77
+ console.warn(message, ...args);
78
+ }
79
+ error(message, ...args) {
80
+ console.error(message, ...args);
81
+ }
82
+ }
83
+ exports.Logger = Logger;
84
+ Logger.logLevel = "info";
85
+ Logger.useColors = true;
86
+ Logger.prefix = "[POSTAL-CODE-SCRAPER]";
87
+ //# sourceMappingURL=logger.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"logger.js","sourceRoot":"","sources":["../../../../src/utils/logger.ts"],"names":[],"mappings":";;;AAUA,MAAa,MAAM;IAMlB,MAAM,CAAC,SAAS,CAAC,MAAyF;QACzG,IAAI,MAAM,CAAC,KAAK;YAAE,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC;QAC/C,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS;YAAE,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC;QAChE,IAAI,MAAM,CAAC,MAAM;YAAE,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC/C,IAAI,MAAM,CAAC,MAAM;YAAE,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC;IAClD,CAAC;IAED,MAAM,CAAC,WAAW;QACjB,OAAO,IAAI,CAAC,QAAQ,IAAI,IAAI,MAAM,EAAE,CAAC;IACtC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QAC3C,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QAC1C,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QAC1C,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QAC3C,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAClC,CAAC;IAEO,MAAM,CAAC,SAAS,CAAC,KAAe;QACvC,IAAI,IAAI,CAAC,QAAQ,KAAK,QAAQ;YAAE,OAAO,KAAK,CAAC;QAE7C,MAAM,MAAM,GAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAC/D,OAAO,MAAM,CAAC,OAAO,CAAC,KAAkB,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,QAAqB,CAAC,CAAC;IACzF,CAAC;IAEO,MAAM,CAAC,GAAG,CAAC,KAAgB,EAAE,OAAe,EAAE,IAAW;QAChE,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;YAAE,OAAO;QAEnC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAErD,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,CAAC;IACnC,CAAC;IAEO,MAAM,CAAC,aAAa,CAAC,KAAgB,EAAE,OAAe;QAC7D,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC7C,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;QAEtD,OAAO;YACN,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE;YAChC,GAAG,IAAI,CAAC,MAAM,GAAG;YACjB,GAAG,SAAS,GAAG;YACf,UAAU;YACV,IAAI,KAAK,CAAC,WAAW,EAAE,GAAG;YAC1B,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;YAC/B,YAAY;YACZ,IAAI,OAAO,EAAE;YACb,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;SAC/B,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACZ,CAAC;IAEO,MAAM,CAAC,aAAa,CAAC,KAAgB;QAC5C,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAC;QAE/B,OAAO;YACN,KAAK,EAAE,UAAU,EAAE,MAAM;YACzB,IAAI,EAAE,UAAU,EAAE,SAAS;YAC3B,IAAI,EAAE,UAAU,EAAE,OAAO;YACzB,KAAK,EAAE,UAAU,EAAE,UAAU;SAC7B,CAAC,KAAK,CAAC,CAAC;IACV,CAAC;IAED,gDAAgD;IAChD,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QACpC,OAAO,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QACnC,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IAC/B,CAAC;IAED,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QACnC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QACpC,OAAO,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IACjC,CAAC;;AA7FF,wBA8FC;AA7Fe,eAAQ,GAAa,MAAM,CAAC;AAC5B,gBAAS,GAAY,IAAI,CAAC;AAC1B,aAAM,GAAW,uBAAuB,CAAC"}
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ const chai_1 = require("chai");
7
+ const index_1 = __importDefault(require("../src/index"));
8
+ describe("Postal Code Scraper", () => {
9
+ it("should do something", () => {
10
+ const result = index_1.default.scrapeCountry("romania");
11
+ (0, chai_1.expect)(result).to.be.ok;
12
+ });
13
+ });
14
+ //# sourceMappingURL=postal-code-scraper.test.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"postal-code-scraper.test.js","sourceRoot":"","sources":["../../../tests/postal-code-scraper.test.ts"],"names":[],"mappings":";;;;;AAAA,+BAA8B;AAC9B,yDAA6C;AAE7C,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACpC,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC9B,MAAM,MAAM,GAAG,eAAiB,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAA,aAAM,EAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;IACzB,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
@@ -0,0 +1,3 @@
1
+ export * from "./types";
2
+ export { PostalCodeScraper } from "./scraper/scrapers";
3
+ export { default } from "./scraper/scrapers";
package/dist/index.js ADDED
@@ -0,0 +1,25 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ var __importDefault = (this && this.__importDefault) || function (mod) {
17
+ return (mod && mod.__esModule) ? mod : { "default": mod };
18
+ };
19
+ Object.defineProperty(exports, "__esModule", { value: true });
20
+ exports.default = exports.PostalCodeScraper = void 0;
21
+ __exportStar(require("./types"), exports);
22
+ var scrapers_1 = require("./scraper/scrapers");
23
+ Object.defineProperty(exports, "PostalCodeScraper", { enumerable: true, get: function () { return scrapers_1.PostalCodeScraper; } });
24
+ var scrapers_2 = require("./scraper/scrapers");
25
+ Object.defineProperty(exports, "default", { enumerable: true, get: function () { return __importDefault(scrapers_2).default; } });
@@ -0,0 +1,9 @@
1
+ import { Browser } from "puppeteer";
2
+ import { ScraperConfig } from "../types";
3
+ export declare class Fetcher {
4
+ private browser;
5
+ private config;
6
+ constructor(browser: Browser, config: ScraperConfig);
7
+ fetchHtml(url: string): Promise<string>;
8
+ fetchWithRetry(url: string, retries?: number): Promise<string>;
9
+ }
@@ -0,0 +1,48 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ Object.defineProperty(exports, "__esModule", { value: true });
12
+ exports.Fetcher = void 0;
13
+ class Fetcher {
14
+ constructor(browser, config) {
15
+ this.browser = browser;
16
+ this.config = config;
17
+ }
18
+ fetchHtml(url) {
19
+ return __awaiter(this, void 0, void 0, function* () {
20
+ const page = yield this.browser.newPage();
21
+ try {
22
+ page.setDefaultNavigationTimeout(60000);
23
+ yield page.goto(url, { waitUntil: "domcontentloaded" });
24
+ return yield page.content();
25
+ }
26
+ finally {
27
+ yield page.close();
28
+ }
29
+ });
30
+ }
31
+ fetchWithRetry(url_1) {
32
+ return __awaiter(this, arguments, void 0, function* (url, retries = this.config.maxRetries || 5) {
33
+ var _a;
34
+ try {
35
+ return yield this.fetchHtml(url);
36
+ }
37
+ catch (error) {
38
+ (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn(`Retrying (${this.config.maxRetries - retries + 1}) for: ${url}`);
39
+ if (retries > 0) {
40
+ yield new Promise((resolve) => setTimeout(resolve, Math.random() * 7000 + 5000));
41
+ return this.fetchWithRetry(url, retries - 1);
42
+ }
43
+ throw new Error(`Failed to fetch: ${url} after ${this.config.maxRetries} attempts`);
44
+ }
45
+ });
46
+ }
47
+ }
48
+ exports.Fetcher = Fetcher;
@@ -0,0 +1,7 @@
1
+ import { Region, ScraperConfig } from "../types";
2
+ export declare class Parser {
3
+ static parseRegions($: cheerio.Root, config: ScraperConfig): Region[];
4
+ static parsePostalCodes($: cheerio.Root, config: ScraperConfig): Record<string, string[]>;
5
+ static parseCountries($: cheerio.Root, config: ScraperConfig): Region[];
6
+ static parseCountryByName($: cheerio.Root, config: ScraperConfig, name: string): Region | null;
7
+ }
@@ -0,0 +1,62 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Parser = void 0;
4
+ class Parser {
5
+ static parseRegions($, config) {
6
+ return $("h2:contains('Regions')")
7
+ .next(".regions")
8
+ .find("a")
9
+ .map((_index, element) => {
10
+ const path = $(element).attr("href");
11
+ const prettyName = $(element).text().trim();
12
+ if (!path || !prettyName)
13
+ return null;
14
+ return {
15
+ name: path.split("/").filter(Boolean).pop(),
16
+ prettyName,
17
+ path,
18
+ };
19
+ })
20
+ .get()
21
+ .filter(Boolean);
22
+ }
23
+ static parsePostalCodes($, config) {
24
+ const codes = {};
25
+ $(".codes .container").each((_i, element) => {
26
+ const place = $(element).find(".place").text().trim();
27
+ const codesList = $(element)
28
+ .find(".code span")
29
+ .map((_j, el) => $(el).text().trim())
30
+ .get();
31
+ if (place) {
32
+ const key = config.usePrettyName ? place : place.toLowerCase().replace(/\s+/g, "-");
33
+ codes[key] = codesList;
34
+ }
35
+ });
36
+ return codes;
37
+ }
38
+ static parseCountries($, config) {
39
+ return $(".regions div a")
40
+ .map((_i, element) => {
41
+ const path = $(element).attr("href");
42
+ return path ? { name: path.replace(/\//g, ""), prettyName: $(element).text().trim(), path } : null;
43
+ })
44
+ .get()
45
+ .filter(Boolean);
46
+ }
47
+ static parseCountryByName($, config, name) {
48
+ const countryElement = $(`.regions div a`).filter((_, el) => { var _a; return ((_a = $(el).attr("href")) === null || _a === void 0 ? void 0 : _a.replace(/\//g, "")) === name.toLowerCase().trim(); });
49
+ if (!countryElement.length)
50
+ return null;
51
+ const path = countryElement.attr("href");
52
+ const prettyName = countryElement.text().trim();
53
+ return path && prettyName
54
+ ? {
55
+ name: path.replace(/\//g, ""),
56
+ prettyName,
57
+ path,
58
+ }
59
+ : null;
60
+ }
61
+ }
62
+ exports.Parser = Parser;
@@ -0,0 +1,12 @@
1
+ import { Region, ScraperConfig, RegionData } from "../types";
2
+ import { Fetcher } from "./fetchers";
3
+ export declare class ProcessingQueue {
4
+ private fetcher;
5
+ private config;
6
+ private queue;
7
+ private visitedUrls;
8
+ private limit;
9
+ constructor(fetcher: Fetcher, config: ScraperConfig);
10
+ process(startRegion: Region, data: RegionData): Promise<void>;
11
+ private processItem;
12
+ }
@@ -0,0 +1,67 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.ProcessingQueue = void 0;
16
+ const cheerio_1 = require("cheerio");
17
+ const p_limit_1 = __importDefault(require("p-limit"));
18
+ const parsers_1 = require("./parsers");
19
+ const env_config_1 = require("../utils/env-config");
20
+ class ProcessingQueue {
21
+ constructor(fetcher, config) {
22
+ this.fetcher = fetcher;
23
+ this.config = config;
24
+ this.queue = [];
25
+ this.visitedUrls = new Set();
26
+ this.limit = (0, p_limit_1.default)(config.concurrency || 15);
27
+ }
28
+ process(startRegion, data) {
29
+ return __awaiter(this, void 0, void 0, function* () {
30
+ this.queue.push({ region: startRegion, currData: data });
31
+ while (this.queue.length > 0) {
32
+ const tasks = this.queue.map((item) => this.limit(() => this.processItem(item)));
33
+ this.queue = [];
34
+ yield Promise.all(tasks);
35
+ }
36
+ });
37
+ }
38
+ processItem(item) {
39
+ return __awaiter(this, void 0, void 0, function* () {
40
+ var _a, _b;
41
+ const url = `${(0, env_config_1.getBaseUrl)()}${item.region.path}`;
42
+ if (this.visitedUrls.has(url))
43
+ return;
44
+ this.visitedUrls.add(url);
45
+ (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.info(`Fetching: ${url}`);
46
+ try {
47
+ const html = yield this.fetcher.fetchWithRetry(url);
48
+ const $ = (0, cheerio_1.load)(html);
49
+ const regions = parsers_1.Parser.parseRegions($, this.config);
50
+ regions.forEach((region) => {
51
+ const key = this.config.usePrettyName ? region.prettyName : region.name;
52
+ item.currData[key] = {};
53
+ this.queue.push({
54
+ region,
55
+ currData: item.currData[key],
56
+ });
57
+ });
58
+ const codes = parsers_1.Parser.parsePostalCodes($, this.config);
59
+ Object.assign(item.currData, codes);
60
+ }
61
+ catch (error) {
62
+ (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.error(`Error processing ${url}:`, error);
63
+ }
64
+ });
65
+ }
66
+ }
67
+ exports.ProcessingQueue = ProcessingQueue;
@@ -0,0 +1,19 @@
1
+ import { ScraperConfig } from "../types";
2
+ export declare class PostalCodeScraper {
3
+ private config;
4
+ private browser;
5
+ private queue;
6
+ private fetcher;
7
+ constructor(config?: ScraperConfig);
8
+ scrapeCountry(countryName: string): Promise<null | undefined>;
9
+ scrapeCountries(): Promise<null | undefined>;
10
+ private initBrowser;
11
+ private getCountryDetails;
12
+ private getCountriesDetails;
13
+ private generatePostalCodeLookup;
14
+ private buildLookup;
15
+ private saveData;
16
+ private cleanup;
17
+ }
18
+ declare const _default: PostalCodeScraper;
19
+ export default _default;
@@ -0,0 +1,149 @@
1
+ "use strict";
2
+ var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
+ function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
+ return new (P || (P = Promise))(function (resolve, reject) {
5
+ function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
+ function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
+ function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
9
+ });
10
+ };
11
+ var __importDefault = (this && this.__importDefault) || function (mod) {
12
+ return (mod && mod.__esModule) ? mod : { "default": mod };
13
+ };
14
+ Object.defineProperty(exports, "__esModule", { value: true });
15
+ exports.PostalCodeScraper = void 0;
16
+ const path_1 = __importDefault(require("path"));
17
+ const puppeteer_1 = __importDefault(require("puppeteer"));
18
+ const queue_1 = require("./queue");
19
+ const fetchers_1 = require("./fetchers");
20
+ const id_generator_1 = require("../utils/id-generator");
21
+ const fs_1 = require("fs");
22
+ const cheerio_1 = require("cheerio");
23
+ const parsers_1 = require("./parsers");
24
+ const env_config_1 = require("../utils/env-config");
25
+ const string_utils_1 = require("../utils/string-utils");
26
+ const logger_1 = require("../utils/logger");
27
+ class PostalCodeScraper {
28
+ constructor(config = {}) {
29
+ this.config = config;
30
+ this.config = Object.assign({ concurrency: 15, maxRetries: 5, headless: true, directory: "src/data", logger: logger_1.Logger, usePrettyName: false }, config);
31
+ }
32
+ scrapeCountry(countryName) {
33
+ return __awaiter(this, void 0, void 0, function* () {
34
+ var _a;
35
+ yield this.initBrowser();
36
+ try {
37
+ const country = yield this.getCountryDetails(countryName);
38
+ if (!country) {
39
+ (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn(`Country not found: ${countryName}`);
40
+ return null;
41
+ }
42
+ const data = {};
43
+ yield this.queue.process(country, data);
44
+ this.saveData(data, `${country.name}-postal-codes.json`, this.config.directory);
45
+ const postalCodeLookup = this.generatePostalCodeLookup(data);
46
+ this.saveData(postalCodeLookup, `${country.name}-lookup.json`, this.config.directory);
47
+ }
48
+ finally {
49
+ yield this.cleanup();
50
+ }
51
+ });
52
+ }
53
+ scrapeCountries() {
54
+ return __awaiter(this, void 0, void 0, function* () {
55
+ var _a, _b;
56
+ yield this.initBrowser();
57
+ try {
58
+ const countries = yield this.getCountriesDetails();
59
+ if (countries.length === 0) {
60
+ (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn("No countries found.");
61
+ return null;
62
+ }
63
+ for (const country of countries) {
64
+ const key = this.config.usePrettyName ? country.prettyName : country.name;
65
+ const countryData = {};
66
+ (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.info(`Processing country: ${key}`);
67
+ yield this.queue.process(country, countryData);
68
+ this.saveData(countryData, `${key}-postal-codes.json`, this.config.directory);
69
+ const postalCodeLookup = this.generatePostalCodeLookup(countryData);
70
+ this.saveData(postalCodeLookup, `${key}-lookup.json`, this.config.directory);
71
+ }
72
+ }
73
+ finally {
74
+ yield this.cleanup();
75
+ }
76
+ });
77
+ }
78
+ initBrowser() {
79
+ return __awaiter(this, void 0, void 0, function* () {
80
+ this.browser = yield puppeteer_1.default.launch({ headless: this.config.headless });
81
+ this.fetcher = new fetchers_1.Fetcher(this.browser, this.config);
82
+ this.queue = new queue_1.ProcessingQueue(this.fetcher, this.config);
83
+ });
84
+ }
85
+ getCountryDetails(name) {
86
+ return __awaiter(this, void 0, void 0, function* () {
87
+ var _a;
88
+ try {
89
+ const html = yield this.fetcher.fetchWithRetry((0, env_config_1.getBaseUrl)());
90
+ return parsers_1.Parser.parseCountryByName((0, cheerio_1.load)(html), this.config, name);
91
+ }
92
+ catch (error) {
93
+ (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.error(`Error fetching country details: ${name}`, error);
94
+ return null;
95
+ }
96
+ });
97
+ }
98
+ getCountriesDetails() {
99
+ return __awaiter(this, void 0, void 0, function* () {
100
+ var _a;
101
+ try {
102
+ const html = yield this.fetcher.fetchWithRetry((0, env_config_1.getBaseUrl)());
103
+ return parsers_1.Parser.parseCountries((0, cheerio_1.load)(html), this.config);
104
+ }
105
+ catch (error) {
106
+ (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.error("Error fetching countries details", error);
107
+ return [];
108
+ }
109
+ });
110
+ }
111
+ generatePostalCodeLookup(data) {
112
+ return this.buildLookup(data, (0, id_generator_1.createRegionIdGenerator)());
113
+ }
114
+ buildLookup(regionObj, idGenerator, acc = [], result = { postalCodeMap: {}, regions: {} }) {
115
+ if (Array.isArray(regionObj)) {
116
+ for (const item of regionObj) {
117
+ const id = idGenerator(acc);
118
+ result.postalCodeMap[item] = id;
119
+ result.regions[id] = [...acc];
120
+ }
121
+ }
122
+ else if (typeof regionObj === "object" && regionObj !== null) {
123
+ for (const [regionKey, regionValue] of Object.entries(regionObj)) {
124
+ this.buildLookup(regionValue, idGenerator, [...acc, regionKey], result);
125
+ }
126
+ }
127
+ return result;
128
+ }
129
+ saveData(data, fileName, directory = "src/data") {
130
+ var _a, _b;
131
+ try {
132
+ (0, fs_1.mkdirSync)(directory, { recursive: true });
133
+ const filePath = path_1.default.join(directory, (0, string_utils_1.normalizeString)(fileName));
134
+ (0, fs_1.writeFileSync)(filePath, JSON.stringify(data, null, 2), { flag: "w" });
135
+ (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.info(`Saved data to ${filePath}`);
136
+ }
137
+ catch (error) {
138
+ (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.error(`Error saving data to ${fileName}`, error);
139
+ }
140
+ }
141
+ cleanup() {
142
+ return __awaiter(this, void 0, void 0, function* () {
143
+ var _a;
144
+ yield ((_a = this.browser) === null || _a === void 0 ? void 0 : _a.close());
145
+ });
146
+ }
147
+ }
148
+ exports.PostalCodeScraper = PostalCodeScraper;
149
+ exports.default = new PostalCodeScraper();
@@ -0,0 +1,32 @@
1
+ export type Region = {
2
+ path: string;
3
+ name: string;
4
+ prettyName: string;
5
+ };
6
+ export type ScraperConfig = {
7
+ usePrettyName?: boolean;
8
+ directory?: string;
9
+ concurrency?: number;
10
+ maxRetries?: number;
11
+ headless?: boolean;
12
+ logger?: any;
13
+ };
14
+ export type ProcessingQueueItem = {
15
+ region: Region;
16
+ currData: RegionData;
17
+ };
18
+ export interface LookupData {
19
+ postalCodeMap: {
20
+ [postalCode: string]: string;
21
+ };
22
+ regions: {
23
+ [code: string]: string[];
24
+ };
25
+ }
26
+ export interface PostalCodeData {
27
+ rawData: RegionData;
28
+ postalCodeLookup: LookupData;
29
+ }
30
+ export interface RegionData {
31
+ [key: string]: RegionData | string[];
32
+ }
package/dist/types.js ADDED
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1 @@
1
+ export declare const getBaseUrl: () => string;
@@ -0,0 +1,7 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.getBaseUrl = void 0;
4
+ const getBaseUrl = () => {
5
+ return "https://worldpostalcode.com";
6
+ };
7
+ exports.getBaseUrl = getBaseUrl;
@@ -0,0 +1,4 @@
1
+ export interface RegionIdGenerator {
2
+ (regions: string[]): string;
3
+ }
4
+ export declare const createRegionIdGenerator: () => RegionIdGenerator;
@@ -0,0 +1,26 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createRegionIdGenerator = void 0;
4
+ const createRegionIdGenerator = () => {
5
+ const regionRegistry = new Map();
6
+ const counterMap = new Map();
7
+ return (regions) => {
8
+ const normalized = regions.map((region) => region
9
+ .trim()
10
+ .toLowerCase()
11
+ .normalize("NFD")
12
+ .replace(/[\u0300-\u036f]/g, "")
13
+ .replace(/\s+/g, "_"));
14
+ const compositeKey = normalized.join("|");
15
+ if (regionRegistry.has(compositeKey)) {
16
+ return regionRegistry.get(compositeKey);
17
+ }
18
+ const baseName = normalized[normalized.length - 1];
19
+ const count = (counterMap.get(baseName) || 0) + 1;
20
+ counterMap.set(baseName, count);
21
+ const newId = `${baseName}_${count}`;
22
+ regionRegistry.set(compositeKey, newId);
23
+ return newId;
24
+ };
25
+ };
26
+ exports.createRegionIdGenerator = createRegionIdGenerator;
@@ -0,0 +1,33 @@
1
+ export type LogMethod = "error" | "warn" | "info" | "debug";
2
+ export type LogLevel = LogMethod | "silent";
3
+ export interface LoggerInterface {
4
+ debug(message: string, ...args: any[]): void;
5
+ info(message: string, ...args: any[]): void;
6
+ warn(message: string, ...args: any[]): void;
7
+ error(message: string, ...args: any[]): void;
8
+ }
9
+ export declare class Logger implements LoggerInterface {
10
+ private static logLevel;
11
+ private static useColors;
12
+ private static prefix;
13
+ private static instance;
14
+ static configure(config: {
15
+ level?: LogLevel;
16
+ colors?: boolean;
17
+ prefix?: string;
18
+ logger?: LoggerInterface;
19
+ }): void;
20
+ static getInstance(): LoggerInterface;
21
+ static debug(message: string, ...args: any[]): void;
22
+ static info(message: string, ...args: any[]): void;
23
+ static warn(message: string, ...args: any[]): void;
24
+ static error(message: string, ...args: any[]): void;
25
+ private static shouldLog;
26
+ private static log;
27
+ private static formatMessage;
28
+ private static getLevelColor;
29
+ debug(message: string, ...args: any[]): void;
30
+ info(message: string, ...args: any[]): void;
31
+ warn(message: string, ...args: any[]): void;
32
+ error(message: string, ...args: any[]): void;
33
+ }