postal-code-scraper 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +55 -61
  2. package/dist/index.cjs +392 -0
  3. package/dist/index.d.cts +43 -0
  4. package/dist/index.d.ts +43 -2
  5. package/dist/index.js +355 -20
  6. package/package.json +17 -6
  7. package/.mocharc.json +0 -4
  8. package/build/test/src/index.js +0 -26
  9. package/build/test/src/index.js.map +0 -1
  10. package/build/test/src/scraper/fetchers.js +0 -49
  11. package/build/test/src/scraper/fetchers.js.map +0 -1
  12. package/build/test/src/scraper/parsers.js +0 -63
  13. package/build/test/src/scraper/parsers.js.map +0 -1
  14. package/build/test/src/scraper/queue.js +0 -69
  15. package/build/test/src/scraper/queue.js.map +0 -1
  16. package/build/test/src/scraper/scrapers.js +0 -148
  17. package/build/test/src/scraper/scrapers.js.map +0 -1
  18. package/build/test/src/types.js +0 -3
  19. package/build/test/src/types.js.map +0 -1
  20. package/build/test/src/utils/id-generator.js +0 -33
  21. package/build/test/src/utils/id-generator.js.map +0 -1
  22. package/build/test/src/utils/logger.js +0 -87
  23. package/build/test/src/utils/logger.js.map +0 -1
  24. package/build/test/tests/postal-code-scraper.test.js +0 -14
  25. package/build/test/tests/postal-code-scraper.test.js.map +0 -1
  26. package/dist/scraper/fetchers.d.ts +0 -9
  27. package/dist/scraper/fetchers.js +0 -48
  28. package/dist/scraper/parsers.d.ts +0 -7
  29. package/dist/scraper/parsers.js +0 -62
  30. package/dist/scraper/queue.d.ts +0 -12
  31. package/dist/scraper/queue.js +0 -67
  32. package/dist/scraper/scrapers.d.ts +0 -18
  33. package/dist/scraper/scrapers.js +0 -149
  34. package/dist/types.d.ts +0 -32
  35. package/dist/types.js +0 -2
  36. package/dist/utils/env-config.d.ts +0 -1
  37. package/dist/utils/env-config.js +0 -7
  38. package/dist/utils/id-generator.d.ts +0 -4
  39. package/dist/utils/id-generator.js +0 -26
  40. package/dist/utils/logger.d.ts +0 -33
  41. package/dist/utils/logger.js +0 -86
  42. package/dist/utils/string-utils.d.ts +0 -1
  43. package/dist/utils/string-utils.js +0 -13
  44. package/src/index.ts +0 -2
  45. package/src/scraper/fetchers.ts +0 -30
  46. package/src/scraper/parsers.ts +0 -67
  47. package/src/scraper/queue.ts +0 -55
  48. package/src/scraper/scrapers.ts +0 -143
  49. package/src/types.ts +0 -37
  50. package/src/utils/env-config.ts +0 -3
  51. package/src/utils/id-generator.ts +0 -35
  52. package/src/utils/logger.ts +0 -105
  53. package/src/utils/string-utils.ts +0 -9
  54. package/tests/postal-code-scraper.test.ts +0 -100
  55. package/tests/tsconfig.json +0 -13
  56. package/tsconfig.json +0 -15
@@ -1,149 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.PostalCodeScraper = void 0;
16
- const path_1 = __importDefault(require("path"));
17
- const puppeteer_1 = __importDefault(require("puppeteer"));
18
- const queue_1 = require("./queue");
19
- const fetchers_1 = require("./fetchers");
20
- const id_generator_1 = require("../utils/id-generator");
21
- const fs_1 = require("fs");
22
- const cheerio_1 = require("cheerio");
23
- const parsers_1 = require("./parsers");
24
- const env_config_1 = require("../utils/env-config");
25
- const string_utils_1 = require("../utils/string-utils");
26
- const logger_1 = require("../utils/logger");
27
- class PostalCodeScraper {
28
- constructor(config = {}) {
29
- this.config = config;
30
- this.config = Object.assign({ concurrency: 15, maxRetries: 5, headless: true, directory: "src/data", logger: logger_1.Logger, usePrettyName: false }, config);
31
- }
32
- scrapeCountry(countryName) {
33
- return __awaiter(this, void 0, void 0, function* () {
34
- var _a;
35
- yield this.initBrowser();
36
- try {
37
- const country = yield this.getCountryDetails(countryName);
38
- if (!country) {
39
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn(`Country not found: ${countryName}`);
40
- return null;
41
- }
42
- const data = {};
43
- yield this.queue.process(country, data);
44
- this.saveData(data, `${country.name}-postal-codes.json`, this.config.directory);
45
- const postalCodeLookup = this.generatePostalCodeLookup(data);
46
- this.saveData(postalCodeLookup, `${country.name}-lookup.json`, this.config.directory);
47
- }
48
- finally {
49
- yield this.cleanup();
50
- }
51
- });
52
- }
53
- scrapeCountries() {
54
- return __awaiter(this, void 0, void 0, function* () {
55
- var _a, _b;
56
- yield this.initBrowser();
57
- try {
58
- const countries = yield this.getCountriesDetails();
59
- if (countries.length === 0) {
60
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn("No countries found.");
61
- return null;
62
- }
63
- for (const country of countries) {
64
- const key = this.config.usePrettyName ? country.prettyName : country.name;
65
- const countryData = {};
66
- (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.info(`Processing country: ${key}`);
67
- yield this.queue.process(country, countryData);
68
- this.saveData(countryData, `${key}-postal-codes.json`, this.config.directory);
69
- const postalCodeLookup = this.generatePostalCodeLookup(countryData);
70
- this.saveData(postalCodeLookup, `${key}-lookup.json`, this.config.directory);
71
- }
72
- }
73
- finally {
74
- yield this.cleanup();
75
- }
76
- });
77
- }
78
- initBrowser() {
79
- return __awaiter(this, void 0, void 0, function* () {
80
- this.browser = yield puppeteer_1.default.launch({ headless: this.config.headless });
81
- this.fetcher = new fetchers_1.Fetcher(this.browser, this.config);
82
- this.queue = new queue_1.ProcessingQueue(this.fetcher, this.config);
83
- });
84
- }
85
- getCountryDetails(name) {
86
- return __awaiter(this, void 0, void 0, function* () {
87
- var _a;
88
- try {
89
- const html = yield this.fetcher.fetchWithRetry((0, env_config_1.getBaseUrl)());
90
- return parsers_1.Parser.parseCountryByName((0, cheerio_1.load)(html), this.config, name);
91
- }
92
- catch (error) {
93
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.error(`Error fetching country details: ${name}`, error);
94
- return null;
95
- }
96
- });
97
- }
98
- getCountriesDetails() {
99
- return __awaiter(this, void 0, void 0, function* () {
100
- var _a;
101
- try {
102
- const html = yield this.fetcher.fetchWithRetry((0, env_config_1.getBaseUrl)());
103
- return parsers_1.Parser.parseCountries((0, cheerio_1.load)(html), this.config);
104
- }
105
- catch (error) {
106
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.error("Error fetching countries details", error);
107
- return [];
108
- }
109
- });
110
- }
111
- generatePostalCodeLookup(data) {
112
- return this.buildLookup(data, (0, id_generator_1.createRegionIdGenerator)());
113
- }
114
- buildLookup(regionObj, idGenerator, acc = [], result = { postalCodeMap: {}, regions: {} }) {
115
- if (Array.isArray(regionObj)) {
116
- for (const item of regionObj) {
117
- const id = idGenerator(acc);
118
- result.postalCodeMap[item] = id;
119
- result.regions[id] = [...acc];
120
- }
121
- }
122
- else if (typeof regionObj === "object" && regionObj !== null) {
123
- for (const [regionKey, regionValue] of Object.entries(regionObj)) {
124
- this.buildLookup(regionValue, idGenerator, [...acc, regionKey], result);
125
- }
126
- }
127
- return result;
128
- }
129
- saveData(data, fileName, directory = "src/data") {
130
- var _a, _b;
131
- try {
132
- (0, fs_1.mkdirSync)(directory, { recursive: true });
133
- const filePath = path_1.default.join(directory, (0, string_utils_1.normalizeString)(fileName));
134
- (0, fs_1.writeFileSync)(filePath, JSON.stringify(data, null, 2), { flag: "w" });
135
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.info(`Saved data to ${filePath}`);
136
- }
137
- catch (error) {
138
- (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.error(`Error saving data to ${fileName}`, error);
139
- }
140
- }
141
- cleanup() {
142
- return __awaiter(this, void 0, void 0, function* () {
143
- var _a;
144
- yield ((_a = this.browser) === null || _a === void 0 ? void 0 : _a.close());
145
- });
146
- }
147
- }
148
- exports.PostalCodeScraper = PostalCodeScraper;
149
- exports.default = PostalCodeScraper;
package/dist/types.d.ts DELETED
@@ -1,32 +0,0 @@
1
- export type Region = {
2
- path: string;
3
- name: string;
4
- prettyName: string;
5
- };
6
- export type ScraperConfig = {
7
- usePrettyName?: boolean;
8
- directory?: string;
9
- concurrency?: number;
10
- maxRetries?: number;
11
- headless?: boolean;
12
- logger?: any;
13
- };
14
- export type ProcessingQueueItem = {
15
- region: Region;
16
- currData: RegionData;
17
- };
18
- export interface LookupData {
19
- postalCodeMap: {
20
- [postalCode: string]: string;
21
- };
22
- regions: {
23
- [code: string]: string[];
24
- };
25
- }
26
- export interface PostalCodeData {
27
- rawData: RegionData;
28
- postalCodeLookup: LookupData;
29
- }
30
- export interface RegionData {
31
- [key: string]: RegionData | string[];
32
- }
package/dist/types.js DELETED
@@ -1,2 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
@@ -1 +0,0 @@
1
- export declare const getBaseUrl: () => string;
@@ -1,7 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.getBaseUrl = void 0;
4
- const getBaseUrl = () => {
5
- return "https://worldpostalcode.com";
6
- };
7
- exports.getBaseUrl = getBaseUrl;
@@ -1,4 +0,0 @@
1
- export interface RegionIdGenerator {
2
- (regions: string[]): string;
3
- }
4
- export declare const createRegionIdGenerator: () => RegionIdGenerator;
@@ -1,26 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createRegionIdGenerator = void 0;
4
- const createRegionIdGenerator = () => {
5
- const regionRegistry = new Map();
6
- const counterMap = new Map();
7
- return (regions) => {
8
- const normalized = regions.map((region) => region
9
- .trim()
10
- .toLowerCase()
11
- .normalize("NFD")
12
- .replace(/[\u0300-\u036f]/g, "")
13
- .replace(/\s+/g, "_"));
14
- const compositeKey = normalized.join("|");
15
- if (regionRegistry.has(compositeKey)) {
16
- return regionRegistry.get(compositeKey);
17
- }
18
- const baseName = normalized[normalized.length - 1];
19
- const count = (counterMap.get(baseName) || 0) + 1;
20
- counterMap.set(baseName, count);
21
- const newId = `${baseName}_${count}`;
22
- regionRegistry.set(compositeKey, newId);
23
- return newId;
24
- };
25
- };
26
- exports.createRegionIdGenerator = createRegionIdGenerator;
@@ -1,33 +0,0 @@
1
- export type LogMethod = "error" | "warn" | "info" | "debug";
2
- export type LogLevel = LogMethod | "silent";
3
- export interface LoggerInterface {
4
- debug(message: string, ...args: any[]): void;
5
- info(message: string, ...args: any[]): void;
6
- warn(message: string, ...args: any[]): void;
7
- error(message: string, ...args: any[]): void;
8
- }
9
- export declare class Logger implements LoggerInterface {
10
- private static logLevel;
11
- private static useColors;
12
- private static prefix;
13
- private static instance;
14
- static configure(config: {
15
- level?: LogLevel;
16
- colors?: boolean;
17
- prefix?: string;
18
- logger?: LoggerInterface;
19
- }): void;
20
- static getInstance(): LoggerInterface;
21
- static debug(message: string, ...args: any[]): void;
22
- static info(message: string, ...args: any[]): void;
23
- static warn(message: string, ...args: any[]): void;
24
- static error(message: string, ...args: any[]): void;
25
- private static shouldLog;
26
- private static log;
27
- private static formatMessage;
28
- private static getLevelColor;
29
- debug(message: string, ...args: any[]): void;
30
- info(message: string, ...args: any[]): void;
31
- warn(message: string, ...args: any[]): void;
32
- error(message: string, ...args: any[]): void;
33
- }
@@ -1,86 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Logger = void 0;
4
- class Logger {
5
- static configure(config) {
6
- if (config.level)
7
- this.logLevel = config.level;
8
- if (config.colors !== undefined)
9
- this.useColors = config.colors;
10
- if (config.prefix)
11
- this.prefix = config.prefix;
12
- if (config.logger)
13
- this.instance = config.logger;
14
- }
15
- static getInstance() {
16
- return this.instance || new Logger();
17
- }
18
- static debug(message, ...args) {
19
- this.log("debug", message, args);
20
- }
21
- static info(message, ...args) {
22
- this.log("info", message, args);
23
- }
24
- static warn(message, ...args) {
25
- this.log("warn", message, args);
26
- }
27
- static error(message, ...args) {
28
- this.log("error", message, args);
29
- }
30
- static shouldLog(level) {
31
- if (this.logLevel === "silent")
32
- return false;
33
- const levels = ["error", "warn", "info", "debug"];
34
- return levels.indexOf(level) <= levels.indexOf(this.logLevel);
35
- }
36
- static log(level, message, args) {
37
- if (!this.shouldLog(level))
38
- return;
39
- const logger = this.getInstance();
40
- const formatted = this.formatMessage(level, message);
41
- logger[level](formatted, ...args);
42
- }
43
- static formatMessage(level, message) {
44
- const timestamp = new Date().toISOString();
45
- const levelColor = this.getLevelColor(level);
46
- const messageColor = this.useColors ? "\x1b[37m" : "";
47
- return [
48
- this.useColors ? "\x1b[90m" : "",
49
- `${this.prefix} `,
50
- `${timestamp} `,
51
- levelColor,
52
- `[${level.toUpperCase()}]`,
53
- this.useColors ? "\x1b[0m" : "",
54
- messageColor,
55
- ` ${message}`,
56
- this.useColors ? "\x1b[0m" : "",
57
- ].join("");
58
- }
59
- static getLevelColor(level) {
60
- if (!this.useColors)
61
- return "";
62
- return {
63
- error: "\x1b[31m", // Red
64
- warn: "\x1b[33m", // Yellow
65
- info: "\x1b[36m", // Cyan
66
- debug: "\x1b[35m", // Magenta
67
- }[level];
68
- }
69
- // Instance methods to implement LoggerInterface
70
- debug(message, ...args) {
71
- console.debug(message, ...args);
72
- }
73
- info(message, ...args) {
74
- console.log(message, ...args);
75
- }
76
- warn(message, ...args) {
77
- console.warn(message, ...args);
78
- }
79
- error(message, ...args) {
80
- console.error(message, ...args);
81
- }
82
- }
83
- exports.Logger = Logger;
84
- Logger.logLevel = "info";
85
- Logger.useColors = true;
86
- Logger.prefix = "[POSTAL-CODE-SCRAPER]";
@@ -1 +0,0 @@
1
- export declare const normalizeString: (str: string) => string;
@@ -1,13 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.normalizeString = void 0;
4
- const normalizeString = (str) => {
5
- return str
6
- .trim()
7
- .toLowerCase()
8
- .normalize("NFD")
9
- .replace(/[\u0300-\u036f]/g, "")
10
- .replace(/\s+/g, "-")
11
- .replace(/[^a-z0-9.-]/g, "");
12
- };
13
- exports.normalizeString = normalizeString;
package/src/index.ts DELETED
@@ -1,2 +0,0 @@
1
- export * from "./types";
2
- export { PostalCodeScraper } from "./scraper/scrapers";
@@ -1,30 +0,0 @@
1
- import { Browser } from "puppeteer";
2
- import { ScraperConfig } from "../types";
3
-
4
- export class Fetcher {
5
- constructor(private browser: Browser, private config: ScraperConfig) {}
6
-
7
- async fetchHtml(url: string): Promise<string> {
8
- const page = await this.browser.newPage();
9
- try {
10
- page.setDefaultNavigationTimeout(60000);
11
- await page.goto(url, { waitUntil: "domcontentloaded" });
12
- return await page.content();
13
- } finally {
14
- await page.close();
15
- }
16
- }
17
-
18
- async fetchWithRetry(url: string, retries = this.config.maxRetries || 5): Promise<string> {
19
- try {
20
- return await this.fetchHtml(url);
21
- } catch (error) {
22
- this.config.logger?.warn(`Retrying (${this.config.maxRetries! - retries + 1}) for: ${url}`);
23
- if (retries > 0) {
24
- await new Promise((resolve) => setTimeout(resolve, Math.random() * 7000 + 5000));
25
- return this.fetchWithRetry(url, retries - 1);
26
- }
27
- throw new Error(`Failed to fetch: ${url} after ${this.config.maxRetries} attempts`);
28
- }
29
- }
30
- }
@@ -1,67 +0,0 @@
1
- import { Region, ScraperConfig } from "../types";
2
-
3
- export class Parser {
4
- static parseRegions($: cheerio.Root, config: ScraperConfig): Region[] {
5
- return $("h2:contains('Regions')")
6
- .next(".regions")
7
- .find("a")
8
- .map((_index, element) => {
9
- const path = $(element).attr("href");
10
- const prettyName = $(element).text().trim();
11
- if (!path || !prettyName) return null;
12
-
13
- return {
14
- name: path.split("/").filter(Boolean).pop()!,
15
- prettyName,
16
- path,
17
- };
18
- })
19
- .get()
20
- .filter(Boolean) as Region[];
21
- }
22
-
23
- static parsePostalCodes($: cheerio.Root, config: ScraperConfig): Record<string, string[]> {
24
- const codes: Record<string, string[]> = {};
25
-
26
- $(".codes .container").each((_i, element) => {
27
- const place = $(element).find(".place").text().trim();
28
- const codesList = $(element)
29
- .find(".code span")
30
- .map((_j, el) => $(el).text().trim())
31
- .get();
32
-
33
- if (place) {
34
- const key = config.usePrettyName ? place : place.toLowerCase().replace(/\s+/g, "-");
35
- codes[key] = codesList;
36
- }
37
- });
38
-
39
- return codes;
40
- }
41
-
42
- static parseCountries($: cheerio.Root, config: ScraperConfig): Region[] {
43
- return $(".regions div a")
44
- .map((_i, element) => {
45
- const path = $(element).attr("href");
46
- return path ? { name: path.replace(/\//g, ""), prettyName: $(element).text().trim(), path } : null;
47
- })
48
- .get()
49
- .filter(Boolean) as Region[];
50
- }
51
-
52
- static parseCountryByName($: cheerio.Root, config: ScraperConfig, name: string): Region | null {
53
- const countryElement = $(`.regions div a`).filter((_, el) => $(el).attr("href")?.replace(/\//g, "") === name.toLowerCase().trim());
54
-
55
- if (!countryElement.length) return null;
56
-
57
- const path = countryElement.attr("href");
58
- const prettyName = countryElement.text().trim();
59
- return path && prettyName
60
- ? {
61
- name: path.replace(/\//g, ""),
62
- prettyName,
63
- path,
64
- }
65
- : null;
66
- }
67
- }
@@ -1,55 +0,0 @@
1
- import { load } from "cheerio";
2
- import { Region, ProcessingQueueItem, ScraperConfig, RegionData } from "../types";
3
- import { Fetcher } from "./fetchers";
4
- import pLimit from "p-limit";
5
- import { Parser } from "./parsers";
6
- import { getBaseUrl } from "../utils/env-config";
7
-
8
- export class ProcessingQueue {
9
- private queue: ProcessingQueueItem[] = [];
10
- private visitedUrls = new Set<string>();
11
- private limit: ReturnType<typeof pLimit>;
12
-
13
- constructor(private fetcher: Fetcher, private config: ScraperConfig) {
14
- this.limit = pLimit(config.concurrency || 15);
15
- }
16
-
17
- async process(startRegion: Region, data: RegionData): Promise<void> {
18
- this.queue.push({ region: startRegion, currData: data });
19
-
20
- while (this.queue.length > 0) {
21
- const tasks = this.queue.map((item) => this.limit(() => this.processItem(item)));
22
- this.queue = [];
23
- await Promise.all(tasks);
24
- }
25
- }
26
-
27
- private async processItem(item: ProcessingQueueItem): Promise<void> {
28
- const url = `${getBaseUrl()}${item.region.path}`;
29
-
30
- if (this.visitedUrls.has(url)) return;
31
- this.visitedUrls.add(url);
32
-
33
- this.config.logger?.info(`Fetching: ${url}`);
34
-
35
- try {
36
- const html = await this.fetcher.fetchWithRetry(url);
37
- const $ = load(html);
38
-
39
- const regions = Parser.parseRegions($, this.config);
40
- regions.forEach((region) => {
41
- const key = this.config.usePrettyName ? region.prettyName : region.name;
42
- item.currData[key] = {};
43
- this.queue.push({
44
- region,
45
- currData: item.currData[key],
46
- });
47
- });
48
-
49
- const codes = Parser.parsePostalCodes($, this.config);
50
- Object.assign(item.currData, codes);
51
- } catch (error) {
52
- this.config.logger?.error(`Error processing ${url}:`, error);
53
- }
54
- }
55
- }
@@ -1,143 +0,0 @@
1
- import path from "path";
2
- import puppeteer, { Browser } from "puppeteer";
3
- import { ProcessingQueue } from "./queue";
4
- import { Fetcher } from "./fetchers";
5
- import { Region, ScraperConfig, LookupData, RegionData } from "../types";
6
- import { createRegionIdGenerator, RegionIdGenerator } from "../utils/id-generator";
7
- import { writeFileSync, mkdirSync } from "fs";
8
- import { load } from "cheerio";
9
- import { Parser } from "./parsers";
10
- import { getBaseUrl } from "../utils/env-config";
11
- import { normalizeString } from "../utils/string-utils";
12
- import { Logger } from "../utils/logger";
13
-
14
- export class PostalCodeScraper {
15
- private browser!: Browser;
16
- private queue!: ProcessingQueue;
17
- private fetcher!: Fetcher;
18
-
19
- constructor(private config: ScraperConfig = {}) {
20
- this.config = {
21
- concurrency: 15,
22
- maxRetries: 5,
23
- headless: true,
24
- directory: "src/data",
25
- logger: Logger,
26
- usePrettyName: false,
27
- ...config,
28
- };
29
- }
30
-
31
- async scrapeCountry(countryName: string) {
32
- await this.initBrowser();
33
- try {
34
- const country = await this.getCountryDetails(countryName);
35
- if (!country) {
36
- this.config.logger?.warn(`Country not found: ${countryName}`);
37
- return null;
38
- }
39
-
40
- const data: RegionData = {};
41
- await this.queue.process(country, data);
42
-
43
- this.saveData(data, `${country.name}-postal-codes.json`, this.config.directory!);
44
-
45
- const postalCodeLookup = this.generatePostalCodeLookup(data);
46
- this.saveData(postalCodeLookup, `${country.name}-lookup.json`, this.config.directory!);
47
- } finally {
48
- await this.cleanup();
49
- }
50
- }
51
-
52
- async scrapeCountries() {
53
- await this.initBrowser();
54
- try {
55
- const countries = await this.getCountriesDetails();
56
- if (countries.length === 0) {
57
- this.config.logger?.warn("No countries found.");
58
- return null;
59
- }
60
-
61
- for (const country of countries) {
62
- const key = this.config.usePrettyName ? country.prettyName : country.name;
63
- const countryData: RegionData = {};
64
- this.config.logger?.info(`Processing country: ${key}`);
65
-
66
- await this.queue.process(country, countryData);
67
- this.saveData(countryData, `${key}-postal-codes.json`, this.config.directory!);
68
-
69
- const postalCodeLookup = this.generatePostalCodeLookup(countryData);
70
- this.saveData(postalCodeLookup, `${key}-lookup.json`, this.config.directory!);
71
- }
72
- } finally {
73
- await this.cleanup();
74
- }
75
- }
76
-
77
- private async initBrowser() {
78
- this.browser = await puppeteer.launch({ headless: this.config.headless });
79
- this.fetcher = new Fetcher(this.browser, this.config);
80
- this.queue = new ProcessingQueue(this.fetcher, this.config);
81
- }
82
-
83
- private async getCountryDetails(name: string): Promise<Region | null> {
84
- try {
85
- const html = await this.fetcher.fetchWithRetry(getBaseUrl());
86
- return Parser.parseCountryByName(load(html), this.config, name);
87
- } catch (error) {
88
- this.config.logger?.error(`Error fetching country details: ${name}`, error);
89
- return null;
90
- }
91
- }
92
-
93
- private async getCountriesDetails(): Promise<Region[]> {
94
- try {
95
- const html = await this.fetcher.fetchWithRetry(getBaseUrl());
96
- return Parser.parseCountries(load(html), this.config);
97
- } catch (error) {
98
- this.config.logger?.error("Error fetching countries details", error);
99
- return [];
100
- }
101
- }
102
-
103
- private generatePostalCodeLookup(data: RegionData): LookupData {
104
- return this.buildLookup(data, createRegionIdGenerator());
105
- }
106
-
107
- private buildLookup(
108
- regionObj: RegionData | string[],
109
- idGenerator: RegionIdGenerator,
110
- acc: string[] = [],
111
- result: LookupData = { postalCodeMap: {}, regions: {} }
112
- ): LookupData {
113
- if (Array.isArray(regionObj)) {
114
- for (const item of regionObj) {
115
- const id = idGenerator(acc);
116
- result.postalCodeMap[item] = id;
117
- result.regions[id] = [...acc];
118
- }
119
- } else if (typeof regionObj === "object" && regionObj !== null) {
120
- for (const [regionKey, regionValue] of Object.entries(regionObj)) {
121
- this.buildLookup(regionValue, idGenerator, [...acc, regionKey], result);
122
- }
123
- }
124
- return result;
125
- }
126
-
127
- private saveData(data: any, fileName: string, directory: string = "src/data") {
128
- try {
129
- mkdirSync(directory, { recursive: true });
130
- const filePath = path.join(directory, normalizeString(fileName));
131
- writeFileSync(filePath, JSON.stringify(data, null, 2), { flag: "w" });
132
- this.config.logger?.info(`Saved data to ${filePath}`);
133
- } catch (error) {
134
- this.config.logger?.error(`Error saving data to ${fileName}`, error);
135
- }
136
- }
137
-
138
- private async cleanup() {
139
- await this.browser?.close();
140
- }
141
- }
142
-
143
- export default PostalCodeScraper;