postal-code-scraper 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +56 -62
  2. package/dist/index.cjs +392 -0
  3. package/dist/index.d.cts +43 -0
  4. package/dist/index.d.ts +43 -3
  5. package/dist/index.js +355 -25
  6. package/package.json +17 -6
  7. package/.mocharc.json +0 -4
  8. package/build/test/src/index.js +0 -26
  9. package/build/test/src/index.js.map +0 -1
  10. package/build/test/src/scraper/fetchers.js +0 -49
  11. package/build/test/src/scraper/fetchers.js.map +0 -1
  12. package/build/test/src/scraper/parsers.js +0 -63
  13. package/build/test/src/scraper/parsers.js.map +0 -1
  14. package/build/test/src/scraper/queue.js +0 -69
  15. package/build/test/src/scraper/queue.js.map +0 -1
  16. package/build/test/src/scraper/scrapers.js +0 -148
  17. package/build/test/src/scraper/scrapers.js.map +0 -1
  18. package/build/test/src/types.js +0 -3
  19. package/build/test/src/types.js.map +0 -1
  20. package/build/test/src/utils/id-generator.js +0 -33
  21. package/build/test/src/utils/id-generator.js.map +0 -1
  22. package/build/test/src/utils/logger.js +0 -87
  23. package/build/test/src/utils/logger.js.map +0 -1
  24. package/build/test/tests/postal-code-scraper.test.js +0 -14
  25. package/build/test/tests/postal-code-scraper.test.js.map +0 -1
  26. package/dist/scraper/fetchers.d.ts +0 -9
  27. package/dist/scraper/fetchers.js +0 -48
  28. package/dist/scraper/parsers.d.ts +0 -7
  29. package/dist/scraper/parsers.js +0 -62
  30. package/dist/scraper/queue.d.ts +0 -12
  31. package/dist/scraper/queue.js +0 -67
  32. package/dist/scraper/scrapers.d.ts +0 -19
  33. package/dist/scraper/scrapers.js +0 -149
  34. package/dist/types.d.ts +0 -32
  35. package/dist/types.js +0 -2
  36. package/dist/utils/env-config.d.ts +0 -1
  37. package/dist/utils/env-config.js +0 -7
  38. package/dist/utils/id-generator.d.ts +0 -4
  39. package/dist/utils/id-generator.js +0 -26
  40. package/dist/utils/logger.d.ts +0 -33
  41. package/dist/utils/logger.js +0 -86
  42. package/dist/utils/string-utils.d.ts +0 -1
  43. package/dist/utils/string-utils.js +0 -13
  44. package/src/index.ts +0 -3
  45. package/src/scraper/fetchers.ts +0 -30
  46. package/src/scraper/parsers.ts +0 -67
  47. package/src/scraper/queue.ts +0 -55
  48. package/src/scraper/scrapers.ts +0 -143
  49. package/src/types.ts +0 -37
  50. package/src/utils/env-config.ts +0 -3
  51. package/src/utils/id-generator.ts +0 -35
  52. package/src/utils/logger.ts +0 -105
  53. package/src/utils/string-utils.ts +0 -9
  54. package/tests/postal-code-scraper.test.ts +0 -100
  55. package/tests/tsconfig.json +0 -13
  56. package/tsconfig.json +0 -15
package/README.md CHANGED
@@ -4,15 +4,15 @@
4
4
 
5
5
  **Postal Code Scraper** is an automated web scraper designed to extract postal code data from countries worldwide. It efficiently fetches postal codes and organizes them into structured JSON files for easy use in applications.
6
6
 
7
- This library uses **Puppeteer** for web scraping and **Cheerio** for HTML parsing, ensuring accurate and efficient data extraction.
7
+ This library uses **Puppeteer** for web scraping, **Cheerio** for HTML parsing, **p-limit** for controlling concurrency, ensuring accurate and efficient data extraction.
8
8
 
9
9
  ## 🚀 Features
10
10
 
11
- - Scrape **postal codes** from any country
12
- - Scrape **all countries** in one go
11
+ - Scrape **postal codes** for one country or all countries
13
12
  - Save results as **JSON** files for easy integration
14
- - Configurable settings (concurrency, retries, headless mode, etc.) <- read more below
15
- - Structured **postal code lookup** generation
13
+ - **Region-structured** output (country region1 region2 region3 → ... → postal codes)
14
+ - **Postal-code lookup** output (postal code region path)
15
+ - Configurable options (concurrency, retries, headless mode, output directory, logging, etc.)
16
16
  - **Fully asynchronous** for optimized performance
17
17
 
18
18
  ## 📦 Installation
@@ -45,50 +45,65 @@ import { PostalCodeScraper } from "postal-code-scraper";
45
45
  const { PostalCodeScraper } = require("postal-code-scraper");
46
46
  ```
47
47
 
48
- ### 2️⃣ **Scrape a Single Country**
48
+ ### 2️⃣ **Instantiate Scraper**
49
49
 
50
50
  ```javascript
51
- async function scrapeSingleCountry() {
52
- await PostalCodeScraper.scrapeCountry("Canada");
53
- }
51
+ const scraper = new PostalCodeScraper();
52
+ ```
53
+
54
+ ### 3️⃣ **Scrape a Single Country**
55
+
56
+ ```javascript
57
+ import { PostalCodeScraper } from "postal-code-scraper";
54
58
 
55
- scrapeSingleCountry();
59
+ const scraper = new PostalCodeScraper();
60
+
61
+ await scraper.scrapeCountry("Romania");
56
62
  ```
57
63
 
58
64
  📌 **Output Files (saved in **``**):**
59
65
 
60
- - `Canada-postal-codes.json`
61
- - `Canada-lookup.json`
66
+ - `romania-postal-codes.json`
67
+ - `romania-lookup.json`
62
68
 
63
- ### 3️⃣ **Scrape All Countries**
69
+ ### 4️⃣ **Scrape All Countries**
64
70
 
65
71
  ```javascript
66
- async function scrapeAllCountries() {
67
- await PostalCodeScraper.scrapeCountries();
68
- }
72
+ import { PostalCodeScraper } from "postal-code-scraper";
69
73
 
70
- scrapeAllCountries();
74
+ const scraper = new PostalCodeScraper();
75
+
76
+ await scraper.scrapeCountries();
71
77
  ```
72
78
 
73
79
  📌 This will fetch postal codes for **every available country**.
74
80
 
75
- ### 4️⃣ **Customize Scraper Configuration**
81
+ ### 5️⃣ **Customize Scraper Configuration**
82
+
83
+ #### 🛠 Configuration Options
84
+
85
+ | Option | Type | Default | Description |
86
+ | --------------- | --------------- | -------------------------------- | ------------------------------------------------------------ |
87
+ | `directory` | `string` | `src/data` | The directory to save data |
88
+ | `concurrency` | `number` | `15` | Maximum concurrent requests to process |
89
+ | `maxRetries` | `number` | `5` | Number of retries for failed requests |
90
+ | `headless` | `boolean` | `true` | Run Puppeteer in headless mode |
91
+ | `usePrettyName` | `boolean` | `false` | Use country pretty names instead of default names |
92
+ | `logger` | `object` `null` | `Logger` (custom implementation) | Handles event logging, can be set to null to disable logging |
76
93
 
77
94
  ```javascript
95
+ import { PostalCodeScraper } from "postal-code-scraper";
96
+
78
97
  const customScraper = new PostalCodeScraper({
79
- concurrency: 10, // Limit concurrent requests
80
- maxRetries: 3, // Max retries per request (if a request fails -> so we don't lose data)
81
- headless: false, // Run Puppeteer in visible mode
82
- usePrettyName: true, // Store data using country pretty names
83
- logger: console // Enable console logging (default is own implemented)
84
- directory: 'src/data' // Choose the folder where you want to save the data
98
+ concurrency: 10, // Limit concurrent requests
99
+ maxRetries: 3, // Max retries per request
100
+ headless: false, // Run Puppeteer in visible mode
101
+ usePrettyName: true, // Store data using country pretty names
102
+ logger: console, // Enable console logging (set to null to disable)
103
+ directory: "src/data", // Output directory
85
104
  });
86
105
 
87
- async function run() {
88
- await customScraper.scrapeCountry("Germany");
89
- }
90
-
91
- run();
106
+ await customScraper.scrapeCountry("Germany");
92
107
  ```
93
108
 
94
109
  ## 📁 Output Data Format
@@ -117,41 +132,21 @@ run();
117
132
 
118
133
  ```json
119
134
  {
120
- "postalCodeMap": {
121
- "337563": "tamasesti_2",
122
- "337564": "valea_4",
123
- "400001": "cluj-napoca_1",
124
- "400002": "cluj-napoca_1",
125
- "400003": "cluj-napoca_1",
126
- },
127
- "regions": {
128
- "cluj-napoca_1": [
129
- "cluj",
130
- "cluj-napoca"
131
- ],
132
- "tamasesti_2": [
133
- "hunedoara",
134
- "tamasesti"
135
- ],
136
- "valea_4": [
137
- "hunedoara",
138
- "valea"
139
- ],
140
- }
135
+ "postalCodeMap": {
136
+ "337563": "tamasesti_2",
137
+ "337564": "valea_4",
138
+ "400001": "cluj-napoca_1",
139
+ "400002": "cluj-napoca_1",
140
+ "400003": "cluj-napoca_1"
141
+ },
142
+ "regions": {
143
+ "cluj-napoca_1": ["cluj", "cluj-napoca"],
144
+ "tamasesti_2": ["hunedoara", "tamasesti"],
145
+ "valea_4": ["hunedoara", "valea"]
146
+ }
141
147
  }
142
148
  ```
143
149
 
144
- ## 🛠 Configuration Options
145
-
146
- | Option | Type | Default | Description |
147
- | --------------- | ----------------------------- | -------------------------------- | ---------------------------------------------------------------------------------------------- |
148
- | `directory` | `string` | `src/data` | The directory to save data |
149
- | `concurrency` | `number` | `15` | Maximum concurrent requests to process |
150
- | `maxRetries` | `number` | `5` | Number of retries for failed requests |
151
- | `headless` | `boolean` | `true` | Run Puppeteer in headless mode |
152
- | `usePrettyName` | `boolean` | `false` | Use country pretty names instead of default names |
153
- | `logger` | `object` `null` | `Logger` (custom implementation) | Handles event logging, can be set to null to disable logging |
154
-
155
150
  ## ❓ FAQs
156
151
 
157
152
  ### **1. Where are the postal code files stored?**
@@ -191,4 +186,3 @@ Contributions are welcome! Feel free to submit a pull request or open an issue.
191
186
  ## 📜 License
192
187
 
193
188
  MIT License © 2024
194
-
package/dist/index.cjs ADDED
@@ -0,0 +1,392 @@
1
+ "use strict";
2
+ var __create = Object.create;
3
+ var __defProp = Object.defineProperty;
4
+ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
+ var __getOwnPropNames = Object.getOwnPropertyNames;
6
+ var __getProtoOf = Object.getPrototypeOf;
7
+ var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __export = (target, all) => {
9
+ for (var name in all)
10
+ __defProp(target, name, { get: all[name], enumerable: true });
11
+ };
12
+ var __copyProps = (to, from, except, desc) => {
13
+ if (from && typeof from === "object" || typeof from === "function") {
14
+ for (let key of __getOwnPropNames(from))
15
+ if (!__hasOwnProp.call(to, key) && key !== except)
16
+ __defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
17
+ }
18
+ return to;
19
+ };
20
+ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
21
+ // If the importer is in node compatibility mode or this is not an ESM
22
+ // file that has been converted to a CommonJS file using a Babel-
23
+ // compatible transform (i.e. "__esModule" has not been set), then set
24
+ // "default" to the CommonJS "module.exports" for node compatibility.
25
+ isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
26
+ mod
27
+ ));
28
+ var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
+
30
+ // src/index.ts
31
+ var index_exports = {};
32
+ __export(index_exports, {
33
+ PostalCodeScraper: () => PostalCodeScraper
34
+ });
35
+ module.exports = __toCommonJS(index_exports);
36
+
37
+ // src/scraper/scrapers.ts
38
+ var import_path = __toESM(require("path"), 1);
39
+ var import_puppeteer = __toESM(require("puppeteer"), 1);
40
+
41
+ // src/scraper/queue.ts
42
+ var import_cheerio = require("cheerio");
43
+ var import_p_limit = __toESM(require("p-limit"), 1);
44
+
45
+ // src/scraper/parsers.ts
46
+ var Parser = class {
47
+ static parseRegions($, config) {
48
+ return $("h2:contains('Regions')").next(".regions").find("a").map((_index, element) => {
49
+ const path2 = $(element).attr("href");
50
+ const prettyName = $(element).text().trim();
51
+ if (!path2 || !prettyName) return null;
52
+ return {
53
+ name: path2.split("/").filter(Boolean).pop(),
54
+ prettyName,
55
+ path: path2
56
+ };
57
+ }).get().filter(Boolean);
58
+ }
59
+ static parsePostalCodes($, config) {
60
+ const codes = {};
61
+ $(".codes .container").each((_i, element) => {
62
+ const place = $(element).find(".place").text().trim();
63
+ const codesList = $(element).find(".code span").map((_j, el) => $(el).text().trim()).get();
64
+ if (place) {
65
+ const key = config.usePrettyName ? place : place.toLowerCase().replace(/\s+/g, "-");
66
+ codes[key] = codesList;
67
+ }
68
+ });
69
+ return codes;
70
+ }
71
+ static parseCountries($, config) {
72
+ return $(".regions div a").map((_i, element) => {
73
+ const path2 = $(element).attr("href");
74
+ return path2 ? { name: path2.replace(/\//g, ""), prettyName: $(element).text().trim(), path: path2 } : null;
75
+ }).get().filter(Boolean);
76
+ }
77
+ static parseCountryByName($, config, name) {
78
+ const countryElement = $(`.regions div a`).filter((_, el) => $(el).attr("href")?.replace(/\//g, "") === name.toLowerCase().trim());
79
+ if (!countryElement.length) return null;
80
+ const path2 = countryElement.attr("href");
81
+ const prettyName = countryElement.text().trim();
82
+ return path2 && prettyName ? {
83
+ name: path2.replace(/\//g, ""),
84
+ prettyName,
85
+ path: path2
86
+ } : null;
87
+ }
88
+ };
89
+
90
+ // src/utils/env-config.ts
91
+ var getBaseUrl = () => {
92
+ return "https://worldpostalcode.com";
93
+ };
94
+
95
+ // src/scraper/queue.ts
96
+ var ProcessingQueue = class {
97
+ constructor(fetcher, config) {
98
+ this.fetcher = fetcher;
99
+ this.config = config;
100
+ this.limit = (0, import_p_limit.default)(config.concurrency || 15);
101
+ }
102
+ queue = [];
103
+ visitedUrls = /* @__PURE__ */ new Set();
104
+ limit;
105
+ async process(startRegion, data) {
106
+ this.queue.push({ region: startRegion, currData: data });
107
+ while (this.queue.length > 0) {
108
+ const tasks = this.queue.map((item) => this.limit(() => this.processItem(item)));
109
+ this.queue = [];
110
+ await Promise.all(tasks);
111
+ }
112
+ }
113
+ async processItem(item) {
114
+ const url = `${getBaseUrl()}${item.region.path}`;
115
+ if (this.visitedUrls.has(url)) return;
116
+ this.visitedUrls.add(url);
117
+ this.config.logger?.info(`Fetching: ${url}`);
118
+ try {
119
+ const html = await this.fetcher.fetchWithRetry(url);
120
+ const $ = (0, import_cheerio.load)(html);
121
+ const regions = Parser.parseRegions($, this.config);
122
+ regions.forEach((region) => {
123
+ const key = this.config.usePrettyName ? region.prettyName : region.name;
124
+ item.currData[key] = {};
125
+ this.queue.push({
126
+ region,
127
+ currData: item.currData[key]
128
+ });
129
+ });
130
+ const codes = Parser.parsePostalCodes($, this.config);
131
+ Object.assign(item.currData, codes);
132
+ } catch (error) {
133
+ this.config.logger?.error(`Error processing ${url}:`, error);
134
+ }
135
+ }
136
+ };
137
+
138
+ // src/scraper/fetchers.ts
139
+ var Fetcher = class {
140
+ constructor(browser, config) {
141
+ this.browser = browser;
142
+ this.config = config;
143
+ }
144
+ async fetchHtml(url) {
145
+ const page = await this.browser.newPage();
146
+ try {
147
+ page.setDefaultNavigationTimeout(6e4);
148
+ await page.goto(url, { waitUntil: "domcontentloaded" });
149
+ return await page.content();
150
+ } finally {
151
+ await page.close();
152
+ }
153
+ }
154
+ async fetchWithRetry(url, retries = this.config.maxRetries || 5) {
155
+ try {
156
+ return await this.fetchHtml(url);
157
+ } catch (error) {
158
+ this.config.logger?.warn(`Retrying (${this.config.maxRetries - retries + 1}) for: ${url}`);
159
+ if (retries > 0) {
160
+ await new Promise((resolve) => setTimeout(resolve, Math.random() * 7e3 + 5e3));
161
+ return this.fetchWithRetry(url, retries - 1);
162
+ }
163
+ throw new Error(`Failed to fetch: ${url} after ${this.config.maxRetries} attempts`);
164
+ }
165
+ }
166
+ };
167
+
168
+ // src/utils/id-generator.ts
169
+ var createRegionIdGenerator = () => {
170
+ const regionRegistry = /* @__PURE__ */ new Map();
171
+ const counterMap = /* @__PURE__ */ new Map();
172
+ return (regions) => {
173
+ const normalized = regions.map(
174
+ (region) => region.trim().toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\s+/g, "_")
175
+ );
176
+ const compositeKey = normalized.join("|");
177
+ if (regionRegistry.has(compositeKey)) {
178
+ return regionRegistry.get(compositeKey);
179
+ }
180
+ const baseName = normalized[normalized.length - 1];
181
+ const count = (counterMap.get(baseName) || 0) + 1;
182
+ counterMap.set(baseName, count);
183
+ const newId = `${baseName}_${count}`;
184
+ regionRegistry.set(compositeKey, newId);
185
+ return newId;
186
+ };
187
+ };
188
+
189
+ // src/scraper/scrapers.ts
190
+ var import_fs = require("fs");
191
+ var import_cheerio2 = require("cheerio");
192
+
193
+ // src/utils/string-utils.ts
194
+ var normalizeString = (str) => {
195
+ return str.trim().toLowerCase().normalize("NFD").replace(/[\u0300-\u036f]/g, "").replace(/\s+/g, "-").replace(/[^a-z0-9.-]/g, "");
196
+ };
197
+
198
+ // src/utils/logger.ts
199
+ var Logger = class _Logger {
200
+ static logLevel = "info";
201
+ static useColors = true;
202
+ static prefix = "[POSTAL-CODE-SCRAPER]";
203
+ static instance;
204
+ static configure(config) {
205
+ if (config.level) this.logLevel = config.level;
206
+ if (config.colors !== void 0) this.useColors = config.colors;
207
+ if (config.prefix) this.prefix = config.prefix;
208
+ if (config.logger) this.instance = config.logger;
209
+ }
210
+ static getInstance() {
211
+ return this.instance || new _Logger();
212
+ }
213
+ static debug(message, ...args) {
214
+ this.log("debug", message, args);
215
+ }
216
+ static info(message, ...args) {
217
+ this.log("info", message, args);
218
+ }
219
+ static warn(message, ...args) {
220
+ this.log("warn", message, args);
221
+ }
222
+ static error(message, ...args) {
223
+ this.log("error", message, args);
224
+ }
225
+ static shouldLog(level) {
226
+ if (this.logLevel === "silent") return false;
227
+ const levels = ["error", "warn", "info", "debug"];
228
+ return levels.indexOf(level) <= levels.indexOf(this.logLevel);
229
+ }
230
+ static log(level, message, args) {
231
+ if (!this.shouldLog(level)) return;
232
+ const logger = this.getInstance();
233
+ const formatted = this.formatMessage(level, message);
234
+ logger[level](formatted, ...args);
235
+ }
236
+ static formatMessage(level, message) {
237
+ const timestamp = (/* @__PURE__ */ new Date()).toISOString();
238
+ const levelColor = this.getLevelColor(level);
239
+ const messageColor = this.useColors ? "\x1B[37m" : "";
240
+ return [
241
+ this.useColors ? "\x1B[90m" : "",
242
+ `${this.prefix} `,
243
+ `${timestamp} `,
244
+ levelColor,
245
+ `[${level.toUpperCase()}]`,
246
+ this.useColors ? "\x1B[0m" : "",
247
+ messageColor,
248
+ ` ${message}`,
249
+ this.useColors ? "\x1B[0m" : ""
250
+ ].join("");
251
+ }
252
+ static getLevelColor(level) {
253
+ if (!this.useColors) return "";
254
+ return {
255
+ error: "\x1B[31m",
256
+ // Red
257
+ warn: "\x1B[33m",
258
+ // Yellow
259
+ info: "\x1B[36m",
260
+ // Cyan
261
+ debug: "\x1B[35m"
262
+ // Magenta
263
+ }[level];
264
+ }
265
+ // Instance methods to implement LoggerInterface
266
+ debug(message, ...args) {
267
+ console.debug(message, ...args);
268
+ }
269
+ info(message, ...args) {
270
+ console.log(message, ...args);
271
+ }
272
+ warn(message, ...args) {
273
+ console.warn(message, ...args);
274
+ }
275
+ error(message, ...args) {
276
+ console.error(message, ...args);
277
+ }
278
+ };
279
+
280
+ // src/scraper/scrapers.ts
281
+ var PostalCodeScraper = class {
282
+ constructor(config = {}) {
283
+ this.config = config;
284
+ this.config = {
285
+ concurrency: 15,
286
+ maxRetries: 5,
287
+ headless: true,
288
+ directory: "src/data",
289
+ logger: Logger,
290
+ usePrettyName: false,
291
+ ...config
292
+ };
293
+ }
294
+ browser;
295
+ queue;
296
+ fetcher;
297
+ async scrapeCountry(countryName) {
298
+ await this.initBrowser();
299
+ try {
300
+ const country = await this.getCountryDetails(countryName);
301
+ if (!country) {
302
+ this.config.logger?.warn(`Country not found: ${countryName}`);
303
+ return null;
304
+ }
305
+ const data = {};
306
+ await this.queue.process(country, data);
307
+ this.saveData(data, `${country.name}-postal-codes.json`, this.config.directory);
308
+ const postalCodeLookup = this.generatePostalCodeLookup(data);
309
+ this.saveData(postalCodeLookup, `${country.name}-lookup.json`, this.config.directory);
310
+ } finally {
311
+ await this.cleanup();
312
+ }
313
+ }
314
+ async scrapeCountries() {
315
+ await this.initBrowser();
316
+ try {
317
+ const countries = await this.getCountriesDetails();
318
+ if (countries.length === 0) {
319
+ this.config.logger?.warn("No countries found.");
320
+ return null;
321
+ }
322
+ for (const country of countries) {
323
+ const key = this.config.usePrettyName ? country.prettyName : country.name;
324
+ const countryData = {};
325
+ this.config.logger?.info(`Processing country: ${key}`);
326
+ await this.queue.process(country, countryData);
327
+ this.saveData(countryData, `${key}-postal-codes.json`, this.config.directory);
328
+ const postalCodeLookup = this.generatePostalCodeLookup(countryData);
329
+ this.saveData(postalCodeLookup, `${key}-lookup.json`, this.config.directory);
330
+ }
331
+ } finally {
332
+ await this.cleanup();
333
+ }
334
+ }
335
+ async initBrowser() {
336
+ this.browser = await import_puppeteer.default.launch({ headless: this.config.headless });
337
+ this.fetcher = new Fetcher(this.browser, this.config);
338
+ this.queue = new ProcessingQueue(this.fetcher, this.config);
339
+ }
340
+ async getCountryDetails(name) {
341
+ try {
342
+ const html = await this.fetcher.fetchWithRetry(getBaseUrl());
343
+ return Parser.parseCountryByName((0, import_cheerio2.load)(html), this.config, name);
344
+ } catch (error) {
345
+ this.config.logger?.error(`Error fetching country details: ${name}`, error);
346
+ return null;
347
+ }
348
+ }
349
+ async getCountriesDetails() {
350
+ try {
351
+ const html = await this.fetcher.fetchWithRetry(getBaseUrl());
352
+ return Parser.parseCountries((0, import_cheerio2.load)(html), this.config);
353
+ } catch (error) {
354
+ this.config.logger?.error("Error fetching countries details", error);
355
+ return [];
356
+ }
357
+ }
358
+ generatePostalCodeLookup(data) {
359
+ return this.buildLookup(data, createRegionIdGenerator());
360
+ }
361
+ buildLookup(regionObj, idGenerator, acc = [], result = { postalCodeMap: {}, regions: {} }) {
362
+ if (Array.isArray(regionObj)) {
363
+ for (const item of regionObj) {
364
+ const id = idGenerator(acc);
365
+ result.postalCodeMap[item] = id;
366
+ result.regions[id] = [...acc];
367
+ }
368
+ } else if (typeof regionObj === "object" && regionObj !== null) {
369
+ for (const [regionKey, regionValue] of Object.entries(regionObj)) {
370
+ this.buildLookup(regionValue, idGenerator, [...acc, regionKey], result);
371
+ }
372
+ }
373
+ return result;
374
+ }
375
+ saveData(data, fileName, directory = "src/data") {
376
+ try {
377
+ (0, import_fs.mkdirSync)(directory, { recursive: true });
378
+ const filePath = import_path.default.join(directory, normalizeString(fileName));
379
+ (0, import_fs.writeFileSync)(filePath, JSON.stringify(data, null, 2), { flag: "w" });
380
+ this.config.logger?.info(`Saved data to ${filePath}`);
381
+ } catch (error) {
382
+ this.config.logger?.error(`Error saving data to ${fileName}`, error);
383
+ }
384
+ }
385
+ async cleanup() {
386
+ await this.browser?.close();
387
+ }
388
+ };
389
+ // Annotate the CommonJS export names for ESM import in node:
390
+ 0 && (module.exports = {
391
+ PostalCodeScraper
392
+ });
@@ -0,0 +1,43 @@
1
+ type Region = {
2
+ path: string;
3
+ name: string;
4
+ prettyName: string;
5
+ };
6
+ type ScraperConfig = {
7
+ usePrettyName?: boolean;
8
+ directory?: string;
9
+ concurrency?: number;
10
+ maxRetries?: number;
11
+ headless?: boolean;
12
+ logger?: any;
13
+ };
14
+ interface LookupData {
15
+ postalCodeMap: {
16
+ [postalCode: string]: string;
17
+ };
18
+ regions: {
19
+ [code: string]: string[];
20
+ };
21
+ }
22
+ interface RegionData {
23
+ [key: string]: RegionData | string[];
24
+ }
25
+
26
+ declare class PostalCodeScraper {
27
+ private config;
28
+ private browser;
29
+ private queue;
30
+ private fetcher;
31
+ constructor(config?: ScraperConfig);
32
+ scrapeCountry(countryName: string): Promise<null | undefined>;
33
+ scrapeCountries(): Promise<null | undefined>;
34
+ private initBrowser;
35
+ private getCountryDetails;
36
+ private getCountriesDetails;
37
+ private generatePostalCodeLookup;
38
+ private buildLookup;
39
+ private saveData;
40
+ private cleanup;
41
+ }
42
+
43
+ export { type LookupData, PostalCodeScraper, type Region, type RegionData, type ScraperConfig };
package/dist/index.d.ts CHANGED
@@ -1,3 +1,43 @@
1
- export * from "./types";
2
- export { PostalCodeScraper } from "./scraper/scrapers";
3
- export { default } from "./scraper/scrapers";
1
+ type Region = {
2
+ path: string;
3
+ name: string;
4
+ prettyName: string;
5
+ };
6
+ type ScraperConfig = {
7
+ usePrettyName?: boolean;
8
+ directory?: string;
9
+ concurrency?: number;
10
+ maxRetries?: number;
11
+ headless?: boolean;
12
+ logger?: any;
13
+ };
14
+ interface LookupData {
15
+ postalCodeMap: {
16
+ [postalCode: string]: string;
17
+ };
18
+ regions: {
19
+ [code: string]: string[];
20
+ };
21
+ }
22
+ interface RegionData {
23
+ [key: string]: RegionData | string[];
24
+ }
25
+
26
+ declare class PostalCodeScraper {
27
+ private config;
28
+ private browser;
29
+ private queue;
30
+ private fetcher;
31
+ constructor(config?: ScraperConfig);
32
+ scrapeCountry(countryName: string): Promise<null | undefined>;
33
+ scrapeCountries(): Promise<null | undefined>;
34
+ private initBrowser;
35
+ private getCountryDetails;
36
+ private getCountriesDetails;
37
+ private generatePostalCodeLookup;
38
+ private buildLookup;
39
+ private saveData;
40
+ private cleanup;
41
+ }
42
+
43
+ export { type LookupData, PostalCodeScraper, type Region, type RegionData, type ScraperConfig };