postal-code-scraper 1.0.2 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. package/README.md +56 -62
  2. package/dist/index.cjs +392 -0
  3. package/dist/index.d.cts +43 -0
  4. package/dist/index.d.ts +43 -3
  5. package/dist/index.js +355 -25
  6. package/package.json +17 -6
  7. package/.mocharc.json +0 -4
  8. package/build/test/src/index.js +0 -26
  9. package/build/test/src/index.js.map +0 -1
  10. package/build/test/src/scraper/fetchers.js +0 -49
  11. package/build/test/src/scraper/fetchers.js.map +0 -1
  12. package/build/test/src/scraper/parsers.js +0 -63
  13. package/build/test/src/scraper/parsers.js.map +0 -1
  14. package/build/test/src/scraper/queue.js +0 -69
  15. package/build/test/src/scraper/queue.js.map +0 -1
  16. package/build/test/src/scraper/scrapers.js +0 -148
  17. package/build/test/src/scraper/scrapers.js.map +0 -1
  18. package/build/test/src/types.js +0 -3
  19. package/build/test/src/types.js.map +0 -1
  20. package/build/test/src/utils/id-generator.js +0 -33
  21. package/build/test/src/utils/id-generator.js.map +0 -1
  22. package/build/test/src/utils/logger.js +0 -87
  23. package/build/test/src/utils/logger.js.map +0 -1
  24. package/build/test/tests/postal-code-scraper.test.js +0 -14
  25. package/build/test/tests/postal-code-scraper.test.js.map +0 -1
  26. package/dist/scraper/fetchers.d.ts +0 -9
  27. package/dist/scraper/fetchers.js +0 -48
  28. package/dist/scraper/parsers.d.ts +0 -7
  29. package/dist/scraper/parsers.js +0 -62
  30. package/dist/scraper/queue.d.ts +0 -12
  31. package/dist/scraper/queue.js +0 -67
  32. package/dist/scraper/scrapers.d.ts +0 -19
  33. package/dist/scraper/scrapers.js +0 -149
  34. package/dist/types.d.ts +0 -32
  35. package/dist/types.js +0 -2
  36. package/dist/utils/env-config.d.ts +0 -1
  37. package/dist/utils/env-config.js +0 -7
  38. package/dist/utils/id-generator.d.ts +0 -4
  39. package/dist/utils/id-generator.js +0 -26
  40. package/dist/utils/logger.d.ts +0 -33
  41. package/dist/utils/logger.js +0 -86
  42. package/dist/utils/string-utils.d.ts +0 -1
  43. package/dist/utils/string-utils.js +0 -13
  44. package/src/index.ts +0 -3
  45. package/src/scraper/fetchers.ts +0 -30
  46. package/src/scraper/parsers.ts +0 -67
  47. package/src/scraper/queue.ts +0 -55
  48. package/src/scraper/scrapers.ts +0 -143
  49. package/src/types.ts +0 -37
  50. package/src/utils/env-config.ts +0 -3
  51. package/src/utils/id-generator.ts +0 -35
  52. package/src/utils/logger.ts +0 -105
  53. package/src/utils/string-utils.ts +0 -9
  54. package/tests/postal-code-scraper.test.ts +0 -100
  55. package/tests/tsconfig.json +0 -13
  56. package/tsconfig.json +0 -15
@@ -1,148 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.PostalCodeScraper = void 0;
16
- const puppeteer_1 = __importDefault(require("puppeteer"));
17
- const queue_1 = require("./queue");
18
- const fetchers_1 = require("./fetchers");
19
- const id_generator_1 = require("../utils/id-generator");
20
- const fs_1 = require("fs");
21
- const path_1 = __importDefault(require("path"));
22
- const cheerio_1 = require("cheerio");
23
- const parsers_1 = require("./parsers");
24
- class PostalCodeScraper {
25
- constructor(config = {}) {
26
- this.config = config;
27
- this.config = Object.assign({ baseUrl: "https://worldpostalcode.com", concurrency: 15, maxRetries: 5, headless: true, directory: "src/data" }, config);
28
- }
29
- scrapeCountry(countryName) {
30
- return __awaiter(this, void 0, void 0, function* () {
31
- var _a;
32
- yield this.initBrowser();
33
- const country = yield this.getCountryDetails(countryName);
34
- if (!country) {
35
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn(`Country not found: ${countryName}`);
36
- return null;
37
- }
38
- const data = {};
39
- yield this.queue.process(country, data);
40
- this.saveData(data, `${country.name}-postal-codes.json`, this.config.directory);
41
- const postalCodeLookup = this.generatePostalCodeLookup(data);
42
- this.saveData(postalCodeLookup, `${country.name}-lookup.json`, this.config.directory);
43
- yield this.cleanup();
44
- });
45
- }
46
- scrapeCountries() {
47
- return __awaiter(this, void 0, void 0, function* () {
48
- var _a, _b;
49
- yield this.initBrowser();
50
- const countries = yield this.getCountriesDetails();
51
- if (countries.length === 0) {
52
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn("No countries found.");
53
- return null;
54
- }
55
- for (const country of countries) {
56
- const key = this.config.usePrettyName ? country.prettyName : country.name;
57
- const countryData = {};
58
- (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.info(`Processing country: ${key}`);
59
- yield this.queue.process(country, countryData);
60
- this.saveData(countryData, `${key}-postal-codes.json`, this.config.directory);
61
- const postalCodeLookup = this.generatePostalCodeLookup(countryData);
62
- this.saveData(postalCodeLookup, `${key}-lookup.json`, this.config.directory);
63
- }
64
- yield this.cleanup();
65
- });
66
- }
67
- initBrowser() {
68
- return __awaiter(this, void 0, void 0, function* () {
69
- this.browser = yield puppeteer_1.default.launch({ headless: this.config.headless });
70
- this.fetcher = new fetchers_1.Fetcher(this.browser, this.config);
71
- this.queue = new queue_1.ProcessingQueue(this.fetcher, this.config);
72
- });
73
- }
74
- getCountryDetails(name) {
75
- return __awaiter(this, void 0, void 0, function* () {
76
- var _a;
77
- try {
78
- const html = yield this.fetcher.fetchWithRetry(this.config.baseUrl);
79
- return parsers_1.Parser.parseCountryByName((0, cheerio_1.load)(html), this.config, name);
80
- }
81
- catch (error) {
82
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.error(`Error fetching country details: ${name}`, error);
83
- return null;
84
- }
85
- });
86
- }
87
- getCountriesDetails() {
88
- return __awaiter(this, void 0, void 0, function* () {
89
- var _a;
90
- try {
91
- const html = yield this.fetcher.fetchWithRetry(this.config.baseUrl);
92
- return parsers_1.Parser.parseCountries((0, cheerio_1.load)(html), this.config);
93
- }
94
- catch (error) {
95
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.error("Error fetching countries details", error);
96
- return [];
97
- }
98
- });
99
- }
100
- generatePostalCodeLookup(data) {
101
- return this.buildLookup(data, (0, id_generator_1.createRegionIdGenerator)());
102
- }
103
- buildLookup(regionObj, idGenerator, acc = [], result = { postalCodeMap: {}, regions: {} }) {
104
- if (Array.isArray(regionObj)) {
105
- for (const item of regionObj) {
106
- const id = idGenerator(acc);
107
- result.postalCodeMap[item] = id;
108
- result.regions[id] = [...acc];
109
- }
110
- }
111
- else if (typeof regionObj === "object" && regionObj !== null) {
112
- for (const [regionKey, regionValue] of Object.entries(regionObj)) {
113
- this.buildLookup(regionValue, idGenerator, [...acc, regionKey], result);
114
- }
115
- }
116
- return result;
117
- }
118
- normalizeString(str) {
119
- return str
120
- .trim()
121
- .toLowerCase()
122
- .normalize("NFD")
123
- .replace(/[\u0300-\u036f]/g, "")
124
- .replace(/\s+/g, "-")
125
- .replace(/[^a-z0-9.-]/g, "");
126
- }
127
- saveData(data, fileName, directory = "src/data") {
128
- var _a, _b;
129
- try {
130
- (0, fs_1.mkdirSync)(directory, { recursive: true });
131
- const filePath = path_1.default.join(directory, this.normalizeString(fileName));
132
- (0, fs_1.writeFileSync)(filePath, JSON.stringify(data, null, 2), { flag: "w" });
133
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.info(`Saved data to ${filePath}`);
134
- }
135
- catch (error) {
136
- (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.error(`Error saving data to ${fileName}`, error);
137
- }
138
- }
139
- cleanup() {
140
- return __awaiter(this, void 0, void 0, function* () {
141
- var _a;
142
- yield ((_a = this.browser) === null || _a === void 0 ? void 0 : _a.close());
143
- });
144
- }
145
- }
146
- exports.PostalCodeScraper = PostalCodeScraper;
147
- exports.default = new PostalCodeScraper();
148
- //# sourceMappingURL=scrapers.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"scrapers.js","sourceRoot":"","sources":["../../../../src/scraper/scrapers.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;AAAA,0DAA+C;AAC/C,mCAA0C;AAC1C,yCAAqC;AAErC,wDAAmF;AACnF,2BAA8C;AAC9C,gDAAwB;AACxB,qCAA+B;AAC/B,uCAAmC;AAEnC,MAAa,iBAAiB;IAK7B,YAAoB,SAAwB,EAAE;QAA1B,WAAM,GAAN,MAAM,CAAoB;QAC7C,IAAI,CAAC,MAAM,mBACV,OAAO,EAAE,6BAA6B,EACtC,WAAW,EAAE,EAAE,EACf,UAAU,EAAE,CAAC,EACb,QAAQ,EAAE,IAAI,EACd,SAAS,EAAE,UAAU,IAClB,MAAM,CACT,CAAC;IACH,CAAC;IAEK,aAAa,CAAC,WAAmB;;;YACtC,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,iBAAiB,CAAC,WAAW,CAAC,CAAC;YAC1D,IAAI,CAAC,OAAO,EAAE,CAAC;gBACd,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,IAAI,CAAC,sBAAsB,WAAW,EAAE,CAAC,CAAC;gBAC9D,OAAO,IAAI,CAAC;YACb,CAAC;YAED,MAAM,IAAI,GAAwB,EAAE,CAAC;YACrC,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YAExC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,GAAG,OAAO,CAAC,IAAI,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAU,CAAC,CAAC;YAEjF,MAAM,gBAAgB,GAAG,IAAI,CAAC,wBAAwB,CAAC,IAAI,CAAC,CAAC;YAC7D,IAAI,CAAC,QAAQ,CAAC,gBAAgB,EAAE,GAAG,OAAO,CAAC,IAAI,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,SAAU,CAAC,CAAC;YAEvF,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QACtB,CAAC;KAAA;IAEK,eAAe;;;YACpB,MAAM,IAAI,CAAC,WAAW,EAAE,CAAC;YACzB,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YACnD,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC5B,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,IAAI,CAAC,qBAAqB,CAAC,CAAC;gBAChD,OAAO,IAAI,CAAC;YACb,CAAC;YAED,KAAK,MAAM,OAAO,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,GAAG,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,CAAC;gBAC1E,MAAM,WAAW,GAAwB,EAAE,CAAC;gBAC5C,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,IAAI,CAAC,uBAAuB,GAAG,EAAE,CAAC,CAAC;gBAEvD,MAAM,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,OAAO,EAAE,WAAW,CAAC,CAAC;gBAC/C,IAAI,CAAC,QAAQ,CAAC,WAAW,EAAE,GAAG,GAAG,oBAAoB,EAAE,IAAI,CAAC,MAAM,CAAC,SAAU,CAAC,CAAC;gBAE/E,MAAM,gBAAgB,GAAG,IAAI,CAAC,wBAAwB,CAAC,WAAW,CAAC,CAAC;gBACpE,IAAI,CAAC,QAAQ,CAAC,gBAAgB,EAAE,GAAG,GAAG,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,SAAU,CAAC,CAAC;YAC/E,CAAC;YACD,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QACtB,CAAC;KAAA;IAEa,WAAW;;YACxB,IAAI,CAAC,OAAO,GAAG,MAAM,mBAAS,CAAC,MAAM,CAAC,EAAE,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC,CAAC;YAC1E,IAAI,CAAC,OAAO,GAAG,IAAI,kBAAO,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YACtD,IAAI,CAAC,KAAK,GAAG,IAAI,uBAAe,CAAC,IAAI,CAAC,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAC7D,CAAC;KAAA;IAEa,iBAAiB,CAAC,IAAY;;;YAC3C,IAAI,CAAC;gBACJ,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,OAAQ,CAAC,CAAC;gBACrE,OAAO,gBAAM,CAAC,kBAAkB,CAAC,IAAA,cAAI,EAAC,IAAI,CAAC,EAAE,IAAI,CAAC,MAAM,EAAE,IAAI,CAAC,CAAC;YACjE,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,KAAK,CAAC,mCAAmC,IAAI,EAAE,EAAE,KAAK,CAAC,CAAC;gBAC5E,OAAO,IAAI,CAAC;YACb,CAAC;QACF,CAAC;KAAA;IAEa,mBAAmB;;;YAChC,IAAI,CAAC;gBACJ,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,IAAI,CAAC,MAAM,CAAC,OAAQ,CAAC,CAAC;gBACrE,OAAO,gBAAM,CAAC,cAAc,CAAC,IAAA,cAAI,EAAC,IAAI,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YACvD,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBAChB,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,KAAK,CAAC,kCAAkC,EAAE,KAAK,CAAC,CAAC;gBACrE,OAAO,EAAE,CAAC;YACX,CAAC;QACF,CAAC;KAAA;IAEO,wBAAwB,CAAC,IAAS;QACzC,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,IAAA,sCAAuB,GAAE,CAAC,CAAC;IAC1D,CAAC;IAEO,WAAW,CAClB,SAAc,EACd,WAA8B,EAC9B,MAAgB,EAAE,EAClB,SAA2B,EAAE,aAAa,EAAE,EAAE,EAAE,OAAO,EAAE,EAAE,EAAE;QAE7D,IAAI,KAAK,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;YAC9B,KAAK,MAAM,IAAI,IAAI,SAAS,EAAE,CAAC;gBAC9B,MAAM,EAAE,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;gBAC5B,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC;gBAChC,MAAM,CAAC,OAAO,CAAC,EAAE,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,CAAC;YAC/B,CAAC;QACF,CAAC;aAAM,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,KAAK,IAAI,EAAE,CAAC;YAChE,KAAK,MAAM,CAAC,SAAS,EAAE,WAAW,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,SAAS,CAAC,EAAE,CAAC;gBAClE,IAAI,CAAC,WAAW,CAAC,WAAW,EAAE,WAAW,EAAE,CAAC,GAAG,GAAG,EAAE,SAAS,CAAC,EAAE,MAAM,CAAC,CAAC;YACzE,CAAC;QACF,CAAC;QACD,OAAO,MAAM,CAAC;IACf,CAAC;IAEO,eAAe,CAAC,GAAW;QAClC,OAAO,GAAG;aACR,IAAI,EAAE;aACN,WAAW,EAAE;aACb,SAAS,CAAC,KAAK,CAAC;aAChB,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;aAC/B,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC,CAAC;IAC/B,CAAC;IAEO,QAAQ,CAAC,IAAS,EAAE,QAAgB,EAAE,YAAoB,UAAU;;QAC3E,IAAI,CAAC;YACJ,IAAA,cAAS,EAAC,SAAS,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;YAC1C,MAAM,QAAQ,GAAG,cAAI,CAAC,IAAI,CAAC,SAAS,EAAE,IAAI,CAAC,eAAe,CAAC,QAAQ,CAAC,CAAC,CAAC;YACtE,IAAA,kBAAa,EAAC,QAAQ,EAAE,IAAI,CAAC,SAAS,CAAC,IAAI,EAAE,IAAI,EAAE,CAAC,CAAC,EAAE,EAAE,IAAI,EAAE,GAAG,EAAE,CAAC,CAAC;YACtE,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,IAAI,CAAC,iBAAiB,QAAQ,EAAE,CAAC,CAAC;QACvD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YAChB,MAAA,IAAI,CAAC,MAAM,CAAC,MAAM,0CAAE,KAAK,CAAC,wBAAwB,QAAQ,EAAE,EAAE,KAAK,CAAC,CAAC;QACtE,CAAC;IACF,CAAC;IAEK,OAAO;;;YACZ,MAAM,CAAA,MAAA,IAAI,CAAC,OAAO,0CAAE,KAAK,EAAE,CAAA,CAAC;QAC7B,CAAC;KAAA;CACD;AAnID,8CAmIC;AAED,kBAAe,IAAI,iBAAiB,EAAE,CAAC"}
@@ -1,3 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- //# sourceMappingURL=types.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/types.ts"],"names":[],"mappings":""}
@@ -1,33 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.createRegionIdGenerator = void 0;
4
- const createRegionIdGenerator = () => {
5
- const regionRegistry = new Map();
6
- const counterMap = new Map();
7
- return (regions) => {
8
- // Normalize region names (case/diacritic/space insensitive)
9
- const normalized = regions.map((region) => region
10
- .trim()
11
- .toLowerCase()
12
- .normalize("NFD")
13
- .replace(/[\u0300-\u036f]/g, "") // Remove diacritics
14
- .replace(/\s+/g, "_"));
15
- // Create a unique composite key for the region hierarchy
16
- const compositeKey = normalized.join("|");
17
- // Return existing ID if already registered
18
- if (regionRegistry.has(compositeKey)) {
19
- return regionRegistry.get(compositeKey);
20
- }
21
- // Generate new base name from the last region (typically city name)
22
- const baseName = normalized[normalized.length - 1];
23
- const count = (counterMap.get(baseName) || 0) + 1;
24
- counterMap.set(baseName, count);
25
- // Create ID with format: {city}_{uniqueCounter}
26
- const newId = `${baseName}_${count}`;
27
- // Register the composite key
28
- regionRegistry.set(compositeKey, newId);
29
- return newId;
30
- };
31
- };
32
- exports.createRegionIdGenerator = createRegionIdGenerator;
33
- //# sourceMappingURL=id-generator.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"id-generator.js","sourceRoot":"","sources":["../../../../src/utils/id-generator.ts"],"names":[],"mappings":";;;AAIO,MAAM,uBAAuB,GAAG,GAAsB,EAAE;IAC9D,MAAM,cAAc,GAAG,IAAI,GAAG,EAAkB,CAAC;IACjD,MAAM,UAAU,GAAG,IAAI,GAAG,EAAkB,CAAC;IAE7C,OAAO,CAAC,OAAiB,EAAU,EAAE;QACpC,4DAA4D;QAC5D,MAAM,UAAU,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CACzC,MAAM;aACJ,IAAI,EAAE;aACN,WAAW,EAAE;aACb,SAAS,CAAC,KAAK,CAAC;aAChB,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC,CAAC,oBAAoB;aACpD,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CACtB,CAAC;QAEF,yDAAyD;QACzD,MAAM,YAAY,GAAG,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;QAE1C,2CAA2C;QAC3C,IAAI,cAAc,CAAC,GAAG,CAAC,YAAY,CAAC,EAAE,CAAC;YACtC,OAAO,cAAc,CAAC,GAAG,CAAC,YAAY,CAAE,CAAC;QAC1C,CAAC;QAED,oEAAoE;QACpE,MAAM,QAAQ,GAAG,UAAU,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACnD,MAAM,KAAK,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;QAClD,UAAU,CAAC,GAAG,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAC;QAEhC,gDAAgD;QAChD,MAAM,KAAK,GAAG,GAAG,QAAQ,IAAI,KAAK,EAAE,CAAC;QAErC,6BAA6B;QAC7B,cAAc,CAAC,GAAG,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;QAExC,OAAO,KAAK,CAAC;IACd,CAAC,CAAC;AACH,CAAC,CAAC;AApCW,QAAA,uBAAuB,2BAoClC"}
@@ -1,87 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Logger = void 0;
4
- class Logger {
5
- static configure(config) {
6
- if (config.level)
7
- this.logLevel = config.level;
8
- if (config.colors !== undefined)
9
- this.useColors = config.colors;
10
- if (config.prefix)
11
- this.prefix = config.prefix;
12
- if (config.logger)
13
- this.instance = config.logger;
14
- }
15
- static getInstance() {
16
- return this.instance || new Logger();
17
- }
18
- static debug(message, ...args) {
19
- this.log("debug", message, args);
20
- }
21
- static info(message, ...args) {
22
- this.log("info", message, args);
23
- }
24
- static warn(message, ...args) {
25
- this.log("warn", message, args);
26
- }
27
- static error(message, ...args) {
28
- this.log("error", message, args);
29
- }
30
- static shouldLog(level) {
31
- if (this.logLevel === "silent")
32
- return false;
33
- const levels = ["error", "warn", "info", "debug"];
34
- return levels.indexOf(level) <= levels.indexOf(this.logLevel);
35
- }
36
- static log(level, message, args) {
37
- if (!this.shouldLog(level))
38
- return;
39
- const logger = this.getInstance();
40
- const formatted = this.formatMessage(level, message);
41
- logger[level](formatted, ...args);
42
- }
43
- static formatMessage(level, message) {
44
- const timestamp = new Date().toISOString();
45
- const levelColor = this.getLevelColor(level);
46
- const messageColor = this.useColors ? "\x1b[37m" : "";
47
- return [
48
- this.useColors ? "\x1b[90m" : "",
49
- `${this.prefix} `,
50
- `${timestamp} `,
51
- levelColor,
52
- `[${level.toUpperCase()}]`,
53
- this.useColors ? "\x1b[0m" : "",
54
- messageColor,
55
- ` ${message}`,
56
- this.useColors ? "\x1b[0m" : "",
57
- ].join("");
58
- }
59
- static getLevelColor(level) {
60
- if (!this.useColors)
61
- return "";
62
- return {
63
- error: "\x1b[31m", // Red
64
- warn: "\x1b[33m", // Yellow
65
- info: "\x1b[36m", // Cyan
66
- debug: "\x1b[35m", // Magenta
67
- }[level];
68
- }
69
- // Instance methods to implement LoggerInterface
70
- debug(message, ...args) {
71
- console.debug(message, ...args);
72
- }
73
- info(message, ...args) {
74
- console.log(message, ...args);
75
- }
76
- warn(message, ...args) {
77
- console.warn(message, ...args);
78
- }
79
- error(message, ...args) {
80
- console.error(message, ...args);
81
- }
82
- }
83
- exports.Logger = Logger;
84
- Logger.logLevel = "info";
85
- Logger.useColors = true;
86
- Logger.prefix = "[POSTAL-CODE-SCRAPER]";
87
- //# sourceMappingURL=logger.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"logger.js","sourceRoot":"","sources":["../../../../src/utils/logger.ts"],"names":[],"mappings":";;;AAUA,MAAa,MAAM;IAMlB,MAAM,CAAC,SAAS,CAAC,MAAyF;QACzG,IAAI,MAAM,CAAC,KAAK;YAAE,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,KAAK,CAAC;QAC/C,IAAI,MAAM,CAAC,MAAM,KAAK,SAAS;YAAE,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC;QAChE,IAAI,MAAM,CAAC,MAAM;YAAE,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;QAC/C,IAAI,MAAM,CAAC,MAAM;YAAE,IAAI,CAAC,QAAQ,GAAG,MAAM,CAAC,MAAM,CAAC;IAClD,CAAC;IAED,MAAM,CAAC,WAAW;QACjB,OAAO,IAAI,CAAC,QAAQ,IAAI,IAAI,MAAM,EAAE,CAAC;IACtC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QAC3C,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAClC,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QAC1C,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QAC1C,IAAI,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IACjC,CAAC;IAED,MAAM,CAAC,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QAC3C,IAAI,CAAC,GAAG,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,CAAC,CAAC;IAClC,CAAC;IAEO,MAAM,CAAC,SAAS,CAAC,KAAe;QACvC,IAAI,IAAI,CAAC,QAAQ,KAAK,QAAQ;YAAE,OAAO,KAAK,CAAC;QAE7C,MAAM,MAAM,GAAgB,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,CAAC,CAAC;QAC/D,OAAO,MAAM,CAAC,OAAO,CAAC,KAAkB,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,QAAqB,CAAC,CAAC;IACzF,CAAC;IAEO,MAAM,CAAC,GAAG,CAAC,KAAgB,EAAE,OAAe,EAAE,IAAW;QAChE,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC;YAAE,OAAO;QAEnC,MAAM,MAAM,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAClC,MAAM,SAAS,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,CAAC,CAAC;QAErD,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,EAAE,GAAG,IAAI,CAAC,CAAC;IACnC,CAAC;IAEO,MAAM,CAAC,aAAa,CAAC,KAAgB,EAAE,OAAe;QAC7D,MAAM,SAAS,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAC3C,MAAM,UAAU,GAAG,IAAI,CAAC,aAAa,CAAC,KAAK,CAAC,CAAC;QAC7C,MAAM,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC;QAEtD,OAAO;YACN,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,EAAE;YAChC,GAAG,IAAI,CAAC,MAAM,GAAG;YACjB,GAAG,SAAS,GAAG;YACf,UAAU;YACV,IAAI,KAAK,CAAC,WAAW,EAAE,GAAG;YAC1B,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;YAC/B,YAAY;YACZ,IAAI,OAAO,EAAE;YACb,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,EAAE;SAC/B,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACZ,CAAC;IAEO,MAAM,CAAC,aAAa,CAAC,KAAgB;QAC5C,IAAI,CAAC,IAAI,CAAC,SAAS;YAAE,OAAO,EAAE,CAAC;QAE/B,OAAO;YACN,KAAK,EAAE,UAAU,EAAE,MAAM;YACzB,IAAI,EAAE,UAAU,EAAE,SAAS;YAC3B,IAAI,EAAE,UAAU,EAAE,OAAO;YACzB,KAAK,EAAE,UAAU,EAAE,UAAU;SAC7B,CAAC,KAAK,CAAC,CAAC;IACV,CAAC;IAED,gDAAgD;IAChD,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QACpC,OAAO,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QACnC,OAAO,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IAC/B,CAAC;IAED,IAAI,CAAC,OAAe,EAAE,GAAG,IAAW;QACnC,OAAO,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,OAAe,EAAE,GAAG,IAAW;QACpC,OAAO,CAAC,KAAK,CAAC,OAAO,EAAE,GAAG,IAAI,CAAC,CAAC;IACjC,CAAC;;AA7FF,wBA8FC;AA7Fe,eAAQ,GAAa,MAAM,CAAC;AAC5B,gBAAS,GAAY,IAAI,CAAC;AAC1B,aAAM,GAAW,uBAAuB,CAAC"}
@@ -1,14 +0,0 @@
1
- "use strict";
2
- var __importDefault = (this && this.__importDefault) || function (mod) {
3
- return (mod && mod.__esModule) ? mod : { "default": mod };
4
- };
5
- Object.defineProperty(exports, "__esModule", { value: true });
6
- const chai_1 = require("chai");
7
- const index_1 = __importDefault(require("../src/index"));
8
- describe("Postal Code Scraper", () => {
9
- it("should do something", () => {
10
- const result = index_1.default.scrapeCountry("romania");
11
- (0, chai_1.expect)(result).to.be.ok;
12
- });
13
- });
14
- //# sourceMappingURL=postal-code-scraper.test.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"postal-code-scraper.test.js","sourceRoot":"","sources":["../../../tests/postal-code-scraper.test.ts"],"names":[],"mappings":";;;;;AAAA,+BAA8B;AAC9B,yDAA6C;AAE7C,QAAQ,CAAC,qBAAqB,EAAE,GAAG,EAAE;IACpC,EAAE,CAAC,qBAAqB,EAAE,GAAG,EAAE;QAC9B,MAAM,MAAM,GAAG,eAAiB,CAAC,aAAa,CAAC,SAAS,CAAC,CAAC;QAC1D,IAAA,aAAM,EAAC,MAAM,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC;IACzB,CAAC,CAAC,CAAC;AACJ,CAAC,CAAC,CAAC"}
@@ -1,9 +0,0 @@
1
- import { Browser } from "puppeteer";
2
- import { ScraperConfig } from "../types";
3
- export declare class Fetcher {
4
- private browser;
5
- private config;
6
- constructor(browser: Browser, config: ScraperConfig);
7
- fetchHtml(url: string): Promise<string>;
8
- fetchWithRetry(url: string, retries?: number): Promise<string>;
9
- }
@@ -1,48 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- Object.defineProperty(exports, "__esModule", { value: true });
12
- exports.Fetcher = void 0;
13
- class Fetcher {
14
- constructor(browser, config) {
15
- this.browser = browser;
16
- this.config = config;
17
- }
18
- fetchHtml(url) {
19
- return __awaiter(this, void 0, void 0, function* () {
20
- const page = yield this.browser.newPage();
21
- try {
22
- page.setDefaultNavigationTimeout(60000);
23
- yield page.goto(url, { waitUntil: "domcontentloaded" });
24
- return yield page.content();
25
- }
26
- finally {
27
- yield page.close();
28
- }
29
- });
30
- }
31
- fetchWithRetry(url_1) {
32
- return __awaiter(this, arguments, void 0, function* (url, retries = this.config.maxRetries || 5) {
33
- var _a;
34
- try {
35
- return yield this.fetchHtml(url);
36
- }
37
- catch (error) {
38
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.warn(`Retrying (${this.config.maxRetries - retries + 1}) for: ${url}`);
39
- if (retries > 0) {
40
- yield new Promise((resolve) => setTimeout(resolve, Math.random() * 7000 + 5000));
41
- return this.fetchWithRetry(url, retries - 1);
42
- }
43
- throw new Error(`Failed to fetch: ${url} after ${this.config.maxRetries} attempts`);
44
- }
45
- });
46
- }
47
- }
48
- exports.Fetcher = Fetcher;
@@ -1,7 +0,0 @@
1
- import { Region, ScraperConfig } from "../types";
2
- export declare class Parser {
3
- static parseRegions($: cheerio.Root, config: ScraperConfig): Region[];
4
- static parsePostalCodes($: cheerio.Root, config: ScraperConfig): Record<string, string[]>;
5
- static parseCountries($: cheerio.Root, config: ScraperConfig): Region[];
6
- static parseCountryByName($: cheerio.Root, config: ScraperConfig, name: string): Region | null;
7
- }
@@ -1,62 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.Parser = void 0;
4
- class Parser {
5
- static parseRegions($, config) {
6
- return $("h2:contains('Regions')")
7
- .next(".regions")
8
- .find("a")
9
- .map((_index, element) => {
10
- const path = $(element).attr("href");
11
- const prettyName = $(element).text().trim();
12
- if (!path || !prettyName)
13
- return null;
14
- return {
15
- name: path.split("/").filter(Boolean).pop(),
16
- prettyName,
17
- path,
18
- };
19
- })
20
- .get()
21
- .filter(Boolean);
22
- }
23
- static parsePostalCodes($, config) {
24
- const codes = {};
25
- $(".codes .container").each((_i, element) => {
26
- const place = $(element).find(".place").text().trim();
27
- const codesList = $(element)
28
- .find(".code span")
29
- .map((_j, el) => $(el).text().trim())
30
- .get();
31
- if (place) {
32
- const key = config.usePrettyName ? place : place.toLowerCase().replace(/\s+/g, "-");
33
- codes[key] = codesList;
34
- }
35
- });
36
- return codes;
37
- }
38
- static parseCountries($, config) {
39
- return $(".regions div a")
40
- .map((_i, element) => {
41
- const path = $(element).attr("href");
42
- return path ? { name: path.replace(/\//g, ""), prettyName: $(element).text().trim(), path } : null;
43
- })
44
- .get()
45
- .filter(Boolean);
46
- }
47
- static parseCountryByName($, config, name) {
48
- const countryElement = $(`.regions div a`).filter((_, el) => { var _a; return ((_a = $(el).attr("href")) === null || _a === void 0 ? void 0 : _a.replace(/\//g, "")) === name.toLowerCase().trim(); });
49
- if (!countryElement.length)
50
- return null;
51
- const path = countryElement.attr("href");
52
- const prettyName = countryElement.text().trim();
53
- return path && prettyName
54
- ? {
55
- name: path.replace(/\//g, ""),
56
- prettyName,
57
- path,
58
- }
59
- : null;
60
- }
61
- }
62
- exports.Parser = Parser;
@@ -1,12 +0,0 @@
1
- import { Region, ScraperConfig, RegionData } from "../types";
2
- import { Fetcher } from "./fetchers";
3
- export declare class ProcessingQueue {
4
- private fetcher;
5
- private config;
6
- private queue;
7
- private visitedUrls;
8
- private limit;
9
- constructor(fetcher: Fetcher, config: ScraperConfig);
10
- process(startRegion: Region, data: RegionData): Promise<void>;
11
- private processItem;
12
- }
@@ -1,67 +0,0 @@
1
- "use strict";
2
- var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
3
- function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
4
- return new (P || (P = Promise))(function (resolve, reject) {
5
- function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
6
- function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
7
- function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
8
- step((generator = generator.apply(thisArg, _arguments || [])).next());
9
- });
10
- };
11
- var __importDefault = (this && this.__importDefault) || function (mod) {
12
- return (mod && mod.__esModule) ? mod : { "default": mod };
13
- };
14
- Object.defineProperty(exports, "__esModule", { value: true });
15
- exports.ProcessingQueue = void 0;
16
- const cheerio_1 = require("cheerio");
17
- const p_limit_1 = __importDefault(require("p-limit"));
18
- const parsers_1 = require("./parsers");
19
- const env_config_1 = require("../utils/env-config");
20
- class ProcessingQueue {
21
- constructor(fetcher, config) {
22
- this.fetcher = fetcher;
23
- this.config = config;
24
- this.queue = [];
25
- this.visitedUrls = new Set();
26
- this.limit = (0, p_limit_1.default)(config.concurrency || 15);
27
- }
28
- process(startRegion, data) {
29
- return __awaiter(this, void 0, void 0, function* () {
30
- this.queue.push({ region: startRegion, currData: data });
31
- while (this.queue.length > 0) {
32
- const tasks = this.queue.map((item) => this.limit(() => this.processItem(item)));
33
- this.queue = [];
34
- yield Promise.all(tasks);
35
- }
36
- });
37
- }
38
- processItem(item) {
39
- return __awaiter(this, void 0, void 0, function* () {
40
- var _a, _b;
41
- const url = `${(0, env_config_1.getBaseUrl)()}${item.region.path}`;
42
- if (this.visitedUrls.has(url))
43
- return;
44
- this.visitedUrls.add(url);
45
- (_a = this.config.logger) === null || _a === void 0 ? void 0 : _a.info(`Fetching: ${url}`);
46
- try {
47
- const html = yield this.fetcher.fetchWithRetry(url);
48
- const $ = (0, cheerio_1.load)(html);
49
- const regions = parsers_1.Parser.parseRegions($, this.config);
50
- regions.forEach((region) => {
51
- const key = this.config.usePrettyName ? region.prettyName : region.name;
52
- item.currData[key] = {};
53
- this.queue.push({
54
- region,
55
- currData: item.currData[key],
56
- });
57
- });
58
- const codes = parsers_1.Parser.parsePostalCodes($, this.config);
59
- Object.assign(item.currData, codes);
60
- }
61
- catch (error) {
62
- (_b = this.config.logger) === null || _b === void 0 ? void 0 : _b.error(`Error processing ${url}:`, error);
63
- }
64
- });
65
- }
66
- }
67
- exports.ProcessingQueue = ProcessingQueue;
@@ -1,19 +0,0 @@
1
- import { ScraperConfig } from "../types";
2
- export declare class PostalCodeScraper {
3
- private config;
4
- private browser;
5
- private queue;
6
- private fetcher;
7
- constructor(config?: ScraperConfig);
8
- scrapeCountry(countryName: string): Promise<null | undefined>;
9
- scrapeCountries(): Promise<null | undefined>;
10
- private initBrowser;
11
- private getCountryDetails;
12
- private getCountriesDetails;
13
- private generatePostalCodeLookup;
14
- private buildLookup;
15
- private saveData;
16
- private cleanup;
17
- }
18
- declare const _default: PostalCodeScraper;
19
- export default _default;