h56-github-scrapper 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/main-scrapping.js CHANGED
@@ -1,120 +1,76 @@
1
+ #!/usr/bin/env node
1
2
  /**
2
- * main-scrapping.js (ESM)
3
+ * main-scrapping.js (ESM, improved)
3
4
  *
4
- * Converted to ECMAScript Modules (ESM) for Node.js.
5
- * - Preserves original logic and features (scraping, retry/backoff, spinner, CLI).
6
- * - Adds optional translator integration (uses translate-engine/translate.js as ESM).
7
- * - Uses dynamic import for third-party runtime deps after optional auto-install.
8
- *
9
- *
10
- * This file is ready to copy/paste. It intentionally preserves behavior and CLI options
11
- * while modernizing module semantics to ESM.
5
+ * - Fully ESM, no createRequire usage.
6
+ * - Does NOT auto-install dependencies. Instead it detects missing runtime deps
7
+ * and prints a clear instruction to install them (safer for CI and terminals).
8
+ * - Keeps previous scraping, retry/backoff, translator lazy-loading, CLI and public API.
9
+ * - Improved export block and CLI detection.
12
10
  */
13
11
 
14
12
  import fs from "fs";
15
13
  import path from "path";
16
- import { spawnSync } from "child_process";
17
14
  import os from "os";
18
- import { createRequire } from "module";
19
15
  import readline from "readline";
20
16
  import { fileURLToPath } from "url";
21
17
 
22
- const require = createRequire(import.meta.url);
18
+ const __filename = fileURLToPath(import.meta.url);
19
+ const __dirname = path.dirname(__filename);
23
20
 
24
- // -------------------------
25
- // Ensure runtime deps (async)
26
- // -------------------------
27
- async function ensureDependencies(deps = []) {
28
- const missing = deps.filter((d) => {
21
+ // Runtime packages required for operation
22
+ const RUNTIME_PKGS = ["axios", "cheerio", "ora", "yargs"];
23
+
24
+ /**
25
+ * Dynamically import runtime packages and return a map of package -> module.
26
+ * If any package is missing, behave differently depending on whether this module
27
+ * is being run as CLI or imported as a library:
28
+ * - CLI: print a helpful message and exit(1)
29
+ * - Library: throw an Error (so consumer can handle)
30
+ */
31
+ async function loadRuntimes() {
32
+ const results = {};
33
+ const missing = [];
34
+
35
+ for (const name of RUNTIME_PKGS) {
29
36
  try {
30
- require.resolve(d);
31
- return false;
32
- } catch (e) {
33
- return true;
37
+ const mod = await import(name);
38
+ results[name] = mod && mod.default ? mod.default : mod;
39
+ } catch (err) {
40
+ missing.push(name);
34
41
  }
35
- });
36
-
37
- if (!missing.length) return;
38
-
39
- console.log(
40
- `Dependencies missing: ${missing.join(
41
- ", "
42
- )}. The script can install them automatically.`
43
- );
44
-
45
- // If running non-interactive environment, install automatically.
46
- let consent = false;
47
- if (process.env.CI || !process.stdin.isTTY) {
48
- consent = true;
49
- console.log("Non-interactive environment detected, installing automatically...");
50
- } else {
51
- const rl = readline.createInterface({
52
- input: process.stdin,
53
- output: process.stdout,
54
- });
42
+ }
55
43
 
56
- // Prompt with 10s timeout; default to yes
57
- const questionPromise = new Promise((resolve) =>
58
- rl.question(`Install missing dependencies now? (Y/n): `, (a) => {
59
- rl.close();
60
- resolve(String(a || "").trim().toLowerCase());
61
- })
62
- );
44
+ if (missing.length) {
45
+ const cmd = `npm install ${missing.join(" ")}`;
46
+ const msg =
47
+ `Missing runtime dependencies: ${missing.join(", ")}.\n` +
48
+ `Please install them before running this script:\n\n ${cmd}\n\n` +
49
+ `If you are in CI and want deterministic installs, declare these dependencies explicitly in your pipeline or package.json.`;
63
50
 
64
- const ans = await Promise.race([
65
- questionPromise,
66
- new Promise((resolve) => setTimeout(() => resolve(null), 10000)),
67
- ]);
51
+ // Detect CLI execution (rough check)
52
+ const isCli = process.argv && process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1];
68
53
 
69
- if (ans === null) {
70
- consent = true;
71
- console.log("\nNo answer, defaulting to install.\n");
54
+ if (isCli) {
55
+ console.error(msg);
56
+ process.exit(1);
72
57
  } else {
73
- consent = !ans || ans === "y" || ans === "yes";
58
+ throw new Error(msg);
74
59
  }
75
60
  }
76
61
 
77
- if (!consent) {
78
- console.error("Cannot proceed without required dependencies. Exiting.");
79
- process.exit(1);
80
- }
81
-
82
- // Run npm install for missing deps
83
- console.log(`Installing: ${missing.join(" ")} ...`);
84
- const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
85
- const args = ["install", "--save", ...missing];
86
- const result = spawnSync(npmCmd, args, {
87
- stdio: "inherit",
88
- shell: false,
89
- });
90
-
91
- if (result.error || result.status !== 0) {
92
- console.error("Automatic installation failed. Please run:");
93
- console.error(` npm install ${missing.join(" ")}`);
94
- process.exit(1);
95
- }
96
-
97
- console.log("Dependencies installed, continuing...");
62
+ return results;
98
63
  }
99
64
 
100
- // Ensure runtime modules (same set as original)
101
- await ensureDependencies(["axios", "cheerio", "ora", "yargs"]);
102
-
103
- // Dynamic import of runtime libraries (so they are loaded after potential install)
104
- const axiosModule = await import("axios");
105
- const axios = axiosModule.default || axiosModule;
106
-
107
- const cheerioModule = await import("cheerio");
108
- const cheerio = cheerioModule.default || cheerioModule;
109
-
110
- const oraModule = await import("ora");
111
- const ora = oraModule.default || oraModule;
112
-
113
- const yargsModule = await import("yargs");
114
- const yargs = yargsModule.default || yargsModule;
65
+ // Load runtimes before continuing
66
+ const runtimes = await loadRuntimes();
67
+ const axios = runtimes["axios"];
68
+ const cheerio = runtimes["cheerio"];
69
+ const ora = runtimes["ora"];
70
+ const yargs = runtimes["yargs"];
115
71
 
116
72
  // -------------------------
117
- // Utilities & Config
73
+ // Utilities & Config (unchanged behavior)
118
74
  // -------------------------
119
75
  const DEFAULT_CONFIG = {
120
76
  BASE_URL: "https://github.com",
@@ -158,9 +114,8 @@ function validateUsername(username) {
158
114
  }
159
115
 
160
116
  // -------------------------
161
- // Translator loader (optional, lazy)
117
+ // Translator loader (optional, lazy) - unchanged behavior
162
118
  // -------------------------
163
- // We keep a lazy loader so translator is imported only if requested.
164
119
  let _translatorModule = null; // module namespace
165
120
  let _translatorLoadAttempted = false;
166
121
 
@@ -169,22 +124,27 @@ async function loadTranslatorModule() {
169
124
  if (_translatorLoadAttempted) return null;
170
125
  _translatorLoadAttempted = true;
171
126
  try {
172
- // prefer the package-local wrapper (ESM) if present
173
- // path relative to this file
174
- const wrapperPath = new URL("./translate-engine/translate.js", import.meta.url).pathname;
175
- // attempt dynamic import; if module not present or h56-translator not installed, it will throw
176
- const mod = await import(wrapperPath);
177
- if (mod && typeof mod.translate === "function") {
178
- _translatorModule = mod;
179
- return _translatorModule;
127
+ // prefer package-local wrapper (ESM)
128
+ const wrapperUrl = new URL("./translate-engine/translate.js", import.meta.url).href;
129
+ try {
130
+ const mod = await import(wrapperUrl);
131
+ if (mod && typeof mod.translate === "function") {
132
+ _translatorModule = mod;
133
+ return _translatorModule;
134
+ }
135
+ } catch (_) {
136
+ // wrapper not present or failed - fallthrough to package import
180
137
  }
181
- // fallback: try importing the package directly
138
+
139
+ // fallback: try importing the optional package directly
182
140
  const pkg = await import("h56-translator");
183
- if (pkg && (typeof pkg.translate === "function" || typeof pkg.default === "function")) {
184
- const translateFn = typeof pkg.translate === "function" ? pkg.translate : pkg.default;
141
+ const impl = pkg && (pkg.default || pkg);
142
+ if (impl && (typeof impl.translate === "function" || typeof impl === "function")) {
143
+ const translateFn = typeof impl.translate === "function" ? impl.translate : impl;
185
144
  _translatorModule = { translate: translateFn };
186
145
  return _translatorModule;
187
146
  }
147
+
188
148
  return null;
189
149
  } catch (err) {
190
150
  // translator not available — return null and let callers handle
@@ -193,7 +153,7 @@ async function loadTranslatorModule() {
193
153
  }
194
154
 
195
155
  // -------------------------
196
- // Scraper class
156
+ // Scraper class (logic preserved)
197
157
  // -------------------------
198
158
  class GithubScraper {
199
159
  constructor(opts = {}) {
@@ -210,12 +170,10 @@ class GithubScraper {
210
170
  return res.data;
211
171
  } catch (err) {
212
172
  if (attempt >= this.config.MAX_RETRY) {
213
- // wrap error with url info
214
173
  const e = new Error(`Failed to fetch ${url}: ${err.message}`);
215
174
  e.cause = err;
216
175
  throw e;
217
176
  }
218
- // backoff
219
177
  await sleep(1000 * attempt);
220
178
  return this.requestWithRetry(url, attempt + 1);
221
179
  }
@@ -245,7 +203,6 @@ class GithubScraper {
245
203
  $('div[itemprop="description"]').text().trim() ||
246
204
  "";
247
205
 
248
- // Pulled from header counters (structure may vary by locale/markup)
249
206
  const followersText = $(
250
207
  'a[href$="?tab=followers"], a[href$="?tab=followers"] .text-bold'
251
208
  )
@@ -286,7 +243,6 @@ class GithubScraper {
286
243
  while (true) {
287
244
  const url = `${this.config.BASE_URL}/${username}?page=${page}&tab=repositories`;
288
245
  const $ = await this.fetchPage(url);
289
- // older layout: li[itemprop='owns'], new layout: div[id^=user-repositories-list] li
290
246
  const repoItems =
291
247
  $("li[itemprop='owns']").length > 0
292
248
  ? $("li[itemprop='owns']")
@@ -360,24 +316,6 @@ class GithubScraper {
360
316
  };
361
317
  }
362
318
 
363
- /**
364
- * applyTranslations(result, translateOptions)
365
- *
366
- * Mutates the result object by adding translated fields.
367
- *
368
- * translateOptions (optional) shape:
369
- * {
370
- * lang: string, // target language code (required to perform translations)
371
- * fields?: string[], // list of fields to translate; supported values:
372
- * // 'bio' (profile.bio)
373
- * // 'repo_descriptions' (repo.description)
374
- * // 'repo_names' (repo.name)
375
- * // 'all_repos' (alias for repo_descriptions + repo_names)
376
- * // default: ['bio', 'repo_descriptions']
377
- * perRepoDelay?: number, // ms delay between repo translations (default 120)
378
- * failOnMissing?: boolean // if true, throw when translator is not available (default false)
379
- * }
380
- */
381
319
  async applyTranslations(result, translateOptions = {}) {
382
320
  if (!translateOptions || !translateOptions.lang) return result;
383
321
  const opts = {
@@ -387,7 +325,6 @@ class GithubScraper {
387
325
  ...translateOptions,
388
326
  };
389
327
 
390
- // normalize fields
391
328
  const fields = new Set();
392
329
  for (const f of opts.fields) {
393
330
  if (f === "all_repos") {
@@ -398,7 +335,6 @@ class GithubScraper {
398
335
  }
399
336
  }
400
337
 
401
- // translator function must exist (lazy load)
402
338
  const mod = await loadTranslatorModule();
403
339
  if (!mod || typeof mod.translate !== "function") {
404
340
  const msg =
@@ -408,7 +344,6 @@ class GithubScraper {
408
344
  e.code = "TRANSLATOR_MISSING";
409
345
  throw e;
410
346
  } else {
411
- // attach a note and skip translations
412
347
  result._translation_note = {
413
348
  skipped: true,
414
349
  reason: msg,
@@ -418,7 +353,6 @@ class GithubScraper {
418
353
  }
419
354
  const tfn = mod.translate;
420
355
 
421
- // perform profile translation
422
356
  try {
423
357
  if (fields.has("bio") && result.profile && result.profile.bio) {
424
358
  try {
@@ -433,11 +367,9 @@ class GithubScraper {
433
367
  }
434
368
  }
435
369
  } catch (e) {
436
- // defensive: any translator error should not break the main flow
437
370
  result._translation_profile_error = e && e.message ? e.message : String(e);
438
371
  }
439
372
 
440
- // perform repository translations sequentially (safer)
441
373
  if (Array.isArray(result.repos) && result.repos.length > 0) {
442
374
  for (const repo of result.repos) {
443
375
  try {
@@ -466,7 +398,6 @@ class GithubScraper {
466
398
  }
467
399
  }
468
400
  } catch (e) {
469
- // attach per-repo error but continue
470
401
  repo.translation_internal_error = e && e.message ? e.message : String(e);
471
402
  }
472
403
  await sleep(opts.perRepoDelay);
@@ -476,19 +407,6 @@ class GithubScraper {
476
407
  return result;
477
408
  }
478
409
 
479
- // high-level helper
480
- /**
481
- * scrapeUser(username, opts)
482
- *
483
- * opts:
484
- * spinner: boolean (default true)
485
- * translate: {
486
- * lang: 'en', // target language code (required to enable translations)
487
- * fields: ['bio','repo_descriptions'], // which fields to translate
488
- * perRepoDelay: 120, // ms
489
- * failOnMissing: false // if true, throw when translator missing
490
- * }
491
- */
492
410
  async scrapeUser(username, opts = {}) {
493
411
  if (!validateUsername(username)) {
494
412
  const e = new Error("Invalid GitHub username format");
@@ -507,18 +425,15 @@ class GithubScraper {
507
425
 
508
426
  let result = { profile, repos, stats };
509
427
 
510
- // If translation options provided, attempt to apply translations.
511
428
  if (opts.translate && opts.translate.lang) {
512
429
  if (spinner) spinner.text = "Applying translations...";
513
430
  try {
514
431
  result = await this.applyTranslations(result, opts.translate);
515
432
  } catch (e) {
516
- // translator errors: if failOnMissing requested, rethrow; otherwise attach note
517
433
  if (opts.translate && opts.translate.failOnMissing) {
518
434
  if (spinner) spinner.fail("Failed");
519
435
  throw e;
520
436
  } else {
521
- // attach note and continue
522
437
  result._translation_error = e && e.message ? e.message : String(e);
523
438
  }
524
439
  }
@@ -531,7 +446,6 @@ class GithubScraper {
531
446
  }
532
447
  }
533
448
 
534
- // CLI pretty print
535
449
  static printResult(profile, stats, repos = []) {
536
450
  console.log("\n========== GITHUB ACCOUNT ==========\n");
537
451
  console.log("Username :", profile.username);
@@ -567,11 +481,10 @@ class GithubScraper {
567
481
  }
568
482
 
569
483
  // -------------------------
570
- // Exports (for usage as package/module)
484
+ // Exports (clean ESM)
571
485
  // -------------------------
572
486
  const defaultScraper = new GithubScraper();
573
487
 
574
- // h56translate helper: lazy-call translator module when invoked
575
488
  export async function h56translate(text, targetLang, options) {
576
489
  const mod = await loadTranslatorModule();
577
490
  if (!mod || typeof mod.translate !== "function") {
@@ -582,35 +495,26 @@ export async function h56translate(text, targetLang, options) {
582
495
  return await mod.translate(text, targetLang, options);
583
496
  }
584
497
 
585
- export {
586
- GithubScraper,
587
- defaultScraper,
588
- // convenience wrappers
589
- // note: keep same signatures as before (username, opts)
590
- async function scrapeProfile(username, opts) {
591
- return defaultScraper.scrapeProfile(username, opts);
592
- },
593
- async function scrapeRepos(username, opts) {
594
- return defaultScraper.scrapeRepos(username, opts);
595
- },
596
- async function scrapeUser(username, opts) {
597
- return defaultScraper.scrapeUser(username, opts);
598
- },
599
- function calculateStats(repos) {
600
- return defaultScraper.calculateStats(repos);
601
- },
602
- GithubScraper.printResult as printResult,
603
- };
498
+ export { GithubScraper, defaultScraper };
604
499
 
605
- // The above export block uses named exports compatible with ESM consumers.
606
- // For backwards compatibility with consumers expecting a default CommonJS export,
607
- // also write a small CJS-compatible default export file (optional) when packaging.
500
+ export async function scrapeProfile(username) {
501
+ return defaultScraper.scrapeProfile(username);
502
+ }
503
+ export async function scrapeRepos(username) {
504
+ return defaultScraper.scrapeRepos(username);
505
+ }
506
+ export async function scrapeUser(username, opts) {
507
+ return defaultScraper.scrapeUser(username, opts);
508
+ }
509
+ export function calculateStats(repos) {
510
+ return defaultScraper.calculateStats(repos);
511
+ }
512
+ export const printResult = GithubScraper.printResult;
608
513
 
609
514
  // -------------------------
610
515
  // CLI behavior when run directly
611
516
  // -------------------------
612
- const __filename = fileURLToPath(import.meta.url);
613
- if (process.argv[1] === __filename) {
517
+ if (fileURLToPath(import.meta.url) === process.argv[1]) {
614
518
  (async () => {
615
519
  // build argv using yargs (same API as original)
616
520
  const argv = yargs(process.argv.slice(2))
@@ -649,7 +553,6 @@ if (process.argv[1] === __filename) {
649
553
  process.exit(1);
650
554
  }
651
555
 
652
- // build translate options if requested
653
556
  const translateOpt = argv.lang
654
557
  ? {
655
558
  lang: argv.lang,
@@ -678,7 +581,6 @@ if (process.argv[1] === __filename) {
678
581
  } else {
679
582
  GithubScraper.printResult(result.profile, result.stats, result.repos);
680
583
  if (argv.output) {
681
- // also write JSON file if requested
682
584
  fs.writeFileSync(path.resolve(argv.output), JSON.stringify(result, null, 2) + os.EOL, "utf8");
683
585
  console.log("Written JSON to", argv.output);
684
586
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "h56-github-scrapper",
3
- "version": "1.0.4",
3
+ "version": "1.0.6",
4
4
  "description": "GitHub user scraper",
5
5
  "type": "module",
6
6
  "main": "main-scrapping.js",
@@ -18,8 +18,5 @@
18
18
  },
19
19
  "optionalDependencies": {
20
20
  "h56-translator": "^1.0.0"
21
- },
22
- "scripts": {
23
- "postinstall": "node ./scripts/ensure-external-deps.js"
24
21
  }
25
22
  }
package/readme.md CHANGED
@@ -274,14 +274,6 @@ console.log(r.translatedText);
274
274
  - `h56-translator` adalah dependency opsional. Paket menyediakan:
275
275
  - `translate-engine/translate.ts` (typed wrapper) untuk development/TS.
276
276
  - `translate-engine/translate.js` (CJS wrapper) untuk runtime require().
277
- - `scripts/ensure-external-deps.js` — postinstall helper yang berusaha memasang `h56-translator` jika tidak ada, kecuali di CI (safety).
278
- - Jika Anda menginginkan pemasangan otomatis di CI:
279
- - jalankan: `H56_FORCE_POSTINSTALL=1 npm install`
280
- - Jika translator tidak terpasang:
281
- - `h56translate(...)` akan melempar Error informatif.
282
- - `scrapeUser(..., { translate: {...} })` akan:
283
- - menambahkan `_translation_note` dan melanjutkan (default), atau
284
- - melempar error jika `failOnMissing: true` disetel.
285
277
 
286
278
  ---
287
279
 
@@ -1,57 +0,0 @@
1
- #!/usr/bin/env node
2
- /**
3
- * scripts/ensure-external-deps.js (ESM)
4
- *
5
- * Postinstall helper (ESM). Attempts to install optional dependencies if missing.**/
6
-
7
- import { spawnSync } from "child_process";
8
- import { createRequire } from "module";
9
-
10
- const require = createRequire(import.meta.url);
11
-
12
- const optionalDeps = ["h56-translator"];
13
-
14
- function isInstalled(name) {
15
- try {
16
- require.resolve(name);
17
- return true;
18
- } catch (_) {
19
- return false;
20
- }
21
- }
22
-
23
- function installDeps(deps) {
24
- if (!deps.length) return;
25
- const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
26
- const args = ["install", "--no-audit", "--no-fund", "--save", ...deps];
27
- console.log("Installing optional dependencies:", deps.join(", "));
28
- const res = spawnSync(npmCmd, args, { stdio: "inherit" });
29
- if (res.error || res.status !== 0) {
30
- console.error("Failed to install optional dependencies. You can run manually:");
31
- console.error(" npm install " + deps.join(" "));
32
- // do not throw to keep npm install resilient
33
- } else {
34
- console.log("Optional dependencies installed.");
35
- }
36
- }
37
-
38
- (function main() {
39
- try {
40
- const toInstall = optionalDeps.filter((d) => !isInstalled(d));
41
- if (toInstall.length === 0) return;
42
-
43
- // Skip auto-install in CI by default to avoid surprises; allow override.
44
- if (process.env.CI && !process.env.H56_FORCE_POSTINSTALL) {
45
- console.log(
46
- "CI environment detected — skipping automatic installation of optional dependencies.",
47
- "Set H56_FORCE_POSTINSTALL=1 to force installation in CI."
48
- );
49
- return;
50
- }
51
-
52
- installDeps(toInstall);
53
- } catch (err) {
54
- console.error("Postinstall check encountered an error:", err && err.message ? err.message : err);
55
- // do not exit non-zero; keep postinstall resilient
56
- }
57
- })();