h56-github-scrapper 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/main-scrapping.js +109 -97
- package/package.json +2 -1
- package/script/ensure-external-deps.js +53 -20
- package/translate-engine/translate.js +26 -25
package/main-scrapping.js
CHANGED
|
@@ -1,32 +1,30 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* main-scrapping.js
|
|
2
|
+
* main-scrapping.js (ESM)
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Converted to ECMAScript Modules (ESM) for Node.js.
|
|
5
|
+
* - Preserves original logic and features (scraping, retry/backoff, spinner, CLI).
|
|
6
|
+
* - Adds optional translator integration (uses translate-engine/translate.js as ESM).
|
|
7
|
+
* - Uses dynamic import for third-party runtime deps after optional auto-install.
|
|
6
8
|
*
|
|
7
|
-
* Features:
|
|
8
|
-
* - Exports programmatic functions: scrapeProfile, scrapeRepos, scrapeUser, calculateStats
|
|
9
|
-
* - CLI entry when run directly: node main-scrapping.js <username> [--json] [--output=file]
|
|
10
|
-
* - Optional translator integration (h56-translator) with selectable fields to translate
|
|
11
|
-
* - Automatic install of missing npm runtime dependencies (asks for consent when needed)
|
|
12
|
-
* - Robust retry/backoff, polite scraping delay, spinner (ora) fallback
|
|
13
|
-
* - Well-structured results and JSON output support
|
|
14
9
|
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* in package.json. The runtime auto-installer is implemented as a convenience only.
|
|
18
|
-
* - Scraping HTML may break if GitHub changes markup. Consider using GitHub API for production.
|
|
10
|
+
* This file is ready to copy/paste. It intentionally preserves behavior and CLI options
|
|
11
|
+
* while modernizing module semantics to ESM.
|
|
19
12
|
*/
|
|
20
13
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
14
|
+
import fs from "fs";
|
|
15
|
+
import path from "path";
|
|
16
|
+
import { spawnSync } from "child_process";
|
|
17
|
+
import os from "os";
|
|
18
|
+
import { createRequire } from "module";
|
|
19
|
+
import readline from "readline";
|
|
20
|
+
import { fileURLToPath } from "url";
|
|
21
|
+
|
|
22
|
+
const require = createRequire(import.meta.url);
|
|
25
23
|
|
|
26
24
|
// -------------------------
|
|
27
|
-
// Ensure runtime deps
|
|
25
|
+
// Ensure runtime deps (async)
|
|
28
26
|
// -------------------------
|
|
29
|
-
function ensureDependencies(deps = []) {
|
|
27
|
+
async function ensureDependencies(deps = []) {
|
|
30
28
|
const missing = deps.filter((d) => {
|
|
31
29
|
try {
|
|
32
30
|
require.resolve(d);
|
|
@@ -50,45 +48,29 @@ function ensureDependencies(deps = []) {
|
|
|
50
48
|
consent = true;
|
|
51
49
|
console.log("Non-interactive environment detected, installing automatically...");
|
|
52
50
|
} else {
|
|
53
|
-
const rl =
|
|
51
|
+
const rl = readline.createInterface({
|
|
54
52
|
input: process.stdin,
|
|
55
53
|
output: process.stdout,
|
|
56
54
|
});
|
|
57
55
|
|
|
58
|
-
|
|
56
|
+
// Prompt with 10s timeout; default to yes
|
|
57
|
+
const questionPromise = new Promise((resolve) =>
|
|
59
58
|
rl.question(`Install missing dependencies now? (Y/n): `, (a) => {
|
|
60
59
|
rl.close();
|
|
61
|
-
resolve(a.trim().toLowerCase());
|
|
60
|
+
resolve(String(a || "").trim().toLowerCase());
|
|
62
61
|
})
|
|
63
62
|
);
|
|
64
63
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
// Implementing a small blocking wait using child_process to call 'bash -c read -t 10' is platform-dependent.
|
|
70
|
-
// For simplicity here, assume consent if user presses Enter quickly — we'll read synchronously via question with callback, then block via a small busy loop until resolved.
|
|
71
|
-
let resolved = false;
|
|
72
|
-
let ansValue = "";
|
|
73
|
-
answer.then((v) => {
|
|
74
|
-
resolved = true;
|
|
75
|
-
ansValue = v;
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
// Wait (busy-loop) until resolved — acceptable for a small prompt in CLI tool.
|
|
79
|
-
const waitUntil = Date.now() + 10000; // 10s timeout
|
|
80
|
-
while (!resolved && Date.now() < waitUntil) {
|
|
81
|
-
// small sleep
|
|
82
|
-
const start = Date.now();
|
|
83
|
-
while (Date.now() - start < 50) {}
|
|
84
|
-
}
|
|
64
|
+
const ans = await Promise.race([
|
|
65
|
+
questionPromise,
|
|
66
|
+
new Promise((resolve) => setTimeout(() => resolve(null), 10000)),
|
|
67
|
+
]);
|
|
85
68
|
|
|
86
|
-
if (
|
|
87
|
-
// default to yes
|
|
69
|
+
if (ans === null) {
|
|
88
70
|
consent = true;
|
|
89
71
|
console.log("\nNo answer, defaulting to install.\n");
|
|
90
72
|
} else {
|
|
91
|
-
consent = !
|
|
73
|
+
consent = !ans || ans === "y" || ans === "yes";
|
|
92
74
|
}
|
|
93
75
|
}
|
|
94
76
|
|
|
@@ -99,8 +81,9 @@ function ensureDependencies(deps = []) {
|
|
|
99
81
|
|
|
100
82
|
// Run npm install for missing deps
|
|
101
83
|
console.log(`Installing: ${missing.join(" ")} ...`);
|
|
84
|
+
const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
|
|
102
85
|
const args = ["install", "--save", ...missing];
|
|
103
|
-
const result = spawnSync(
|
|
86
|
+
const result = spawnSync(npmCmd, args, {
|
|
104
87
|
stdio: "inherit",
|
|
105
88
|
shell: false,
|
|
106
89
|
});
|
|
@@ -114,14 +97,21 @@ function ensureDependencies(deps = []) {
|
|
|
114
97
|
console.log("Dependencies installed, continuing...");
|
|
115
98
|
}
|
|
116
99
|
|
|
117
|
-
//
|
|
118
|
-
ensureDependencies(["axios", "cheerio", "ora", "yargs"]);
|
|
100
|
+
// Ensure runtime modules (same set as original)
|
|
101
|
+
await ensureDependencies(["axios", "cheerio", "ora", "yargs"]);
|
|
119
102
|
|
|
120
|
-
//
|
|
121
|
-
const
|
|
122
|
-
const
|
|
123
|
-
|
|
124
|
-
const
|
|
103
|
+
// Dynamic import of runtime libraries (so they are loaded after potential install)
|
|
104
|
+
const axiosModule = await import("axios");
|
|
105
|
+
const axios = axiosModule.default || axiosModule;
|
|
106
|
+
|
|
107
|
+
const cheerioModule = await import("cheerio");
|
|
108
|
+
const cheerio = cheerioModule.default || cheerioModule;
|
|
109
|
+
|
|
110
|
+
const oraModule = await import("ora");
|
|
111
|
+
const ora = oraModule.default || oraModule;
|
|
112
|
+
|
|
113
|
+
const yargsModule = await import("yargs");
|
|
114
|
+
const yargs = yargsModule.default || yargsModule;
|
|
125
115
|
|
|
126
116
|
// -------------------------
|
|
127
117
|
// Utilities & Config
|
|
@@ -168,21 +158,38 @@ function validateUsername(username) {
|
|
|
168
158
|
}
|
|
169
159
|
|
|
170
160
|
// -------------------------
|
|
171
|
-
// Translator loader (optional)
|
|
161
|
+
// Translator loader (optional, lazy)
|
|
172
162
|
// -------------------------
|
|
173
|
-
//
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
163
|
+
// We keep a lazy loader so translator is imported only if requested.
|
|
164
|
+
let _translatorModule = null; // module namespace
|
|
165
|
+
let _translatorLoadAttempted = false;
|
|
166
|
+
|
|
167
|
+
async function loadTranslatorModule() {
|
|
168
|
+
if (_translatorModule) return _translatorModule;
|
|
169
|
+
if (_translatorLoadAttempted) return null;
|
|
170
|
+
_translatorLoadAttempted = true;
|
|
171
|
+
try {
|
|
172
|
+
// prefer the package-local wrapper (ESM) if present
|
|
173
|
+
// path relative to this file
|
|
174
|
+
const wrapperPath = new URL("./translate-engine/translate.js", import.meta.url).pathname;
|
|
175
|
+
// attempt dynamic import; if module not present or h56-translator not installed, it will throw
|
|
176
|
+
const mod = await import(wrapperPath);
|
|
177
|
+
if (mod && typeof mod.translate === "function") {
|
|
178
|
+
_translatorModule = mod;
|
|
179
|
+
return _translatorModule;
|
|
180
|
+
}
|
|
181
|
+
// fallback: try importing the package directly
|
|
182
|
+
const pkg = await import("h56-translator");
|
|
183
|
+
if (pkg && (typeof pkg.translate === "function" || typeof pkg.default === "function")) {
|
|
184
|
+
const translateFn = typeof pkg.translate === "function" ? pkg.translate : pkg.default;
|
|
185
|
+
_translatorModule = { translate: translateFn };
|
|
186
|
+
return _translatorModule;
|
|
187
|
+
}
|
|
188
|
+
return null;
|
|
189
|
+
} catch (err) {
|
|
190
|
+
// translator not available — return null and let callers handle
|
|
191
|
+
return null;
|
|
182
192
|
}
|
|
183
|
-
} catch (e) {
|
|
184
|
-
// not installed / wrapper not available; keep translatorFn null
|
|
185
|
-
translatorFn = null;
|
|
186
193
|
}
|
|
187
194
|
|
|
188
195
|
// -------------------------
|
|
@@ -391,9 +398,9 @@ class GithubScraper {
|
|
|
391
398
|
}
|
|
392
399
|
}
|
|
393
400
|
|
|
394
|
-
// translator function must exist
|
|
395
|
-
const
|
|
396
|
-
if (!
|
|
401
|
+
// translator function must exist (lazy load)
|
|
402
|
+
const mod = await loadTranslatorModule();
|
|
403
|
+
if (!mod || typeof mod.translate !== "function") {
|
|
397
404
|
const msg =
|
|
398
405
|
"Optional translator is not available. Install 'h56-translator' (and ensure translate-engine/translate.js is present) to enable translations.";
|
|
399
406
|
if (opts.failOnMissing) {
|
|
@@ -409,6 +416,7 @@ class GithubScraper {
|
|
|
409
416
|
return result;
|
|
410
417
|
}
|
|
411
418
|
}
|
|
419
|
+
const tfn = mod.translate;
|
|
412
420
|
|
|
413
421
|
// perform profile translation
|
|
414
422
|
try {
|
|
@@ -559,48 +567,52 @@ class GithubScraper {
|
|
|
559
567
|
}
|
|
560
568
|
|
|
561
569
|
// -------------------------
|
|
562
|
-
// Exports (for
|
|
570
|
+
// Exports (for usage as package/module)
|
|
563
571
|
// -------------------------
|
|
564
572
|
const defaultScraper = new GithubScraper();
|
|
565
573
|
|
|
566
|
-
//
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
if (tmod && typeof tmod.translate === "function") {
|
|
571
|
-
h56translate = tmod.translate;
|
|
572
|
-
} else {
|
|
573
|
-
h56translate = async function () {
|
|
574
|
-
throw new Error(
|
|
575
|
-
"Optional translator module loaded but export shape is not recognized. Ensure 'h56-translator' is installed and compatible."
|
|
576
|
-
);
|
|
577
|
-
};
|
|
578
|
-
}
|
|
579
|
-
} catch (e) {
|
|
580
|
-
h56translate = async function () {
|
|
574
|
+
// h56translate helper: lazy-call translator module when invoked
|
|
575
|
+
export async function h56translate(text, targetLang, options) {
|
|
576
|
+
const mod = await loadTranslatorModule();
|
|
577
|
+
if (!mod || typeof mod.translate !== "function") {
|
|
581
578
|
throw new Error(
|
|
582
|
-
"Optional translator is not available. Install it with `npm install h56-translator` or
|
|
579
|
+
"Optional translator is not available. Install it with `npm install h56-translator` or ensure translate-engine/translate.js exists and is usable."
|
|
583
580
|
);
|
|
584
|
-
}
|
|
581
|
+
}
|
|
582
|
+
return await mod.translate(text, targetLang, options);
|
|
585
583
|
}
|
|
586
584
|
|
|
587
|
-
|
|
585
|
+
export {
|
|
588
586
|
GithubScraper,
|
|
589
587
|
defaultScraper,
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
588
|
+
// convenience wrappers
|
|
589
|
+
// note: keep same signatures as before (username, opts)
|
|
590
|
+
async function scrapeProfile(username, opts) {
|
|
591
|
+
return defaultScraper.scrapeProfile(username, opts);
|
|
592
|
+
},
|
|
593
|
+
async function scrapeRepos(username, opts) {
|
|
594
|
+
return defaultScraper.scrapeRepos(username, opts);
|
|
595
|
+
},
|
|
596
|
+
async function scrapeUser(username, opts) {
|
|
597
|
+
return defaultScraper.scrapeUser(username, opts);
|
|
598
|
+
},
|
|
599
|
+
function calculateStats(repos) {
|
|
600
|
+
return defaultScraper.calculateStats(repos);
|
|
601
|
+
},
|
|
602
|
+
GithubScraper.printResult as printResult,
|
|
597
603
|
};
|
|
598
604
|
|
|
605
|
+
// The above export block uses named exports compatible with ESM consumers.
|
|
606
|
+
// For backwards compatibility with consumers expecting a default CommonJS export,
|
|
607
|
+
// also write a small CJS-compatible default export file (optional) when packaging.
|
|
608
|
+
|
|
599
609
|
// -------------------------
|
|
600
610
|
// CLI behavior when run directly
|
|
601
611
|
// -------------------------
|
|
602
|
-
|
|
612
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
613
|
+
if (process.argv[1] === __filename) {
|
|
603
614
|
(async () => {
|
|
615
|
+
// build argv using yargs (same API as original)
|
|
604
616
|
const argv = yargs(process.argv.slice(2))
|
|
605
617
|
.usage("Usage: $0 <username> [options]")
|
|
606
618
|
.option("json", {
|
package/package.json
CHANGED
|
@@ -1,44 +1,75 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
2
|
+
/**
|
|
3
|
+
* scripts/ensure-external-deps.js (ESM)
|
|
4
|
+
*
|
|
5
|
+
* Postinstall helper: idempotent installer for optional external deps.
|
|
6
|
+
* - Implemented as a full ESM module using top-level await.
|
|
7
|
+
* - Detects missing packages using dynamic `import()` (avoids createRequire / CJS).
|
|
8
|
+
* - Skips automatic install in CI by default; set H56_FORCE_POSTINSTALL=1 to override.
|
|
9
|
+
* - Uses spawnSync to run npm install and keeps the script resilient (won't throw on failure).
|
|
10
|
+
*
|
|
11
|
+
*/
|
|
7
12
|
|
|
8
|
-
|
|
13
|
+
import { spawnSync } from "child_process";
|
|
14
|
+
import path from "path";
|
|
15
|
+
import { fileURLToPath } from "url";
|
|
16
|
+
|
|
17
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
18
|
+
const __dirname = path.dirname(__filename);
|
|
9
19
|
|
|
10
20
|
const optionalDeps = ["h56-translator"];
|
|
11
21
|
|
|
12
|
-
|
|
22
|
+
/**
|
|
23
|
+
* Check whether a package is resolvable via dynamic import.
|
|
24
|
+
* Uses import() which will attempt to load the package; this is the most portable
|
|
25
|
+
* ESM-compatible way to test presence without using CJS helpers.
|
|
26
|
+
*
|
|
27
|
+
* We deliberately avoid `require.resolve` / createRequire to keep file pure ESM.
|
|
28
|
+
*/
|
|
29
|
+
async function isInstalled(name) {
|
|
13
30
|
try {
|
|
14
|
-
|
|
31
|
+
// dynamic import of a bare specifier will resolve via Node's resolver
|
|
32
|
+
await import(name);
|
|
15
33
|
return true;
|
|
16
|
-
} catch (
|
|
34
|
+
} catch (err) {
|
|
35
|
+
// import failed => treat as missing
|
|
17
36
|
return false;
|
|
18
37
|
}
|
|
19
38
|
}
|
|
20
39
|
|
|
40
|
+
/**
|
|
41
|
+
* Run npm install for the given list of packages (synchronous to keep postinstall simple).
|
|
42
|
+
*/
|
|
21
43
|
function installDeps(deps) {
|
|
22
|
-
if (!deps.length) return;
|
|
44
|
+
if (!deps || deps.length === 0) return;
|
|
23
45
|
const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
|
|
24
46
|
const args = ["install", "--no-audit", "--no-fund", "--save", ...deps];
|
|
25
|
-
console.log(
|
|
26
|
-
const res = spawnSync(npmCmd, args, { stdio: "inherit" });
|
|
47
|
+
console.log(`Installing optional dependencies: ${deps.join(", ")}`);
|
|
48
|
+
const res = spawnSync(npmCmd, args, { stdio: "inherit", shell: false, cwd: process.cwd() });
|
|
27
49
|
if (res.error || res.status !== 0) {
|
|
28
|
-
console.error("Failed to install optional dependencies.
|
|
50
|
+
console.error("Failed to install optional dependencies automatically. Please run manually:");
|
|
29
51
|
console.error(" npm install " + deps.join(" "));
|
|
30
|
-
// do not throw to
|
|
52
|
+
// keep postinstall resilient: do not throw/exit non-zero to avoid breaking npm install
|
|
31
53
|
} else {
|
|
32
54
|
console.log("Optional dependencies installed.");
|
|
33
55
|
}
|
|
34
56
|
}
|
|
35
57
|
|
|
36
|
-
|
|
58
|
+
async function main() {
|
|
37
59
|
try {
|
|
38
|
-
const toInstall =
|
|
39
|
-
|
|
60
|
+
const toInstall = [];
|
|
61
|
+
for (const dep of optionalDeps) {
|
|
62
|
+
const present = await isInstalled(dep);
|
|
63
|
+
if (!present) toInstall.push(dep);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (toInstall.length === 0) {
|
|
67
|
+
// nothing to do
|
|
68
|
+
// console.log("All optional dependencies present.");
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
40
71
|
|
|
41
|
-
//
|
|
72
|
+
// Safety: skip auto-install in CI unless explicitly forced
|
|
42
73
|
if (process.env.CI && !process.env.H56_FORCE_POSTINSTALL) {
|
|
43
74
|
console.log(
|
|
44
75
|
"CI environment detected — skipping automatic installation of optional dependencies.",
|
|
@@ -49,7 +80,9 @@ function installDeps(deps) {
|
|
|
49
80
|
|
|
50
81
|
installDeps(toInstall);
|
|
51
82
|
} catch (err) {
|
|
83
|
+
// Keep postinstall resilient; log error but don't fail the install process.
|
|
52
84
|
console.error("Postinstall check encountered an error:", err && err.message ? err.message : err);
|
|
53
|
-
// do not exit non-zero; keep postinstall resilient
|
|
54
85
|
}
|
|
55
|
-
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
await main();
|
|
@@ -1,18 +1,30 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
/**
|
|
2
|
+
* translate-engine/translate.js (ESM)
|
|
3
|
+
*
|
|
4
|
+
* ESM wrapper for optional dependency 'h56-translator'.
|
|
5
|
+
* Exports a single named function `translate(text, targetLang, options?)`.
|
|
6
|
+
*
|
|
7
|
+
* This file replaces the previous CommonJS wrapper and is ready for ESM usage.
|
|
8
|
+
*
|
|
9
|
+
* NOTE: If you publish this package as ESM (package.json "type": "module"),
|
|
10
|
+
* keep this file as-is. It will attempt to import 'h56-translator' and normalize shapes.
|
|
11
|
+
*/
|
|
6
12
|
|
|
7
|
-
function
|
|
13
|
+
export async function translate(text, targetLang, options) {
|
|
8
14
|
try {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
if (typeof
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
// dynamic import of the translator package (supports ESM or CJS shapes)
|
|
16
|
+
const mod = await import("h56-translator");
|
|
17
|
+
const m = mod && (mod.default || mod); // handle default interop
|
|
18
|
+
if (typeof m.translate === "function") {
|
|
19
|
+
return await m.translate(text, targetLang, options);
|
|
20
|
+
}
|
|
21
|
+
if (typeof m === "function") {
|
|
22
|
+
// package export is a function
|
|
23
|
+
return await m(text, targetLang, options);
|
|
24
|
+
}
|
|
25
|
+
if (m && typeof m.default === "function") {
|
|
26
|
+
return await m.default(text, targetLang, options);
|
|
27
|
+
}
|
|
16
28
|
throw new Error("h56-translator export shape not recognized");
|
|
17
29
|
} catch (err) {
|
|
18
30
|
const e = new Error(
|
|
@@ -21,15 +33,4 @@ function loadTranslator() {
|
|
|
21
33
|
e.cause = err;
|
|
22
34
|
throw e;
|
|
23
35
|
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* translate(text, targetLang, options?)
|
|
28
|
-
* Delegates directly to h56-translator.
|
|
29
|
-
*/
|
|
30
|
-
async function translate(text, targetLang, options) {
|
|
31
|
-
const t = loadTranslator();
|
|
32
|
-
return await t.translate(text, targetLang, options);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
module.exports = { translate };
|
|
36
|
+
}
|