h56-github-scrapper 1.0.2 → 1.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/main-scrapping.js +109 -97
- package/package.json +3 -2
- package/script/ensure-external-deps.js +8 -6
- package/translate-engine/translate.js +26 -25
package/main-scrapping.js
CHANGED
|
@@ -1,32 +1,30 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* main-scrapping.js
|
|
2
|
+
* main-scrapping.js (ESM)
|
|
3
3
|
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
4
|
+
* Converted to ECMAScript Modules (ESM) for Node.js.
|
|
5
|
+
* - Preserves original logic and features (scraping, retry/backoff, spinner, CLI).
|
|
6
|
+
* - Adds optional translator integration (uses translate-engine/translate.js as ESM).
|
|
7
|
+
* - Uses dynamic import for third-party runtime deps after optional auto-install.
|
|
6
8
|
*
|
|
7
|
-
* Features:
|
|
8
|
-
* - Exports programmatic functions: scrapeProfile, scrapeRepos, scrapeUser, calculateStats
|
|
9
|
-
* - CLI entry when run directly: node main-scrapping.js <username> [--json] [--output=file]
|
|
10
|
-
* - Optional translator integration (h56-translator) with selectable fields to translate
|
|
11
|
-
* - Automatic install of missing npm runtime dependencies (asks for consent when needed)
|
|
12
|
-
* - Robust retry/backoff, polite scraping delay, spinner (ora) fallback
|
|
13
|
-
* - Well-structured results and JSON output support
|
|
14
9
|
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
* in package.json. The runtime auto-installer is implemented as a convenience only.
|
|
18
|
-
* - Scraping HTML may break if GitHub changes markup. Consider using GitHub API for production.
|
|
10
|
+
* This file is ready to copy/paste. It intentionally preserves behavior and CLI options
|
|
11
|
+
* while modernizing module semantics to ESM.
|
|
19
12
|
*/
|
|
20
13
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
14
|
+
import fs from "fs";
|
|
15
|
+
import path from "path";
|
|
16
|
+
import { spawnSync } from "child_process";
|
|
17
|
+
import os from "os";
|
|
18
|
+
import { createRequire } from "module";
|
|
19
|
+
import readline from "readline";
|
|
20
|
+
import { fileURLToPath } from "url";
|
|
21
|
+
|
|
22
|
+
const require = createRequire(import.meta.url);
|
|
25
23
|
|
|
26
24
|
// -------------------------
|
|
27
|
-
// Ensure runtime deps
|
|
25
|
+
// Ensure runtime deps (async)
|
|
28
26
|
// -------------------------
|
|
29
|
-
function ensureDependencies(deps = []) {
|
|
27
|
+
async function ensureDependencies(deps = []) {
|
|
30
28
|
const missing = deps.filter((d) => {
|
|
31
29
|
try {
|
|
32
30
|
require.resolve(d);
|
|
@@ -50,45 +48,29 @@ function ensureDependencies(deps = []) {
|
|
|
50
48
|
consent = true;
|
|
51
49
|
console.log("Non-interactive environment detected, installing automatically...");
|
|
52
50
|
} else {
|
|
53
|
-
const rl =
|
|
51
|
+
const rl = readline.createInterface({
|
|
54
52
|
input: process.stdin,
|
|
55
53
|
output: process.stdout,
|
|
56
54
|
});
|
|
57
55
|
|
|
58
|
-
|
|
56
|
+
// Prompt with 10s timeout; default to yes
|
|
57
|
+
const questionPromise = new Promise((resolve) =>
|
|
59
58
|
rl.question(`Install missing dependencies now? (Y/n): `, (a) => {
|
|
60
59
|
rl.close();
|
|
61
|
-
resolve(a.trim().toLowerCase());
|
|
60
|
+
resolve(String(a || "").trim().toLowerCase());
|
|
62
61
|
})
|
|
63
62
|
);
|
|
64
63
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
// Implementing a small blocking wait using child_process to call 'bash -c read -t 10' is platform-dependent.
|
|
70
|
-
// For simplicity here, assume consent if user presses Enter quickly — we'll read synchronously via question with callback, then block via a small busy loop until resolved.
|
|
71
|
-
let resolved = false;
|
|
72
|
-
let ansValue = "";
|
|
73
|
-
answer.then((v) => {
|
|
74
|
-
resolved = true;
|
|
75
|
-
ansValue = v;
|
|
76
|
-
});
|
|
77
|
-
|
|
78
|
-
// Wait (busy-loop) until resolved — acceptable for a small prompt in CLI tool.
|
|
79
|
-
const waitUntil = Date.now() + 10000; // 10s timeout
|
|
80
|
-
while (!resolved && Date.now() < waitUntil) {
|
|
81
|
-
// small sleep
|
|
82
|
-
const start = Date.now();
|
|
83
|
-
while (Date.now() - start < 50) {}
|
|
84
|
-
}
|
|
64
|
+
const ans = await Promise.race([
|
|
65
|
+
questionPromise,
|
|
66
|
+
new Promise((resolve) => setTimeout(() => resolve(null), 10000)),
|
|
67
|
+
]);
|
|
85
68
|
|
|
86
|
-
if (
|
|
87
|
-
// default to yes
|
|
69
|
+
if (ans === null) {
|
|
88
70
|
consent = true;
|
|
89
71
|
console.log("\nNo answer, defaulting to install.\n");
|
|
90
72
|
} else {
|
|
91
|
-
consent = !
|
|
73
|
+
consent = !ans || ans === "y" || ans === "yes";
|
|
92
74
|
}
|
|
93
75
|
}
|
|
94
76
|
|
|
@@ -99,8 +81,9 @@ function ensureDependencies(deps = []) {
|
|
|
99
81
|
|
|
100
82
|
// Run npm install for missing deps
|
|
101
83
|
console.log(`Installing: ${missing.join(" ")} ...`);
|
|
84
|
+
const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
|
|
102
85
|
const args = ["install", "--save", ...missing];
|
|
103
|
-
const result = spawnSync(
|
|
86
|
+
const result = spawnSync(npmCmd, args, {
|
|
104
87
|
stdio: "inherit",
|
|
105
88
|
shell: false,
|
|
106
89
|
});
|
|
@@ -114,14 +97,21 @@ function ensureDependencies(deps = []) {
|
|
|
114
97
|
console.log("Dependencies installed, continuing...");
|
|
115
98
|
}
|
|
116
99
|
|
|
117
|
-
//
|
|
118
|
-
ensureDependencies(["axios", "cheerio", "ora", "yargs"]);
|
|
100
|
+
// Ensure runtime modules (same set as original)
|
|
101
|
+
await ensureDependencies(["axios", "cheerio", "ora", "yargs"]);
|
|
119
102
|
|
|
120
|
-
//
|
|
121
|
-
const
|
|
122
|
-
const
|
|
123
|
-
|
|
124
|
-
const
|
|
103
|
+
// Dynamic import of runtime libraries (so they are loaded after potential install)
|
|
104
|
+
const axiosModule = await import("axios");
|
|
105
|
+
const axios = axiosModule.default || axiosModule;
|
|
106
|
+
|
|
107
|
+
const cheerioModule = await import("cheerio");
|
|
108
|
+
const cheerio = cheerioModule.default || cheerioModule;
|
|
109
|
+
|
|
110
|
+
const oraModule = await import("ora");
|
|
111
|
+
const ora = oraModule.default || oraModule;
|
|
112
|
+
|
|
113
|
+
const yargsModule = await import("yargs");
|
|
114
|
+
const yargs = yargsModule.default || yargsModule;
|
|
125
115
|
|
|
126
116
|
// -------------------------
|
|
127
117
|
// Utilities & Config
|
|
@@ -168,21 +158,38 @@ function validateUsername(username) {
|
|
|
168
158
|
}
|
|
169
159
|
|
|
170
160
|
// -------------------------
|
|
171
|
-
// Translator loader (optional)
|
|
161
|
+
// Translator loader (optional, lazy)
|
|
172
162
|
// -------------------------
|
|
173
|
-
//
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
163
|
+
// We keep a lazy loader so translator is imported only if requested.
|
|
164
|
+
let _translatorModule = null; // module namespace
|
|
165
|
+
let _translatorLoadAttempted = false;
|
|
166
|
+
|
|
167
|
+
async function loadTranslatorModule() {
|
|
168
|
+
if (_translatorModule) return _translatorModule;
|
|
169
|
+
if (_translatorLoadAttempted) return null;
|
|
170
|
+
_translatorLoadAttempted = true;
|
|
171
|
+
try {
|
|
172
|
+
// prefer the package-local wrapper (ESM) if present
|
|
173
|
+
// path relative to this file
|
|
174
|
+
const wrapperPath = new URL("./translate-engine/translate.js", import.meta.url).pathname;
|
|
175
|
+
// attempt dynamic import; if module not present or h56-translator not installed, it will throw
|
|
176
|
+
const mod = await import(wrapperPath);
|
|
177
|
+
if (mod && typeof mod.translate === "function") {
|
|
178
|
+
_translatorModule = mod;
|
|
179
|
+
return _translatorModule;
|
|
180
|
+
}
|
|
181
|
+
// fallback: try importing the package directly
|
|
182
|
+
const pkg = await import("h56-translator");
|
|
183
|
+
if (pkg && (typeof pkg.translate === "function" || typeof pkg.default === "function")) {
|
|
184
|
+
const translateFn = typeof pkg.translate === "function" ? pkg.translate : pkg.default;
|
|
185
|
+
_translatorModule = { translate: translateFn };
|
|
186
|
+
return _translatorModule;
|
|
187
|
+
}
|
|
188
|
+
return null;
|
|
189
|
+
} catch (err) {
|
|
190
|
+
// translator not available — return null and let callers handle
|
|
191
|
+
return null;
|
|
182
192
|
}
|
|
183
|
-
} catch (e) {
|
|
184
|
-
// not installed / wrapper not available; keep translatorFn null
|
|
185
|
-
translatorFn = null;
|
|
186
193
|
}
|
|
187
194
|
|
|
188
195
|
// -------------------------
|
|
@@ -391,9 +398,9 @@ class GithubScraper {
|
|
|
391
398
|
}
|
|
392
399
|
}
|
|
393
400
|
|
|
394
|
-
// translator function must exist
|
|
395
|
-
const
|
|
396
|
-
if (!
|
|
401
|
+
// translator function must exist (lazy load)
|
|
402
|
+
const mod = await loadTranslatorModule();
|
|
403
|
+
if (!mod || typeof mod.translate !== "function") {
|
|
397
404
|
const msg =
|
|
398
405
|
"Optional translator is not available. Install 'h56-translator' (and ensure translate-engine/translate.js is present) to enable translations.";
|
|
399
406
|
if (opts.failOnMissing) {
|
|
@@ -409,6 +416,7 @@ class GithubScraper {
|
|
|
409
416
|
return result;
|
|
410
417
|
}
|
|
411
418
|
}
|
|
419
|
+
const tfn = mod.translate;
|
|
412
420
|
|
|
413
421
|
// perform profile translation
|
|
414
422
|
try {
|
|
@@ -559,48 +567,52 @@ class GithubScraper {
|
|
|
559
567
|
}
|
|
560
568
|
|
|
561
569
|
// -------------------------
|
|
562
|
-
// Exports (for
|
|
570
|
+
// Exports (for usage as package/module)
|
|
563
571
|
// -------------------------
|
|
564
572
|
const defaultScraper = new GithubScraper();
|
|
565
573
|
|
|
566
|
-
//
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
if (tmod && typeof tmod.translate === "function") {
|
|
571
|
-
h56translate = tmod.translate;
|
|
572
|
-
} else {
|
|
573
|
-
h56translate = async function () {
|
|
574
|
-
throw new Error(
|
|
575
|
-
"Optional translator module loaded but export shape is not recognized. Ensure 'h56-translator' is installed and compatible."
|
|
576
|
-
);
|
|
577
|
-
};
|
|
578
|
-
}
|
|
579
|
-
} catch (e) {
|
|
580
|
-
h56translate = async function () {
|
|
574
|
+
// h56translate helper: lazy-call translator module when invoked
|
|
575
|
+
export async function h56translate(text, targetLang, options) {
|
|
576
|
+
const mod = await loadTranslatorModule();
|
|
577
|
+
if (!mod || typeof mod.translate !== "function") {
|
|
581
578
|
throw new Error(
|
|
582
|
-
"Optional translator is not available. Install it with `npm install h56-translator` or
|
|
579
|
+
"Optional translator is not available. Install it with `npm install h56-translator` or ensure translate-engine/translate.js exists and is usable."
|
|
583
580
|
);
|
|
584
|
-
}
|
|
581
|
+
}
|
|
582
|
+
return await mod.translate(text, targetLang, options);
|
|
585
583
|
}
|
|
586
584
|
|
|
587
|
-
|
|
585
|
+
export {
|
|
588
586
|
GithubScraper,
|
|
589
587
|
defaultScraper,
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
588
|
+
// convenience wrappers
|
|
589
|
+
// note: keep same signatures as before (username, opts)
|
|
590
|
+
async function scrapeProfile(username, opts) {
|
|
591
|
+
return defaultScraper.scrapeProfile(username, opts);
|
|
592
|
+
},
|
|
593
|
+
async function scrapeRepos(username, opts) {
|
|
594
|
+
return defaultScraper.scrapeRepos(username, opts);
|
|
595
|
+
},
|
|
596
|
+
async function scrapeUser(username, opts) {
|
|
597
|
+
return defaultScraper.scrapeUser(username, opts);
|
|
598
|
+
},
|
|
599
|
+
function calculateStats(repos) {
|
|
600
|
+
return defaultScraper.calculateStats(repos);
|
|
601
|
+
},
|
|
602
|
+
GithubScraper.printResult as printResult,
|
|
597
603
|
};
|
|
598
604
|
|
|
605
|
+
// The above export block uses named exports compatible with ESM consumers.
|
|
606
|
+
// For backwards compatibility with consumers expecting a default CommonJS export,
|
|
607
|
+
// also write a small CJS-compatible default export file (optional) when packaging.
|
|
608
|
+
|
|
599
609
|
// -------------------------
|
|
600
610
|
// CLI behavior when run directly
|
|
601
611
|
// -------------------------
|
|
602
|
-
|
|
612
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
613
|
+
if (process.argv[1] === __filename) {
|
|
603
614
|
(async () => {
|
|
615
|
+
// build argv using yargs (same API as original)
|
|
604
616
|
const argv = yargs(process.argv.slice(2))
|
|
605
617
|
.usage("Usage: $0 <username> [options]")
|
|
606
618
|
.option("json", {
|
package/package.json
CHANGED
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "h56-github-scrapper",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.4",
|
|
4
4
|
"description": "GitHub user scraper",
|
|
5
|
+
"type": "module",
|
|
5
6
|
"main": "main-scrapping.js",
|
|
6
7
|
"bin": {
|
|
7
8
|
"h56-github-scrapper": "main-scrapping.js"
|
|
@@ -13,7 +14,7 @@
|
|
|
13
14
|
"cheerio": "^1.0.0-rc.12",
|
|
14
15
|
"ora": "^6.0.0",
|
|
15
16
|
"yargs": "^17.0.0",
|
|
16
|
-
"
|
|
17
|
+
"h56-translator": "^1.0.6"
|
|
17
18
|
},
|
|
18
19
|
"optionalDependencies": {
|
|
19
20
|
"h56-translator": "^1.0.0"
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
// - Does not throw on failure to avoid breaking `npm install` completely.
|
|
2
|
+
/**
|
|
3
|
+
* scripts/ensure-external-deps.js (ESM)
|
|
4
|
+
*
|
|
5
|
+
* Postinstall helper (ESM). Attempts to install optional dependencies if missing.**/
|
|
7
6
|
|
|
8
|
-
|
|
7
|
+
import { spawnSync } from "child_process";
|
|
8
|
+
import { createRequire } from "module";
|
|
9
|
+
|
|
10
|
+
const require = createRequire(import.meta.url);
|
|
9
11
|
|
|
10
12
|
const optionalDeps = ["h56-translator"];
|
|
11
13
|
|
|
@@ -1,18 +1,30 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
/**
|
|
2
|
+
* translate-engine/translate.js (ESM)
|
|
3
|
+
*
|
|
4
|
+
* ESM wrapper for optional dependency 'h56-translator'.
|
|
5
|
+
* Exports a single named function `translate(text, targetLang, options?)`.
|
|
6
|
+
*
|
|
7
|
+
* This file replaces the previous CommonJS wrapper and is ready for ESM usage.
|
|
8
|
+
*
|
|
9
|
+
* NOTE: If you publish this package as ESM (package.json "type": "module"),
|
|
10
|
+
* keep this file as-is. It will attempt to import 'h56-translator' and normalize shapes.
|
|
11
|
+
*/
|
|
6
12
|
|
|
7
|
-
function
|
|
13
|
+
export async function translate(text, targetLang, options) {
|
|
8
14
|
try {
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
if (typeof
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
15
|
+
// dynamic import of the translator package (supports ESM or CJS shapes)
|
|
16
|
+
const mod = await import("h56-translator");
|
|
17
|
+
const m = mod && (mod.default || mod); // handle default interop
|
|
18
|
+
if (typeof m.translate === "function") {
|
|
19
|
+
return await m.translate(text, targetLang, options);
|
|
20
|
+
}
|
|
21
|
+
if (typeof m === "function") {
|
|
22
|
+
// package export is a function
|
|
23
|
+
return await m(text, targetLang, options);
|
|
24
|
+
}
|
|
25
|
+
if (m && typeof m.default === "function") {
|
|
26
|
+
return await m.default(text, targetLang, options);
|
|
27
|
+
}
|
|
16
28
|
throw new Error("h56-translator export shape not recognized");
|
|
17
29
|
} catch (err) {
|
|
18
30
|
const e = new Error(
|
|
@@ -21,15 +33,4 @@ function loadTranslator() {
|
|
|
21
33
|
e.cause = err;
|
|
22
34
|
throw e;
|
|
23
35
|
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
/**
|
|
27
|
-
* translate(text, targetLang, options?)
|
|
28
|
-
* Delegates directly to h56-translator.
|
|
29
|
-
*/
|
|
30
|
-
async function translate(text, targetLang, options) {
|
|
31
|
-
const t = loadTranslator();
|
|
32
|
-
return await t.translate(text, targetLang, options);
|
|
33
|
-
}
|
|
34
|
-
|
|
35
|
-
module.exports = { translate };
|
|
36
|
+
}
|