h56-github-scrapper 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/main-scrapping.js +83 -181
- package/package.json +1 -4
- package/readme.md +193 -10
- package/script/ensure-external-deps.js +0 -88
package/main-scrapping.js
CHANGED
|
@@ -1,120 +1,76 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
1
2
|
/**
|
|
2
|
-
* main-scrapping.js (ESM)
|
|
3
|
+
* main-scrapping.js (ESM, improved)
|
|
3
4
|
*
|
|
4
|
-
*
|
|
5
|
-
* -
|
|
6
|
-
*
|
|
7
|
-
* -
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
* This file is ready to copy/paste. It intentionally preserves behavior and CLI options
|
|
11
|
-
* while modernizing module semantics to ESM.
|
|
5
|
+
* - Fully ESM, no createRequire usage.
|
|
6
|
+
* - Does NOT auto-install dependencies. Instead it detects missing runtime deps
|
|
7
|
+
* and prints a clear instruction to install them (safer for CI and terminals).
|
|
8
|
+
* - Keeps previous scraping, retry/backoff, translator lazy-loading, CLI and public API.
|
|
9
|
+
* - Improved export block and CLI detection.
|
|
12
10
|
*/
|
|
13
11
|
|
|
14
12
|
import fs from "fs";
|
|
15
13
|
import path from "path";
|
|
16
|
-
import { spawnSync } from "child_process";
|
|
17
14
|
import os from "os";
|
|
18
|
-
import { createRequire } from "module";
|
|
19
15
|
import readline from "readline";
|
|
20
16
|
import { fileURLToPath } from "url";
|
|
21
17
|
|
|
22
|
-
const
|
|
18
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
19
|
+
const __dirname = path.dirname(__filename);
|
|
23
20
|
|
|
24
|
-
//
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
21
|
+
// Runtime packages required for operation
|
|
22
|
+
const RUNTIME_PKGS = ["axios", "cheerio", "ora", "yargs"];
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Dynamically import runtime packages and return a map of package -> module.
|
|
26
|
+
* If any package is missing, behave differently depending on whether this module
|
|
27
|
+
* is being run as CLI or imported as a library:
|
|
28
|
+
* - CLI: print a helpful message and exit(1)
|
|
29
|
+
* - Library: throw an Error (so consumer can handle)
|
|
30
|
+
*/
|
|
31
|
+
async function loadRuntimes() {
|
|
32
|
+
const results = {};
|
|
33
|
+
const missing = [];
|
|
34
|
+
|
|
35
|
+
for (const name of RUNTIME_PKGS) {
|
|
29
36
|
try {
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
} catch (
|
|
33
|
-
|
|
37
|
+
const mod = await import(name);
|
|
38
|
+
results[name] = mod && mod.default ? mod.default : mod;
|
|
39
|
+
} catch (err) {
|
|
40
|
+
missing.push(name);
|
|
34
41
|
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
if (!missing.length) return;
|
|
38
|
-
|
|
39
|
-
console.log(
|
|
40
|
-
`Dependencies missing: ${missing.join(
|
|
41
|
-
", "
|
|
42
|
-
)}. The script can install them automatically.`
|
|
43
|
-
);
|
|
44
|
-
|
|
45
|
-
// If running non-interactive environment, install automatically.
|
|
46
|
-
let consent = false;
|
|
47
|
-
if (process.env.CI || !process.stdin.isTTY) {
|
|
48
|
-
consent = true;
|
|
49
|
-
console.log("Non-interactive environment detected, installing automatically...");
|
|
50
|
-
} else {
|
|
51
|
-
const rl = readline.createInterface({
|
|
52
|
-
input: process.stdin,
|
|
53
|
-
output: process.stdout,
|
|
54
|
-
});
|
|
42
|
+
}
|
|
55
43
|
|
|
56
|
-
|
|
57
|
-
const
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
);
|
|
44
|
+
if (missing.length) {
|
|
45
|
+
const cmd = `npm install ${missing.join(" ")}`;
|
|
46
|
+
const msg =
|
|
47
|
+
`Missing runtime dependencies: ${missing.join(", ")}.\n` +
|
|
48
|
+
`Please install them before running this script:\n\n ${cmd}\n\n` +
|
|
49
|
+
`If you are in CI and want deterministic installs, declare these dependencies explicitly in your pipeline or package.json.`;
|
|
63
50
|
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
new Promise((resolve) => setTimeout(() => resolve(null), 10000)),
|
|
67
|
-
]);
|
|
51
|
+
// Detect CLI execution (rough check)
|
|
52
|
+
const isCli = process.argv && process.argv[1] && fileURLToPath(import.meta.url) === process.argv[1];
|
|
68
53
|
|
|
69
|
-
if (
|
|
70
|
-
|
|
71
|
-
|
|
54
|
+
if (isCli) {
|
|
55
|
+
console.error(msg);
|
|
56
|
+
process.exit(1);
|
|
72
57
|
} else {
|
|
73
|
-
|
|
58
|
+
throw new Error(msg);
|
|
74
59
|
}
|
|
75
60
|
}
|
|
76
61
|
|
|
77
|
-
|
|
78
|
-
console.error("Cannot proceed without required dependencies. Exiting.");
|
|
79
|
-
process.exit(1);
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
// Run npm install for missing deps
|
|
83
|
-
console.log(`Installing: ${missing.join(" ")} ...`);
|
|
84
|
-
const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
|
|
85
|
-
const args = ["install", "--save", ...missing];
|
|
86
|
-
const result = spawnSync(npmCmd, args, {
|
|
87
|
-
stdio: "inherit",
|
|
88
|
-
shell: false,
|
|
89
|
-
});
|
|
90
|
-
|
|
91
|
-
if (result.error || result.status !== 0) {
|
|
92
|
-
console.error("Automatic installation failed. Please run:");
|
|
93
|
-
console.error(` npm install ${missing.join(" ")}`);
|
|
94
|
-
process.exit(1);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
console.log("Dependencies installed, continuing...");
|
|
62
|
+
return results;
|
|
98
63
|
}
|
|
99
64
|
|
|
100
|
-
//
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
const
|
|
105
|
-
const
|
|
106
|
-
|
|
107
|
-
const cheerioModule = await import("cheerio");
|
|
108
|
-
const cheerio = cheerioModule.default || cheerioModule;
|
|
109
|
-
|
|
110
|
-
const oraModule = await import("ora");
|
|
111
|
-
const ora = oraModule.default || oraModule;
|
|
112
|
-
|
|
113
|
-
const yargsModule = await import("yargs");
|
|
114
|
-
const yargs = yargsModule.default || yargsModule;
|
|
65
|
+
// Load runtimes before continuing
|
|
66
|
+
const runtimes = await loadRuntimes();
|
|
67
|
+
const axios = runtimes["axios"];
|
|
68
|
+
const cheerio = runtimes["cheerio"];
|
|
69
|
+
const ora = runtimes["ora"];
|
|
70
|
+
const yargs = runtimes["yargs"];
|
|
115
71
|
|
|
116
72
|
// -------------------------
|
|
117
|
-
// Utilities & Config
|
|
73
|
+
// Utilities & Config (unchanged behavior)
|
|
118
74
|
// -------------------------
|
|
119
75
|
const DEFAULT_CONFIG = {
|
|
120
76
|
BASE_URL: "https://github.com",
|
|
@@ -158,9 +114,8 @@ function validateUsername(username) {
|
|
|
158
114
|
}
|
|
159
115
|
|
|
160
116
|
// -------------------------
|
|
161
|
-
// Translator loader (optional, lazy)
|
|
117
|
+
// Translator loader (optional, lazy) - unchanged behavior
|
|
162
118
|
// -------------------------
|
|
163
|
-
// We keep a lazy loader so translator is imported only if requested.
|
|
164
119
|
let _translatorModule = null; // module namespace
|
|
165
120
|
let _translatorLoadAttempted = false;
|
|
166
121
|
|
|
@@ -169,22 +124,27 @@ async function loadTranslatorModule() {
|
|
|
169
124
|
if (_translatorLoadAttempted) return null;
|
|
170
125
|
_translatorLoadAttempted = true;
|
|
171
126
|
try {
|
|
172
|
-
// prefer
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
127
|
+
// prefer package-local wrapper (ESM)
|
|
128
|
+
const wrapperUrl = new URL("./translate-engine/translate.js", import.meta.url).href;
|
|
129
|
+
try {
|
|
130
|
+
const mod = await import(wrapperUrl);
|
|
131
|
+
if (mod && typeof mod.translate === "function") {
|
|
132
|
+
_translatorModule = mod;
|
|
133
|
+
return _translatorModule;
|
|
134
|
+
}
|
|
135
|
+
} catch (_) {
|
|
136
|
+
// wrapper not present or failed - fallthrough to package import
|
|
180
137
|
}
|
|
181
|
-
|
|
138
|
+
|
|
139
|
+
// fallback: try importing the optional package directly
|
|
182
140
|
const pkg = await import("h56-translator");
|
|
183
|
-
|
|
184
|
-
|
|
141
|
+
const impl = pkg && (pkg.default || pkg);
|
|
142
|
+
if (impl && (typeof impl.translate === "function" || typeof impl === "function")) {
|
|
143
|
+
const translateFn = typeof impl.translate === "function" ? impl.translate : impl;
|
|
185
144
|
_translatorModule = { translate: translateFn };
|
|
186
145
|
return _translatorModule;
|
|
187
146
|
}
|
|
147
|
+
|
|
188
148
|
return null;
|
|
189
149
|
} catch (err) {
|
|
190
150
|
// translator not available — return null and let callers handle
|
|
@@ -193,7 +153,7 @@ async function loadTranslatorModule() {
|
|
|
193
153
|
}
|
|
194
154
|
|
|
195
155
|
// -------------------------
|
|
196
|
-
// Scraper class
|
|
156
|
+
// Scraper class (logic preserved)
|
|
197
157
|
// -------------------------
|
|
198
158
|
class GithubScraper {
|
|
199
159
|
constructor(opts = {}) {
|
|
@@ -210,12 +170,10 @@ class GithubScraper {
|
|
|
210
170
|
return res.data;
|
|
211
171
|
} catch (err) {
|
|
212
172
|
if (attempt >= this.config.MAX_RETRY) {
|
|
213
|
-
// wrap error with url info
|
|
214
173
|
const e = new Error(`Failed to fetch ${url}: ${err.message}`);
|
|
215
174
|
e.cause = err;
|
|
216
175
|
throw e;
|
|
217
176
|
}
|
|
218
|
-
// backoff
|
|
219
177
|
await sleep(1000 * attempt);
|
|
220
178
|
return this.requestWithRetry(url, attempt + 1);
|
|
221
179
|
}
|
|
@@ -245,7 +203,6 @@ class GithubScraper {
|
|
|
245
203
|
$('div[itemprop="description"]').text().trim() ||
|
|
246
204
|
"";
|
|
247
205
|
|
|
248
|
-
// Pulled from header counters (structure may vary by locale/markup)
|
|
249
206
|
const followersText = $(
|
|
250
207
|
'a[href$="?tab=followers"], a[href$="?tab=followers"] .text-bold'
|
|
251
208
|
)
|
|
@@ -286,7 +243,6 @@ class GithubScraper {
|
|
|
286
243
|
while (true) {
|
|
287
244
|
const url = `${this.config.BASE_URL}/${username}?page=${page}&tab=repositories`;
|
|
288
245
|
const $ = await this.fetchPage(url);
|
|
289
|
-
// older layout: li[itemprop='owns'], new layout: div[id^=user-repositories-list] li
|
|
290
246
|
const repoItems =
|
|
291
247
|
$("li[itemprop='owns']").length > 0
|
|
292
248
|
? $("li[itemprop='owns']")
|
|
@@ -360,24 +316,6 @@ class GithubScraper {
|
|
|
360
316
|
};
|
|
361
317
|
}
|
|
362
318
|
|
|
363
|
-
/**
|
|
364
|
-
* applyTranslations(result, translateOptions)
|
|
365
|
-
*
|
|
366
|
-
* Mutates the result object by adding translated fields.
|
|
367
|
-
*
|
|
368
|
-
* translateOptions (optional) shape:
|
|
369
|
-
* {
|
|
370
|
-
* lang: string, // target language code (required to perform translations)
|
|
371
|
-
* fields?: string[], // list of fields to translate; supported values:
|
|
372
|
-
* // 'bio' (profile.bio)
|
|
373
|
-
* // 'repo_descriptions' (repo.description)
|
|
374
|
-
* // 'repo_names' (repo.name)
|
|
375
|
-
* // 'all_repos' (alias for repo_descriptions + repo_names)
|
|
376
|
-
* // default: ['bio', 'repo_descriptions']
|
|
377
|
-
* perRepoDelay?: number, // ms delay between repo translations (default 120)
|
|
378
|
-
* failOnMissing?: boolean // if true, throw when translator is not available (default false)
|
|
379
|
-
* }
|
|
380
|
-
*/
|
|
381
319
|
async applyTranslations(result, translateOptions = {}) {
|
|
382
320
|
if (!translateOptions || !translateOptions.lang) return result;
|
|
383
321
|
const opts = {
|
|
@@ -387,7 +325,6 @@ class GithubScraper {
|
|
|
387
325
|
...translateOptions,
|
|
388
326
|
};
|
|
389
327
|
|
|
390
|
-
// normalize fields
|
|
391
328
|
const fields = new Set();
|
|
392
329
|
for (const f of opts.fields) {
|
|
393
330
|
if (f === "all_repos") {
|
|
@@ -398,7 +335,6 @@ class GithubScraper {
|
|
|
398
335
|
}
|
|
399
336
|
}
|
|
400
337
|
|
|
401
|
-
// translator function must exist (lazy load)
|
|
402
338
|
const mod = await loadTranslatorModule();
|
|
403
339
|
if (!mod || typeof mod.translate !== "function") {
|
|
404
340
|
const msg =
|
|
@@ -408,7 +344,6 @@ class GithubScraper {
|
|
|
408
344
|
e.code = "TRANSLATOR_MISSING";
|
|
409
345
|
throw e;
|
|
410
346
|
} else {
|
|
411
|
-
// attach a note and skip translations
|
|
412
347
|
result._translation_note = {
|
|
413
348
|
skipped: true,
|
|
414
349
|
reason: msg,
|
|
@@ -418,7 +353,6 @@ class GithubScraper {
|
|
|
418
353
|
}
|
|
419
354
|
const tfn = mod.translate;
|
|
420
355
|
|
|
421
|
-
// perform profile translation
|
|
422
356
|
try {
|
|
423
357
|
if (fields.has("bio") && result.profile && result.profile.bio) {
|
|
424
358
|
try {
|
|
@@ -433,11 +367,9 @@ class GithubScraper {
|
|
|
433
367
|
}
|
|
434
368
|
}
|
|
435
369
|
} catch (e) {
|
|
436
|
-
// defensive: any translator error should not break the main flow
|
|
437
370
|
result._translation_profile_error = e && e.message ? e.message : String(e);
|
|
438
371
|
}
|
|
439
372
|
|
|
440
|
-
// perform repository translations sequentially (safer)
|
|
441
373
|
if (Array.isArray(result.repos) && result.repos.length > 0) {
|
|
442
374
|
for (const repo of result.repos) {
|
|
443
375
|
try {
|
|
@@ -466,7 +398,6 @@ class GithubScraper {
|
|
|
466
398
|
}
|
|
467
399
|
}
|
|
468
400
|
} catch (e) {
|
|
469
|
-
// attach per-repo error but continue
|
|
470
401
|
repo.translation_internal_error = e && e.message ? e.message : String(e);
|
|
471
402
|
}
|
|
472
403
|
await sleep(opts.perRepoDelay);
|
|
@@ -476,19 +407,6 @@ class GithubScraper {
|
|
|
476
407
|
return result;
|
|
477
408
|
}
|
|
478
409
|
|
|
479
|
-
// high-level helper
|
|
480
|
-
/**
|
|
481
|
-
* scrapeUser(username, opts)
|
|
482
|
-
*
|
|
483
|
-
* opts:
|
|
484
|
-
* spinner: boolean (default true)
|
|
485
|
-
* translate: {
|
|
486
|
-
* lang: 'en', // target language code (required to enable translations)
|
|
487
|
-
* fields: ['bio','repo_descriptions'], // which fields to translate
|
|
488
|
-
* perRepoDelay: 120, // ms
|
|
489
|
-
* failOnMissing: false // if true, throw when translator missing
|
|
490
|
-
* }
|
|
491
|
-
*/
|
|
492
410
|
async scrapeUser(username, opts = {}) {
|
|
493
411
|
if (!validateUsername(username)) {
|
|
494
412
|
const e = new Error("Invalid GitHub username format");
|
|
@@ -507,18 +425,15 @@ class GithubScraper {
|
|
|
507
425
|
|
|
508
426
|
let result = { profile, repos, stats };
|
|
509
427
|
|
|
510
|
-
// If translation options provided, attempt to apply translations.
|
|
511
428
|
if (opts.translate && opts.translate.lang) {
|
|
512
429
|
if (spinner) spinner.text = "Applying translations...";
|
|
513
430
|
try {
|
|
514
431
|
result = await this.applyTranslations(result, opts.translate);
|
|
515
432
|
} catch (e) {
|
|
516
|
-
// translator errors: if failOnMissing requested, rethrow; otherwise attach note
|
|
517
433
|
if (opts.translate && opts.translate.failOnMissing) {
|
|
518
434
|
if (spinner) spinner.fail("Failed");
|
|
519
435
|
throw e;
|
|
520
436
|
} else {
|
|
521
|
-
// attach note and continue
|
|
522
437
|
result._translation_error = e && e.message ? e.message : String(e);
|
|
523
438
|
}
|
|
524
439
|
}
|
|
@@ -531,7 +446,6 @@ class GithubScraper {
|
|
|
531
446
|
}
|
|
532
447
|
}
|
|
533
448
|
|
|
534
|
-
// CLI pretty print
|
|
535
449
|
static printResult(profile, stats, repos = []) {
|
|
536
450
|
console.log("\n========== GITHUB ACCOUNT ==========\n");
|
|
537
451
|
console.log("Username :", profile.username);
|
|
@@ -567,11 +481,10 @@ class GithubScraper {
|
|
|
567
481
|
}
|
|
568
482
|
|
|
569
483
|
// -------------------------
|
|
570
|
-
// Exports (
|
|
484
|
+
// Exports (clean ESM)
|
|
571
485
|
// -------------------------
|
|
572
486
|
const defaultScraper = new GithubScraper();
|
|
573
487
|
|
|
574
|
-
// h56translate helper: lazy-call translator module when invoked
|
|
575
488
|
export async function h56translate(text, targetLang, options) {
|
|
576
489
|
const mod = await loadTranslatorModule();
|
|
577
490
|
if (!mod || typeof mod.translate !== "function") {
|
|
@@ -582,35 +495,26 @@ export async function h56translate(text, targetLang, options) {
|
|
|
582
495
|
return await mod.translate(text, targetLang, options);
|
|
583
496
|
}
|
|
584
497
|
|
|
585
|
-
export {
|
|
586
|
-
GithubScraper,
|
|
587
|
-
defaultScraper,
|
|
588
|
-
// convenience wrappers
|
|
589
|
-
// note: keep same signatures as before (username, opts)
|
|
590
|
-
async function scrapeProfile(username, opts) {
|
|
591
|
-
return defaultScraper.scrapeProfile(username, opts);
|
|
592
|
-
},
|
|
593
|
-
async function scrapeRepos(username, opts) {
|
|
594
|
-
return defaultScraper.scrapeRepos(username, opts);
|
|
595
|
-
},
|
|
596
|
-
async function scrapeUser(username, opts) {
|
|
597
|
-
return defaultScraper.scrapeUser(username, opts);
|
|
598
|
-
},
|
|
599
|
-
function calculateStats(repos) {
|
|
600
|
-
return defaultScraper.calculateStats(repos);
|
|
601
|
-
},
|
|
602
|
-
GithubScraper.printResult as printResult,
|
|
603
|
-
};
|
|
498
|
+
export { GithubScraper, defaultScraper };
|
|
604
499
|
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
500
|
+
export async function scrapeProfile(username) {
|
|
501
|
+
return defaultScraper.scrapeProfile(username);
|
|
502
|
+
}
|
|
503
|
+
export async function scrapeRepos(username) {
|
|
504
|
+
return defaultScraper.scrapeRepos(username);
|
|
505
|
+
}
|
|
506
|
+
export async function scrapeUser(username, opts) {
|
|
507
|
+
return defaultScraper.scrapeUser(username, opts);
|
|
508
|
+
}
|
|
509
|
+
export function calculateStats(repos) {
|
|
510
|
+
return defaultScraper.calculateStats(repos);
|
|
511
|
+
}
|
|
512
|
+
export const printResult = GithubScraper.printResult;
|
|
608
513
|
|
|
609
514
|
// -------------------------
|
|
610
515
|
// CLI behavior when run directly
|
|
611
516
|
// -------------------------
|
|
612
|
-
|
|
613
|
-
if (process.argv[1] === __filename) {
|
|
517
|
+
if (fileURLToPath(import.meta.url) === process.argv[1]) {
|
|
614
518
|
(async () => {
|
|
615
519
|
// build argv using yargs (same API as original)
|
|
616
520
|
const argv = yargs(process.argv.slice(2))
|
|
@@ -649,7 +553,6 @@ if (process.argv[1] === __filename) {
|
|
|
649
553
|
process.exit(1);
|
|
650
554
|
}
|
|
651
555
|
|
|
652
|
-
// build translate options if requested
|
|
653
556
|
const translateOpt = argv.lang
|
|
654
557
|
? {
|
|
655
558
|
lang: argv.lang,
|
|
@@ -678,7 +581,6 @@ if (process.argv[1] === __filename) {
|
|
|
678
581
|
} else {
|
|
679
582
|
GithubScraper.printResult(result.profile, result.stats, result.repos);
|
|
680
583
|
if (argv.output) {
|
|
681
|
-
// also write JSON file if requested
|
|
682
584
|
fs.writeFileSync(path.resolve(argv.output), JSON.stringify(result, null, 2) + os.EOL, "utf8");
|
|
683
585
|
console.log("Written JSON to", argv.output);
|
|
684
586
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "h56-github-scrapper",
|
|
3
|
-
"version": "1.0.
|
|
3
|
+
"version": "1.0.7",
|
|
4
4
|
"description": "GitHub user scraper",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "main-scrapping.js",
|
|
@@ -18,8 +18,5 @@
|
|
|
18
18
|
},
|
|
19
19
|
"optionalDependencies": {
|
|
20
20
|
"h56-translator": "^1.0.0"
|
|
21
|
-
},
|
|
22
|
-
"scripts": {
|
|
23
|
-
"postinstall": "node ./scripts/ensure-external-deps.js"
|
|
24
21
|
}
|
|
25
22
|
}
|
package/readme.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/h56-github-scrapper)
|
|
4
4
|
[](https://www.npmjs.com/package/h56-github-scrapper)
|
|
5
5
|
[](https://nodejs.org/)
|
|
6
|
-
[](./LICENSE)
|
|
6
|
+
[](./LICENSE)
|
|
7
7
|
[](https://www.typescriptlang.org/)
|
|
8
8
|
[](https://www.npmjs.com/package/h56-translator)
|
|
9
9
|
|
|
@@ -274,14 +274,6 @@ console.log(r.translatedText);
|
|
|
274
274
|
- `h56-translator` adalah dependency opsional. Paket menyediakan:
|
|
275
275
|
- `translate-engine/translate.ts` (typed wrapper) untuk development/TS.
|
|
276
276
|
- `translate-engine/translate.js` (CJS wrapper) untuk runtime require().
|
|
277
|
-
- `scripts/ensure-external-deps.js` — postinstall helper yang berusaha memasang `h56-translator` jika tidak ada, kecuali di CI (safety).
|
|
278
|
-
- Jika Anda menginginkan pemasangan otomatis di CI:
|
|
279
|
-
- jalankan: `H56_FORCE_POSTINSTALL=1 npm install`
|
|
280
|
-
- Jika translator tidak terpasang:
|
|
281
|
-
- `h56translate(...)` akan melempar Error informatif.
|
|
282
|
-
- `scrapeUser(..., { translate: {...} })` akan:
|
|
283
|
-
- menambahkan `_translation_note` dan melanjutkan (default), atau
|
|
284
|
-
- melempar error jika `failOnMissing: true` disetel.
|
|
285
277
|
|
|
286
278
|
---
|
|
287
279
|
|
|
@@ -317,4 +309,195 @@ Kontribusi disambut. Silakan:
|
|
|
317
309
|
- Ikuti style guide dan sertakan deskripsi perubahan pada PR.
|
|
318
310
|
|
|
319
311
|
Changelog singkat (ringkasan):
|
|
320
|
-
- v1.0.0 — Core scraper + optional translator support (h56-translator) + CLI translate flags.
|
|
312
|
+
- v1.0.0 — Core scraper + optional translator support (h56-translator) + CLI translate flags.
|
|
313
|
+
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
## Contoh Implementasi ESM Node.js — Detail lengkap (Tambahan dokumentasi)
|
|
317
|
+
|
|
318
|
+
Bagian ini memberikan panduan langkah demi langkah dan contoh kode ESM (Node.js) yang lebih komprehensif untuk mengimpor paket, mengkonfigurasi scraper, menangani opsi terjemahan (opsional), dan menyimpan hasil full data akun GitHub ke file JSON. Semua contoh menggunakan ESM (".mjs" atau package.json "type": "module") dan Node.js >= 16.
|
|
319
|
+
|
|
320
|
+
Catatan singkat:
|
|
321
|
+
- Jika Anda menginstall paket via npm dan menggunakan ESM, Anda dapat memakai dynamic import atau static import (tergantung cara publish). Contoh di bawah menggunakan dynamic import agar langsung kompatibel dengan berbagai skenario.
|
|
322
|
+
- Contoh juga menunjukkan opsi untuk menangani kasus ketika `h56-translator` tidak tersedia.
|
|
323
|
+
|
|
324
|
+
1) Contoh file: scrape-full-esm.mjs
|
|
325
|
+
- Perintah menjalankan: node scrape-full-esm.mjs <github-username> [--lang=<lang>] [--output=<path>] [--no-spinner]
|
|
326
|
+
- Fungsi: scrape full data (profile, repos, stats), coba terjemahkan bila diminta, simpan ke file JSON atau cetak ke stdout.
|
|
327
|
+
|
|
328
|
+
```js
|
|
329
|
+
// scrape-full-esm.mjs
|
|
330
|
+
// Usage: node scrape-full-esm.mjs <username> [--lang=en] [--output=./result.json] [--no-spinner]
|
|
331
|
+
|
|
332
|
+
import fs from "fs";
|
|
333
|
+
import path from "path";
|
|
334
|
+
import { fileURLToPath } from "url";
|
|
335
|
+
import process from "process";
|
|
336
|
+
|
|
337
|
+
const argv = process.argv.slice(2);
|
|
338
|
+
|
|
339
|
+
// Minimal CLI parsing (boleh ganti dengan yargs jika ingin)
|
|
340
|
+
function parseArgs(args) {
|
|
341
|
+
const out = { _: [] };
|
|
342
|
+
for (const a of args) {
|
|
343
|
+
if (a.startsWith("--lang=")) out.lang = a.split("=")[1];
|
|
344
|
+
else if (a.startsWith("--output=")) out.output = a.split("=")[1];
|
|
345
|
+
else if (a === "--no-spinner") out.noSpinner = true;
|
|
346
|
+
else out._.push(a);
|
|
347
|
+
}
|
|
348
|
+
return out;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
const parsed = parseArgs(argv);
|
|
352
|
+
const username = parsed._[0];
|
|
353
|
+
|
|
354
|
+
if (!username) {
|
|
355
|
+
console.error("Usage: node scrape-full-esm.mjs <username> [--lang=en] [--output=./res.json] [--no-spinner]");
|
|
356
|
+
process.exit(2);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
(async () => {
|
|
360
|
+
try {
|
|
361
|
+
// dynamic import library (ESM)
|
|
362
|
+
const pkg = await import("h56-github-scrapper");
|
|
363
|
+
// package exports: scrapeUser, scrapeProfile, scrapeRepos, GithubScraper, h56translate, printResult
|
|
364
|
+
const {
|
|
365
|
+
scrapeUser,
|
|
366
|
+
GithubScraper,
|
|
367
|
+
h56translate,
|
|
368
|
+
printResult,
|
|
369
|
+
} = pkg;
|
|
370
|
+
|
|
371
|
+
// Example: use defaultScraper via scrapeUser (simple)
|
|
372
|
+
const translateOpt = parsed.lang
|
|
373
|
+
? {
|
|
374
|
+
lang: parsed.lang,
|
|
375
|
+
fields: ["bio", "repo_descriptions"], // default fields
|
|
376
|
+
perRepoDelay: 120,
|
|
377
|
+
failOnMissing: false, // don't fail if translator missing
|
|
378
|
+
}
|
|
379
|
+
: undefined;
|
|
380
|
+
|
|
381
|
+
console.log("Scraping user:", username);
|
|
382
|
+
const result = await scrapeUser(username, {
|
|
383
|
+
spinner: !parsed.noSpinner,
|
|
384
|
+
translate: translateOpt,
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
// Pretty-print to console using built-in helper (optional)
|
|
388
|
+
if (!parsed.output) {
|
|
389
|
+
// readable print
|
|
390
|
+
printResult(result.profile, result.stats, result.repos);
|
|
391
|
+
// also output JSON to stdout if desired
|
|
392
|
+
console.log("JSON output:");
|
|
393
|
+
console.log(JSON.stringify(result, null, 2));
|
|
394
|
+
} else {
|
|
395
|
+
const outPath = path.resolve(parsed.output);
|
|
396
|
+
fs.writeFileSync(outPath, JSON.stringify(result, null, 2) + "\n", "utf8");
|
|
397
|
+
console.log("Saved JSON to", outPath);
|
|
398
|
+
}
|
|
399
|
+
} catch (err) {
|
|
400
|
+
console.error("Error scraping:", err && err.message ? err.message : String(err));
|
|
401
|
+
if (err && err.cause && err.cause.message) {
|
|
402
|
+
console.error("Cause:", err.cause.message);
|
|
403
|
+
}
|
|
404
|
+
process.exit(1);
|
|
405
|
+
}
|
|
406
|
+
})();
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
2) Contoh: menggunakan class GithubScraper untuk konfigurasi lanjutan
|
|
410
|
+
- Anda mungkin ingin mengubah timeout, user-agent, atau delay. Gunakan `new GithubScraper({ ... })`.
|
|
411
|
+
|
|
412
|
+
```js
|
|
413
|
+
// scrape-custom-esm.mjs
|
|
414
|
+
import fs from "fs";
|
|
415
|
+
import path from "path";
|
|
416
|
+
const { GithubScraper } = await import("h56-github-scrapper");
|
|
417
|
+
|
|
418
|
+
const scraper = new GithubScraper({
|
|
419
|
+
REQUEST_TIMEOUT: 30000,
|
|
420
|
+
SCRAPE_DELAY: 600,
|
|
421
|
+
MAX_RETRY: 4,
|
|
422
|
+
USER_AGENT: "MyBot/1.0 (+https://example.com/mybot)",
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
async function run(username, outFile) {
|
|
426
|
+
try {
|
|
427
|
+
const result = await scraper.scrapeUser(username, {
|
|
428
|
+
spinner: true,
|
|
429
|
+
translate: { lang: "en", fields: ["bio"], perRepoDelay: 120, failOnMissing: false },
|
|
430
|
+
});
|
|
431
|
+
fs.writeFileSync(path.resolve(outFile), JSON.stringify(result, null, 2) + "\n", "utf8");
|
|
432
|
+
console.log("Saved:", outFile);
|
|
433
|
+
} catch (e) {
|
|
434
|
+
console.error("Failed:", e.message || e);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
await run("HASYIM56", "./hasyim56-full.json");
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
3) Contoh: memanggil helper terjemahan langsung (h56translate) — menangani ketiadaan translator
|
|
442
|
+
- Helper `h56translate` akan melempar error bila translator tidak tersedia. Tangani dengan try/catch.
|
|
443
|
+
|
|
444
|
+
```js
|
|
445
|
+
// translate-direct.mjs
|
|
446
|
+
const { h56translate } = await import("h56-github-scrapper");
|
|
447
|
+
|
|
448
|
+
async function example() {
|
|
449
|
+
try {
|
|
450
|
+
const r = await h56translate("Halo dunia, ini contoh bio", "en");
|
|
451
|
+
console.log("Translated:", r.translatedText);
|
|
452
|
+
} catch (err) {
|
|
453
|
+
console.warn("Translator helper unavailable:", err.message);
|
|
454
|
+
// fallback: continue tanpa terjemahan
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
await example();
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
4) Praktik terbaik & tips pada implementasi ESM:
|
|
462
|
+
- Pastikan project Anda menggunakan "type": "module" di package.json atau gunakan ekstensi .mjs untuk file ESM.
|
|
463
|
+
- Jika Anda menjalankan pada lingkungan CI, disable spinner (`spinner: false` atau `--no-spinner`) untuk hasil yang bersih.
|
|
464
|
+
- Kelola `SCRAPE_DELAY` dan `perRepoDelay` untuk menghindari rate-limiting dari layanan penerjemah atau beban berlebih ke GitHub.
|
|
465
|
+
- Untuk penggunaan skala besar, simpan hasil terjemahan ke cache (file/db) agar tidak melakukan permintaan ulang terjemahan.
|
|
466
|
+
- Tangani error network dan kasus "Username not found" (kode error: `NOT_FOUND`) saat memanggil `scrapeUser` atau `scrapeProfile`.
|
|
467
|
+
|
|
468
|
+
5) Contoh alur end-to-end (script yang menerima daftar username dan menyimpan masing-masing ke file)
|
|
469
|
+
```js
|
|
470
|
+
// batch-scrape.mjs
|
|
471
|
+
import fs from "fs";
|
|
472
|
+
import path from "path";
|
|
473
|
+
|
|
474
|
+
const { scrapeUser } = await import("h56-github-scrapper");
|
|
475
|
+
|
|
476
|
+
// contoh daftar
|
|
477
|
+
const users = ["octocat", "HASYIM56", "someuser"];
|
|
478
|
+
|
|
479
|
+
for (const u of users) {
|
|
480
|
+
try {
|
|
481
|
+
console.log("Scraping", u);
|
|
482
|
+
const res = await scrapeUser(u, { spinner: false, translate: undefined });
|
|
483
|
+
const out = path.resolve(`./output-${u}.json`);
|
|
484
|
+
fs.writeFileSync(out, JSON.stringify(res, null, 2) + "\n", "utf8");
|
|
485
|
+
console.log("Saved", out);
|
|
486
|
+
} catch (e) {
|
|
487
|
+
console.error("Failed to scrape", u, e.message || e);
|
|
488
|
+
}
|
|
489
|
+
// disarankan memberi delay antar akun untuk sopan-santun
|
|
490
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
491
|
+
}
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
Ringkasan tambahan:
|
|
495
|
+
- Gunakan contoh `scrape-full-esm.mjs` untuk kebutuhan satu akun sederhana.
|
|
496
|
+
- Gunakan `GithubScraper` jika perlu konfigurasi param runtime (timeout, user-agent, delay).
|
|
497
|
+
- Gunakan `h56translate` atau opsi `translate` di `scrapeUser` bila memerlukan terjemahan, dan selalu tangani kemungkinan ketiadaan paket `h56-translator`.
|
|
498
|
+
|
|
499
|
+
---
|
|
500
|
+
|
|
501
|
+
## License
|
|
502
|
+
|
|
503
|
+
MIT
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
|
-
/**
|
|
3
|
-
* scripts/ensure-external-deps.js (ESM)
|
|
4
|
-
*
|
|
5
|
-
* Postinstall helper: idempotent installer for optional external deps.
|
|
6
|
-
* - Implemented as a full ESM module using top-level await.
|
|
7
|
-
* - Detects missing packages using dynamic `import()` (avoids createRequire / CJS).
|
|
8
|
-
* - Skips automatic install in CI by default; set H56_FORCE_POSTINSTALL=1 to override.
|
|
9
|
-
* - Uses spawnSync to run npm install and keeps the script resilient (won't throw on failure).
|
|
10
|
-
*
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import { spawnSync } from "child_process";
|
|
14
|
-
import path from "path";
|
|
15
|
-
import { fileURLToPath } from "url";
|
|
16
|
-
|
|
17
|
-
const __filename = fileURLToPath(import.meta.url);
|
|
18
|
-
const __dirname = path.dirname(__filename);
|
|
19
|
-
|
|
20
|
-
const optionalDeps = ["h56-translator"];
|
|
21
|
-
|
|
22
|
-
/**
|
|
23
|
-
* Check whether a package is resolvable via dynamic import.
|
|
24
|
-
* Uses import() which will attempt to load the package; this is the most portable
|
|
25
|
-
* ESM-compatible way to test presence without using CJS helpers.
|
|
26
|
-
*
|
|
27
|
-
* We deliberately avoid `require.resolve` / createRequire to keep file pure ESM.
|
|
28
|
-
*/
|
|
29
|
-
async function isInstalled(name) {
|
|
30
|
-
try {
|
|
31
|
-
// dynamic import of a bare specifier will resolve via Node's resolver
|
|
32
|
-
await import(name);
|
|
33
|
-
return true;
|
|
34
|
-
} catch (err) {
|
|
35
|
-
// import failed => treat as missing
|
|
36
|
-
return false;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
/**
|
|
41
|
-
* Run npm install for the given list of packages (synchronous to keep postinstall simple).
|
|
42
|
-
*/
|
|
43
|
-
function installDeps(deps) {
|
|
44
|
-
if (!deps || deps.length === 0) return;
|
|
45
|
-
const npmCmd = process.platform === "win32" ? "npm.cmd" : "npm";
|
|
46
|
-
const args = ["install", "--no-audit", "--no-fund", "--save", ...deps];
|
|
47
|
-
console.log(`Installing optional dependencies: ${deps.join(", ")}`);
|
|
48
|
-
const res = spawnSync(npmCmd, args, { stdio: "inherit", shell: false, cwd: process.cwd() });
|
|
49
|
-
if (res.error || res.status !== 0) {
|
|
50
|
-
console.error("Failed to install optional dependencies automatically. Please run manually:");
|
|
51
|
-
console.error(" npm install " + deps.join(" "));
|
|
52
|
-
// keep postinstall resilient: do not throw/exit non-zero to avoid breaking npm install
|
|
53
|
-
} else {
|
|
54
|
-
console.log("Optional dependencies installed.");
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
|
|
58
|
-
async function main() {
|
|
59
|
-
try {
|
|
60
|
-
const toInstall = [];
|
|
61
|
-
for (const dep of optionalDeps) {
|
|
62
|
-
const present = await isInstalled(dep);
|
|
63
|
-
if (!present) toInstall.push(dep);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
if (toInstall.length === 0) {
|
|
67
|
-
// nothing to do
|
|
68
|
-
// console.log("All optional dependencies present.");
|
|
69
|
-
return;
|
|
70
|
-
}
|
|
71
|
-
|
|
72
|
-
// Safety: skip auto-install in CI unless explicitly forced
|
|
73
|
-
if (process.env.CI && !process.env.H56_FORCE_POSTINSTALL) {
|
|
74
|
-
console.log(
|
|
75
|
-
"CI environment detected — skipping automatic installation of optional dependencies.",
|
|
76
|
-
"Set H56_FORCE_POSTINSTALL=1 to force installation in CI."
|
|
77
|
-
);
|
|
78
|
-
return;
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
installDeps(toInstall);
|
|
82
|
-
} catch (err) {
|
|
83
|
-
// Keep postinstall resilient; log error but don't fail the install process.
|
|
84
|
-
console.error("Postinstall check encountered an error:", err && err.message ? err.message : err);
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
|
|
88
|
-
await main();
|