h56-github-scrapper 1.0.6 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/readme.md +193 -2
package/package.json
CHANGED
package/readme.md
CHANGED
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
[](https://www.npmjs.com/package/h56-github-scrapper)
|
|
4
4
|
[](https://www.npmjs.com/package/h56-github-scrapper)
|
|
5
5
|
[](https://nodejs.org/)
|
|
6
|
-
[](./LICENSE)
|
|
6
|
+
[](./LICENSE)
|
|
7
7
|
[](https://www.typescriptlang.org/)
|
|
8
8
|
[](https://www.npmjs.com/package/h56-translator)
|
|
9
9
|
|
|
@@ -309,4 +309,195 @@ Kontribusi disambut. Silakan:
|
|
|
309
309
|
- Ikuti style guide dan sertakan deskripsi perubahan pada PR.
|
|
310
310
|
|
|
311
311
|
Changelog singkat (ringkasan):
|
|
312
|
-
- v1.0.0 — Core scraper + optional translator support (h56-translator) + CLI translate flags.
|
|
312
|
+
- v1.0.0 — Core scraper + optional translator support (h56-translator) + CLI translate flags.
|
|
313
|
+
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
## Contoh Implementasi ESM Node.js — Detail lengkap (Tambahan dokumentasi)
|
|
317
|
+
|
|
318
|
+
Bagian ini memberikan panduan langkah demi langkah dan contoh kode ESM (Node.js) yang lebih komprehensif untuk mengimpor paket, mengkonfigurasi scraper, menangani opsi terjemahan (opsional), dan menyimpan hasil full data akun GitHub ke file JSON. Semua contoh menggunakan ESM (".mjs" atau package.json "type": "module") dan Node.js >= 16.
|
|
319
|
+
|
|
320
|
+
Catatan singkat:
|
|
321
|
+
- Jika Anda menginstall paket via npm dan menggunakan ESM, Anda dapat memakai dynamic import atau static import (tergantung cara publish). Contoh di bawah menggunakan dynamic import agar langsung kompatibel dengan berbagai skenario.
|
|
322
|
+
- Contoh juga menunjukkan opsi untuk menangani kasus ketika `h56-translator` tidak tersedia.
|
|
323
|
+
|
|
324
|
+
1) Contoh file: scrape-full-esm.mjs
|
|
325
|
+
- Perintah menjalankan: node scrape-full-esm.mjs <github-username> [--lang=<lang>] [--output=<path>] [--no-spinner]
|
|
326
|
+
- Fungsi: scrape full data (profile, repos, stats), coba terjemahkan bila diminta, simpan ke file JSON atau cetak ke stdout.
|
|
327
|
+
|
|
328
|
+
```js
|
|
329
|
+
// scrape-full-esm.mjs
|
|
330
|
+
// Usage: node scrape-full-esm.mjs <username> [--lang=en] [--output=./result.json] [--no-spinner]
|
|
331
|
+
|
|
332
|
+
import fs from "fs";
|
|
333
|
+
import path from "path";
|
|
334
|
+
import { fileURLToPath } from "url";
|
|
335
|
+
import process from "process";
|
|
336
|
+
|
|
337
|
+
const argv = process.argv.slice(2);
|
|
338
|
+
|
|
339
|
+
// Minimal CLI parsing (boleh ganti dengan yargs jika ingin)
|
|
340
|
+
function parseArgs(args) {
|
|
341
|
+
const out = { _: [] };
|
|
342
|
+
for (const a of args) {
|
|
343
|
+
if (a.startsWith("--lang=")) out.lang = a.split("=")[1];
|
|
344
|
+
else if (a.startsWith("--output=")) out.output = a.split("=")[1];
|
|
345
|
+
else if (a === "--no-spinner") out.noSpinner = true;
|
|
346
|
+
else out._.push(a);
|
|
347
|
+
}
|
|
348
|
+
return out;
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
const parsed = parseArgs(argv);
|
|
352
|
+
const username = parsed._[0];
|
|
353
|
+
|
|
354
|
+
if (!username) {
|
|
355
|
+
console.error("Usage: node scrape-full-esm.mjs <username> [--lang=en] [--output=./res.json] [--no-spinner]");
|
|
356
|
+
process.exit(2);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
(async () => {
|
|
360
|
+
try {
|
|
361
|
+
// dynamic import library (ESM)
|
|
362
|
+
const pkg = await import("h56-github-scrapper");
|
|
363
|
+
// package exports: scrapeUser, scrapeProfile, scrapeRepos, GithubScraper, h56translate, printResult
|
|
364
|
+
const {
|
|
365
|
+
scrapeUser,
|
|
366
|
+
GithubScraper,
|
|
367
|
+
h56translate,
|
|
368
|
+
printResult,
|
|
369
|
+
} = pkg;
|
|
370
|
+
|
|
371
|
+
// Example: use defaultScraper via scrapeUser (simple)
|
|
372
|
+
const translateOpt = parsed.lang
|
|
373
|
+
? {
|
|
374
|
+
lang: parsed.lang,
|
|
375
|
+
fields: ["bio", "repo_descriptions"], // default fields
|
|
376
|
+
perRepoDelay: 120,
|
|
377
|
+
failOnMissing: false, // don't fail if translator missing
|
|
378
|
+
}
|
|
379
|
+
: undefined;
|
|
380
|
+
|
|
381
|
+
console.log("Scraping user:", username);
|
|
382
|
+
const result = await scrapeUser(username, {
|
|
383
|
+
spinner: !parsed.noSpinner,
|
|
384
|
+
translate: translateOpt,
|
|
385
|
+
});
|
|
386
|
+
|
|
387
|
+
// Pretty-print to console using built-in helper (optional)
|
|
388
|
+
if (!parsed.output) {
|
|
389
|
+
// readable print
|
|
390
|
+
printResult(result.profile, result.stats, result.repos);
|
|
391
|
+
// also output JSON to stdout if desired
|
|
392
|
+
console.log("JSON output:");
|
|
393
|
+
console.log(JSON.stringify(result, null, 2));
|
|
394
|
+
} else {
|
|
395
|
+
const outPath = path.resolve(parsed.output);
|
|
396
|
+
fs.writeFileSync(outPath, JSON.stringify(result, null, 2) + "\n", "utf8");
|
|
397
|
+
console.log("Saved JSON to", outPath);
|
|
398
|
+
}
|
|
399
|
+
} catch (err) {
|
|
400
|
+
console.error("Error scraping:", err && err.message ? err.message : String(err));
|
|
401
|
+
if (err && err.cause && err.cause.message) {
|
|
402
|
+
console.error("Cause:", err.cause.message);
|
|
403
|
+
}
|
|
404
|
+
process.exit(1);
|
|
405
|
+
}
|
|
406
|
+
})();
|
|
407
|
+
```
|
|
408
|
+
|
|
409
|
+
2) Contoh: menggunakan class GithubScraper untuk konfigurasi lanjutan
|
|
410
|
+
- Anda mungkin ingin mengubah timeout, user-agent, atau delay. Gunakan `new GithubScraper({ ... })`.
|
|
411
|
+
|
|
412
|
+
```js
|
|
413
|
+
// scrape-custom-esm.mjs
|
|
414
|
+
import fs from "fs";
|
|
415
|
+
import path from "path";
|
|
416
|
+
const { GithubScraper } = await import("h56-github-scrapper");
|
|
417
|
+
|
|
418
|
+
const scraper = new GithubScraper({
|
|
419
|
+
REQUEST_TIMEOUT: 30000,
|
|
420
|
+
SCRAPE_DELAY: 600,
|
|
421
|
+
MAX_RETRY: 4,
|
|
422
|
+
USER_AGENT: "MyBot/1.0 (+https://example.com/mybot)",
|
|
423
|
+
});
|
|
424
|
+
|
|
425
|
+
async function run(username, outFile) {
|
|
426
|
+
try {
|
|
427
|
+
const result = await scraper.scrapeUser(username, {
|
|
428
|
+
spinner: true,
|
|
429
|
+
translate: { lang: "en", fields: ["bio"], perRepoDelay: 120, failOnMissing: false },
|
|
430
|
+
});
|
|
431
|
+
fs.writeFileSync(path.resolve(outFile), JSON.stringify(result, null, 2) + "\n", "utf8");
|
|
432
|
+
console.log("Saved:", outFile);
|
|
433
|
+
} catch (e) {
|
|
434
|
+
console.error("Failed:", e.message || e);
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
await run("HASYIM56", "./hasyim56-full.json");
|
|
439
|
+
```
|
|
440
|
+
|
|
441
|
+
3) Contoh: memanggil helper terjemahan langsung (h56translate) — menangani ketiadaan translator
|
|
442
|
+
- Helper `h56translate` akan melempar error bila translator tidak tersedia. Tangani dengan try/catch.
|
|
443
|
+
|
|
444
|
+
```js
|
|
445
|
+
// translate-direct.mjs
|
|
446
|
+
const { h56translate } = await import("h56-github-scrapper");
|
|
447
|
+
|
|
448
|
+
async function example() {
|
|
449
|
+
try {
|
|
450
|
+
const r = await h56translate("Halo dunia, ini contoh bio", "en");
|
|
451
|
+
console.log("Translated:", r.translatedText);
|
|
452
|
+
} catch (err) {
|
|
453
|
+
console.warn("Translator helper unavailable:", err.message);
|
|
454
|
+
// fallback: continue tanpa terjemahan
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
await example();
|
|
459
|
+
```
|
|
460
|
+
|
|
461
|
+
4) Praktik terbaik & tips pada implementasi ESM:
|
|
462
|
+
- Pastikan project Anda menggunakan "type": "module" di package.json atau gunakan ekstensi .mjs untuk file ESM.
|
|
463
|
+
- Jika Anda menjalankan pada lingkungan CI, disable spinner (`spinner: false` atau `--no-spinner`) untuk hasil yang bersih.
|
|
464
|
+
- Kelola `SCRAPE_DELAY` dan `perRepoDelay` untuk menghindari rate-limiting dari layanan penerjemah atau beban berlebih ke GitHub.
|
|
465
|
+
- Untuk penggunaan skala besar, simpan hasil terjemahan ke cache (file/db) agar tidak melakukan permintaan ulang terjemahan.
|
|
466
|
+
- Tangani error network dan kasus "Username not found" (kode error: `NOT_FOUND`) saat memanggil `scrapeUser` atau `scrapeProfile`.
|
|
467
|
+
|
|
468
|
+
5) Contoh alur end-to-end (script yang menerima daftar username dan menyimpan masing-masing ke file)
|
|
469
|
+
```js
|
|
470
|
+
// batch-scrape.mjs
|
|
471
|
+
import fs from "fs";
|
|
472
|
+
import path from "path";
|
|
473
|
+
|
|
474
|
+
const { scrapeUser } = await import("h56-github-scrapper");
|
|
475
|
+
|
|
476
|
+
// contoh daftar
|
|
477
|
+
const users = ["octocat", "HASYIM56", "someuser"];
|
|
478
|
+
|
|
479
|
+
for (const u of users) {
|
|
480
|
+
try {
|
|
481
|
+
console.log("Scraping", u);
|
|
482
|
+
const res = await scrapeUser(u, { spinner: false, translate: undefined });
|
|
483
|
+
const out = path.resolve(`./output-${u}.json`);
|
|
484
|
+
fs.writeFileSync(out, JSON.stringify(res, null, 2) + "\n", "utf8");
|
|
485
|
+
console.log("Saved", out);
|
|
486
|
+
} catch (e) {
|
|
487
|
+
console.error("Failed to scrape", u, e.message || e);
|
|
488
|
+
}
|
|
489
|
+
// disarankan memberi delay antar akun untuk sopan-santun
|
|
490
|
+
await new Promise((r) => setTimeout(r, 500));
|
|
491
|
+
}
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
Ringkasan tambahan:
|
|
495
|
+
- Gunakan contoh `scrape-full-esm.mjs` untuk kebutuhan satu akun sederhana.
|
|
496
|
+
- Gunakan `GithubScraper` jika perlu konfigurasi param runtime (timeout, user-agent, delay).
|
|
497
|
+
- Gunakan `h56translate` atau opsi `translate` di `scrapeUser` bila memerlukan terjemahan, dan selalu tangani kemungkinan ketiadaan paket `h56-translator`.
|
|
498
|
+
|
|
499
|
+
---
|
|
500
|
+
|
|
501
|
+
## License
|
|
502
|
+
|
|
503
|
+
MIT
|