indo-scraper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +646 -0
- package/index.js +38 -0
- package/indo-scraper.zip +0 -0
- package/package.json +26 -0
- package/src/bmkg/cuaca.js +34 -0
- package/src/bmkg/gempa.js +56 -0
- package/src/downloader/facebook.js +94 -0
- package/src/downloader/gdrive.js +38 -0
- package/src/downloader/instagram.js +62 -0
- package/src/downloader/mediafire.js +30 -0
- package/src/downloader/spotify.js +262 -0
- package/src/downloader/tiktok.js +472 -0
- package/src/finance/bbm.js +51 -0
- package/src/finance/emas.js +46 -0
- package/src/finance/kurs.js +64 -0
- package/src/finance/saham.js +117 -0
- package/src/info/cekno.js +39 -0
- package/src/info/resi.js +82 -0
- package/src/news/antara.js +66 -0
- package/src/news/cnn.js +71 -0
- package/src/news/detik.js +108 -0
- package/src/news/kompas.js +70 -0
- package/src/news/liputan6.js +65 -0
- package/src/news/okezone.js +72 -0
- package/src/news/republika.js +73 -0
- package/src/news/tribun.js +95 -0
- package/src/tools/simsimi.js +69 -0
- package/src/tools/ssweb.js +35 -0
- package/src/utils.js +79 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
const { axios, fetchHTML, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
const TV_HEADERS = {
|
|
4
|
+
'User-Agent': 'Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 Chrome/124.0 Mobile Safari/537.36',
|
|
5
|
+
'Content-Type': 'application/x-www-form-urlencoded',
|
|
6
|
+
'Origin': 'https://id.tradingview.com',
|
|
7
|
+
'Referer': 'https://id.tradingview.com/',
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
/*
|
|
11
|
+
* Harga saham Indonesia dari TradingView
|
|
12
|
+
* @param {string} kode - ihsg | bbca | bbri | bmri | tlkm | atau kode saham IDX lainnya
|
|
13
|
+
*/
|
|
14
|
+
const saham = async (kode = 'ihsg') => {
|
|
15
|
+
return new Promise(async (resolve) => {
|
|
16
|
+
try {
|
|
17
|
+
const isIHSG = kode.toLowerCase() === 'ihsg'
|
|
18
|
+
|
|
19
|
+
if (isIHSG) {
|
|
20
|
+
const html = await fetchHTML('https://id.tradingview.com/symbols/IDX-COMPOSITE/', {
|
|
21
|
+
Referer: 'https://id.tradingview.com/',
|
|
22
|
+
})
|
|
23
|
+
const match = html.match(/\{"close":"([\d.]+)","data_update_time":"([\d.]+)","high":"([\d.]+)","low":"([\d.]+)","open":"([\d.]+)","time":"(\d+)"[^}]*"volume":"([\d.]+)"/)
|
|
24
|
+
if (!match) return resolve(fail('Data IHSG tidak ditemukan'))
|
|
25
|
+
return resolve(ok({
|
|
26
|
+
kode: 'IHSG', symbol: 'IDX:COMPOSITE',
|
|
27
|
+
open: parseFloat(match[5]), high: parseFloat(match[3]),
|
|
28
|
+
low: parseFloat(match[4]), close: parseFloat(match[1]),
|
|
29
|
+
volume: parseFloat(match[7]),
|
|
30
|
+
update: new Date(parseInt(match[2]) * 1000).toISOString(),
|
|
31
|
+
sumber: 'TradingView',
|
|
32
|
+
}))
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
const ticker = kode.toUpperCase()
|
|
36
|
+
const res = await axios.post(
|
|
37
|
+
'https://scanner.tradingview.com/indonesia/scan',
|
|
38
|
+
JSON.stringify({
|
|
39
|
+
symbols: { tickers: [`IDX:${ticker}`] },
|
|
40
|
+
columns: [
|
|
41
|
+
'name', 'description', 'close', 'open', 'high', 'low', 'volume',
|
|
42
|
+
'change', 'change_abs',
|
|
43
|
+
'Perf.W', 'Perf.1M', 'Perf.3M', 'Perf.6M', 'Perf.Y', 'Perf.YTD'
|
|
44
|
+
],
|
|
45
|
+
}),
|
|
46
|
+
{ headers: TV_HEADERS, timeout: 10000 }
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
if (!res.data?.data?.length) return resolve(fail(`Saham ${ticker} tidak ditemukan`))
|
|
50
|
+
const d = res.data.data[0].d
|
|
51
|
+
resolve(ok({
|
|
52
|
+
kode: d[0],
|
|
53
|
+
nama: d[1],
|
|
54
|
+
close: d[2],
|
|
55
|
+
open: d[3],
|
|
56
|
+
high: d[4],
|
|
57
|
+
low: d[5],
|
|
58
|
+
volume: d[6],
|
|
59
|
+
perubahan_pct: d[7] ? +d[7].toFixed(2) : null,
|
|
60
|
+
perubahan_abs: d[8] ? +d[8].toFixed(2) : null,
|
|
61
|
+
performa: {
|
|
62
|
+
'1W': d[9] ? +d[9].toFixed(2) : null,
|
|
63
|
+
'1M': d[10] ? +d[10].toFixed(2) : null,
|
|
64
|
+
'3M': d[11] ? +d[11].toFixed(2) : null,
|
|
65
|
+
'6M': d[12] ? +d[12].toFixed(2) : null,
|
|
66
|
+
'1Y': d[13] ? +d[13].toFixed(2) : null,
|
|
67
|
+
'YTD': d[14] ? +d[14].toFixed(2) : null,
|
|
68
|
+
},
|
|
69
|
+
sumber: 'TradingView',
|
|
70
|
+
}))
|
|
71
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
72
|
+
})
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/*
|
|
76
|
+
* Daftar saham Indonesia (top by market cap)
|
|
77
|
+
* @param {number} limit - jumlah saham (max 906, default 50)
|
|
78
|
+
*/
|
|
79
|
+
const sahamList = async (limit = 50) => {
|
|
80
|
+
return new Promise(async (resolve) => {
|
|
81
|
+
try {
|
|
82
|
+
const res = await axios.post(
|
|
83
|
+
'https://scanner.tradingview.com/indonesia/scan',
|
|
84
|
+
JSON.stringify({
|
|
85
|
+
columns: [
|
|
86
|
+
'name', 'description', 'close', 'change', 'change_abs',
|
|
87
|
+
'volume', 'market_cap_basic',
|
|
88
|
+
'Perf.W', 'Perf.1M', 'Perf.Y', 'Perf.YTD'
|
|
89
|
+
],
|
|
90
|
+
sort: { sortBy: 'market_cap_basic', sortOrder: 'desc' },
|
|
91
|
+
range: [0, limit],
|
|
92
|
+
}),
|
|
93
|
+
{ headers: TV_HEADERS, timeout: 10000 }
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
if (!res.data?.data?.length) return resolve(fail('Data tidak ditemukan'))
|
|
97
|
+
const data = res.data.data.map(item => ({
|
|
98
|
+
kode: item.d[0],
|
|
99
|
+
nama: item.d[1],
|
|
100
|
+
close: item.d[2],
|
|
101
|
+
perubahan_pct: item.d[3] ? +item.d[3].toFixed(2) : null,
|
|
102
|
+
perubahan_abs: item.d[4] ? +item.d[4].toFixed(2) : null,
|
|
103
|
+
volume: item.d[5],
|
|
104
|
+
market_cap: item.d[6],
|
|
105
|
+
performa: {
|
|
106
|
+
'1W': item.d[7] ? +item.d[7].toFixed(2) : null,
|
|
107
|
+
'1M': item.d[8] ? +item.d[8].toFixed(2) : null,
|
|
108
|
+
'1Y': item.d[9] ? +item.d[9].toFixed(2) : null,
|
|
109
|
+
'YTD': item.d[10] ? +item.d[10].toFixed(2) : null,
|
|
110
|
+
},
|
|
111
|
+
}))
|
|
112
|
+
resolve(ok({ total: res.data.totalCount, tampil: data.length, data }))
|
|
113
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
114
|
+
})
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
module.exports = { saham, sahamList }
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
const { ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Cek provider nomor HP Indonesia
|
|
5
|
+
* @param {string} nomor - contoh: 08123456789 atau +628123456789
|
|
6
|
+
*/
|
|
7
|
+
const cekNomor = async (nomor) => {
|
|
8
|
+
return new Promise((resolve) => {
|
|
9
|
+
try {
|
|
10
|
+
const PREFIX = {
|
|
11
|
+
'0811': 'Telkomsel', '0812': 'Telkomsel', '0813': 'Telkomsel',
|
|
12
|
+
'0821': 'Telkomsel', '0822': 'Telkomsel', '0823': 'Telkomsel',
|
|
13
|
+
'0851': 'Telkomsel', '0852': 'Telkomsel', '0853': 'Telkomsel',
|
|
14
|
+
'0814': 'Indosat', '0815': 'Indosat', '0816': 'Indosat',
|
|
15
|
+
'0855': 'Indosat', '0856': 'Indosat', '0857': 'Indosat',
|
|
16
|
+
'0858': 'Indosat', '0828': 'Indosat',
|
|
17
|
+
'0817': 'XL', '0818': 'XL', '0819': 'XL',
|
|
18
|
+
'0859': 'XL', '0877': 'XL', '0878': 'XL',
|
|
19
|
+
'0831': 'AXIS', '0832': 'AXIS', '0833': 'AXIS', '0838': 'AXIS',
|
|
20
|
+
'0881': 'Smartfren', '0882': 'Smartfren', '0883': 'Smartfren',
|
|
21
|
+
'0884': 'Smartfren', '0885': 'Smartfren', '0886': 'Smartfren',
|
|
22
|
+
'0887': 'Smartfren', '0888': 'Smartfren', '0889': 'Smartfren',
|
|
23
|
+
'0895': 'Three', '0896': 'Three', '0897': 'Three',
|
|
24
|
+
'0898': 'Three', '0899': 'Three',
|
|
25
|
+
}
|
|
26
|
+
let no = nomor.replace(/\s|-|\./g, '')
|
|
27
|
+
if (no.startsWith('+62')) no = '0' + no.slice(3)
|
|
28
|
+
if (no.startsWith('62')) no = '0' + no.slice(2)
|
|
29
|
+
if (!no.startsWith('0')) no = '0' + no
|
|
30
|
+
if (no.length < 10 || no.length > 13) return resolve(fail('Nomor tidak valid (10-13 digit)'))
|
|
31
|
+
const prefix = no.slice(0, 4)
|
|
32
|
+
const provider = PREFIX[prefix]
|
|
33
|
+
if (!provider) return resolve(fail(`Prefix ${prefix} tidak dikenali`))
|
|
34
|
+
resolve(ok({ nomor_asli: nomor, nomor: no, prefix, provider, panjang: no.length }))
|
|
35
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
36
|
+
})
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
module.exports = { cekNomor }
|
package/src/info/resi.js
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
const cloudscraper = require('cloudscraper')
|
|
2
|
+
const { cheerio, ok, fail } = require('../utils')
|
|
3
|
+
|
|
4
|
+
const KURIR_MAP = {
|
|
5
|
+
jne: 'jne',
|
|
6
|
+
jnt: 'jnt',
|
|
7
|
+
'j&t': 'jnt',
|
|
8
|
+
sicepat: 'sicepat',
|
|
9
|
+
anteraja: 'anteraja',
|
|
10
|
+
pos: 'pos',
|
|
11
|
+
wahana: 'wahana',
|
|
12
|
+
tiki: 'tiki',
|
|
13
|
+
ninja: 'ninja',
|
|
14
|
+
lion: 'lion',
|
|
15
|
+
sap: 'sap',
|
|
16
|
+
id: 'id-express',
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
/*
|
|
20
|
+
* Cek resi pengiriman via cek-resi.net (bypass Cloudflare)
|
|
21
|
+
* @param {string} kurir - jne | jnt | sicepat | anteraja | pos | wahana | tiki | dll
|
|
22
|
+
* @param {string} noResi - nomor resi
|
|
23
|
+
*/
|
|
24
|
+
const cekResi = async (kurir, noResi) => {
|
|
25
|
+
return new Promise(async (resolve) => {
|
|
26
|
+
try {
|
|
27
|
+
const code = KURIR_MAP[kurir.toLowerCase()] || kurir.toLowerCase()
|
|
28
|
+
const url = `https://cek-resi.net/kurir/${code}`
|
|
29
|
+
|
|
30
|
+
// Bypass Cloudflare dengan cloudscraper
|
|
31
|
+
const pageHtml = await cloudscraper.get(url)
|
|
32
|
+
const $page = cheerio.load(pageHtml)
|
|
33
|
+
|
|
34
|
+
// Cari token/csrf
|
|
35
|
+
const token = $page('input[name="_token"]').val()
|
|
36
|
+
|| $page('meta[name="csrf-token"]').attr('content')
|
|
37
|
+
|| ''
|
|
38
|
+
|
|
39
|
+
// Submit form tracking
|
|
40
|
+
const formHtml = await cloudscraper({
|
|
41
|
+
method: 'POST',
|
|
42
|
+
uri: url,
|
|
43
|
+
form: { resi: noResi, _token: token },
|
|
44
|
+
headers: {
|
|
45
|
+
'Referer': url,
|
|
46
|
+
'Origin': 'https://cek-resi.net',
|
|
47
|
+
},
|
|
48
|
+
})
|
|
49
|
+
|
|
50
|
+
const $ = cheerio.load(formHtml)
|
|
51
|
+
const history = []
|
|
52
|
+
|
|
53
|
+
// Parse hasil tracking
|
|
54
|
+
$('table tbody tr, .tracking-result tr, .result-tracking tr').each((_, el) => {
|
|
55
|
+
const cols = $(el).find('td')
|
|
56
|
+
if (cols.length < 2) return
|
|
57
|
+
const tanggal = $(cols[0]).text().trim()
|
|
58
|
+
const keterangan = $(cols[1]).text().trim()
|
|
59
|
+
const lokasi = cols.length > 2 ? $(cols[2]).text().trim() : ''
|
|
60
|
+
if (tanggal && keterangan) history.push({ tanggal, keterangan, lokasi })
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
if (!history.length) {
|
|
64
|
+
// Debug: print HTML hasil untuk lihat struktur
|
|
65
|
+
const preview = $('body').text().replace(/\s+/g, ' ').trim().slice(0, 300)
|
|
66
|
+
return resolve(fail(`Resi tidak ditemukan. Preview: ${preview}`))
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
resolve(ok({
|
|
70
|
+
kurir: code,
|
|
71
|
+
noResi,
|
|
72
|
+
status: history[0]?.keterangan || '',
|
|
73
|
+
history,
|
|
74
|
+
}))
|
|
75
|
+
} catch (e) {
|
|
76
|
+
console.log('[cekResi]', e.message)
|
|
77
|
+
resolve(fail(e))
|
|
78
|
+
}
|
|
79
|
+
})
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
module.exports = { cekResi, KURIR_MAP }
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Berita terbaru Antara News
|
|
5
|
+
* @param {object} options - { channel: 'nasional', page: 1, limit: 20 }
|
|
6
|
+
* channel: nasional | hukum | ekonomi | olahraga | hiburan | internasional | tekno | otomotif
|
|
7
|
+
*/
|
|
8
|
+
const antara = async (options = {}) => {
|
|
9
|
+
return new Promise(async (resolve) => {
|
|
10
|
+
try {
|
|
11
|
+
const { channel = 'nasional', page = 1, limit = 20 } = options
|
|
12
|
+
const url = `https://www.antaranews.com/${channel}?page=${page}`
|
|
13
|
+
const html = await fetchHTML(url, { Referer: 'https://www.antaranews.com' })
|
|
14
|
+
const $ = cheerio.load(html)
|
|
15
|
+
const articles = []
|
|
16
|
+
$('.title-card').each((i, el) => {
|
|
17
|
+
if (articles.length >= limit) return false
|
|
18
|
+
const $el = $(el)
|
|
19
|
+
const linkEl = $el.find('a').first()
|
|
20
|
+
const href = linkEl.attr('href') || ''
|
|
21
|
+
const title = linkEl.text().trim()
|
|
22
|
+
if (!title || !href) return
|
|
23
|
+
const container = $el.closest('div, article, li')
|
|
24
|
+
const imgEl = container.find('img').first()
|
|
25
|
+
const dateEl = container.find('time, .timeago, .date').first()
|
|
26
|
+
articles.push({
|
|
27
|
+
title,
|
|
28
|
+
url: href.startsWith('http') ? href : `https://www.antaranews.com${href}`,
|
|
29
|
+
image: imgEl.attr('data-src') || imgEl.attr('src') || null,
|
|
30
|
+
category: channel,
|
|
31
|
+
date: dateEl.attr('datetime') || dateEl.text().trim() || null,
|
|
32
|
+
source: 'antaranews',
|
|
33
|
+
})
|
|
34
|
+
})
|
|
35
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
36
|
+
resolve(ok(articles))
|
|
37
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/*
|
|
42
|
+
* Artikel lengkap Antara News
|
|
43
|
+
* @param {string} url
|
|
44
|
+
*/
|
|
45
|
+
const antaraArticle = async (url) => {
|
|
46
|
+
return new Promise(async (resolve) => {
|
|
47
|
+
try {
|
|
48
|
+
const html = await fetchHTML(url, { Referer: 'https://www.antaranews.com' })
|
|
49
|
+
const $ = cheerio.load(html)
|
|
50
|
+
const ld = parseLdJson($, html)
|
|
51
|
+
console.log(ld)
|
|
52
|
+
const paragraphs = parseParagraphs($, 'div.post-content p, .article-body p')
|
|
53
|
+
resolve(ok({
|
|
54
|
+
title: ld?.headline || $('h1.post-title').first().text().trim(),
|
|
55
|
+
author: ld?.author?.name || $('.reporter-name, .author').first().text().trim(),
|
|
56
|
+
date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[1] || '',
|
|
57
|
+
category: ld?.articleSection || '',
|
|
58
|
+
description: ld?.description || html.match(/"description"\s*:\s*"([^"]+)"/)?.[1] || '',
|
|
59
|
+
thumbnail: parseThumbnail(ld),
|
|
60
|
+
content: paragraphs.join('\n\n'), paragraphs,
|
|
61
|
+
}))
|
|
62
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
63
|
+
})
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
module.exports = { antara, antaraArticle }
|
package/src/news/cnn.js
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseParagraphs, parseThumbnail, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Berita terbaru CNN Indonesia
|
|
5
|
+
* @param {object} options - { category: 'nasional', limit: 20 }
|
|
6
|
+
* category: nasional | internasional | ekonomi | olahraga | teknologi | hiburan | gaya_hidup
|
|
7
|
+
*/
|
|
8
|
+
const cnn = async (options = {}) => {
|
|
9
|
+
return new Promise(async (resolve) => {
|
|
10
|
+
try {
|
|
11
|
+
const { category = 'nasional', limit = 20 } = options
|
|
12
|
+
const CATEGORIES = {
|
|
13
|
+
nasional: 'nasional', internasional: 'internasional', ekonomi: 'ekonomi',
|
|
14
|
+
olahraga: 'olahraga', teknologi: 'teknologi', hiburan: 'hiburan', gaya_hidup: 'gaya-hidup',
|
|
15
|
+
}
|
|
16
|
+
const url = `https://www.cnnindonesia.com/${CATEGORIES[category] || category}`
|
|
17
|
+
const html = await fetchHTML(url)
|
|
18
|
+
const $ = cheerio.load(html)
|
|
19
|
+
const articles = []
|
|
20
|
+
$('article').each((i, el) => {
|
|
21
|
+
if (articles.length >= limit) return false
|
|
22
|
+
const $el = $(el)
|
|
23
|
+
const linkEl = $el.find('a[href*="cnnindonesia.com"]').first()
|
|
24
|
+
const href = linkEl.attr('href') || ''
|
|
25
|
+
if (!href.match(/\/\d{8}\d+-\d+-\d+\//)) return
|
|
26
|
+
const title = linkEl.text().replace(/\s+/g, ' ').trim()
|
|
27
|
+
if (!title || title.length < 10) return
|
|
28
|
+
const dateMatch = href.match(/\/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})-/)
|
|
29
|
+
articles.push({
|
|
30
|
+
title, url: href,
|
|
31
|
+
image: $el.find('img').first().attr('src') || null,
|
|
32
|
+
category,
|
|
33
|
+
date: dateMatch ? `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}T${dateMatch[4]}:${dateMatch[5]}:${dateMatch[6]}+07:00` : null,
|
|
34
|
+
source: 'cnnindonesia',
|
|
35
|
+
})
|
|
36
|
+
})
|
|
37
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
38
|
+
resolve(ok(articles))
|
|
39
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
40
|
+
})
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/*
|
|
44
|
+
* Artikel lengkap CNN Indonesia
|
|
45
|
+
* @param {string} url
|
|
46
|
+
*/
|
|
47
|
+
const cnnArticle = async (url) => {
|
|
48
|
+
return new Promise(async (resolve) => {
|
|
49
|
+
try {
|
|
50
|
+
const html = await fetchHTML(url)
|
|
51
|
+
const $ = cheerio.load(html)
|
|
52
|
+
let ld = null
|
|
53
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
54
|
+
try { const j = JSON.parse($(el).html()); if (j['@type'] === 'NewsArticle') ld = j } catch (_) {}
|
|
55
|
+
})
|
|
56
|
+
const categoryMatch = url.match(/cnnindonesia\.com\/([^/]+)\//)
|
|
57
|
+
const paragraphs = parseParagraphs($, '.detail-text p, .artikel-content p')
|
|
58
|
+
resolve(ok({
|
|
59
|
+
title: ld?.headline || $('h1').first().text().trim(),
|
|
60
|
+
author: 'CNN Indonesia',
|
|
61
|
+
date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[1] || '',
|
|
62
|
+
category: categoryMatch?.[1] || '',
|
|
63
|
+
description: ld?.description || '',
|
|
64
|
+
thumbnail: parseThumbnail(ld),
|
|
65
|
+
content: paragraphs.join('\n\n'), paragraphs,
|
|
66
|
+
}))
|
|
67
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
68
|
+
})
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
module.exports = { cnn, cnnArticle }
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Berita terbaru Detik
|
|
5
|
+
* @param {object} options - { channel: 'news', page: 1, limit: 20 }
|
|
6
|
+
* channel: news | finance | hot | sport | inet | oto | health | travel | food
|
|
7
|
+
*/
|
|
8
|
+
const detik = async (options = {}) => {
|
|
9
|
+
return new Promise(async (resolve) => {
|
|
10
|
+
try {
|
|
11
|
+
const { channel = 'news', page = 1, limit = 20 } = options
|
|
12
|
+
const CHANNELS = {
|
|
13
|
+
news: 'https://news.detik.com/indeks', finance: 'https://finance.detik.com/indeks',
|
|
14
|
+
hot: 'https://hot.detik.com/indeks', sport: 'https://sport.detik.com/indeks',
|
|
15
|
+
inet: 'https://inet.detik.com/indeks', oto: 'https://oto.detik.com/indeks',
|
|
16
|
+
health: 'https://health.detik.com/indeks', travel: 'https://travel.detik.com/indeks',
|
|
17
|
+
food: 'https://food.detik.com/indeks',
|
|
18
|
+
}
|
|
19
|
+
const url = `${CHANNELS[channel] || CHANNELS.news}?page=${page}`
|
|
20
|
+
const html = await fetchHTML(url)
|
|
21
|
+
const $ = cheerio.load(html)
|
|
22
|
+
const articles = []
|
|
23
|
+
$('article.list-content__item').each((i, el) => {
|
|
24
|
+
if (articles.length >= limit) return false
|
|
25
|
+
const $el = $(el)
|
|
26
|
+
const linkEl = $el.find('a.media__link').first()
|
|
27
|
+
const title = linkEl.find('div').first().text().trim() || linkEl.text().trim()
|
|
28
|
+
const href = $el.attr('i-link') || linkEl.attr('href') || ''
|
|
29
|
+
if (!title || !href || href === '#') return
|
|
30
|
+
const imgBase = $el.attr('i-img') || null
|
|
31
|
+
const imgQs = $el.attr('i-img-qs') || ''
|
|
32
|
+
const image = imgBase ? `${imgBase}${imgQs}` : null
|
|
33
|
+
const dateEl = $el.find('.media__date span').first()
|
|
34
|
+
articles.push({
|
|
35
|
+
title,
|
|
36
|
+
url: href,
|
|
37
|
+
image,
|
|
38
|
+
category: $el.find('.media__category').text().trim() || channel,
|
|
39
|
+
date: dateEl.attr('title') || dateEl.text().trim() || null,
|
|
40
|
+
source: 'detik',
|
|
41
|
+
})
|
|
42
|
+
})
|
|
43
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
44
|
+
resolve(ok(articles))
|
|
45
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
46
|
+
})
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
/*
|
|
50
|
+
* Cari berita Detik
|
|
51
|
+
* @param {string} query
|
|
52
|
+
* @param {number} limit
|
|
53
|
+
*/
|
|
54
|
+
const detikSearch = async (query, limit = 10) => {
|
|
55
|
+
return new Promise(async (resolve) => {
|
|
56
|
+
try {
|
|
57
|
+
const url = `https://www.detik.com/search/searchall?query=${encodeURIComponent(query)}&sortby=time`
|
|
58
|
+
const html = await fetchHTML(url)
|
|
59
|
+
const $ = cheerio.load(html)
|
|
60
|
+
const articles = []
|
|
61
|
+
$('article.list-content__item').each((i, el) => {
|
|
62
|
+
if (articles.length >= limit) return false
|
|
63
|
+
const $el = $(el)
|
|
64
|
+
const linkEl = $el.find('a.media__link').first()
|
|
65
|
+
const title = linkEl.find('div').first().text().trim() || linkEl.text().trim()
|
|
66
|
+
const href = $el.attr('i-link') || linkEl.attr('href') || ''
|
|
67
|
+
if (!title || !href) return
|
|
68
|
+
const imgBase = $el.attr('i-img') || null
|
|
69
|
+
const imgQs = $el.attr('i-img-qs') || ''
|
|
70
|
+
const dateEl = $el.find('.media__date span').first()
|
|
71
|
+
articles.push({
|
|
72
|
+
title, url: href,
|
|
73
|
+
image: imgBase ? `${imgBase}${imgQs}` : null,
|
|
74
|
+
date: dateEl.attr('title') || dateEl.text().trim() || null,
|
|
75
|
+
source: 'detik',
|
|
76
|
+
})
|
|
77
|
+
})
|
|
78
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
79
|
+
resolve(ok(articles))
|
|
80
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
81
|
+
})
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
/*
|
|
85
|
+
* Artikel lengkap Detik
|
|
86
|
+
* @param {string} url
|
|
87
|
+
*/
|
|
88
|
+
const detikArticle = async (url) => {
|
|
89
|
+
return new Promise(async (resolve) => {
|
|
90
|
+
try {
|
|
91
|
+
const html = await fetchHTML(url)
|
|
92
|
+
const $ = cheerio.load(html)
|
|
93
|
+
const ld = parseLdJson($, html)
|
|
94
|
+
const paragraphs = parseParagraphs($, '.detail__body p, .itp_bodycontent p')
|
|
95
|
+
resolve(ok({
|
|
96
|
+
title: ld?.headline || $('h1.detail__title').first().text().trim(),
|
|
97
|
+
author: ld?.author?.name || $('.detail__author').first().text().trim(),
|
|
98
|
+
date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[ 1] || '',
|
|
99
|
+
category: $('.detail__label').first().text().trim() || ld?.articleSection || '',
|
|
100
|
+
description: ld?.description || '',
|
|
101
|
+
thumbnail: parseThumbnail(ld),
|
|
102
|
+
content: paragraphs.join('\n\n'), paragraphs,
|
|
103
|
+
}))
|
|
104
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
105
|
+
})
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
module.exports = { detik, detikSearch, detikArticle }
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseLdJson, parseThumbnail, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Berita terbaru Kompas
|
|
5
|
+
* @param {object} options - { channel: 'news', page: 1, limit: 20, date: '11/05/2026' }
|
|
6
|
+
* channel: news | regional | megapolitan | money | sport | tekno | sains | travel | food | health
|
|
7
|
+
*/
|
|
8
|
+
const kompas = async (options = {}) => {
|
|
9
|
+
return new Promise(async (resolve) => {
|
|
10
|
+
try {
|
|
11
|
+
const { channel = 'news', page = 1, limit = 20, date = null } = options
|
|
12
|
+
let url = `https://indeks.kompas.com/?site=${channel}&page=${page}`
|
|
13
|
+
if (date) url += `&date=${date}`
|
|
14
|
+
const html = await fetchHTML(url, { Referer: 'https://www.kompas.com/' })
|
|
15
|
+
const $ = cheerio.load(html)
|
|
16
|
+
const articles = []
|
|
17
|
+
$('div.hlItem').each((i, el) => {
|
|
18
|
+
if (articles.length >= limit) return false
|
|
19
|
+
const $el = $(el)
|
|
20
|
+
const linkEl = $el.find('a.hlItem-link').first()
|
|
21
|
+
const title = $el.find('.hlTitle').first().text().trim() || linkEl.attr('title') || ''
|
|
22
|
+
const href = linkEl.attr('href') || ''
|
|
23
|
+
if (!title || !href) return
|
|
24
|
+
const imgEl = $el.find('img').first()
|
|
25
|
+
const dateEl = $el.find('.hlTime').first()
|
|
26
|
+
articles.push({
|
|
27
|
+
title, url: href,
|
|
28
|
+
image: imgEl.attr('data-src') || imgEl.attr('src') || null,
|
|
29
|
+
category: $el.find('.hlChannel').text().trim() || channel,
|
|
30
|
+
date: dateEl.attr('datetime') || dateEl.text().trim() || null,
|
|
31
|
+
source: 'kompas',
|
|
32
|
+
})
|
|
33
|
+
})
|
|
34
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
35
|
+
resolve(ok(articles))
|
|
36
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
37
|
+
})
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/*
|
|
41
|
+
* Artikel lengkap Kompas
|
|
42
|
+
* @param {string} url
|
|
43
|
+
*/
|
|
44
|
+
const kompasArticle = async (url) => {
|
|
45
|
+
return new Promise(async (resolve) => {
|
|
46
|
+
try {
|
|
47
|
+
const html = await fetchHTML(url, { Referer: 'https://indeks.kompas.com/' })
|
|
48
|
+
const $ = cheerio.load(html)
|
|
49
|
+
const ld = parseLdJson($, html)
|
|
50
|
+
const SKIP = /^(baca juga|simak juga|artikel terkait|advertisement|iklan)/i
|
|
51
|
+
const STRIP = /baca juga\s*:.*?(?=\n|$)/gi
|
|
52
|
+
const paragraphs = []
|
|
53
|
+
$('.read__content p, .article__body p').each((_, el) => {
|
|
54
|
+
let text = $(el).text().trim().replace(STRIP, '').trim()
|
|
55
|
+
if (text.length > 30 && !SKIP.test(text)) paragraphs.push(text)
|
|
56
|
+
})
|
|
57
|
+
resolve(ok({
|
|
58
|
+
title: ld?.headline || $('h1.read__title').first().text().trim(),
|
|
59
|
+
author: ld?.author?.name || '',
|
|
60
|
+
date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[1] || '',
|
|
61
|
+
category: ld?.articleSection || '',
|
|
62
|
+
description: ld?.description || '',
|
|
63
|
+
thumbnail: parseThumbnail(ld),
|
|
64
|
+
content: paragraphs.join('\n\n'), paragraphs,
|
|
65
|
+
}))
|
|
66
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
67
|
+
})
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
module.exports = { kompas, kompasArticle }
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
const pickImg = ($, el) => {
|
|
4
|
+
const attrs = ['data-src', 'data-original', 'data-lazy', 'data-image', 'src']
|
|
5
|
+
for (const a of attrs) {
|
|
6
|
+
const v = $(el).attr(a) || ''
|
|
7
|
+
if (v && !v.startsWith('data:') && v.startsWith('http')) return v
|
|
8
|
+
}
|
|
9
|
+
const ss = $(el).attr('data-srcset') || $(el).attr('srcset') || ''
|
|
10
|
+
if (ss) return ss.trim().split(/[\s,]+/).find(s => s.startsWith('http')) || null
|
|
11
|
+
return null
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const liputan6 = async (options = {}) => {
|
|
15
|
+
return new Promise(async (resolve) => {
|
|
16
|
+
try {
|
|
17
|
+
const { channel = 'news', page = 1, limit = 20 } = options
|
|
18
|
+
const url = `https://www.liputan6.com/${channel}/indeks?page=${page}`
|
|
19
|
+
const html = await fetchHTML(url, { Referer: 'https://www.liputan6.com' })
|
|
20
|
+
const $ = cheerio.load(html)
|
|
21
|
+
const articles = []
|
|
22
|
+
$('.article-snippet').each((i, el) => {
|
|
23
|
+
if (articles.length >= limit) return false
|
|
24
|
+
const $el = $(el)
|
|
25
|
+
const href = $el.find('.article-snippet__title-link').attr('href') || ''
|
|
26
|
+
const title = $el.find('.article-snippet__title-text').text().trim()
|
|
27
|
+
if (!title || !href) return
|
|
28
|
+
const imgEl = $el.find('.article-snippet--media-figure__picture-img').first()
|
|
29
|
+
const date = $el.find('.article-snippet__date').text().trim() || null
|
|
30
|
+
articles.push({
|
|
31
|
+
title,
|
|
32
|
+
url: href.startsWith('http') ? href : `https://www.liputan6.com${href}`,
|
|
33
|
+
image: pickImg($, imgEl),
|
|
34
|
+
category: channel,
|
|
35
|
+
date,
|
|
36
|
+
source: 'liputan6',
|
|
37
|
+
})
|
|
38
|
+
})
|
|
39
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
40
|
+
resolve(ok(articles))
|
|
41
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
42
|
+
})
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const liputan6Article = async (url) => {
|
|
46
|
+
return new Promise(async (resolve) => {
|
|
47
|
+
try {
|
|
48
|
+
const html = await fetchHTML(url, { Referer: 'https://www.liputan6.com' })
|
|
49
|
+
const $ = cheerio.load(html)
|
|
50
|
+
const ld = parseLdJson($, html)
|
|
51
|
+
|
|
52
|
+
const title = $('.articles-content__title').first().text().trim() || ld?.headline || ''
|
|
53
|
+
const description = $('.articles-content__sinopsis').first().text().trim() || ld?.description || ''
|
|
54
|
+
const author = $('.editorial-articles__name').first().text().trim() || ld?.author?.name || ''
|
|
55
|
+
const date = $('time[datetime]').first().attr('datetime') || ''
|
|
56
|
+
const category = $('[data-channel]').first().attr('data-channel') || ld?.articleSection || ''
|
|
57
|
+
const thumbnail = pickImg($, $('.articles-content__image-container img').first()) || parseThumbnail(ld)
|
|
58
|
+
const paragraphs = parseParagraphs($, '.article-content-body__item p, .article-content-body p')
|
|
59
|
+
|
|
60
|
+
resolve(ok({ title, author, date, category, description, thumbnail, content: paragraphs.join('\n\n'), paragraphs }))
|
|
61
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
62
|
+
})
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
module.exports = { liputan6, liputan6Article }
|