indo-scraper 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,117 @@
1
+ const { axios, fetchHTML, ok, fail } = require('../utils')
2
+
3
+ const TV_HEADERS = {
4
+ 'User-Agent': 'Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 Chrome/124.0 Mobile Safari/537.36',
5
+ 'Content-Type': 'application/x-www-form-urlencoded',
6
+ 'Origin': 'https://id.tradingview.com',
7
+ 'Referer': 'https://id.tradingview.com/',
8
+ }
9
+
10
+ /*
11
+ * Harga saham Indonesia dari TradingView
12
+ * @param {string} kode - ihsg | bbca | bbri | bmri | tlkm | atau kode saham IDX lainnya
13
+ */
14
+ const saham = async (kode = 'ihsg') => {
15
+ return new Promise(async (resolve) => {
16
+ try {
17
+ const isIHSG = kode.toLowerCase() === 'ihsg'
18
+
19
+ if (isIHSG) {
20
+ const html = await fetchHTML('https://id.tradingview.com/symbols/IDX-COMPOSITE/', {
21
+ Referer: 'https://id.tradingview.com/',
22
+ })
23
+ const match = html.match(/\{"close":"([\d.]+)","data_update_time":"([\d.]+)","high":"([\d.]+)","low":"([\d.]+)","open":"([\d.]+)","time":"(\d+)"[^}]*"volume":"([\d.]+)"/)
24
+ if (!match) return resolve(fail('Data IHSG tidak ditemukan'))
25
+ return resolve(ok({
26
+ kode: 'IHSG', symbol: 'IDX:COMPOSITE',
27
+ open: parseFloat(match[5]), high: parseFloat(match[3]),
28
+ low: parseFloat(match[4]), close: parseFloat(match[1]),
29
+ volume: parseFloat(match[7]),
30
+ update: new Date(parseInt(match[2]) * 1000).toISOString(),
31
+ sumber: 'TradingView',
32
+ }))
33
+ }
34
+
35
+ const ticker = kode.toUpperCase()
36
+ const res = await axios.post(
37
+ 'https://scanner.tradingview.com/indonesia/scan',
38
+ JSON.stringify({
39
+ symbols: { tickers: [`IDX:${ticker}`] },
40
+ columns: [
41
+ 'name', 'description', 'close', 'open', 'high', 'low', 'volume',
42
+ 'change', 'change_abs',
43
+ 'Perf.W', 'Perf.1M', 'Perf.3M', 'Perf.6M', 'Perf.Y', 'Perf.YTD'
44
+ ],
45
+ }),
46
+ { headers: TV_HEADERS, timeout: 10000 }
47
+ )
48
+
49
+ if (!res.data?.data?.length) return resolve(fail(`Saham ${ticker} tidak ditemukan`))
50
+ const d = res.data.data[0].d
51
+ resolve(ok({
52
+ kode: d[0],
53
+ nama: d[1],
54
+ close: d[2],
55
+ open: d[3],
56
+ high: d[4],
57
+ low: d[5],
58
+ volume: d[6],
59
+ perubahan_pct: d[7] ? +d[7].toFixed(2) : null,
60
+ perubahan_abs: d[8] ? +d[8].toFixed(2) : null,
61
+ performa: {
62
+ '1W': d[9] ? +d[9].toFixed(2) : null,
63
+ '1M': d[10] ? +d[10].toFixed(2) : null,
64
+ '3M': d[11] ? +d[11].toFixed(2) : null,
65
+ '6M': d[12] ? +d[12].toFixed(2) : null,
66
+ '1Y': d[13] ? +d[13].toFixed(2) : null,
67
+ 'YTD': d[14] ? +d[14].toFixed(2) : null,
68
+ },
69
+ sumber: 'TradingView',
70
+ }))
71
+ } catch (e) { console.log(e); resolve(fail(e)) }
72
+ })
73
+ }
74
+
75
+ /*
76
+ * Daftar saham Indonesia (top by market cap)
77
+ * @param {number} limit - jumlah saham (max 906, default 50)
78
+ */
79
+ const sahamList = async (limit = 50) => {
80
+ return new Promise(async (resolve) => {
81
+ try {
82
+ const res = await axios.post(
83
+ 'https://scanner.tradingview.com/indonesia/scan',
84
+ JSON.stringify({
85
+ columns: [
86
+ 'name', 'description', 'close', 'change', 'change_abs',
87
+ 'volume', 'market_cap_basic',
88
+ 'Perf.W', 'Perf.1M', 'Perf.Y', 'Perf.YTD'
89
+ ],
90
+ sort: { sortBy: 'market_cap_basic', sortOrder: 'desc' },
91
+ range: [0, limit],
92
+ }),
93
+ { headers: TV_HEADERS, timeout: 10000 }
94
+ )
95
+
96
+ if (!res.data?.data?.length) return resolve(fail('Data tidak ditemukan'))
97
+ const data = res.data.data.map(item => ({
98
+ kode: item.d[0],
99
+ nama: item.d[1],
100
+ close: item.d[2],
101
+ perubahan_pct: item.d[3] ? +item.d[3].toFixed(2) : null,
102
+ perubahan_abs: item.d[4] ? +item.d[4].toFixed(2) : null,
103
+ volume: item.d[5],
104
+ market_cap: item.d[6],
105
+ performa: {
106
+ '1W': item.d[7] ? +item.d[7].toFixed(2) : null,
107
+ '1M': item.d[8] ? +item.d[8].toFixed(2) : null,
108
+ '1Y': item.d[9] ? +item.d[9].toFixed(2) : null,
109
+ 'YTD': item.d[10] ? +item.d[10].toFixed(2) : null,
110
+ },
111
+ }))
112
+ resolve(ok({ total: res.data.totalCount, tampil: data.length, data }))
113
+ } catch (e) { console.log(e); resolve(fail(e)) }
114
+ })
115
+ }
116
+
117
+ module.exports = { saham, sahamList }
@@ -0,0 +1,39 @@
1
+ const { ok, fail } = require('../utils')
2
+
3
+ /*
4
+ * Cek provider nomor HP Indonesia
5
+ * @param {string} nomor - contoh: 08123456789 atau +628123456789
6
+ */
7
+ const cekNomor = async (nomor) => {
8
+ return new Promise((resolve) => {
9
+ try {
10
+ const PREFIX = {
11
+ '0811': 'Telkomsel', '0812': 'Telkomsel', '0813': 'Telkomsel',
12
+ '0821': 'Telkomsel', '0822': 'Telkomsel', '0823': 'Telkomsel',
13
+ '0851': 'Telkomsel', '0852': 'Telkomsel', '0853': 'Telkomsel',
14
+ '0814': 'Indosat', '0815': 'Indosat', '0816': 'Indosat',
15
+ '0855': 'Indosat', '0856': 'Indosat', '0857': 'Indosat',
16
+ '0858': 'Indosat', '0828': 'Indosat',
17
+ '0817': 'XL', '0818': 'XL', '0819': 'XL',
18
+ '0859': 'XL', '0877': 'XL', '0878': 'XL',
19
+ '0831': 'AXIS', '0832': 'AXIS', '0833': 'AXIS', '0838': 'AXIS',
20
+ '0881': 'Smartfren', '0882': 'Smartfren', '0883': 'Smartfren',
21
+ '0884': 'Smartfren', '0885': 'Smartfren', '0886': 'Smartfren',
22
+ '0887': 'Smartfren', '0888': 'Smartfren', '0889': 'Smartfren',
23
+ '0895': 'Three', '0896': 'Three', '0897': 'Three',
24
+ '0898': 'Three', '0899': 'Three',
25
+ }
26
+ let no = nomor.replace(/\s|-|\./g, '')
27
+ if (no.startsWith('+62')) no = '0' + no.slice(3)
28
+ if (no.startsWith('62')) no = '0' + no.slice(2)
29
+ if (!no.startsWith('0')) no = '0' + no
30
+ if (no.length < 10 || no.length > 13) return resolve(fail('Nomor tidak valid (10-13 digit)'))
31
+ const prefix = no.slice(0, 4)
32
+ const provider = PREFIX[prefix]
33
+ if (!provider) return resolve(fail(`Prefix ${prefix} tidak dikenali`))
34
+ resolve(ok({ nomor_asli: nomor, nomor: no, prefix, provider, panjang: no.length }))
35
+ } catch (e) { console.log(e); resolve(fail(e)) }
36
+ })
37
+ }
38
+
39
+ module.exports = { cekNomor }
@@ -0,0 +1,82 @@
1
+ const cloudscraper = require('cloudscraper')
2
+ const { cheerio, ok, fail } = require('../utils')
3
+
4
+ const KURIR_MAP = {
5
+ jne: 'jne',
6
+ jnt: 'jnt',
7
+ 'j&t': 'jnt',
8
+ sicepat: 'sicepat',
9
+ anteraja: 'anteraja',
10
+ pos: 'pos',
11
+ wahana: 'wahana',
12
+ tiki: 'tiki',
13
+ ninja: 'ninja',
14
+ lion: 'lion',
15
+ sap: 'sap',
16
+ id: 'id-express',
17
+ }
18
+
19
+ /*
20
+ * Cek resi pengiriman via cek-resi.net (bypass Cloudflare)
21
+ * @param {string} kurir - jne | jnt | sicepat | anteraja | pos | wahana | tiki | dll
22
+ * @param {string} noResi - nomor resi
23
+ */
24
+ const cekResi = async (kurir, noResi) => {
25
+ return new Promise(async (resolve) => {
26
+ try {
27
+ const code = KURIR_MAP[kurir.toLowerCase()] || kurir.toLowerCase()
28
+ const url = `https://cek-resi.net/kurir/${code}`
29
+
30
+ // Bypass Cloudflare dengan cloudscraper
31
+ const pageHtml = await cloudscraper.get(url)
32
+ const $page = cheerio.load(pageHtml)
33
+
34
+ // Cari token/csrf
35
+ const token = $page('input[name="_token"]').val()
36
+ || $page('meta[name="csrf-token"]').attr('content')
37
+ || ''
38
+
39
+ // Submit form tracking
40
+ const formHtml = await cloudscraper({
41
+ method: 'POST',
42
+ uri: url,
43
+ form: { resi: noResi, _token: token },
44
+ headers: {
45
+ 'Referer': url,
46
+ 'Origin': 'https://cek-resi.net',
47
+ },
48
+ })
49
+
50
+ const $ = cheerio.load(formHtml)
51
+ const history = []
52
+
53
+ // Parse hasil tracking
54
+ $('table tbody tr, .tracking-result tr, .result-tracking tr').each((_, el) => {
55
+ const cols = $(el).find('td')
56
+ if (cols.length < 2) return
57
+ const tanggal = $(cols[0]).text().trim()
58
+ const keterangan = $(cols[1]).text().trim()
59
+ const lokasi = cols.length > 2 ? $(cols[2]).text().trim() : ''
60
+ if (tanggal && keterangan) history.push({ tanggal, keterangan, lokasi })
61
+ })
62
+
63
+ if (!history.length) {
64
+ // Debug: print HTML hasil untuk lihat struktur
65
+ const preview = $('body').text().replace(/\s+/g, ' ').trim().slice(0, 300)
66
+ return resolve(fail(`Resi tidak ditemukan. Preview: ${preview}`))
67
+ }
68
+
69
+ resolve(ok({
70
+ kurir: code,
71
+ noResi,
72
+ status: history[0]?.keterangan || '',
73
+ history,
74
+ }))
75
+ } catch (e) {
76
+ console.log('[cekResi]', e.message)
77
+ resolve(fail(e))
78
+ }
79
+ })
80
+ }
81
+
82
+ module.exports = { cekResi, KURIR_MAP }
@@ -0,0 +1,66 @@
1
+ const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
2
+
3
+ /*
4
+ * Berita terbaru Antara News
5
+ * @param {object} options - { channel: 'nasional', page: 1, limit: 20 }
6
+ * channel: nasional | hukum | ekonomi | olahraga | hiburan | internasional | tekno | otomotif
7
+ */
8
+ const antara = async (options = {}) => {
9
+ return new Promise(async (resolve) => {
10
+ try {
11
+ const { channel = 'nasional', page = 1, limit = 20 } = options
12
+ const url = `https://www.antaranews.com/${channel}?page=${page}`
13
+ const html = await fetchHTML(url, { Referer: 'https://www.antaranews.com' })
14
+ const $ = cheerio.load(html)
15
+ const articles = []
16
+ $('.title-card').each((i, el) => {
17
+ if (articles.length >= limit) return false
18
+ const $el = $(el)
19
+ const linkEl = $el.find('a').first()
20
+ const href = linkEl.attr('href') || ''
21
+ const title = linkEl.text().trim()
22
+ if (!title || !href) return
23
+ const container = $el.closest('div, article, li')
24
+ const imgEl = container.find('img').first()
25
+ const dateEl = container.find('time, .timeago, .date').first()
26
+ articles.push({
27
+ title,
28
+ url: href.startsWith('http') ? href : `https://www.antaranews.com${href}`,
29
+ image: imgEl.attr('data-src') || imgEl.attr('src') || null,
30
+ category: channel,
31
+ date: dateEl.attr('datetime') || dateEl.text().trim() || null,
32
+ source: 'antaranews',
33
+ })
34
+ })
35
+ if (!articles.length) return resolve(fail('Data tidak ditemukan'))
36
+ resolve(ok(articles))
37
+ } catch (e) { console.log(e); resolve(fail(e)) }
38
+ })
39
+ }
40
+
41
+ /*
42
+ * Artikel lengkap Antara News
43
+ * @param {string} url
44
+ */
45
+ const antaraArticle = async (url) => {
46
+ return new Promise(async (resolve) => {
47
+ try {
48
+ const html = await fetchHTML(url, { Referer: 'https://www.antaranews.com' })
49
+ const $ = cheerio.load(html)
50
+ const ld = parseLdJson($, html)
51
+ console.log(ld)
52
+ const paragraphs = parseParagraphs($, 'div.post-content p, .article-body p')
53
+ resolve(ok({
54
+ title: ld?.headline || $('h1.post-title').first().text().trim(),
55
+ author: ld?.author?.name || $('.reporter-name, .author').first().text().trim(),
56
+ date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[1] || '',
57
+ category: ld?.articleSection || '',
58
+ description: ld?.description || html.match(/"description"\s*:\s*"([^"]+)"/)?.[1] || '',
59
+ thumbnail: parseThumbnail(ld),
60
+ content: paragraphs.join('\n\n'), paragraphs,
61
+ }))
62
+ } catch (e) { console.log(e); resolve(fail(e)) }
63
+ })
64
+ }
65
+
66
+ module.exports = { antara, antaraArticle }
@@ -0,0 +1,71 @@
1
+ const { fetchHTML, cheerio, parseParagraphs, parseThumbnail, ok, fail } = require('../utils')
2
+
3
+ /*
4
+ * Berita terbaru CNN Indonesia
5
+ * @param {object} options - { category: 'nasional', limit: 20 }
6
+ * category: nasional | internasional | ekonomi | olahraga | teknologi | hiburan | gaya_hidup
7
+ */
8
+ const cnn = async (options = {}) => {
9
+ return new Promise(async (resolve) => {
10
+ try {
11
+ const { category = 'nasional', limit = 20 } = options
12
+ const CATEGORIES = {
13
+ nasional: 'nasional', internasional: 'internasional', ekonomi: 'ekonomi',
14
+ olahraga: 'olahraga', teknologi: 'teknologi', hiburan: 'hiburan', gaya_hidup: 'gaya-hidup',
15
+ }
16
+ const url = `https://www.cnnindonesia.com/${CATEGORIES[category] || category}`
17
+ const html = await fetchHTML(url)
18
+ const $ = cheerio.load(html)
19
+ const articles = []
20
+ $('article').each((i, el) => {
21
+ if (articles.length >= limit) return false
22
+ const $el = $(el)
23
+ const linkEl = $el.find('a[href*="cnnindonesia.com"]').first()
24
+ const href = linkEl.attr('href') || ''
25
+ if (!href.match(/\/\d{8}\d+-\d+-\d+\//)) return
26
+ const title = linkEl.text().replace(/\s+/g, ' ').trim()
27
+ if (!title || title.length < 10) return
28
+ const dateMatch = href.match(/\/(\d{4})(\d{2})(\d{2})(\d{2})(\d{2})(\d{2})-/)
29
+ articles.push({
30
+ title, url: href,
31
+ image: $el.find('img').first().attr('src') || null,
32
+ category,
33
+ date: dateMatch ? `${dateMatch[1]}-${dateMatch[2]}-${dateMatch[3]}T${dateMatch[4]}:${dateMatch[5]}:${dateMatch[6]}+07:00` : null,
34
+ source: 'cnnindonesia',
35
+ })
36
+ })
37
+ if (!articles.length) return resolve(fail('Data tidak ditemukan'))
38
+ resolve(ok(articles))
39
+ } catch (e) { console.log(e); resolve(fail(e)) }
40
+ })
41
+ }
42
+
43
+ /*
44
+ * Artikel lengkap CNN Indonesia
45
+ * @param {string} url
46
+ */
47
+ const cnnArticle = async (url) => {
48
+ return new Promise(async (resolve) => {
49
+ try {
50
+ const html = await fetchHTML(url)
51
+ const $ = cheerio.load(html)
52
+ let ld = null
53
+ $('script[type="application/ld+json"]').each((_, el) => {
54
+ try { const j = JSON.parse($(el).html()); if (j['@type'] === 'NewsArticle') ld = j } catch (_) {}
55
+ })
56
+ const categoryMatch = url.match(/cnnindonesia\.com\/([^/]+)\//)
57
+ const paragraphs = parseParagraphs($, '.detail-text p, .artikel-content p')
58
+ resolve(ok({
59
+ title: ld?.headline || $('h1').first().text().trim(),
60
+ author: 'CNN Indonesia',
61
+ date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[1] || '',
62
+ category: categoryMatch?.[1] || '',
63
+ description: ld?.description || '',
64
+ thumbnail: parseThumbnail(ld),
65
+ content: paragraphs.join('\n\n'), paragraphs,
66
+ }))
67
+ } catch (e) { console.log(e); resolve(fail(e)) }
68
+ })
69
+ }
70
+
71
+ module.exports = { cnn, cnnArticle }
@@ -0,0 +1,108 @@
1
+ const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
2
+
3
+ /*
4
+ * Berita terbaru Detik
5
+ * @param {object} options - { channel: 'news', page: 1, limit: 20 }
6
+ * channel: news | finance | hot | sport | inet | oto | health | travel | food
7
+ */
8
+ const detik = async (options = {}) => {
9
+ return new Promise(async (resolve) => {
10
+ try {
11
+ const { channel = 'news', page = 1, limit = 20 } = options
12
+ const CHANNELS = {
13
+ news: 'https://news.detik.com/indeks', finance: 'https://finance.detik.com/indeks',
14
+ hot: 'https://hot.detik.com/indeks', sport: 'https://sport.detik.com/indeks',
15
+ inet: 'https://inet.detik.com/indeks', oto: 'https://oto.detik.com/indeks',
16
+ health: 'https://health.detik.com/indeks', travel: 'https://travel.detik.com/indeks',
17
+ food: 'https://food.detik.com/indeks',
18
+ }
19
+ const url = `${CHANNELS[channel] || CHANNELS.news}?page=${page}`
20
+ const html = await fetchHTML(url)
21
+ const $ = cheerio.load(html)
22
+ const articles = []
23
+ $('article.list-content__item').each((i, el) => {
24
+ if (articles.length >= limit) return false
25
+ const $el = $(el)
26
+ const linkEl = $el.find('a.media__link').first()
27
+ const title = linkEl.find('div').first().text().trim() || linkEl.text().trim()
28
+ const href = $el.attr('i-link') || linkEl.attr('href') || ''
29
+ if (!title || !href || href === '#') return
30
+ const imgBase = $el.attr('i-img') || null
31
+ const imgQs = $el.attr('i-img-qs') || ''
32
+ const image = imgBase ? `${imgBase}${imgQs}` : null
33
+ const dateEl = $el.find('.media__date span').first()
34
+ articles.push({
35
+ title,
36
+ url: href,
37
+ image,
38
+ category: $el.find('.media__category').text().trim() || channel,
39
+ date: dateEl.attr('title') || dateEl.text().trim() || null,
40
+ source: 'detik',
41
+ })
42
+ })
43
+ if (!articles.length) return resolve(fail('Data tidak ditemukan'))
44
+ resolve(ok(articles))
45
+ } catch (e) { console.log(e); resolve(fail(e)) }
46
+ })
47
+ }
48
+
49
+ /*
50
+ * Cari berita Detik
51
+ * @param {string} query
52
+ * @param {number} limit
53
+ */
54
+ const detikSearch = async (query, limit = 10) => {
55
+ return new Promise(async (resolve) => {
56
+ try {
57
+ const url = `https://www.detik.com/search/searchall?query=${encodeURIComponent(query)}&sortby=time`
58
+ const html = await fetchHTML(url)
59
+ const $ = cheerio.load(html)
60
+ const articles = []
61
+ $('article.list-content__item').each((i, el) => {
62
+ if (articles.length >= limit) return false
63
+ const $el = $(el)
64
+ const linkEl = $el.find('a.media__link').first()
65
+ const title = linkEl.find('div').first().text().trim() || linkEl.text().trim()
66
+ const href = $el.attr('i-link') || linkEl.attr('href') || ''
67
+ if (!title || !href) return
68
+ const imgBase = $el.attr('i-img') || null
69
+ const imgQs = $el.attr('i-img-qs') || ''
70
+ const dateEl = $el.find('.media__date span').first()
71
+ articles.push({
72
+ title, url: href,
73
+ image: imgBase ? `${imgBase}${imgQs}` : null,
74
+ date: dateEl.attr('title') || dateEl.text().trim() || null,
75
+ source: 'detik',
76
+ })
77
+ })
78
+ if (!articles.length) return resolve(fail('Data tidak ditemukan'))
79
+ resolve(ok(articles))
80
+ } catch (e) { console.log(e); resolve(fail(e)) }
81
+ })
82
+ }
83
+
84
+ /*
85
+ * Artikel lengkap Detik
86
+ * @param {string} url
87
+ */
88
+ const detikArticle = async (url) => {
89
+ return new Promise(async (resolve) => {
90
+ try {
91
+ const html = await fetchHTML(url)
92
+ const $ = cheerio.load(html)
93
+ const ld = parseLdJson($, html)
94
+ const paragraphs = parseParagraphs($, '.detail__body p, .itp_bodycontent p')
95
+ resolve(ok({
96
+ title: ld?.headline || $('h1.detail__title').first().text().trim(),
97
+ author: ld?.author?.name || $('.detail__author').first().text().trim(),
98
+ date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[ 1] || '',
99
+ category: $('.detail__label').first().text().trim() || ld?.articleSection || '',
100
+ description: ld?.description || '',
101
+ thumbnail: parseThumbnail(ld),
102
+ content: paragraphs.join('\n\n'), paragraphs,
103
+ }))
104
+ } catch (e) { console.log(e); resolve(fail(e)) }
105
+ })
106
+ }
107
+
108
+ module.exports = { detik, detikSearch, detikArticle }
@@ -0,0 +1,70 @@
1
+ const { fetchHTML, cheerio, parseLdJson, parseThumbnail, ok, fail } = require('../utils')
2
+
3
+ /*
4
+ * Berita terbaru Kompas
5
+ * @param {object} options - { channel: 'news', page: 1, limit: 20, date: '11/05/2026' }
6
+ * channel: news | regional | megapolitan | money | sport | tekno | sains | travel | food | health
7
+ */
8
+ const kompas = async (options = {}) => {
9
+ return new Promise(async (resolve) => {
10
+ try {
11
+ const { channel = 'news', page = 1, limit = 20, date = null } = options
12
+ let url = `https://indeks.kompas.com/?site=${channel}&page=${page}`
13
+ if (date) url += `&date=${date}`
14
+ const html = await fetchHTML(url, { Referer: 'https://www.kompas.com/' })
15
+ const $ = cheerio.load(html)
16
+ const articles = []
17
+ $('div.hlItem').each((i, el) => {
18
+ if (articles.length >= limit) return false
19
+ const $el = $(el)
20
+ const linkEl = $el.find('a.hlItem-link').first()
21
+ const title = $el.find('.hlTitle').first().text().trim() || linkEl.attr('title') || ''
22
+ const href = linkEl.attr('href') || ''
23
+ if (!title || !href) return
24
+ const imgEl = $el.find('img').first()
25
+ const dateEl = $el.find('.hlTime').first()
26
+ articles.push({
27
+ title, url: href,
28
+ image: imgEl.attr('data-src') || imgEl.attr('src') || null,
29
+ category: $el.find('.hlChannel').text().trim() || channel,
30
+ date: dateEl.attr('datetime') || dateEl.text().trim() || null,
31
+ source: 'kompas',
32
+ })
33
+ })
34
+ if (!articles.length) return resolve(fail('Data tidak ditemukan'))
35
+ resolve(ok(articles))
36
+ } catch (e) { console.log(e); resolve(fail(e)) }
37
+ })
38
+ }
39
+
40
+ /*
41
+ * Artikel lengkap Kompas
42
+ * @param {string} url
43
+ */
44
+ const kompasArticle = async (url) => {
45
+ return new Promise(async (resolve) => {
46
+ try {
47
+ const html = await fetchHTML(url, { Referer: 'https://indeks.kompas.com/' })
48
+ const $ = cheerio.load(html)
49
+ const ld = parseLdJson($, html)
50
+ const SKIP = /^(baca juga|simak juga|artikel terkait|advertisement|iklan)/i
51
+ const STRIP = /baca juga\s*:.*?(?=\n|$)/gi
52
+ const paragraphs = []
53
+ $('.read__content p, .article__body p').each((_, el) => {
54
+ let text = $(el).text().trim().replace(STRIP, '').trim()
55
+ if (text.length > 30 && !SKIP.test(text)) paragraphs.push(text)
56
+ })
57
+ resolve(ok({
58
+ title: ld?.headline || $('h1.read__title').first().text().trim(),
59
+ author: ld?.author?.name || '',
60
+ date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[1] || '',
61
+ category: ld?.articleSection || '',
62
+ description: ld?.description || '',
63
+ thumbnail: parseThumbnail(ld),
64
+ content: paragraphs.join('\n\n'), paragraphs,
65
+ }))
66
+ } catch (e) { console.log(e); resolve(fail(e)) }
67
+ })
68
+ }
69
+
70
+ module.exports = { kompas, kompasArticle }
@@ -0,0 +1,65 @@
1
+ const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
2
+
3
+ const pickImg = ($, el) => {
4
+ const attrs = ['data-src', 'data-original', 'data-lazy', 'data-image', 'src']
5
+ for (const a of attrs) {
6
+ const v = $(el).attr(a) || ''
7
+ if (v && !v.startsWith('data:') && v.startsWith('http')) return v
8
+ }
9
+ const ss = $(el).attr('data-srcset') || $(el).attr('srcset') || ''
10
+ if (ss) return ss.trim().split(/[\s,]+/).find(s => s.startsWith('http')) || null
11
+ return null
12
+ }
13
+
14
+ const liputan6 = async (options = {}) => {
15
+ return new Promise(async (resolve) => {
16
+ try {
17
+ const { channel = 'news', page = 1, limit = 20 } = options
18
+ const url = `https://www.liputan6.com/${channel}/indeks?page=${page}`
19
+ const html = await fetchHTML(url, { Referer: 'https://www.liputan6.com' })
20
+ const $ = cheerio.load(html)
21
+ const articles = []
22
+ $('.article-snippet').each((i, el) => {
23
+ if (articles.length >= limit) return false
24
+ const $el = $(el)
25
+ const href = $el.find('.article-snippet__title-link').attr('href') || ''
26
+ const title = $el.find('.article-snippet__title-text').text().trim()
27
+ if (!title || !href) return
28
+ const imgEl = $el.find('.article-snippet--media-figure__picture-img').first()
29
+ const date = $el.find('.article-snippet__date').text().trim() || null
30
+ articles.push({
31
+ title,
32
+ url: href.startsWith('http') ? href : `https://www.liputan6.com${href}`,
33
+ image: pickImg($, imgEl),
34
+ category: channel,
35
+ date,
36
+ source: 'liputan6',
37
+ })
38
+ })
39
+ if (!articles.length) return resolve(fail('Data tidak ditemukan'))
40
+ resolve(ok(articles))
41
+ } catch (e) { console.log(e); resolve(fail(e)) }
42
+ })
43
+ }
44
+
45
+ const liputan6Article = async (url) => {
46
+ return new Promise(async (resolve) => {
47
+ try {
48
+ const html = await fetchHTML(url, { Referer: 'https://www.liputan6.com' })
49
+ const $ = cheerio.load(html)
50
+ const ld = parseLdJson($, html)
51
+
52
+ const title = $('.articles-content__title').first().text().trim() || ld?.headline || ''
53
+ const description = $('.articles-content__sinopsis').first().text().trim() || ld?.description || ''
54
+ const author = $('.editorial-articles__name').first().text().trim() || ld?.author?.name || ''
55
+ const date = $('time[datetime]').first().attr('datetime') || ''
56
+ const category = $('[data-channel]').first().attr('data-channel') || ld?.articleSection || ''
57
+ const thumbnail = pickImg($, $('.articles-content__image-container img').first()) || parseThumbnail(ld)
58
+ const paragraphs = parseParagraphs($, '.article-content-body__item p, .article-content-body p')
59
+
60
+ resolve(ok({ title, author, date, category, description, thumbnail, content: paragraphs.join('\n\n'), paragraphs }))
61
+ } catch (e) { console.log(e); resolve(fail(e)) }
62
+ })
63
+ }
64
+
65
+ module.exports = { liputan6, liputan6Article }