indo-scraper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +646 -0
- package/index.js +38 -0
- package/indo-scraper.zip +0 -0
- package/package.json +26 -0
- package/src/bmkg/cuaca.js +34 -0
- package/src/bmkg/gempa.js +56 -0
- package/src/downloader/facebook.js +94 -0
- package/src/downloader/gdrive.js +38 -0
- package/src/downloader/instagram.js +62 -0
- package/src/downloader/mediafire.js +30 -0
- package/src/downloader/spotify.js +262 -0
- package/src/downloader/tiktok.js +472 -0
- package/src/finance/bbm.js +51 -0
- package/src/finance/emas.js +46 -0
- package/src/finance/kurs.js +64 -0
- package/src/finance/saham.js +117 -0
- package/src/info/cekno.js +39 -0
- package/src/info/resi.js +82 -0
- package/src/news/antara.js +66 -0
- package/src/news/cnn.js +71 -0
- package/src/news/detik.js +108 -0
- package/src/news/kompas.js +70 -0
- package/src/news/liputan6.js +65 -0
- package/src/news/okezone.js +72 -0
- package/src/news/republika.js +73 -0
- package/src/news/tribun.js +95 -0
- package/src/tools/simsimi.js +69 -0
- package/src/tools/ssweb.js +35 -0
- package/src/utils.js +79 -0
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Berita terbaru Okezone
|
|
5
|
+
* @param {object} options - { channel: 'nasional', page: 1, limit: 20 }
|
|
6
|
+
* channel: nasional | economy | sports | techno | celebrity | lifestyle | otomotif | health
|
|
7
|
+
*/
|
|
8
|
+
const okezone = async (options = {}) => {
|
|
9
|
+
return new Promise(async (resolve) => {
|
|
10
|
+
try {
|
|
11
|
+
const { channel = 'nasional', page = 1, limit = 20 } = options
|
|
12
|
+
const CHANNELS = {
|
|
13
|
+
nasional: 'nasional', economy: 'economy', sports: 'sports',
|
|
14
|
+
techno: 'techno', celebrity: 'celebrity', lifestyle: 'lifestyle',
|
|
15
|
+
otomotif: 'otomotif', health: 'health',
|
|
16
|
+
}
|
|
17
|
+
const sub = CHANNELS[channel] || 'nasional'
|
|
18
|
+
const url = `https://${sub}.okezone.com/indeks?page=${page}`
|
|
19
|
+
const html = await fetchHTML(url, { Referer: 'https://www.okezone.com' })
|
|
20
|
+
const $ = cheerio.load(html)
|
|
21
|
+
const articles = []
|
|
22
|
+
|
|
23
|
+
// Container: parent div yang berisi .box-text
|
|
24
|
+
$('div.box-text').each((i, el) => {
|
|
25
|
+
if (articles.length >= limit) return false
|
|
26
|
+
const $el = $(el)
|
|
27
|
+
const linkEl = $el.find('a.title').first()
|
|
28
|
+
const title = linkEl.text().trim()
|
|
29
|
+
const href = linkEl.attr('href') || ''
|
|
30
|
+
if (!title || !href) return
|
|
31
|
+
const container = $el.parent()
|
|
32
|
+
const imgEl = container.find('img').first()
|
|
33
|
+
articles.push({
|
|
34
|
+
title,
|
|
35
|
+
url: href.startsWith('http') ? href : `https://${sub}.okezone.com${href}`,
|
|
36
|
+
image: imgEl.attr('data-src') || imgEl.attr('src') || null,
|
|
37
|
+
category: $el.find('a.kanal').text().trim() || channel,
|
|
38
|
+
date: $el.find('div.timego').text().trim() || null,
|
|
39
|
+
source: 'okezone',
|
|
40
|
+
})
|
|
41
|
+
})
|
|
42
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
43
|
+
resolve(ok(articles))
|
|
44
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
45
|
+
})
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/*
|
|
49
|
+
* Artikel lengkap Okezone
|
|
50
|
+
* @param {string} url
|
|
51
|
+
*/
|
|
52
|
+
const okenewsArticle = async (url) => {
|
|
53
|
+
return new Promise(async (resolve) => {
|
|
54
|
+
try {
|
|
55
|
+
const html = await fetchHTML(url, { Referer: 'https://www.okezone.com' })
|
|
56
|
+
const $ = cheerio.load(html)
|
|
57
|
+
const ld = parseLdJson($, html)
|
|
58
|
+
const paragraphs = parseParagraphs($, '.description.read p')
|
|
59
|
+
resolve(ok({
|
|
60
|
+
title: ld?.headline || $('h1.title-content').first().text().trim(),
|
|
61
|
+
author: ld?.author?.name || $('.author, .nm-reporter').first().text().trim(),
|
|
62
|
+
date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[ 1] || '',
|
|
63
|
+
category: $('.category').first().text().trim() || ld?.articleSection || '',
|
|
64
|
+
description: ld?.description || html.match(/"description"\s*:\s*"([^"]+)"/)?.[ 1] || '',
|
|
65
|
+
thumbnail: parseThumbnail(ld),
|
|
66
|
+
content: paragraphs.join('\n\n'), paragraphs,
|
|
67
|
+
}))
|
|
68
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
69
|
+
})
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
module.exports = { okezone, okenewsArticle }
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* Berita terbaru Republika
|
|
5
|
+
* @param {object} options - { channel: 'nasional', page: 1, limit: 20 }
|
|
6
|
+
* channel: nasional | internasional | ekonomi | olahraga | hiburan | tekno | gaya_hidup
|
|
7
|
+
*/
|
|
8
|
+
const republika = async (options = {}) => {
|
|
9
|
+
return new Promise(async (resolve) => {
|
|
10
|
+
try {
|
|
11
|
+
const { channel = 'nasional', page = 1, limit = 20 } = options
|
|
12
|
+
const ch = channel === 'gaya_hidup' ? 'gaya-hidup' : channel
|
|
13
|
+
const url = `https://republika.co.id/berita/${ch}?page=${page}`
|
|
14
|
+
const html = await fetchHTML(url, { Referer: 'https://republika.co.id' })
|
|
15
|
+
const $ = cheerio.load(html)
|
|
16
|
+
const articles = []
|
|
17
|
+
$('.link-mobile-headline').each((i, el) => {
|
|
18
|
+
if (articles.length >= limit) return false
|
|
19
|
+
const $el = $(el)
|
|
20
|
+
const href = $el.attr('href') || ''
|
|
21
|
+
const title = $el.find('h1.card-text, h2.card-text, .card-text').first().text().trim()
|
|
22
|
+
if (!title || !href) return
|
|
23
|
+
const image = $el.find('img.lazy').first().attr('data-original') || null
|
|
24
|
+
const smalls = $el.find('small')
|
|
25
|
+
const category = smalls.filter('.text-primary').first().text().trim() || channel
|
|
26
|
+
const date = smalls.not('.text-primary').first().text().replace(/^-\s*/, '').trim() || null
|
|
27
|
+
articles.push({
|
|
28
|
+
title,
|
|
29
|
+
url: href.startsWith('http') ? href : `https://republika.co.id${href}`,
|
|
30
|
+
image, category, date,
|
|
31
|
+
source: 'republika',
|
|
32
|
+
})
|
|
33
|
+
})
|
|
34
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
35
|
+
resolve(ok(articles))
|
|
36
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
37
|
+
})
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/*
|
|
41
|
+
* Artikel lengkap Republika
|
|
42
|
+
* @param {string} url
|
|
43
|
+
*/
|
|
44
|
+
const republikaArticle = async (url) => {
|
|
45
|
+
return new Promise(async (resolve) => {
|
|
46
|
+
try {
|
|
47
|
+
const html = await fetchHTML(url, { Referer: 'https://republika.co.id' })
|
|
48
|
+
const $ = cheerio.load(html)
|
|
49
|
+
const ld = parseLdJson($, html)
|
|
50
|
+
|
|
51
|
+
// Ekstrak kategori dari subdomain URL (misal khazanah.republika.co.id → 'khazanah')
|
|
52
|
+
let category = ''
|
|
53
|
+
try {
|
|
54
|
+
const sub = new URL(url).hostname.split('.')[0]
|
|
55
|
+
if (sub !== 'republika' && sub !== 'www') category = sub
|
|
56
|
+
} catch (_) {}
|
|
57
|
+
category = category || ld?.articleSection || ''
|
|
58
|
+
|
|
59
|
+
const paragraphs = parseParagraphs($, 'p.paragraphx')
|
|
60
|
+
resolve(ok({
|
|
61
|
+
title: ld?.headline || $('h1').first().text().trim(),
|
|
62
|
+
author: ld?.author?.name || $('.write-by').first().text().replace(/^Oleh:\s*/i, '').trim(),
|
|
63
|
+
date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[ 1] || '',
|
|
64
|
+
category,
|
|
65
|
+
description: ld?.description || html.match(/"description"\s*:\s*"([^"]+)"/)?.[ 1] || '',
|
|
66
|
+
thumbnail: parseThumbnail(ld),
|
|
67
|
+
content: paragraphs.join('\n\n'), paragraphs,
|
|
68
|
+
}))
|
|
69
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
70
|
+
})
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
module.exports = { republika, republikaArticle }
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
const { fetchHTML, cheerio, parseLdJson, parseThumbnail, parseParagraphs, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
// Coba ambil URL gambar dari berbagai atribut lazy-load
|
|
4
|
+
const pickImg = ($, el) => {
|
|
5
|
+
const attrs = ['data-src', 'data-original', 'data-lazy', 'data-image', 'src']
|
|
6
|
+
for (const a of attrs) {
|
|
7
|
+
const v = $(el).attr(a) || ''
|
|
8
|
+
if (v && !v.startsWith('data:') && v.startsWith('http')) return v
|
|
9
|
+
}
|
|
10
|
+
const ss = $(el).attr('data-srcset') || $(el).attr('srcset') || ''
|
|
11
|
+
if (ss) return ss.trim().split(/[\s,]+/).find(s => s.startsWith('http')) || null
|
|
12
|
+
return null
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/*
|
|
16
|
+
* Berita terbaru Tribunnews
|
|
17
|
+
* @param {object} options - { channel: 'nasional', page: 1, limit: 20 }
|
|
18
|
+
* channel: nasional | regional | internasional | sport | bisnis | seleb | lifestyle | techno | otomotif
|
|
19
|
+
*/
|
|
20
|
+
const tribun = async (options = {}) => {
|
|
21
|
+
return new Promise(async (resolve) => {
|
|
22
|
+
try {
|
|
23
|
+
const { channel = 'nasional', page = 1, limit = 20 } = options
|
|
24
|
+
const url = `https://www.tribunnews.com/${channel}?page=${page}`
|
|
25
|
+
const html = await fetchHTML(url, { Referer: 'https://www.tribunnews.com' })
|
|
26
|
+
const $ = cheerio.load(html)
|
|
27
|
+
const articles = []
|
|
28
|
+
$("h3 a[href*='tribunnews.com']").each((i, el) => {
|
|
29
|
+
if (articles.length >= limit) return false
|
|
30
|
+
const $el = $(el)
|
|
31
|
+
const title = $el.attr('title') || $el.text().trim()
|
|
32
|
+
const href = $el.attr('href') || ''
|
|
33
|
+
if (!title || !href) return
|
|
34
|
+
|
|
35
|
+
// Naik ke container terluas: coba li/article dulu, fallback ke parent div
|
|
36
|
+
let container = $el.closest('li, article')
|
|
37
|
+
if (!container.length) container = $el.closest('div').parent()
|
|
38
|
+
|
|
39
|
+
// Cari img di seluruh container — coba semua atribut lazy-load
|
|
40
|
+
let image = null
|
|
41
|
+
container.find('img').each((_, img) => {
|
|
42
|
+
if (image) return false
|
|
43
|
+
image = pickImg($, img) || null
|
|
44
|
+
})
|
|
45
|
+
|
|
46
|
+
articles.push({
|
|
47
|
+
title, url: href, image,
|
|
48
|
+
category: container.find('h4 a.tsa-2').text().trim() || channel,
|
|
49
|
+
date: container.find('time span').text().trim() || null,
|
|
50
|
+
source: 'tribunnews',
|
|
51
|
+
})
|
|
52
|
+
})
|
|
53
|
+
if (!articles.length) return resolve(fail('Data tidak ditemukan'))
|
|
54
|
+
resolve(ok(articles))
|
|
55
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
56
|
+
})
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
/*
|
|
60
|
+
* Artikel lengkap Tribunnews
|
|
61
|
+
* @param {string} url
|
|
62
|
+
*/
|
|
63
|
+
const tribunArticle = async (url) => {
|
|
64
|
+
return new Promise(async (resolve) => {
|
|
65
|
+
try {
|
|
66
|
+
const html = await fetchHTML(url, { Referer: 'https://www.tribunnews.com' })
|
|
67
|
+
const $ = cheerio.load(html)
|
|
68
|
+
const ld = parseLdJson($, html)
|
|
69
|
+
let author = ''
|
|
70
|
+
if (ld?.author) author = Array.isArray(ld.author) ? ld.author[0]?.name || '' : ld.author.name || ''
|
|
71
|
+
if (!author) author = html.match(/'penulis'\s*:\s*'([^']+)'/)?.[1] || ''
|
|
72
|
+
|
|
73
|
+
// 1) ld+json 2) og:image (server-side, tidak lazy-load) 3) DOM fallback
|
|
74
|
+
let thumbnail = parseThumbnail(ld)
|
|
75
|
+
if (!thumbnail) thumbnail = $('meta[property="og:image"]').attr('content') || null
|
|
76
|
+
if (!thumbnail) {
|
|
77
|
+
const imgEl = $('.img-holder img, .wrap_img img, figure img, #article-body img, .photo img').first()
|
|
78
|
+
if (imgEl.length) thumbnail = pickImg($, imgEl) || null
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
const paragraphs = parseParagraphs($, 'div#article-body p, div.txt-article p')
|
|
82
|
+
resolve(ok({
|
|
83
|
+
title: ld?.headline || $('h1').first().text().trim(),
|
|
84
|
+
author,
|
|
85
|
+
date: ld?.datePublished || html.match(/"datePublished"\s*:\s*"([^"]+)"/)?.[1] || '',
|
|
86
|
+
category: ld?.articleSection || $('.breadcrumb a').eq(1).text().trim(),
|
|
87
|
+
description: ld?.description || html.match(/"description"\s*:\s*"([^"]+)"/)?.[1] || '',
|
|
88
|
+
thumbnail,
|
|
89
|
+
content: paragraphs.join('\n\n'), paragraphs,
|
|
90
|
+
}))
|
|
91
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
92
|
+
})
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
module.exports = { tribun, tribunArticle }
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
const { axios, ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
/*
|
|
4
|
+
* SimSimi / AI Chat
|
|
5
|
+
* Source: widipe.com
|
|
6
|
+
* @param {string} text
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
const simsimi = async (text) => {
|
|
10
|
+
return new Promise(async (resolve) => {
|
|
11
|
+
try {
|
|
12
|
+
|
|
13
|
+
if (!text)
|
|
14
|
+
return resolve(
|
|
15
|
+
fail('Text tidak boleh kosong')
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
const res = await axios.get(
|
|
19
|
+
'https://widipe.com/simi',
|
|
20
|
+
{
|
|
21
|
+
params: {
|
|
22
|
+
text
|
|
23
|
+
},
|
|
24
|
+
headers: {
|
|
25
|
+
'User-Agent':
|
|
26
|
+
'Mozilla/5.0 (Linux; Android 13; Pixel 7)',
|
|
27
|
+
|
|
28
|
+
'Accept':
|
|
29
|
+
'application/json'
|
|
30
|
+
},
|
|
31
|
+
timeout: 15000
|
|
32
|
+
}
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
const data = res.data
|
|
36
|
+
|
|
37
|
+
/*
|
|
38
|
+
* parse response
|
|
39
|
+
*/
|
|
40
|
+
|
|
41
|
+
const answer =
|
|
42
|
+
data.result ||
|
|
43
|
+
data.message ||
|
|
44
|
+
data.response
|
|
45
|
+
|
|
46
|
+
if (!answer)
|
|
47
|
+
return resolve(
|
|
48
|
+
fail('Tidak ada respon')
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
resolve(ok({
|
|
52
|
+
question: text,
|
|
53
|
+
answer
|
|
54
|
+
}))
|
|
55
|
+
|
|
56
|
+
} catch (e) {
|
|
57
|
+
|
|
58
|
+
console.log(e)
|
|
59
|
+
|
|
60
|
+
resolve(fail(
|
|
61
|
+
e?.response?.data ||
|
|
62
|
+
e?.response?.status ||
|
|
63
|
+
e.message
|
|
64
|
+
))
|
|
65
|
+
}
|
|
66
|
+
})
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
module.exports = { simsimi }
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
const { ok, fail } = require('../utils')
|
|
2
|
+
|
|
3
|
+
const DEVICES = {
|
|
4
|
+
mobile: { width: 390, height: 844, mobile: true },
|
|
5
|
+
tablet: { width: 768, height: 1024, mobile: true },
|
|
6
|
+
desktop: { width: 1920, height: 1080, mobile: false },
|
|
7
|
+
hd: { width: 2560, height: 1440, mobile: false },
|
|
8
|
+
'4k': { width: 3840, height: 2160, mobile: false },
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
/*
|
|
12
|
+
* Website Screenshot via Microlink API
|
|
13
|
+
* @param {string} url
|
|
14
|
+
* @param {string} device - mobile | tablet | desktop | hd | 4k
|
|
15
|
+
*/
|
|
16
|
+
const ssweb = async (url, device = 'desktop') => {
|
|
17
|
+
return new Promise(async (resolve) => {
|
|
18
|
+
try {
|
|
19
|
+
if (!url) return resolve(fail('URL tidak boleh kosong'))
|
|
20
|
+
if (!/^https?:\/\//.test(url)) url = 'https://' + url
|
|
21
|
+
|
|
22
|
+
const { width, height, mobile } = DEVICES[device.toLowerCase()] || DEVICES.desktop
|
|
23
|
+
|
|
24
|
+
const screenshot = 'https://api.microlink.io/?' + new URLSearchParams({
|
|
25
|
+
url, screenshot: 'true', meta: 'false', embed: 'screenshot.url',
|
|
26
|
+
viewport: `${width}x${height}`, deviceScaleFactor: '1',
|
|
27
|
+
mobile: String(mobile), fullPage: 'true',
|
|
28
|
+
})
|
|
29
|
+
|
|
30
|
+
resolve(ok({ url, device, resolution: `${width}x${height}`, screenshot }))
|
|
31
|
+
} catch (e) { console.log(e); resolve(fail(e)) }
|
|
32
|
+
})
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
module.exports = { ssweb }
|
package/src/utils.js
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
const axios = require('axios')
|
|
2
|
+
const cheerio = require('cheerio')
|
|
3
|
+
|
|
4
|
+
// ── Headers ───────────────────────────────────────────────────────────────────
|
|
5
|
+
|
|
6
|
+
const HEADERS = {
|
|
7
|
+
'User-Agent': 'Mozilla/5.0 (Linux; Android 13; Pixel 7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.82 Mobile Safari/537.36',
|
|
8
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
|
9
|
+
'Accept-Language': 'id-ID,id;q=0.9,en-US;q=0.8',
|
|
10
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
11
|
+
'Sec-Fetch-Dest': 'document',
|
|
12
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
13
|
+
'Sec-Fetch-Site': 'none',
|
|
14
|
+
'Sec-Fetch-User': '?1',
|
|
15
|
+
'Upgrade-Insecure-Requests': '1',
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
async function fetchHTML(url, headers = {}) {
|
|
19
|
+
let last
|
|
20
|
+
for (let i = 0; i <= 2; i++) {
|
|
21
|
+
try {
|
|
22
|
+
const res = await axios.get(url, { headers: { ...HEADERS, ...headers }, timeout: 10000, responseType: 'text' })
|
|
23
|
+
return res.data
|
|
24
|
+
} catch (e) {
|
|
25
|
+
last = e
|
|
26
|
+
await new Promise(r => setTimeout(r, 500 * (i + 1)))
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
throw new Error(`Gagal fetch ${url}: ${last.message}`)
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function fetchJSON(url, headers = {}) {
|
|
33
|
+
const res = await axios.get(url, {
|
|
34
|
+
headers: {
|
|
35
|
+
'User-Agent': 'Mozilla/5.0 (Linux; Android 13) AppleWebKit/537.36 Chrome/124.0 Mobile Safari/537.36',
|
|
36
|
+
'Accept': 'application/json, text/plain, */*',
|
|
37
|
+
'Accept-Language': 'id-ID,id;q=0.9',
|
|
38
|
+
'Referer': 'https://www.bmkg.go.id/',
|
|
39
|
+
'Origin': 'https://www.bmkg.go.id',
|
|
40
|
+
...headers
|
|
41
|
+
},
|
|
42
|
+
timeout: 10000
|
|
43
|
+
})
|
|
44
|
+
return res.data
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function parseLdJson($, html) {
|
|
48
|
+
let ld = null
|
|
49
|
+
$('script[type="application/ld+json"]').each((_, el) => {
|
|
50
|
+
try { const j = JSON.parse($(el).html()); if (j.datePublished) ld = j } catch (_) {}
|
|
51
|
+
})
|
|
52
|
+
return ld
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
function parseThumbnail(ld) {
|
|
56
|
+
if (!ld?.image) return null
|
|
57
|
+
if (typeof ld.image === 'string') return ld.image
|
|
58
|
+
if (ld.image.url) return ld.image.url
|
|
59
|
+
if (Array.isArray(ld.image)) return ld.image[0]?.url || ld.image[0] || null
|
|
60
|
+
return null
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
function parseParagraphs($, selector) {
|
|
64
|
+
const SKIP = /^(baca juga|simak juga|artikel terkait|lihat juga|advertisement|iklan)/i
|
|
65
|
+
const STRIP = /baca juga\s*:.*?(?=\n|$)/gi
|
|
66
|
+
const result = []
|
|
67
|
+
$(selector).each((_, el) => {
|
|
68
|
+
let text = $(el).text().trim().replace(STRIP, '').trim()
|
|
69
|
+
if (text.length > 30 && !SKIP.test(text)) result.push(text)
|
|
70
|
+
})
|
|
71
|
+
return result
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// ── Responses ─────────────────────────────────────────────────────────────────
|
|
75
|
+
|
|
76
|
+
const ok = (data) => ({ creator: global.creator, status: true, data })
|
|
77
|
+
const fail = (e) => ({ creator: global.creator, status: false, msg: e instanceof Error ? e.message : String(e) })
|
|
78
|
+
|
|
79
|
+
module.exports = { axios, cheerio, HEADERS, fetchHTML, fetchJSON, parseLdJson, parseThumbnail, parseParagraphs, ok, fail }
|