ada-derana-news-scraper 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +85 -0
- package/package.json +33 -0
- package/readme.md +177 -0
package/index.js
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
const express = require('express');
|
|
2
|
+
const axios = require('axios');
|
|
3
|
+
const cheerio = require('cheerio');
|
|
4
|
+
|
|
5
|
+
const BASE_URL = 'https://sinhala.adaderana.lk';
|
|
6
|
+
const HOT_NEWS_URL = `${BASE_URL}/sinhala-hot-news.php`;
|
|
7
|
+
|
|
8
|
+
const axiosInstance = axios.create({
|
|
9
|
+
timeout: 10000,
|
|
10
|
+
headers: {
|
|
11
|
+
'User-Agent':
|
|
12
|
+
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120 Safari/537.36',
|
|
13
|
+
'Accept-Language': 'si-LK,si;q=0.9,en;q=0.8'
|
|
14
|
+
}
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
/* ---------------- SCRAPER ---------------- */
|
|
18
|
+
async function scrapeHotNews() {
|
|
19
|
+
try {
|
|
20
|
+
const response = await axiosInstance.get(HOT_NEWS_URL);
|
|
21
|
+
const $ = cheerio.load(response.data);
|
|
22
|
+
const newsData = [];
|
|
23
|
+
|
|
24
|
+
$('div.news-story').each((_, el) => {
|
|
25
|
+
const titleEl = $(el).find('h2 a');
|
|
26
|
+
const imageEl = $(el).find('.thumb-image img');
|
|
27
|
+
const commentsEl = $(el).find('.comments a');
|
|
28
|
+
const relativeUrl = titleEl.attr('href');
|
|
29
|
+
|
|
30
|
+
newsData.push({
|
|
31
|
+
title: titleEl.text().trim(),
|
|
32
|
+
image: imageEl.attr('src') || null,
|
|
33
|
+
summary: $(el).find('.story-text > p').text().trim(),
|
|
34
|
+
url: relativeUrl ? `${BASE_URL}/${relativeUrl}` : null,
|
|
35
|
+
comments_url: commentsEl.attr('href') ? `${BASE_URL}/${commentsEl.attr('href')}` : null,
|
|
36
|
+
time: $(el).find('.comments span').text().replace('|', '').trim()
|
|
37
|
+
});
|
|
38
|
+
});
|
|
39
|
+
|
|
40
|
+
return newsData;
|
|
41
|
+
} catch (err) {
|
|
42
|
+
console.error('[Scraper ERROR]', err.message);
|
|
43
|
+
return []; // Return empty array if scraping fails
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/* ---------------- API ---------------- */
|
|
48
|
+
function createAdaDeranaNewsAPI() {
|
|
49
|
+
const app = express();
|
|
50
|
+
|
|
51
|
+
app.get('/hotNews', async (req, res) => {
|
|
52
|
+
try {
|
|
53
|
+
const data = await scrapeHotNews();
|
|
54
|
+
|
|
55
|
+
res.json({
|
|
56
|
+
success: true,
|
|
57
|
+
code: 200,
|
|
58
|
+
creator: {
|
|
59
|
+
name: 'H.A. Diluka Hetti Arachchi',
|
|
60
|
+
github: 'https://github.com/DillaCodeX'
|
|
61
|
+
},
|
|
62
|
+
count: data.length,
|
|
63
|
+
timestamp: new Date().toISOString(), // Added timestamp
|
|
64
|
+
data
|
|
65
|
+
});
|
|
66
|
+
} catch (error) {
|
|
67
|
+
console.error('[Ada Derana ERROR]', error.message);
|
|
68
|
+
|
|
69
|
+
res.status(500).json({
|
|
70
|
+
success: false,
|
|
71
|
+
code: 500,
|
|
72
|
+
error: 'Failed to fetch AdaDerana hot news',
|
|
73
|
+
timestamp: new Date().toISOString() // Timestamp included on error
|
|
74
|
+
});
|
|
75
|
+
}
|
|
76
|
+
});
|
|
77
|
+
|
|
78
|
+
return app;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
module.exports = {
|
|
82
|
+
createAdaDeranaNewsAPI,
|
|
83
|
+
scrapeHotNews
|
|
84
|
+
};
|
|
85
|
+
/* ---------------- EXPORTS ---------------- */
|
package/package.json
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ada-derana-news-scraper",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"description": "A web scraper and API for fetching the latest news from Ada Derana, a popular Sri Lankan news website.",
|
|
5
|
+
"homepage": "https://github.com/DillaCodeX/ada-derana-news-scraper",
|
|
6
|
+
"bugs": {
|
|
7
|
+
"url": "https://github.com/DillaCodeX/ada-derana-news-scraper/issues"
|
|
8
|
+
},
|
|
9
|
+
"repository": {
|
|
10
|
+
"type": "git",
|
|
11
|
+
"url": "git+https://github.com/DillaCodeX/ada-derana-news-scraper.git"
|
|
12
|
+
},
|
|
13
|
+
"license": "MIT",
|
|
14
|
+
"author": "H.A. Diluka Hetti Arachchi (DillaCodeX)",
|
|
15
|
+
"type": "commonjs",
|
|
16
|
+
"main": "index.js",
|
|
17
|
+
"scripts": {
|
|
18
|
+
"test": "echo \"Error: no test specified\" && exit 1"
|
|
19
|
+
},
|
|
20
|
+
"keywords": [
|
|
21
|
+
"adaderana",
|
|
22
|
+
"sinhala-news",
|
|
23
|
+
"news-scraper",
|
|
24
|
+
"scraper",
|
|
25
|
+
"api",
|
|
26
|
+
"sri-lanka"
|
|
27
|
+
],
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"axios": "^1.13.2",
|
|
30
|
+
"cheerio": "^1.1.2",
|
|
31
|
+
"express": "^5.2.1"
|
|
32
|
+
}
|
|
33
|
+
}
|
package/readme.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# 📰 Ada Derana News Scraper (v1.0.0)
|
|
2
|
+
|
|
3
|
+
A lightweight API and scraper for Ada Derana Sinhala news content. This package allows you to easily access hot news headlines, summaries, and links from the Ada Derana Sinhala news website.
|
|
4
|
+
|
|
5
|
+
## 🚀 Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
npm install ada-derana-news-scraper
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## ✨ Features
|
|
12
|
+
|
|
13
|
+
- 🌐 Express.js API endpoint for hot news
|
|
14
|
+
- 🔍 Standalone scraper function
|
|
15
|
+
- 🔌 Easy integration with existing Node.js applications
|
|
16
|
+
- 🖼️ Returns news images, summary, and links
|
|
17
|
+
|
|
18
|
+
## 📘 Usage
|
|
19
|
+
|
|
20
|
+
### 🖥️ Using a Custom Server (server.js)
|
|
21
|
+
|
|
22
|
+
You can create a simple server to run the API:
|
|
23
|
+
|
|
24
|
+
```javascript
|
|
25
|
+
// server.js
|
|
26
|
+
const express = require('express');
|
|
27
|
+
const adaDerana = require('ada-derana-news-scraper');
|
|
28
|
+
require('dotenv').config(); // Load environment variables
|
|
29
|
+
|
|
30
|
+
const server = adaDerana.createAdaDeranaNewsAPI();
|
|
31
|
+
const PORT = process.env.PORT || 3000;
|
|
32
|
+
|
|
33
|
+
server.listen(PORT, () => {
|
|
34
|
+
console.log(`Server is running on http://localhost:${PORT}/hotNews`);
|
|
35
|
+
});
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 🛠️ As an Express API
|
|
39
|
+
|
|
40
|
+
```javascript
|
|
41
|
+
const { createAdaDeranaNewsAPI } = require('ada-derana-news-scraper');
|
|
42
|
+
require('dotenv').config(); // Load environment variables
|
|
43
|
+
|
|
44
|
+
const app = createAdaDeranaNewsAPI();
|
|
45
|
+
|
|
46
|
+
// Optional: Add more routes or middleware here
|
|
47
|
+
|
|
48
|
+
const PORT = process.env.PORT || 3000;
|
|
49
|
+
app.listen(PORT, () => {
|
|
50
|
+
console.log(`Ada Derana API server running on port ${PORT}`);
|
|
51
|
+
});
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
### 🧰 As a Scraper
|
|
55
|
+
|
|
56
|
+
```javascript
|
|
57
|
+
const { scrapeHotNews } = require('ada-derana-news-scraper');
|
|
58
|
+
|
|
59
|
+
async function getNews() {
|
|
60
|
+
try {
|
|
61
|
+
const news = await scrapeHotNews();
|
|
62
|
+
console.log(news);
|
|
63
|
+
} catch (error) {
|
|
64
|
+
console.error('Error scraping news:', error);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
getNews();
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## 🔧 Environment Configuration
|
|
72
|
+
|
|
73
|
+
You can configure the application using environment variables:
|
|
74
|
+
|
|
75
|
+
### Available Environment Variables:
|
|
76
|
+
- `PORT`: The port number for the server (default: `3000`)
|
|
77
|
+
|
|
78
|
+
### Setting Environment Variables:
|
|
79
|
+
|
|
80
|
+
#### Using .env file (recommended for development):
|
|
81
|
+
Create a `.env` file in your project root:
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
PORT=8080
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Then install the dotenv package:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
npm install dotenv
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
And import it in your server file as shown in the usage examples above.
|
|
94
|
+
|
|
95
|
+
#### Using command line (for production):
|
|
96
|
+
|
|
97
|
+
```bash
|
|
98
|
+
# Linux/Mac
|
|
99
|
+
PORT=8080 node server.js
|
|
100
|
+
|
|
101
|
+
# Windows Command Prompt
|
|
102
|
+
set PORT=8080 && node server.js
|
|
103
|
+
|
|
104
|
+
# Windows PowerShell
|
|
105
|
+
$env:PORT=8080; node server.js
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## 📚 API Documentation
|
|
109
|
+
|
|
110
|
+
### 🔄 API Endpoints
|
|
111
|
+
|
|
112
|
+
#### 📋 GET `/hotNews`
|
|
113
|
+
|
|
114
|
+
Returns the latest hot news headlines from Ada Derana Sinhala.
|
|
115
|
+
|
|
116
|
+
**Response Format:**
|
|
117
|
+
```json
|
|
118
|
+
{
|
|
119
|
+
"success": true,
|
|
120
|
+
"code": 200,
|
|
121
|
+
"creator": {
|
|
122
|
+
"name": "H.A. Diluka Hetti Arachchi",
|
|
123
|
+
"github": "https://github.com/DillaCodeX"
|
|
124
|
+
},
|
|
125
|
+
"count": 5,
|
|
126
|
+
"data": [
|
|
127
|
+
{
|
|
128
|
+
"title": "News headline",
|
|
129
|
+
"summary": "News summary",
|
|
130
|
+
"time": "Published time",
|
|
131
|
+
"url": "https://sinhala.adaderana.lk/news-url",
|
|
132
|
+
"image": "https://sinhala.adaderana.lk/image.jpg",
|
|
133
|
+
"comments_url": "https://sinhala.adaderana.lk/news-url#disqus_thread"
|
|
134
|
+
}
|
|
135
|
+
// More news items...
|
|
136
|
+
]
|
|
137
|
+
}
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
**Error Response Format:**
|
|
141
|
+
```json
|
|
142
|
+
{
|
|
143
|
+
"success": false,
|
|
144
|
+
"code": 500,
|
|
145
|
+
"error": "Failed to fetch AdaDerana hot news"
|
|
146
|
+
}
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
### ⚙️ Functions
|
|
150
|
+
|
|
151
|
+
#### 🔧 `createAdaDeranaNewsAPI()`
|
|
152
|
+
|
|
153
|
+
Creates an Express application with the AdaDerana News API routes configured.
|
|
154
|
+
|
|
155
|
+
**Returns:** Express application instance with the following endpoints:
|
|
156
|
+
- `GET /hotNews` - Returns the latest hot news headlines
|
|
157
|
+
|
|
158
|
+
#### 🔍 `scrapeHotNews()`
|
|
159
|
+
|
|
160
|
+
Scrapes hot news headlines directly from Ada Derana Sinhala website.
|
|
161
|
+
|
|
162
|
+
**Returns:** Promise that resolves to an array of news objects with the following properties:
|
|
163
|
+
|
|
164
|
+
- `title`: The headline of the news article
|
|
165
|
+
- `image`: News image URL
|
|
166
|
+
- `summary`: A snippet of the news article
|
|
167
|
+
- `url`: The full URL to the news article
|
|
168
|
+
- `comments_url`: Direct link to comments
|
|
169
|
+
- `time`: The published time of the article
|
|
170
|
+
|
|
171
|
+
## 📝 License
|
|
172
|
+
|
|
173
|
+
MIT
|
|
174
|
+
|
|
175
|
+
## 👨💻 Author
|
|
176
|
+
|
|
177
|
+
- H.A. Diluka Hetti Arachchi ([@DillaCodeX](https://github.com/DillaCodeX))
|