@aikeytake/social-automation 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +39 -0
- package/CLAUDE.md +256 -0
- package/CURRENT_CAPABILITIES.md +493 -0
- package/DATA_ORGANIZATION.md +416 -0
- package/IMPLEMENTATION_SUMMARY.md +287 -0
- package/INSTRUCTIONS.md +316 -0
- package/MASTER_PLAN.md +1096 -0
- package/README.md +280 -0
- package/config/sources.json +296 -0
- package/package.json +37 -0
- package/src/cli.js +197 -0
- package/src/fetchers/api.js +232 -0
- package/src/fetchers/hackernews.js +86 -0
- package/src/fetchers/linkedin.js +400 -0
- package/src/fetchers/linkedin_browser.js +167 -0
- package/src/fetchers/reddit.js +77 -0
- package/src/fetchers/rss.js +50 -0
- package/src/fetchers/twitter.js +194 -0
- package/src/index.js +346 -0
- package/src/query.js +316 -0
- package/src/utils/logger.js +74 -0
- package/src/utils/storage.js +134 -0
- package/src/writing-agents/QUICK-REFERENCE.md +111 -0
- package/src/writing-agents/WRITING-SKILLS-IMPROVEMENTS.md +273 -0
- package/src/writing-agents/utils/prompt-templates-improved.js +665 -0
package/src/cli.js
ADDED
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
import dotenv from 'dotenv';
|
|
2
|
+
import createLogger from './utils/logger.js';
|
|
3
|
+
import storage from './utils/storage.js';
|
|
4
|
+
import fs from 'fs';
|
|
5
|
+
import path from 'path';
|
|
6
|
+
import { fileURLToPath } from 'url';
|
|
7
|
+
|
|
8
|
+
dotenv.config();
|
|
9
|
+
|
|
10
|
+
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
11
|
+
const logger = createLogger('CLI');
|
|
12
|
+
|
|
13
|
+
async function showQueue() {
|
|
14
|
+
const items = storage.getQueueItems();
|
|
15
|
+
|
|
16
|
+
if (items.length === 0) {
|
|
17
|
+
console.log('\nš Queue is empty\n');
|
|
18
|
+
return;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
console.log(`\nš„ Queue (${items.length} items):\n`);
|
|
22
|
+
|
|
23
|
+
items.forEach((item, index) => {
|
|
24
|
+
console.log(`${index + 1}. ${item.title}`);
|
|
25
|
+
console.log(` Source: ${item.sourceName} | Score: ${item.processing?.relevanceScore || 0}`);
|
|
26
|
+
console.log(` Status: ${item.status}`);
|
|
27
|
+
console.log(` Added: ${new Date(item.queuedAt).toLocaleString()}`);
|
|
28
|
+
console.log();
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
async function showDrafts() {
|
|
33
|
+
const drafts = storage.readAll('drafts');
|
|
34
|
+
|
|
35
|
+
if (drafts.length === 0) {
|
|
36
|
+
console.log('\nš No drafts available\n');
|
|
37
|
+
return;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
console.log(`\nš Drafts (${drafts.length} items):\n`);
|
|
41
|
+
|
|
42
|
+
drafts.forEach((draft, index) => {
|
|
43
|
+
console.log(`${index + 1}. ${draft.title}`);
|
|
44
|
+
console.log(` Source: ${draft.sourceName} | Tags: ${draft.tags?.join(', ') || 'N/A'}`);
|
|
45
|
+
console.log(` Summary: ${draft.summary?.substring(0, 80)}...`);
|
|
46
|
+
console.log(` Created: ${new Date(draft.processedAt).toLocaleString()}`);
|
|
47
|
+
console.log();
|
|
48
|
+
});
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async function showDraft(id) {
|
|
52
|
+
const draft = storage.read('drafts', id);
|
|
53
|
+
|
|
54
|
+
if (!draft) {
|
|
55
|
+
console.log(`\nā Draft ${id} not found\n`);
|
|
56
|
+
return;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
console.log('\n' + '='.repeat(80));
|
|
60
|
+
console.log(`š Draft: ${draft.title}`);
|
|
61
|
+
console.log('='.repeat(80));
|
|
62
|
+
console.log(`\nš Source: ${draft.sourceName}`);
|
|
63
|
+
console.log(`š Link: ${draft.link}`);
|
|
64
|
+
console.log(`š
Published: ${new Date(draft.pubDate).toLocaleString()}`);
|
|
65
|
+
console.log(`š·ļø Tags: ${draft.tags?.join(', ') || 'N/A'}`);
|
|
66
|
+
|
|
67
|
+
console.log(`\nš” Summary:`);
|
|
68
|
+
console.log(` ${draft.summary}`);
|
|
69
|
+
|
|
70
|
+
console.log(`\nš Suggested Titles:`);
|
|
71
|
+
draft.suggestedTitles?.forEach((title, i) => {
|
|
72
|
+
console.log(` ${i + 1}. ${title}`);
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
console.log(`\nāļø Rewritten Content:`);
|
|
76
|
+
console.log('ā'.repeat(80));
|
|
77
|
+
console.log(draft.rewrittenContent);
|
|
78
|
+
console.log('ā'.repeat(80));
|
|
79
|
+
|
|
80
|
+
console.log(`\nš Original Content:`);
|
|
81
|
+
console.log('ā'.repeat(80));
|
|
82
|
+
console.log(draft.content?.substring(0, 500) + '...');
|
|
83
|
+
console.log('ā'.repeat(80));
|
|
84
|
+
|
|
85
|
+
console.log();
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function approveDraft(id) {
|
|
89
|
+
const draft = storage.read('drafts', id);
|
|
90
|
+
|
|
91
|
+
if (!draft) {
|
|
92
|
+
console.log(`\nā Draft ${id} not found\n`);
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// Move to published
|
|
97
|
+
draft.status = 'approved';
|
|
98
|
+
draft.approvedAt = new Date().toISOString();
|
|
99
|
+
storage.write('published', id, draft);
|
|
100
|
+
storage.delete('drafts', id);
|
|
101
|
+
|
|
102
|
+
console.log(`\nā
Draft "${draft.title.substring(0, 50)}..." approved for publishing\n`);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
async function deleteItem(type, id) {
|
|
106
|
+
if (storage.exists(type, id)) {
|
|
107
|
+
storage.delete(type, id);
|
|
108
|
+
console.log(`\nšļø Item ${id} deleted from ${type}\n`);
|
|
109
|
+
} else {
|
|
110
|
+
console.log(`\nā Item ${id} not found in ${type}\n`);
|
|
111
|
+
}
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
async function showPublished() {
|
|
115
|
+
const items = storage.readAll('published');
|
|
116
|
+
|
|
117
|
+
if (items.length === 0) {
|
|
118
|
+
console.log('\nā
No published items yet\n');
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
console.log(`\nā
Published (${items.length} items):\n`);
|
|
123
|
+
|
|
124
|
+
items.forEach((item, index) => {
|
|
125
|
+
console.log(`${index + 1}. ${item.title}`);
|
|
126
|
+
console.log(` Published: ${new Date(item.approvedAt).toLocaleString()}`);
|
|
127
|
+
console.log(` Status: ${item.status}`);
|
|
128
|
+
console.log();
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
async function main() {
|
|
133
|
+
const command = process.argv[2];
|
|
134
|
+
const arg = process.argv[3];
|
|
135
|
+
|
|
136
|
+
switch (command) {
|
|
137
|
+
case 'queue':
|
|
138
|
+
await showQueue();
|
|
139
|
+
break;
|
|
140
|
+
|
|
141
|
+
case 'drafts':
|
|
142
|
+
await showDrafts();
|
|
143
|
+
break;
|
|
144
|
+
|
|
145
|
+
case 'draft':
|
|
146
|
+
if (!arg) {
|
|
147
|
+
console.log('\nā Please provide a draft ID\n');
|
|
148
|
+
process.exit(1);
|
|
149
|
+
}
|
|
150
|
+
await showDraft(arg);
|
|
151
|
+
break;
|
|
152
|
+
|
|
153
|
+
case 'approve':
|
|
154
|
+
if (!arg) {
|
|
155
|
+
console.log('\nā Please provide a draft ID to approve\n');
|
|
156
|
+
process.exit(1);
|
|
157
|
+
}
|
|
158
|
+
await approveDraft(arg);
|
|
159
|
+
break;
|
|
160
|
+
|
|
161
|
+
case 'delete':
|
|
162
|
+
if (!arg) {
|
|
163
|
+
console.log('\nā Please provide item type (queue/drafts/published) and ID\n');
|
|
164
|
+
process.exit(1);
|
|
165
|
+
}
|
|
166
|
+
const type = process.argv[3];
|
|
167
|
+
const id = process.argv[4];
|
|
168
|
+
await deleteItem(type, id);
|
|
169
|
+
break;
|
|
170
|
+
|
|
171
|
+
case 'published':
|
|
172
|
+
await showPublished();
|
|
173
|
+
break;
|
|
174
|
+
|
|
175
|
+
default:
|
|
176
|
+
console.log(`
|
|
177
|
+
Usage: node src/cli.js [command] [arguments]
|
|
178
|
+
|
|
179
|
+
Commands:
|
|
180
|
+
queue - Show all queued items
|
|
181
|
+
drafts - Show all drafts
|
|
182
|
+
draft <id> - Show full draft content
|
|
183
|
+
approve <id> - Approve draft for publishing
|
|
184
|
+
delete <type> <id> - Delete item (queue/drafts/published)
|
|
185
|
+
published - Show all published items
|
|
186
|
+
|
|
187
|
+
Examples:
|
|
188
|
+
node src/cli.js queue
|
|
189
|
+
node src/cli.js drafts
|
|
190
|
+
node src/cli.js draft 1234567890-abc
|
|
191
|
+
node src/cli.js approve 1234567890-abc
|
|
192
|
+
node src/cli.js delete drafts 1234567890-abc
|
|
193
|
+
`);
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
main().catch(console.error);
|
|
@@ -0,0 +1,232 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import crypto from 'crypto';
|
|
3
|
+
import createLogger from '../utils/logger.js';
|
|
4
|
+
|
|
5
|
+
const logger = createLogger('APIFetcher');
|
|
6
|
+
|
|
7
|
+
// --- Auth ---
|
|
8
|
+
|
|
9
|
+
async function resolveAuthHeaders(auth) {
|
|
10
|
+
if (!auth) return {};
|
|
11
|
+
|
|
12
|
+
if (auth.type === 'bearer') {
|
|
13
|
+
const token = process.env[auth.tokenEnv];
|
|
14
|
+
if (!token) { logger.warn(`Env var ${auth.tokenEnv} not set`); return {}; }
|
|
15
|
+
return { Authorization: `Bearer ${token}` };
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (auth.type === 'oauth2_client_credentials') {
|
|
19
|
+
const clientId = process.env[auth.clientIdEnv];
|
|
20
|
+
const clientSecret = process.env[auth.clientSecretEnv];
|
|
21
|
+
if (!clientId || !clientSecret) {
|
|
22
|
+
logger.warn(`Env vars ${auth.clientIdEnv} or ${auth.clientSecretEnv} not set`);
|
|
23
|
+
return {};
|
|
24
|
+
}
|
|
25
|
+
const res = await axios.post(auth.tokenUrl, {
|
|
26
|
+
client_id: clientId,
|
|
27
|
+
client_secret: clientSecret,
|
|
28
|
+
grant_type: 'client_credentials',
|
|
29
|
+
});
|
|
30
|
+
return { Authorization: `Bearer ${res.data.access_token}` };
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return {};
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
// --- Computed variables (runtime values injected into request params/variables) ---
|
|
37
|
+
// Supported types:
|
|
38
|
+
// { "type": "daysAgo", "days": 7 } ā ISO timestamp N days in the past
|
|
39
|
+
|
|
40
|
+
function resolveComputedVariables(computedVars) {
|
|
41
|
+
if (!computedVars) return {};
|
|
42
|
+
const resolved = {};
|
|
43
|
+
for (const [key, spec] of Object.entries(computedVars)) {
|
|
44
|
+
if (spec.type === 'daysAgo') {
|
|
45
|
+
resolved[key] = new Date(Date.now() - spec.days * 86400000).toISOString();
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
return resolved;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
// --- Value resolution ---
|
|
52
|
+
// Mapping spec types:
|
|
53
|
+
// "fieldName" ā direct field lookup
|
|
54
|
+
// "https://example.com/{field}" ā template string
|
|
55
|
+
// ["field1", "field2"] ā join with \n\n
|
|
56
|
+
// { "path": "a.b.c" } ā deep dot-notation path
|
|
57
|
+
// { "path": "arr", "map": "node.name" } ā map over array, extract subpath
|
|
58
|
+
// { "path": "arr", "map": "node.name", "index": 0 } ā take one element
|
|
59
|
+
// { "field": "x", "split": ",", "index": 0 } ā split string, take element
|
|
60
|
+
// { "field": "x", "split": "," } ā split string into array
|
|
61
|
+
// null ā skip (keep default)
|
|
62
|
+
|
|
63
|
+
function getPath(obj, dotPath) {
|
|
64
|
+
return dotPath.split('.').reduce((acc, key) => acc?.[key], obj);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function resolveValue(item, spec) {
|
|
68
|
+
if (spec === null || spec === undefined) return undefined;
|
|
69
|
+
|
|
70
|
+
if (typeof spec === 'string') {
|
|
71
|
+
if (spec.includes('{')) {
|
|
72
|
+
return spec.replace(/\{(\w+)\}/g, (_, key) => item[key] ?? '');
|
|
73
|
+
}
|
|
74
|
+
const val = item[spec];
|
|
75
|
+
return val !== undefined ? val : undefined;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if (Array.isArray(spec)) {
|
|
79
|
+
const parts = spec.map(f => item[f]).filter(v => v !== null && v !== undefined && v !== '');
|
|
80
|
+
return parts.length ? parts.join('\n\n') : undefined;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
if (typeof spec === 'object') {
|
|
84
|
+
if (spec.path !== undefined) {
|
|
85
|
+
const val = getPath(item, spec.path);
|
|
86
|
+
if (spec.map !== undefined) {
|
|
87
|
+
if (!Array.isArray(val)) return undefined;
|
|
88
|
+
const mapped = val.map(v => getPath(v, spec.map)).filter(Boolean);
|
|
89
|
+
return spec.index !== undefined ? mapped[spec.index] : mapped;
|
|
90
|
+
}
|
|
91
|
+
return val !== undefined ? val : undefined;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
if (spec.field !== undefined) {
|
|
95
|
+
const val = item[spec.field];
|
|
96
|
+
if (val === null || val === undefined) return spec.index !== undefined ? undefined : [];
|
|
97
|
+
const parts = String(val).split(spec.split || ',').map(s => s.trim()).filter(Boolean);
|
|
98
|
+
return spec.index !== undefined ? (parts[spec.index] ?? undefined) : parts;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return undefined;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
function setPath(obj, dotPath, value) {
|
|
106
|
+
const keys = dotPath.split('.');
|
|
107
|
+
let cur = obj;
|
|
108
|
+
for (let i = 0; i < keys.length - 1; i++) {
|
|
109
|
+
if (!cur[keys[i]]) cur[keys[i]] = {};
|
|
110
|
+
cur = cur[keys[i]];
|
|
111
|
+
}
|
|
112
|
+
cur[keys[keys.length - 1]] = value;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// --- Map one raw API item to the standard pipeline shape ---
|
|
116
|
+
|
|
117
|
+
function mapItem(raw, source) {
|
|
118
|
+
const item = {
|
|
119
|
+
source: source.id,
|
|
120
|
+
sourceName: source.name,
|
|
121
|
+
scraped_at: new Date().toISOString(),
|
|
122
|
+
age_hours: 0,
|
|
123
|
+
tags: [],
|
|
124
|
+
engagement: { upvotes: 0, comments: 0 },
|
|
125
|
+
metadata: { score: 0 },
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
for (const [outputPath, spec] of Object.entries(source.mapping)) {
|
|
129
|
+
const value = resolveValue(raw, spec);
|
|
130
|
+
if (value !== undefined) setPath(item, outputPath, value);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Keep link and url in sync
|
|
134
|
+
if (item.link && !item.url) item.url = item.link;
|
|
135
|
+
if (item.url && !item.link) item.link = item.url;
|
|
136
|
+
|
|
137
|
+
// Stable ID from link
|
|
138
|
+
item.id = crypto.createHash('md5')
|
|
139
|
+
.update(item.link || item.title || String(Math.random()))
|
|
140
|
+
.digest('hex');
|
|
141
|
+
|
|
142
|
+
// Normalise pubDate to ISO
|
|
143
|
+
if (item.pubDate && !String(item.pubDate).includes('T')) {
|
|
144
|
+
item.pubDate = new Date(item.pubDate).toISOString();
|
|
145
|
+
}
|
|
146
|
+
if (!item.pubDate) item.pubDate = new Date().toISOString();
|
|
147
|
+
|
|
148
|
+
// Source weight for trending score normalisation (default 1)
|
|
149
|
+
item.metadata.weight = source.weight ?? 1;
|
|
150
|
+
|
|
151
|
+
// Store weighted score for display
|
|
152
|
+
if (!item.metadata.score) {
|
|
153
|
+
item.metadata.score = Math.round((item.engagement.upvotes || 0) * item.metadata.weight);
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
return item;
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
// --- Main entry point ---
|
|
160
|
+
|
|
161
|
+
export default async function apiFetch(source) {
|
|
162
|
+
const { request, response, filter, auth } = source;
|
|
163
|
+
|
|
164
|
+
let authHeaders;
|
|
165
|
+
try {
|
|
166
|
+
authHeaders = await resolveAuthHeaders(auth);
|
|
167
|
+
} catch (err) {
|
|
168
|
+
logger.error(`[${source.id}] Auth failed: ${err.message}`);
|
|
169
|
+
return [];
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const headers = {
|
|
173
|
+
'User-Agent': 'social-automation-scraper/1.0',
|
|
174
|
+
...authHeaders,
|
|
175
|
+
...(request.headers || {}),
|
|
176
|
+
};
|
|
177
|
+
|
|
178
|
+
const computedVars = resolveComputedVariables(request.computedVariables);
|
|
179
|
+
|
|
180
|
+
let rawItems;
|
|
181
|
+
try {
|
|
182
|
+
let data;
|
|
183
|
+
|
|
184
|
+
if (request.graphql) {
|
|
185
|
+
const variables = { ...request.graphql.variables, ...computedVars };
|
|
186
|
+
const res = await axios.post(
|
|
187
|
+
request.url,
|
|
188
|
+
{ query: request.graphql.query, variables },
|
|
189
|
+
{ headers, timeout: 30000 }
|
|
190
|
+
);
|
|
191
|
+
data = res.data;
|
|
192
|
+
} else if (request.method === 'POST') {
|
|
193
|
+
const res = await axios.post(request.url, request.body || {}, {
|
|
194
|
+
headers,
|
|
195
|
+
params: { ...request.params, ...computedVars },
|
|
196
|
+
timeout: 15000,
|
|
197
|
+
});
|
|
198
|
+
data = res.data;
|
|
199
|
+
} else {
|
|
200
|
+
const res = await axios.get(request.url, {
|
|
201
|
+
headers,
|
|
202
|
+
params: { ...request.params, ...computedVars },
|
|
203
|
+
timeout: 15000,
|
|
204
|
+
});
|
|
205
|
+
data = res.data;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
rawItems = response?.itemsPath ? getPath(data, response.itemsPath) : data;
|
|
209
|
+
if (!Array.isArray(rawItems)) rawItems = [];
|
|
210
|
+
|
|
211
|
+
if (response?.itemUnwrap) {
|
|
212
|
+
rawItems = rawItems.map(i => i[response.itemUnwrap]).filter(Boolean);
|
|
213
|
+
}
|
|
214
|
+
} catch (err) {
|
|
215
|
+
logger.error(`[${source.id}] Request failed: ${err.message}`);
|
|
216
|
+
return [];
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
// Apply optional filter
|
|
220
|
+
let filtered = rawItems;
|
|
221
|
+
if (filter) {
|
|
222
|
+
filtered = rawItems.filter(item => {
|
|
223
|
+
const val = getPath(item, filter.field) ?? 0;
|
|
224
|
+
return (filter.min === undefined || val >= filter.min) &&
|
|
225
|
+
(filter.max === undefined || val <= filter.max);
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const items = filtered.map(raw => mapItem(raw, source));
|
|
230
|
+
logger.success(`[${source.id}] Fetched ${items.length} items`);
|
|
231
|
+
return items;
|
|
232
|
+
}
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
import axios from 'axios';
|
|
2
|
+
import createLogger from '../utils/logger.js';
|
|
3
|
+
|
|
4
|
+
const logger = createLogger('HackerNewsFetcher');
|
|
5
|
+
|
|
6
|
+
export default async function hnFetch(config) {
|
|
7
|
+
const hnConfig = config.trendingSources?.hackernews;
|
|
8
|
+
if (!hnConfig?.enabled) {
|
|
9
|
+
return [];
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
const keywords = hnConfig.keywords || [];
|
|
13
|
+
const minPoints = hnConfig.minPoints || 50;
|
|
14
|
+
const maxAge = 48; // hours
|
|
15
|
+
const limit = hnConfig.limit || 30;
|
|
16
|
+
const cutoff = new Date(Date.now() - maxAge * 60 * 60 * 1000);
|
|
17
|
+
|
|
18
|
+
logger.info('Fetching top stories from Hacker News...');
|
|
19
|
+
|
|
20
|
+
const allItems = [];
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
// Get top story IDs
|
|
24
|
+
const { data: topIds } = await axios.get(
|
|
25
|
+
'https://hacker-news.firebaseio.com/v0/topstories.json'
|
|
26
|
+
);
|
|
27
|
+
|
|
28
|
+
// Fetch story details
|
|
29
|
+
for (const id of topIds.slice(0, limit * 2)) {
|
|
30
|
+
try {
|
|
31
|
+
const { data } = await axios.get(
|
|
32
|
+
`https://hacker-news.firebaseio.com/v0/item/${id}.json`
|
|
33
|
+
);
|
|
34
|
+
|
|
35
|
+
const created = new Date(data.time * 1000);
|
|
36
|
+
|
|
37
|
+
// Skip old stories
|
|
38
|
+
if (created < cutoff) continue;
|
|
39
|
+
|
|
40
|
+
// Skip low-score stories
|
|
41
|
+
if ((data.score || 0) < minPoints) continue;
|
|
42
|
+
|
|
43
|
+
// Check keyword relevance
|
|
44
|
+
const searchText = (data.title + ' ' + (data.text || '')).toLowerCase();
|
|
45
|
+
const hasKeyword = keywords.some(kw =>
|
|
46
|
+
searchText.includes(kw.toLowerCase())
|
|
47
|
+
);
|
|
48
|
+
|
|
49
|
+
if (!hasKeyword) continue;
|
|
50
|
+
|
|
51
|
+
allItems.push({
|
|
52
|
+
id: `hn_${id}`,
|
|
53
|
+
source: 'hackernews',
|
|
54
|
+
sourceName: 'Hacker News',
|
|
55
|
+
title: data.title,
|
|
56
|
+
content: data.text || '',
|
|
57
|
+
summary: (data.text || '').substring(0, 200),
|
|
58
|
+
url: data.url || `https://news.ycombinator.com/item?id=${id}`,
|
|
59
|
+
hn_url: `https://news.ycombinator.com/item?id=${id}`,
|
|
60
|
+
author: data.by,
|
|
61
|
+
posted_at: new Date(data.time * 1000).toISOString(),
|
|
62
|
+
scraped_at: new Date().toISOString(),
|
|
63
|
+
age_hours: Math.floor((Date.now() - created.getTime()) / (1000 * 60 * 60)),
|
|
64
|
+
engagement: {
|
|
65
|
+
points: data.score || 0,
|
|
66
|
+
comments: data.descendants || 0
|
|
67
|
+
},
|
|
68
|
+
metadata: {
|
|
69
|
+
score: data.score || 0,
|
|
70
|
+
type: data.type
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
} catch (error) {
|
|
74
|
+
logger.debug(`Failed to fetch story ${id}`);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
if (allItems.length >= limit) break;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
logger.success(`Fetched ${allItems.length} stories from Hacker News`);
|
|
81
|
+
} catch (error) {
|
|
82
|
+
logger.error(`Error fetching HN stories: ${error.message}`);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
return allItems;
|
|
86
|
+
}
|