alif-digest 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/.github/workflows/publish.yml +33 -0
  2. package/.husky/pre-commit +1 -0
  3. package/.prettierrc +7 -0
  4. package/LICENSE +21 -0
  5. package/README.md +131 -0
  6. package/dist/cli/commands/init.d.ts +1 -0
  7. package/dist/cli/commands/init.js +88 -0
  8. package/dist/cli/commands/init.js.map +1 -0
  9. package/dist/cli/commands/run.d.ts +4 -0
  10. package/dist/cli/commands/run.js +46 -0
  11. package/dist/cli/commands/run.js.map +1 -0
  12. package/dist/cli/commands/schedule.d.ts +1 -0
  13. package/dist/cli/commands/schedule.js +94 -0
  14. package/dist/cli/commands/schedule.js.map +1 -0
  15. package/dist/cli/index.d.ts +2 -0
  16. package/dist/cli/index.js +29 -0
  17. package/dist/cli/index.js.map +1 -0
  18. package/dist/core/config-manager.d.ts +14 -0
  19. package/dist/core/config-manager.js +65 -0
  20. package/dist/core/config-manager.js.map +1 -0
  21. package/dist/core/config-schema.d.ts +40 -0
  22. package/dist/core/config-schema.js +24 -0
  23. package/dist/core/config-schema.js.map +1 -0
  24. package/dist/core/default-keywords.d.ts +1 -0
  25. package/dist/core/default-keywords.js +10 -0
  26. package/dist/core/default-keywords.js.map +1 -0
  27. package/dist/core/filters/deduplicator.d.ts +10 -0
  28. package/dist/core/filters/deduplicator.js +34 -0
  29. package/dist/core/filters/deduplicator.js.map +1 -0
  30. package/dist/core/filters/keywords.d.ts +6 -0
  31. package/dist/core/filters/keywords.js +17 -0
  32. package/dist/core/filters/keywords.js.map +1 -0
  33. package/dist/core/orchestrator.d.ts +6 -0
  34. package/dist/core/orchestrator.js +44 -0
  35. package/dist/core/orchestrator.js.map +1 -0
  36. package/dist/core/pipeline.d.ts +15 -0
  37. package/dist/core/pipeline.js +140 -0
  38. package/dist/core/pipeline.js.map +1 -0
  39. package/dist/core/scheduler.d.ts +9 -0
  40. package/dist/core/scheduler.js +64 -0
  41. package/dist/core/scheduler.js.map +1 -0
  42. package/dist/core/scraper-types.d.ts +27 -0
  43. package/dist/core/scraper-types.js +3 -0
  44. package/dist/core/scraper-types.js.map +1 -0
  45. package/dist/core/scrapers/api-scraper.d.ts +4 -0
  46. package/dist/core/scrapers/api-scraper.js +46 -0
  47. package/dist/core/scrapers/api-scraper.js.map +1 -0
  48. package/dist/core/scrapers/arxiv-scraper.d.ts +4 -0
  49. package/dist/core/scrapers/arxiv-scraper.js +34 -0
  50. package/dist/core/scrapers/arxiv-scraper.js.map +1 -0
  51. package/dist/core/scrapers/json-scraper.d.ts +4 -0
  52. package/dist/core/scrapers/json-scraper.js +56 -0
  53. package/dist/core/scrapers/json-scraper.js.map +1 -0
  54. package/dist/core/scrapers/rss-scraper.d.ts +6 -0
  55. package/dist/core/scrapers/rss-scraper.js +32 -0
  56. package/dist/core/scrapers/rss-scraper.js.map +1 -0
  57. package/dist/core/scrapers/scrape-scraper.d.ts +4 -0
  58. package/dist/core/scrapers/scrape-scraper.js +49 -0
  59. package/dist/core/scrapers/scrape-scraper.js.map +1 -0
  60. package/dist/db/article-store.d.ts +22 -0
  61. package/dist/db/article-store.js +43 -0
  62. package/dist/db/article-store.js.map +1 -0
  63. package/dist/db/connection.d.ts +2 -0
  64. package/dist/db/connection.js +15 -0
  65. package/dist/db/connection.js.map +1 -0
  66. package/dist/db/migrate.d.ts +2 -0
  67. package/dist/db/migrate.js +60 -0
  68. package/dist/db/migrate.js.map +1 -0
  69. package/dist/db/schedule-store.d.ts +17 -0
  70. package/dist/db/schedule-store.js +23 -0
  71. package/dist/db/schedule-store.js.map +1 -0
  72. package/dist/db/source-health-store.d.ts +16 -0
  73. package/dist/db/source-health-store.js +31 -0
  74. package/dist/db/source-health-store.js.map +1 -0
  75. package/dist/providers/delivery/index.d.ts +18 -0
  76. package/dist/providers/delivery/index.js +2 -0
  77. package/dist/providers/delivery/index.js.map +1 -0
  78. package/dist/providers/delivery/slack.d.ts +6 -0
  79. package/dist/providers/delivery/slack.js +52 -0
  80. package/dist/providers/delivery/slack.js.map +1 -0
  81. package/dist/providers/delivery/webhook.d.ts +6 -0
  82. package/dist/providers/delivery/webhook.js +16 -0
  83. package/dist/providers/delivery/webhook.js.map +1 -0
  84. package/dist/providers/factory.d.ts +7 -0
  85. package/dist/providers/factory.js +33 -0
  86. package/dist/providers/factory.js.map +1 -0
  87. package/dist/providers/llm/anthropic.d.ts +12 -0
  88. package/dist/providers/llm/anthropic.js +43 -0
  89. package/dist/providers/llm/anthropic.js.map +1 -0
  90. package/dist/providers/llm/index.d.ts +10 -0
  91. package/dist/providers/llm/index.js +2 -0
  92. package/dist/providers/llm/index.js.map +1 -0
  93. package/dist/providers/llm/ollama.d.ts +12 -0
  94. package/dist/providers/llm/ollama.js +42 -0
  95. package/dist/providers/llm/ollama.js.map +1 -0
  96. package/dist/providers/llm/openrouter.d.ts +13 -0
  97. package/dist/providers/llm/openrouter.js +53 -0
  98. package/dist/providers/llm/openrouter.js.map +1 -0
  99. package/dist/providers/llm/utils.d.ts +6 -0
  100. package/dist/providers/llm/utils.js +45 -0
  101. package/dist/providers/llm/utils.js.map +1 -0
  102. package/dist/resources/default-feeds.json +650 -0
  103. package/dist/resources/index.d.ts +2 -0
  104. package/dist/resources/index.js +3 -0
  105. package/dist/resources/index.js.map +1 -0
  106. package/eslint.config.mjs +29 -0
  107. package/package.json +66 -0
  108. package/src/cli/commands/init.ts +94 -0
  109. package/src/cli/commands/run.ts +52 -0
  110. package/src/cli/commands/schedule.ts +99 -0
  111. package/src/cli/index.ts +34 -0
  112. package/src/core/config-manager.ts +72 -0
  113. package/src/core/config-schema.ts +31 -0
  114. package/src/core/default-keywords.ts +9 -0
  115. package/src/core/filters/deduplicator.ts +39 -0
  116. package/src/core/filters/keywords.ts +18 -0
  117. package/src/core/orchestrator.ts +47 -0
  118. package/src/core/pipeline.ts +171 -0
  119. package/src/core/scheduler.ts +74 -0
  120. package/src/core/scraper-types.ts +30 -0
  121. package/src/core/scrapers/api-scraper.ts +45 -0
  122. package/src/core/scrapers/arxiv-scraper.ts +35 -0
  123. package/src/core/scrapers/json-scraper.ts +54 -0
  124. package/src/core/scrapers/rss-scraper.ts +34 -0
  125. package/src/core/scrapers/scrape-scraper.ts +50 -0
  126. package/src/db/article-store.ts +75 -0
  127. package/src/db/connection.ts +17 -0
  128. package/src/db/migrate.ts +68 -0
  129. package/src/db/schedule-store.ts +41 -0
  130. package/src/db/source-health-store.ts +42 -0
  131. package/src/providers/delivery/index.ts +19 -0
  132. package/src/providers/delivery/slack.ts +55 -0
  133. package/src/providers/delivery/webhook.ts +16 -0
  134. package/src/providers/factory.ts +37 -0
  135. package/src/providers/llm/anthropic.ts +48 -0
  136. package/src/providers/llm/index.ts +8 -0
  137. package/src/providers/llm/ollama.ts +44 -0
  138. package/src/providers/llm/openrouter.ts +56 -0
  139. package/src/providers/llm/utils.ts +54 -0
  140. package/src/resources/default-feeds.json +650 -0
  141. package/src/resources/index.ts +3 -0
  142. package/tests/config-manager.test.ts +70 -0
  143. package/tests/db-integration.test.ts +72 -0
  144. package/tests/filters.test.ts +53 -0
  145. package/tests/llm-provider.test.ts +115 -0
  146. package/tsconfig.json +18 -0
  147. package/vitest.config.ts +13 -0
package/package.json ADDED
@@ -0,0 +1,66 @@
1
+ {
2
+ "name": "alif-digest",
3
+ "version": "1.0.1",
4
+ "description": "Autonomous AI Signal Digest CLI for tracking high-signal AI breakthroughs.",
5
+ "type": "module",
6
+ "bin": {
7
+ "alif": "./dist/cli/index.js"
8
+ },
9
+ "scripts": {
10
+ "build": "tsc",
11
+ "start": "node dist/cli/index.js",
12
+ "test": "vitest run",
13
+ "test:watch": "vitest",
14
+ "lint": "eslint .",
15
+ "format": "prettier --write .",
16
+ "dev": "tsx src/cli/index.ts",
17
+ "prepare": "husky"
18
+ },
19
+ "lint-staged": {
20
+ "*.{ts,js,mjs,json,md}": [
21
+ "prettier --write",
22
+ "eslint --fix"
23
+ ]
24
+ },
25
+ "keywords": [
26
+ "ai",
27
+ "cli",
28
+ "digest",
29
+ "newsletter",
30
+ "automation",
31
+ "ollama",
32
+ "anthropic",
33
+ "signal"
34
+ ],
35
+ "author": "qaribhaider",
36
+ "license": "MIT",
37
+ "dependencies": {
38
+ "axios": "^1.13.6",
39
+ "better-sqlite3": "^12.6.2",
40
+ "cheerio": "^1.2.0",
41
+ "commander": "^14.0.3",
42
+ "dotenv": "^17.3.1",
43
+ "openai": "^6.26.0",
44
+ "prompts": "^2.4.2",
45
+ "rss-parser": "^3.13.0",
46
+ "zod": "^4.3.6"
47
+ },
48
+ "devDependencies": {
49
+ "@eslint/js": "^10.0.1",
50
+ "@types/better-sqlite3": "^7.6.13",
51
+ "@types/node": "^25.3.3",
52
+ "@types/prompts": "^2.4.9",
53
+ "@typescript-eslint/eslint-plugin": "^8.56.1",
54
+ "@typescript-eslint/parser": "^8.56.1",
55
+ "eslint": "^10.0.2",
56
+ "globals": "^17.4.0",
57
+ "husky": "^9.1.7",
58
+ "lint-staged": "^16.3.2",
59
+ "prettier": "^3.8.1",
60
+ "ts-node": "^10.9.2",
61
+ "tsx": "^4.21.0",
62
+ "typescript": "^5.9.3",
63
+ "typescript-eslint": "^8.56.1",
64
+ "vitest": "^4.0.18"
65
+ }
66
+ }
@@ -0,0 +1,94 @@
1
+ import prompts from 'prompts';
2
+ import path from 'path';
3
+ import { ConfigManager } from '../../core/config-manager.js';
4
+ import { Config } from '../../core/config-schema.js';
5
+
6
+ export async function initCommand() {
7
+ const configManager = ConfigManager.getInstance();
8
+ const configDir = configManager.getConfigDir();
9
+
10
+ console.log('--- Alif Initialization ---');
11
+
12
+ const response = await prompts([
13
+ {
14
+ type: 'select',
15
+ name: 'llmProvider',
16
+ message: 'Which LLM provider would you like to use?',
17
+ choices: [
18
+ { title: 'Ollama (Local)', value: 'ollama' },
19
+ { title: 'Anthropic', value: 'anthropic' },
20
+ { title: 'OpenRouter', value: 'openrouter' },
21
+ ],
22
+ },
23
+ {
24
+ type: (prev) => (prev !== 'ollama' ? 'text' : null),
25
+ name: 'apiKey',
26
+ message: 'Enter your API Key:',
27
+ },
28
+ {
29
+ type: 'text',
30
+ name: 'model',
31
+ message: 'Enter the model name (e.g., llama3, claude-3-opus-20240229):',
32
+ initial: (prev, values) => {
33
+ if (values.llmProvider === 'ollama') return 'llama3';
34
+ if (values.llmProvider === 'anthropic') return 'claude-3-5-sonnet-20240620';
35
+ return 'meta-llama/llama-3-70b-instruct';
36
+ },
37
+ },
38
+ {
39
+ type: (prev, values) => (values.llmProvider === 'ollama' ? 'text' : null),
40
+ name: 'baseUrl',
41
+ message: 'Enter Ollama base URL:',
42
+ initial: 'http://localhost:11434',
43
+ },
44
+ {
45
+ type: 'multiselect',
46
+ name: 'deliveryProviders',
47
+ message: 'Where should we deliver the digest?',
48
+ choices: [
49
+ { title: 'Slack', value: 'slack' },
50
+ { title: 'Generic Webhook', value: 'webhook' },
51
+ ],
52
+ min: 1,
53
+ },
54
+ ]);
55
+
56
+ if (!response.llmProvider) {
57
+ console.log('Initialization cancelled.');
58
+ return;
59
+ }
60
+
61
+ const deliveryConfigs = [];
62
+ for (const provider of response.deliveryProviders) {
63
+ const { webhookUrl } = await prompts({
64
+ type: 'text',
65
+ name: 'webhookUrl',
66
+ message: `Enter ${provider} Webhook URL:`,
67
+ });
68
+ deliveryConfigs.push({ type: provider, webhookUrl });
69
+ }
70
+
71
+ const config: Config = {
72
+ llm: {
73
+ provider: response.llmProvider,
74
+ apiKey: response.apiKey,
75
+ model: response.model,
76
+ baseUrl: response.baseUrl,
77
+ },
78
+ delivery: deliveryConfigs,
79
+ preferences: {
80
+ signalThreshold: 60,
81
+ maxItemsPerCategory: 5,
82
+ sourceCooldownMinutes: 5,
83
+ customKeywords: {},
84
+ },
85
+ dbPath: path.join(configDir, 'alif.db'),
86
+ feedsPath: path.join(configDir, 'feeds.json'),
87
+ };
88
+
89
+ configManager.save(config);
90
+
91
+ console.log(`\nConfiguration saved to ${configManager.getConfigFile()}`);
92
+ console.log(`Database will be located at ${config.dbPath}`);
93
+ console.log('\nAlif is ready! Run "alif run" to start (after adding feeds).');
94
+ }
@@ -0,0 +1,52 @@
1
+ import fs from 'fs';
2
+ import { ConfigManager } from '../../core/config-manager.js';
3
+ import { createDatabase } from '../../db/connection.js';
4
+ import { runMigrations } from '../../db/migrate.js';
5
+ import { Pipeline } from '../../core/pipeline.js';
6
+
7
+ export async function runPipeline(config: any, db: any, force = false) {
8
+ const pipeline = new Pipeline(config, db);
9
+
10
+ // Load feeds
11
+ if (!fs.existsSync(config.feedsPath)) {
12
+ console.log(
13
+ `[Alif] Feeds file not found at ${config.feedsPath}. Initializing with default sources...`,
14
+ );
15
+ const { defaultFeeds } = await import('../../resources/index.js');
16
+ fs.writeFileSync(config.feedsPath, JSON.stringify(defaultFeeds, null, 2));
17
+ console.log(
18
+ `[Alif] Created default feeds.json at ${config.feedsPath} with ${defaultFeeds.length} sources.`,
19
+ );
20
+ }
21
+
22
+ const feeds = JSON.parse(fs.readFileSync(config.feedsPath, 'utf-8'));
23
+ await pipeline.run(feeds, force);
24
+ }
25
+
26
+ export async function runCommand(options: { force?: boolean } = {}) {
27
+ const configManager = ConfigManager.getInstance();
28
+
29
+ if (!configManager.exists()) {
30
+ console.error('Alif is not initialized. Run "alif init" first.');
31
+ process.exit(1);
32
+ }
33
+
34
+ let db;
35
+ try {
36
+ const config = configManager.load();
37
+ db = createDatabase(config.dbPath);
38
+ runMigrations(db);
39
+
40
+ await runPipeline(config, db, options.force);
41
+ } catch (error) {
42
+ if (error instanceof Error) {
43
+ console.error(`Error: ${error.message}`);
44
+ } else {
45
+ console.error('An unknown error occurred.');
46
+ }
47
+ process.exit(1);
48
+ } finally {
49
+ if (db) db.close();
50
+ process.exit(0);
51
+ }
52
+ }
@@ -0,0 +1,99 @@
1
+ import prompts from 'prompts';
2
+ import { ConfigManager } from '../../core/config-manager.js';
3
+ import { createDatabase } from '../../db/connection.js';
4
+ import { runMigrations } from '../../db/migrate.js';
5
+ import { Scheduler } from '../../core/scheduler.js';
6
+ import { runPipeline } from './run.js';
7
+
8
+ export async function scheduleCommand(action: string) {
9
+ const configManager = ConfigManager.getInstance();
10
+ if (!configManager.exists()) {
11
+ console.error('Alif is not initialized. Run "alif init" first.');
12
+ return;
13
+ }
14
+
15
+ const config = configManager.load();
16
+ const db = createDatabase(config.dbPath);
17
+
18
+ try {
19
+ runMigrations(db);
20
+ const scheduler = new Scheduler(db);
21
+
22
+ if (action === 'add') {
23
+ const response = await prompts([
24
+ {
25
+ type: 'text',
26
+ name: 'name',
27
+ message: 'Name for this schedule:',
28
+ initial: 'Daily Digest',
29
+ },
30
+ {
31
+ type: 'text',
32
+ name: 'cron',
33
+ message: 'Enter frequency (e.g. daily, hourly):',
34
+ initial: 'daily',
35
+ },
36
+ {
37
+ type: (prev) => (prev === 'daily' ? 'text' : null),
38
+ name: 'time',
39
+ message: 'At what time? (HH:mm format, 24h):',
40
+ initial: '09:00',
41
+ validate: (val) =>
42
+ /^([0-1]?[0-9]|2[0-3]):[0-5][0-9]$/.test(val) ? true : 'Please enter valid HH:mm time',
43
+ },
44
+ ]);
45
+
46
+ if (response.name && response.cron) {
47
+ const id = await scheduler.add(response.name, response.cron, response.time);
48
+ console.log(
49
+ `Schedule added! ID: ${id} (Runs ${response.cron}${response.time ? ` at ${response.time}` : ''})`,
50
+ );
51
+ }
52
+ } else if (action === 'list') {
53
+ const schedules = scheduler.list();
54
+ if (schedules.length === 0) {
55
+ console.log('No schedules found.');
56
+ } else {
57
+ console.table(
58
+ schedules.map((s) => ({
59
+ ID: s.id,
60
+ Name: s.name,
61
+ Frequency: s.cron,
62
+ Time: s.scheduled_time || '-',
63
+ Active: s.active ? 'Yes' : 'No',
64
+ 'Last Run': s.last_run || 'Never',
65
+ })),
66
+ );
67
+ }
68
+ } else if (action === 'delete') {
69
+ const schedules = scheduler.list();
70
+ if (schedules.length === 0) {
71
+ console.log('No schedules to delete.');
72
+ return;
73
+ }
74
+
75
+ const { id } = await prompts({
76
+ type: 'select',
77
+ name: 'id',
78
+ message: 'Select schedule to delete:',
79
+ choices: schedules.map((s) => ({ title: s.name, value: s.id })),
80
+ });
81
+
82
+ if (id) {
83
+ scheduler.remove(id);
84
+ console.log(`Schedule ${id} deleted.`);
85
+ }
86
+ } else if (action === 'check') {
87
+ console.log('[Scheduler] Checking for due tasks...');
88
+ await scheduler.checkAndRun(async () => {
89
+ await runPipeline(config, db);
90
+ });
91
+ console.log('[Scheduler] Check complete.');
92
+ } else {
93
+ console.error(`Unknown action: ${action}. Available: add, list, delete, check`);
94
+ }
95
+ } finally {
96
+ db.close();
97
+ process.exit(0);
98
+ }
99
+ }
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env node
2
+ import { Command } from 'commander';
3
+ import { initCommand } from './commands/init.js';
4
+ import { runCommand } from './commands/run.js';
5
+ import { scheduleCommand } from './commands/schedule.js';
6
+
7
+ const program = new Command();
8
+
9
+ program.name('alif').description('Alif - Daily AI Signal Digest CLI').version('1.0.0');
10
+
11
+ program
12
+ .command('init')
13
+ .description('Initialize Alif configuration')
14
+ .action(async () => {
15
+ await initCommand();
16
+ });
17
+
18
+ program
19
+ .command('run')
20
+ .description('Run the AI Signal Digest pipeline')
21
+ .option('-f, --force', 'Bypass source cooldown')
22
+ .action(async (options) => {
23
+ await runCommand(options);
24
+ });
25
+
26
+ program
27
+ .command('schedule <action>')
28
+ .description('Manage digest schedules')
29
+ .addHelpText('after', '\nActions: add, list, delete, check')
30
+ .action(async (action) => {
31
+ await scheduleCommand(action);
32
+ });
33
+
34
+ program.parse(process.argv);
@@ -0,0 +1,72 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+ import os from 'os';
4
+ import { Config, ConfigSchema } from './config-schema.js';
5
+
6
+ export class ConfigManager {
7
+ private static instance: ConfigManager;
8
+ private config: Config | null = null;
9
+ private configDir: string;
10
+ private configFile: string;
11
+
12
+ private constructor() {
13
+ this.configDir = path.join(os.homedir(), '.config', 'alif');
14
+ this.configFile = path.join(this.configDir, 'config.json');
15
+ }
16
+
17
+ static getInstance(): ConfigManager {
18
+ if (!ConfigManager.instance) {
19
+ ConfigManager.instance = new ConfigManager();
20
+ }
21
+ return ConfigManager.instance;
22
+ }
23
+
24
+ getConfigDir(): string {
25
+ return this.configDir;
26
+ }
27
+
28
+ getConfigFile(): string {
29
+ return this.configFile;
30
+ }
31
+
32
+ load(): Config {
33
+ if (this.config) return this.config;
34
+
35
+ if (!fs.existsSync(this.configFile)) {
36
+ throw new Error(`Configuration file not found at ${this.configFile}. Run 'alif init' first.`);
37
+ }
38
+
39
+ try {
40
+ const raw = fs.readFileSync(this.configFile, 'utf-8');
41
+ const parsed = JSON.parse(raw);
42
+ this.config = ConfigSchema.parse(parsed);
43
+ return this.config;
44
+ } catch (error) {
45
+ if (error instanceof Error) {
46
+ throw new Error(`Failed to load configuration: ${error.message}`, { cause: error });
47
+ }
48
+ throw error;
49
+ }
50
+ }
51
+
52
+ save(config: Config): void {
53
+ if (!fs.existsSync(this.configDir)) {
54
+ fs.mkdirSync(this.configDir, { recursive: true });
55
+ }
56
+
57
+ try {
58
+ ConfigSchema.parse(config);
59
+ fs.writeFileSync(this.configFile, JSON.stringify(config, null, 2), 'utf-8');
60
+ this.config = config;
61
+ } catch (error) {
62
+ if (error instanceof Error) {
63
+ throw new Error(`Failed to save configuration: ${error.message}`, { cause: error });
64
+ }
65
+ throw error;
66
+ }
67
+ }
68
+
69
+ exists(): boolean {
70
+ return fs.existsSync(this.configFile);
71
+ }
72
+ }
@@ -0,0 +1,31 @@
1
+ import { z } from 'zod';
2
+
3
+ export const LlmProviderType = z.enum(['ollama', 'anthropic', 'openrouter']);
4
+ export const DeliveryProviderType = z.enum(['slack', 'webhook']);
5
+
6
+ export const ConfigSchema = z.object({
7
+ llm: z.object({
8
+ provider: LlmProviderType,
9
+ apiKey: z.string().optional(),
10
+ model: z.string(),
11
+ baseUrl: z.string().url().optional(),
12
+ }),
13
+ delivery: z.array(
14
+ z.object({
15
+ type: DeliveryProviderType,
16
+ webhookUrl: z.string().url(),
17
+ }),
18
+ ),
19
+ preferences: z.object({
20
+ signalThreshold: z.number().min(0).max(100).default(60),
21
+ maxItemsPerCategory: z.number().min(1).default(5),
22
+ sourceCooldownMinutes: z.number().min(0).default(5),
23
+ customKeywords: z.record(z.string(), z.number()).default({}),
24
+ }),
25
+ dbPath: z.string(),
26
+ feedsPath: z.string(),
27
+ });
28
+
29
+ export type Config = z.infer<typeof ConfigSchema>;
30
+ export type LlmProvider = z.infer<typeof LlmProviderType>;
31
+ export type DeliveryProvider = z.infer<typeof DeliveryProviderType>;
@@ -0,0 +1,9 @@
1
+ export const BASE_KEYWORDS: Record<string, number> = {
2
+ breakthrough: 20,
3
+ 'gpt-5': 30,
4
+ o1: 20,
5
+ deepseek: 25,
6
+ 'open source': 15,
7
+ agi: 15,
8
+ agent: 15,
9
+ };
@@ -0,0 +1,39 @@
1
+ import { ScrapedArticle } from '../scraper-types.js';
2
+
3
+ export interface DeduplicatorOptions {
4
+ similarityThreshold?: number;
5
+ }
6
+
7
+ export class Deduplicator {
8
+ constructor(private options: DeduplicatorOptions = {}) {}
9
+
10
+ process(articles: ScrapedArticle[]): ScrapedArticle[] {
11
+ const unique = new Map<string, ScrapedArticle>();
12
+
13
+ for (const article of articles) {
14
+ // Primary key: URL (simplified)
15
+ const urlKey = this.normalizeUrl(article.url);
16
+
17
+ if (!unique.has(urlKey)) {
18
+ unique.set(urlKey, article);
19
+ } else {
20
+ // If we have an existing one, keep the one with content if possible
21
+ const existing = unique.get(urlKey)!;
22
+ if (!existing.content && article.content) {
23
+ unique.set(urlKey, article);
24
+ }
25
+ }
26
+ }
27
+
28
+ return Array.from(unique.values());
29
+ }
30
+
31
+ private normalizeUrl(url: string): string {
32
+ try {
33
+ const u = new URL(url);
34
+ return u.origin + u.pathname.replace(/\/$/, '');
35
+ } catch {
36
+ return url;
37
+ }
38
+ }
39
+ }
@@ -0,0 +1,18 @@
1
+ import { ScrapedArticle } from '../scraper-types.js';
2
+
3
+ export class KeywordScorer {
4
+ constructor(private keywords: Record<string, number>) {}
5
+
6
+ score(article: ScrapedArticle): number {
7
+ let score = 0;
8
+ const text = `${article.title} ${article.content || ''}`.toLowerCase();
9
+
10
+ for (const [keyword, weight] of Object.entries(this.keywords)) {
11
+ if (text.includes(keyword.toLowerCase())) {
12
+ score += weight;
13
+ }
14
+ }
15
+
16
+ return score;
17
+ }
18
+ }
@@ -0,0 +1,47 @@
1
+ import { ScraperSource, ScraperResult, BaseScraper } from './scraper-types.js';
2
+ import { RssScraper } from './scrapers/rss-scraper.js';
3
+ import { ArxivScraper } from './scrapers/arxiv-scraper.js';
4
+ import { ApiScraper } from './scrapers/api-scraper.js';
5
+ import { ScrapeScraper } from './scrapers/scrape-scraper.js';
6
+ import { JsonScraper } from './scrapers/json-scraper.js';
7
+
8
+ export class ScraperOrchestrator {
9
+ private scrapers: Record<string, BaseScraper> = {};
10
+
11
+ constructor() {
12
+ this.scrapers.rss = new RssScraper();
13
+ this.scrapers.arxiv = new ArxivScraper();
14
+ this.scrapers.api = new ApiScraper();
15
+ this.scrapers.scrape = new ScrapeScraper();
16
+ this.scrapers.json = new JsonScraper();
17
+ }
18
+
19
+ async runAll(sources: ScraperSource[]): Promise<ScraperResult[]> {
20
+ const tasks = sources.map((source) => {
21
+ let scraper: BaseScraper | undefined;
22
+
23
+ // Priority 1: Match by ID for specialized logic in reference
24
+ if (source.id === 'hn') scraper = this.scrapers.api;
25
+ else if (source.id === 'github_trending') scraper = this.scrapers.scrape;
26
+ else if (source.id.startsWith('arxiv')) scraper = this.scrapers.arxiv;
27
+ else if (source.id.startsWith('reddit')) scraper = this.scrapers.json;
28
+
29
+ // Priority 2: Use explicitly defined type
30
+ if (!scraper) {
31
+ scraper = this.scrapers[source.type];
32
+ }
33
+
34
+ if (!scraper) {
35
+ return Promise.resolve({
36
+ source: source.id,
37
+ status: 'error',
38
+ items: [],
39
+ error: `No scraper found for type: ${source.type}`,
40
+ } as ScraperResult);
41
+ }
42
+ return scraper.scrape(source);
43
+ });
44
+
45
+ return Promise.all(tasks);
46
+ }
47
+ }