alif-digest 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. package/.github/workflows/publish.yml +33 -0
  2. package/.husky/pre-commit +1 -0
  3. package/.prettierrc +7 -0
  4. package/LICENSE +21 -0
  5. package/README.md +131 -0
  6. package/dist/cli/commands/init.d.ts +1 -0
  7. package/dist/cli/commands/init.js +88 -0
  8. package/dist/cli/commands/init.js.map +1 -0
  9. package/dist/cli/commands/run.d.ts +4 -0
  10. package/dist/cli/commands/run.js +46 -0
  11. package/dist/cli/commands/run.js.map +1 -0
  12. package/dist/cli/commands/schedule.d.ts +1 -0
  13. package/dist/cli/commands/schedule.js +94 -0
  14. package/dist/cli/commands/schedule.js.map +1 -0
  15. package/dist/cli/index.d.ts +2 -0
  16. package/dist/cli/index.js +29 -0
  17. package/dist/cli/index.js.map +1 -0
  18. package/dist/core/config-manager.d.ts +14 -0
  19. package/dist/core/config-manager.js +65 -0
  20. package/dist/core/config-manager.js.map +1 -0
  21. package/dist/core/config-schema.d.ts +40 -0
  22. package/dist/core/config-schema.js +24 -0
  23. package/dist/core/config-schema.js.map +1 -0
  24. package/dist/core/default-keywords.d.ts +1 -0
  25. package/dist/core/default-keywords.js +10 -0
  26. package/dist/core/default-keywords.js.map +1 -0
  27. package/dist/core/filters/deduplicator.d.ts +10 -0
  28. package/dist/core/filters/deduplicator.js +34 -0
  29. package/dist/core/filters/deduplicator.js.map +1 -0
  30. package/dist/core/filters/keywords.d.ts +6 -0
  31. package/dist/core/filters/keywords.js +17 -0
  32. package/dist/core/filters/keywords.js.map +1 -0
  33. package/dist/core/orchestrator.d.ts +6 -0
  34. package/dist/core/orchestrator.js +44 -0
  35. package/dist/core/orchestrator.js.map +1 -0
  36. package/dist/core/pipeline.d.ts +15 -0
  37. package/dist/core/pipeline.js +140 -0
  38. package/dist/core/pipeline.js.map +1 -0
  39. package/dist/core/scheduler.d.ts +9 -0
  40. package/dist/core/scheduler.js +64 -0
  41. package/dist/core/scheduler.js.map +1 -0
  42. package/dist/core/scraper-types.d.ts +27 -0
  43. package/dist/core/scraper-types.js +3 -0
  44. package/dist/core/scraper-types.js.map +1 -0
  45. package/dist/core/scrapers/api-scraper.d.ts +4 -0
  46. package/dist/core/scrapers/api-scraper.js +46 -0
  47. package/dist/core/scrapers/api-scraper.js.map +1 -0
  48. package/dist/core/scrapers/arxiv-scraper.d.ts +4 -0
  49. package/dist/core/scrapers/arxiv-scraper.js +34 -0
  50. package/dist/core/scrapers/arxiv-scraper.js.map +1 -0
  51. package/dist/core/scrapers/json-scraper.d.ts +4 -0
  52. package/dist/core/scrapers/json-scraper.js +56 -0
  53. package/dist/core/scrapers/json-scraper.js.map +1 -0
  54. package/dist/core/scrapers/rss-scraper.d.ts +6 -0
  55. package/dist/core/scrapers/rss-scraper.js +32 -0
  56. package/dist/core/scrapers/rss-scraper.js.map +1 -0
  57. package/dist/core/scrapers/scrape-scraper.d.ts +4 -0
  58. package/dist/core/scrapers/scrape-scraper.js +49 -0
  59. package/dist/core/scrapers/scrape-scraper.js.map +1 -0
  60. package/dist/db/article-store.d.ts +22 -0
  61. package/dist/db/article-store.js +43 -0
  62. package/dist/db/article-store.js.map +1 -0
  63. package/dist/db/connection.d.ts +2 -0
  64. package/dist/db/connection.js +15 -0
  65. package/dist/db/connection.js.map +1 -0
  66. package/dist/db/migrate.d.ts +2 -0
  67. package/dist/db/migrate.js +60 -0
  68. package/dist/db/migrate.js.map +1 -0
  69. package/dist/db/schedule-store.d.ts +17 -0
  70. package/dist/db/schedule-store.js +23 -0
  71. package/dist/db/schedule-store.js.map +1 -0
  72. package/dist/db/source-health-store.d.ts +16 -0
  73. package/dist/db/source-health-store.js +31 -0
  74. package/dist/db/source-health-store.js.map +1 -0
  75. package/dist/providers/delivery/index.d.ts +18 -0
  76. package/dist/providers/delivery/index.js +2 -0
  77. package/dist/providers/delivery/index.js.map +1 -0
  78. package/dist/providers/delivery/slack.d.ts +6 -0
  79. package/dist/providers/delivery/slack.js +52 -0
  80. package/dist/providers/delivery/slack.js.map +1 -0
  81. package/dist/providers/delivery/webhook.d.ts +6 -0
  82. package/dist/providers/delivery/webhook.js +16 -0
  83. package/dist/providers/delivery/webhook.js.map +1 -0
  84. package/dist/providers/factory.d.ts +7 -0
  85. package/dist/providers/factory.js +33 -0
  86. package/dist/providers/factory.js.map +1 -0
  87. package/dist/providers/llm/anthropic.d.ts +12 -0
  88. package/dist/providers/llm/anthropic.js +43 -0
  89. package/dist/providers/llm/anthropic.js.map +1 -0
  90. package/dist/providers/llm/index.d.ts +10 -0
  91. package/dist/providers/llm/index.js +2 -0
  92. package/dist/providers/llm/index.js.map +1 -0
  93. package/dist/providers/llm/ollama.d.ts +12 -0
  94. package/dist/providers/llm/ollama.js +42 -0
  95. package/dist/providers/llm/ollama.js.map +1 -0
  96. package/dist/providers/llm/openrouter.d.ts +13 -0
  97. package/dist/providers/llm/openrouter.js +53 -0
  98. package/dist/providers/llm/openrouter.js.map +1 -0
  99. package/dist/providers/llm/utils.d.ts +6 -0
  100. package/dist/providers/llm/utils.js +45 -0
  101. package/dist/providers/llm/utils.js.map +1 -0
  102. package/dist/resources/default-feeds.json +650 -0
  103. package/dist/resources/index.d.ts +2 -0
  104. package/dist/resources/index.js +3 -0
  105. package/dist/resources/index.js.map +1 -0
  106. package/eslint.config.mjs +29 -0
  107. package/package.json +66 -0
  108. package/src/cli/commands/init.ts +94 -0
  109. package/src/cli/commands/run.ts +52 -0
  110. package/src/cli/commands/schedule.ts +99 -0
  111. package/src/cli/index.ts +34 -0
  112. package/src/core/config-manager.ts +72 -0
  113. package/src/core/config-schema.ts +31 -0
  114. package/src/core/default-keywords.ts +9 -0
  115. package/src/core/filters/deduplicator.ts +39 -0
  116. package/src/core/filters/keywords.ts +18 -0
  117. package/src/core/orchestrator.ts +47 -0
  118. package/src/core/pipeline.ts +171 -0
  119. package/src/core/scheduler.ts +74 -0
  120. package/src/core/scraper-types.ts +30 -0
  121. package/src/core/scrapers/api-scraper.ts +45 -0
  122. package/src/core/scrapers/arxiv-scraper.ts +35 -0
  123. package/src/core/scrapers/json-scraper.ts +54 -0
  124. package/src/core/scrapers/rss-scraper.ts +34 -0
  125. package/src/core/scrapers/scrape-scraper.ts +50 -0
  126. package/src/db/article-store.ts +75 -0
  127. package/src/db/connection.ts +17 -0
  128. package/src/db/migrate.ts +68 -0
  129. package/src/db/schedule-store.ts +41 -0
  130. package/src/db/source-health-store.ts +42 -0
  131. package/src/providers/delivery/index.ts +19 -0
  132. package/src/providers/delivery/slack.ts +55 -0
  133. package/src/providers/delivery/webhook.ts +16 -0
  134. package/src/providers/factory.ts +37 -0
  135. package/src/providers/llm/anthropic.ts +48 -0
  136. package/src/providers/llm/index.ts +8 -0
  137. package/src/providers/llm/ollama.ts +44 -0
  138. package/src/providers/llm/openrouter.ts +56 -0
  139. package/src/providers/llm/utils.ts +54 -0
  140. package/src/resources/default-feeds.json +650 -0
  141. package/src/resources/index.ts +3 -0
  142. package/tests/config-manager.test.ts +70 -0
  143. package/tests/db-integration.test.ts +72 -0
  144. package/tests/filters.test.ts +53 -0
  145. package/tests/llm-provider.test.ts +115 -0
  146. package/tsconfig.json +18 -0
  147. package/vitest.config.ts +13 -0
@@ -0,0 +1,10 @@
1
+ export const BASE_KEYWORDS = {
2
+ breakthrough: 20,
3
+ 'gpt-5': 30,
4
+ o1: 20,
5
+ deepseek: 25,
6
+ 'open source': 15,
7
+ agi: 15,
8
+ agent: 15,
9
+ };
10
+ //# sourceMappingURL=default-keywords.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"default-keywords.js","sourceRoot":"","sources":["../../src/core/default-keywords.ts"],"names":[],"mappings":"AAAA,MAAM,CAAC,MAAM,aAAa,GAA2B;IACnD,YAAY,EAAE,EAAE;IAChB,OAAO,EAAE,EAAE;IACX,EAAE,EAAE,EAAE;IACN,QAAQ,EAAE,EAAE;IACZ,aAAa,EAAE,EAAE;IACjB,GAAG,EAAE,EAAE;IACP,KAAK,EAAE,EAAE;CACV,CAAC"}
@@ -0,0 +1,10 @@
1
+ import { ScrapedArticle } from '../scraper-types.js';
2
+ export interface DeduplicatorOptions {
3
+ similarityThreshold?: number;
4
+ }
5
+ export declare class Deduplicator {
6
+ private options;
7
+ constructor(options?: DeduplicatorOptions);
8
+ process(articles: ScrapedArticle[]): ScrapedArticle[];
9
+ private normalizeUrl;
10
+ }
@@ -0,0 +1,34 @@
1
+ export class Deduplicator {
2
+ options;
3
+ constructor(options = {}) {
4
+ this.options = options;
5
+ }
6
+ process(articles) {
7
+ const unique = new Map();
8
+ for (const article of articles) {
9
+ // Primary key: URL (simplified)
10
+ const urlKey = this.normalizeUrl(article.url);
11
+ if (!unique.has(urlKey)) {
12
+ unique.set(urlKey, article);
13
+ }
14
+ else {
15
+ // If we have an existing one, keep the one with content if possible
16
+ const existing = unique.get(urlKey);
17
+ if (!existing.content && article.content) {
18
+ unique.set(urlKey, article);
19
+ }
20
+ }
21
+ }
22
+ return Array.from(unique.values());
23
+ }
24
+ normalizeUrl(url) {
25
+ try {
26
+ const u = new URL(url);
27
+ return u.origin + u.pathname.replace(/\/$/, '');
28
+ }
29
+ catch {
30
+ return url;
31
+ }
32
+ }
33
+ }
34
+ //# sourceMappingURL=deduplicator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"deduplicator.js","sourceRoot":"","sources":["../../../src/core/filters/deduplicator.ts"],"names":[],"mappings":"AAMA,MAAM,OAAO,YAAY;IACH;IAApB,YAAoB,UAA+B,EAAE;QAAjC,YAAO,GAAP,OAAO,CAA0B;IAAG,CAAC;IAEzD,OAAO,CAAC,QAA0B;QAChC,MAAM,MAAM,GAAG,IAAI,GAAG,EAA0B,CAAC;QAEjD,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,gCAAgC;YAChC,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAE9C,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,MAAM,CAAC,EAAE,CAAC;gBACxB,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YAC9B,CAAC;iBAAM,CAAC;gBACN,oEAAoE;gBACpE,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,MAAM,CAAE,CAAC;gBACrC,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,OAAO,CAAC,OAAO,EAAE,CAAC;oBACzC,MAAM,CAAC,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;gBAC9B,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,CAAC,CAAC;IACrC,CAAC;IAEO,YAAY,CAAC,GAAW;QAC9B,IAAI,CAAC;YACH,MAAM,CAAC,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAC;YACvB,OAAO,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAClD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,GAAG,CAAC;QACb,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,6 @@
1
+ import { ScrapedArticle } from '../scraper-types.js';
2
+ export declare class KeywordScorer {
3
+ private keywords;
4
+ constructor(keywords: Record<string, number>);
5
+ score(article: ScrapedArticle): number;
6
+ }
@@ -0,0 +1,17 @@
1
+ export class KeywordScorer {
2
+ keywords;
3
+ constructor(keywords) {
4
+ this.keywords = keywords;
5
+ }
6
+ score(article) {
7
+ let score = 0;
8
+ const text = `${article.title} ${article.content || ''}`.toLowerCase();
9
+ for (const [keyword, weight] of Object.entries(this.keywords)) {
10
+ if (text.includes(keyword.toLowerCase())) {
11
+ score += weight;
12
+ }
13
+ }
14
+ return score;
15
+ }
16
+ }
17
+ //# sourceMappingURL=keywords.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"keywords.js","sourceRoot":"","sources":["../../../src/core/filters/keywords.ts"],"names":[],"mappings":"AAEA,MAAM,OAAO,aAAa;IACJ;IAApB,YAAoB,QAAgC;QAAhC,aAAQ,GAAR,QAAQ,CAAwB;IAAG,CAAC;IAExD,KAAK,CAAC,OAAuB;QAC3B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,MAAM,IAAI,GAAG,GAAG,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,OAAO,IAAI,EAAE,EAAE,CAAC,WAAW,EAAE,CAAC;QAEvE,KAAK,MAAM,CAAC,OAAO,EAAE,MAAM,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9D,IAAI,IAAI,CAAC,QAAQ,CAAC,OAAO,CAAC,WAAW,EAAE,CAAC,EAAE,CAAC;gBACzC,KAAK,IAAI,MAAM,CAAC;YAClB,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;CACF"}
@@ -0,0 +1,6 @@
1
+ import { ScraperSource, ScraperResult } from './scraper-types.js';
2
+ export declare class ScraperOrchestrator {
3
+ private scrapers;
4
+ constructor();
5
+ runAll(sources: ScraperSource[]): Promise<ScraperResult[]>;
6
+ }
@@ -0,0 +1,44 @@
1
+ import { RssScraper } from './scrapers/rss-scraper.js';
2
+ import { ArxivScraper } from './scrapers/arxiv-scraper.js';
3
+ import { ApiScraper } from './scrapers/api-scraper.js';
4
+ import { ScrapeScraper } from './scrapers/scrape-scraper.js';
5
+ import { JsonScraper } from './scrapers/json-scraper.js';
6
+ export class ScraperOrchestrator {
7
+ scrapers = {};
8
+ constructor() {
9
+ this.scrapers.rss = new RssScraper();
10
+ this.scrapers.arxiv = new ArxivScraper();
11
+ this.scrapers.api = new ApiScraper();
12
+ this.scrapers.scrape = new ScrapeScraper();
13
+ this.scrapers.json = new JsonScraper();
14
+ }
15
+ async runAll(sources) {
16
+ const tasks = sources.map((source) => {
17
+ let scraper;
18
+ // Priority 1: Match by ID for specialized logic in reference
19
+ if (source.id === 'hn')
20
+ scraper = this.scrapers.api;
21
+ else if (source.id === 'github_trending')
22
+ scraper = this.scrapers.scrape;
23
+ else if (source.id.startsWith('arxiv'))
24
+ scraper = this.scrapers.arxiv;
25
+ else if (source.id.startsWith('reddit'))
26
+ scraper = this.scrapers.json;
27
+ // Priority 2: Use explicitly defined type
28
+ if (!scraper) {
29
+ scraper = this.scrapers[source.type];
30
+ }
31
+ if (!scraper) {
32
+ return Promise.resolve({
33
+ source: source.id,
34
+ status: 'error',
35
+ items: [],
36
+ error: `No scraper found for type: ${source.type}`,
37
+ });
38
+ }
39
+ return scraper.scrape(source);
40
+ });
41
+ return Promise.all(tasks);
42
+ }
43
+ }
44
+ //# sourceMappingURL=orchestrator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"orchestrator.js","sourceRoot":"","sources":["../../src/core/orchestrator.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAE,YAAY,EAAE,MAAM,6BAA6B,CAAC;AAC3D,OAAO,EAAE,UAAU,EAAE,MAAM,2BAA2B,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,8BAA8B,CAAC;AAC7D,OAAO,EAAE,WAAW,EAAE,MAAM,4BAA4B,CAAC;AAEzD,MAAM,OAAO,mBAAmB;IACtB,QAAQ,GAAgC,EAAE,CAAC;IAEnD;QACE,IAAI,CAAC,QAAQ,CAAC,GAAG,GAAG,IAAI,UAAU,EAAE,CAAC;QACrC,IAAI,CAAC,QAAQ,CAAC,KAAK,GAAG,IAAI,YAAY,EAAE,CAAC;QACzC,IAAI,CAAC,QAAQ,CAAC,GAAG,GAAG,IAAI,UAAU,EAAE,CAAC;QACrC,IAAI,CAAC,QAAQ,CAAC,MAAM,GAAG,IAAI,aAAa,EAAE,CAAC;QAC3C,IAAI,CAAC,QAAQ,CAAC,IAAI,GAAG,IAAI,WAAW,EAAE,CAAC;IACzC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,OAAwB;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE;YACnC,IAAI,OAAgC,CAAC;YAErC,6DAA6D;YAC7D,IAAI,MAAM,CAAC,EAAE,KAAK,IAAI;gBAAE,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC;iBAC/C,IAAI,MAAM,CAAC,EAAE,KAAK,iBAAiB;gBAAE,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC;iBACpE,IAAI,MAAM,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;gBAAE,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC;iBACjE,IAAI,MAAM,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC;gBAAE,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC;YAEtE,0CAA0C;YAC1C,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;YACvC,CAAC;YAED,IAAI,CAAC,OAAO,EAAE,CAAC;gBACb,OAAO,OAAO,CAAC,OAAO,CAAC;oBACrB,MAAM,EAAE,MAAM,CAAC,EAAE;oBACjB,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,EAAE;oBACT,KAAK,EAAE,8BAA8B,MAAM,CAAC,IAAI,EAAE;iBAClC,CAAC,CAAC;YACtB,CAAC;YACD,OAAO,OAAO,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAChC,CAAC,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;IAC5B,CAAC;CACF"}
@@ -0,0 +1,15 @@
1
+ import { Config } from './config-schema.js';
2
+ import { ScraperSource } from './scraper-types.js';
3
+ import { Database } from 'better-sqlite3';
4
+ export declare class Pipeline {
5
+ private config;
6
+ private orchestrator;
7
+ private scorer;
8
+ private deduplicator;
9
+ private articleStore;
10
+ private healthStore;
11
+ private llm;
12
+ private delivery;
13
+ constructor(config: Config, db: Database);
14
+ run(sources: ScraperSource[], force?: boolean): Promise<any[]>;
15
+ }
@@ -0,0 +1,140 @@
1
+ import { ScraperOrchestrator } from './orchestrator.js';
2
+ import { KeywordScorer } from './filters/keywords.js';
3
+ import { Deduplicator } from './filters/deduplicator.js';
4
+ import { ArticleStore } from '../db/article-store.js';
5
+ import { SourceHealthStore } from '../db/source-health-store.js';
6
+ import { ProviderFactory } from '../providers/factory.js';
7
+ import { BASE_KEYWORDS } from './default-keywords.js';
8
+ export class Pipeline {
9
+ config;
10
+ orchestrator;
11
+ scorer;
12
+ deduplicator;
13
+ articleStore;
14
+ healthStore;
15
+ llm;
16
+ delivery;
17
+ constructor(config, db) {
18
+ this.config = config;
19
+ this.orchestrator = new ScraperOrchestrator();
20
+ const mergedKeywords = {
21
+ ...BASE_KEYWORDS,
22
+ ...(config.preferences.customKeywords || {}),
23
+ };
24
+ this.scorer = new KeywordScorer(mergedKeywords);
25
+ this.deduplicator = new Deduplicator();
26
+ this.articleStore = new ArticleStore(db);
27
+ this.healthStore = new SourceHealthStore(db);
28
+ this.llm = ProviderFactory.createLLM(config);
29
+ this.delivery = ProviderFactory.createDelivery(config);
30
+ }
31
+ async run(sources, force = false) {
32
+ const cooldown = this.config.preferences.sourceCooldownMinutes;
33
+ const activeSources = sources.filter((s) => {
34
+ if (!force && this.healthStore.isThrottled(s.id, cooldown)) {
35
+ console.log(`[Pipeline] Skipping ${s.id} (last check was < ${cooldown}m ago)`);
36
+ return false;
37
+ }
38
+ return true;
39
+ });
40
+ let enrichedItems = [];
41
+ const digestDate = new Date().toISOString().split('T')[0];
42
+ let newItemsCount = 0;
43
+ if (activeSources.length > 0) {
44
+ console.log(`[Pipeline] Scraping ${activeSources.length} sources...`);
45
+ const results = await this.orchestrator.runAll(activeSources);
46
+ // Record health
47
+ for (const res of results) {
48
+ this.healthStore.record({
49
+ source: res.source,
50
+ status: res.status,
51
+ items_found: res.items.length,
52
+ error_message: res.error,
53
+ });
54
+ }
55
+ const allArticles = results.flatMap((r) => r.items);
56
+ console.log(`[Pipeline] Found ${allArticles.length} raw items.`);
57
+ // Filtering
58
+ const latestTimestamp = this.articleStore.getLatestTimestamp();
59
+ let newItems = allArticles;
60
+ if (latestTimestamp) {
61
+ const lastTime = new Date(latestTimestamp).getTime();
62
+ newItems = allArticles.filter((a) => {
63
+ if (!a.published_at)
64
+ return true;
65
+ return new Date(a.published_at).getTime() > lastTime;
66
+ });
67
+ }
68
+ newItemsCount = newItems.length;
69
+ console.log(`[Pipeline] ${newItemsCount} new items after incremental filter.`);
70
+ // Scoring & Deduplication
71
+ const unique = this.deduplicator.process(newItems);
72
+ const highSignal = unique
73
+ .map((a) => ({ ...a, score: this.scorer.score(a) }))
74
+ .filter((a) => a.score >= this.config.preferences.signalThreshold)
75
+ .sort((a, b) => b.score - a.score);
76
+ console.log(`[Pipeline] ${highSignal.length} high-signal items selected.`);
77
+ if (highSignal.length > 0) {
78
+ // LLM Analysis
79
+ console.log('[Pipeline] Analyzing high-signal items with LLM...');
80
+ const analysisResults = await this.llm.analyze(highSignal.map((a) => ({ title: a.title, content: a.content })));
81
+ enrichedItems = highSignal.map((article, idx) => ({
82
+ ...article,
83
+ summary: analysisResults[idx]?.summary || null,
84
+ category: analysisResults[idx]?.category || 'Uncategorized',
85
+ }));
86
+ // Persistence
87
+ for (const item of enrichedItems) {
88
+ this.articleStore.upsert({
89
+ ...item,
90
+ digest_date: digestDate,
91
+ delivered: 0,
92
+ });
93
+ }
94
+ }
95
+ else {
96
+ console.log('[Pipeline] No high-signal items in this batch.');
97
+ }
98
+ }
99
+ else {
100
+ console.log('[Pipeline] All sources are cooled down. Checking database for pending items...');
101
+ }
102
+ // 5. Build Final Digest (Current + Pending from last 24h)
103
+ const pendingItems = this.articleStore.getPendingHighSignal(this.config.preferences.signalThreshold, 24);
104
+ // Merge and deduplicate by ID (latest run wins)
105
+ const allToDeliverMap = new Map();
106
+ for (const item of pendingItems)
107
+ allToDeliverMap.set(item.id, item);
108
+ for (const item of enrichedItems)
109
+ allToDeliverMap.set(item.id, item);
110
+ const finalItemsToDeliver = Array.from(allToDeliverMap.values());
111
+ if (finalItemsToDeliver.length === 0) {
112
+ console.log('[Pipeline] No high-signal items to deliver.');
113
+ console.log('[Pipeline] Execution complete! 🥂');
114
+ return [];
115
+ }
116
+ // Delivery
117
+ const digest = {
118
+ items: finalItemsToDeliver.map((item) => ({
119
+ title: item.title,
120
+ url: item.url,
121
+ summary: item.summary,
122
+ category: item.category,
123
+ source: item.source,
124
+ score: item.score,
125
+ })),
126
+ metadata: {
127
+ total_new_items: newItemsCount,
128
+ total_selected: finalItemsToDeliver.length,
129
+ date: digestDate,
130
+ },
131
+ };
132
+ console.log(`[Pipeline] Delivering ${finalItemsToDeliver.length} items to ${this.delivery.length} channels...`);
133
+ await Promise.all(this.delivery.map((d) => d.send(digest)));
134
+ // Mark as delivered
135
+ this.articleStore.markAsDelivered(finalItemsToDeliver.map((a) => a.id));
136
+ console.log('[Pipeline] Execution complete! 🥂');
137
+ return finalItemsToDeliver;
138
+ }
139
+ }
140
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../src/core/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,mBAAmB,EAAE,MAAM,mBAAmB,CAAC;AAExD,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AACtD,OAAO,EAAE,YAAY,EAAE,MAAM,2BAA2B,CAAC;AACzD,OAAO,EAAE,YAAY,EAAE,MAAM,wBAAwB,CAAC;AACtD,OAAO,EAAE,iBAAiB,EAAE,MAAM,8BAA8B,CAAC;AAEjE,OAAO,EAAE,eAAe,EAAE,MAAM,yBAAyB,CAAC;AAG1D,OAAO,EAAE,aAAa,EAAE,MAAM,uBAAuB,CAAC;AAEtD,MAAM,OAAO,QAAQ;IAUT;IATF,YAAY,CAAsB;IAClC,MAAM,CAAgB;IACtB,YAAY,CAAe;IAC3B,YAAY,CAAe;IAC3B,WAAW,CAAoB;IAC/B,GAAG,CAAc;IACjB,QAAQ,CAAqB;IAErC,YACU,MAAc,EACtB,EAAY;QADJ,WAAM,GAAN,MAAM,CAAQ;QAGtB,IAAI,CAAC,YAAY,GAAG,IAAI,mBAAmB,EAAE,CAAC;QAE9C,MAAM,cAAc,GAAG;YACrB,GAAG,aAAa;YAChB,GAAG,CAAC,MAAM,CAAC,WAAW,CAAC,cAAc,IAAI,EAAE,CAAC;SAC7C,CAAC;QACF,IAAI,CAAC,MAAM,GAAG,IAAI,aAAa,CAAC,cAAc,CAAC,CAAC;QAEhD,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,EAAE,CAAC;QACvC,IAAI,CAAC,YAAY,GAAG,IAAI,YAAY,CAAC,EAAE,CAAC,CAAC;QACzC,IAAI,CAAC,WAAW,GAAG,IAAI,iBAAiB,CAAC,EAAE,CAAC,CAAC;QAC7C,IAAI,CAAC,GAAG,GAAG,eAAe,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;QAC7C,IAAI,CAAC,QAAQ,GAAG,eAAe,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC;IACzD,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,OAAwB,EAAE,KAAK,GAAG,KAAK;QAC/C,MAAM,QAAQ,GAAG,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,qBAAqB,CAAC;QAC/D,MAAM,aAAa,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;YACzC,IAAI,CAAC,KAAK,IAAI,IAAI,CAAC,WAAW,CAAC,WAAW,CAAC,CAAC,CAAC,EAAE,EAAE,QAAQ,CAAC,EAAE,CAAC;gBAC3D,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC,EAAE,sBAAsB,QAAQ,QAAQ,CAAC,CAAC;gBAC/E,OAAO,KAAK,CAAC;YACf,CAAC;YACD,OAAO,IAAI,CAAC;QACd,CAAC,CAAC,CAAC;QAEH,IAAI,aAAa,GAAU,EAAE,CAAC;QAC9B,MAAM,UAAU,GAAG,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1D,IAAI,aAAa,GAAG,CAAC,CAAC;QAEtB,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,OAAO,CAAC,GAAG,CAAC,uBAAuB,aAAa,CAAC,MAAM,aAAa,CAAC,CAAC;YACtE,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,aAAa,CAAC,CAAC;YAE9D,gBAAgB;YAChB,KAAK,MAAM,GAAG,IAAI,OAAO,EAAE,CAAC;gBAC1B,IAAI,CAAC,WAAW,CAAC,MAAM,CAAC;oBACtB,MAAM,EAAE,GAAG,CAAC,MAAM;oBAClB,MAAM,EAAE,GAAG,CAAC,MAAM;oBAClB,WAAW,EAAE,GAAG,CAAC,KAAK,CAAC,MAAM;oBAC7B,aAAa,EAAE,GAAG,CAAC,KAAK;iBACzB,CAAC,CAAC;YACL,CAAC;YAED,MAAM,WAAW,GAAG,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACpD,OAAO,CAAC,GAAG,CAAC,oBAAoB,WAAW,CAAC,MAAM,aAAa,CAAC,CAAC;YAEjE,YAAY;YACZ,MAAM,eAAe,GAAG,IAAI,CAAC,YAAY,CAAC,kBAAkB,EAAE,CAAC;YAC/D,IAAI,QAAQ,GAAG,WAAW,CAAC;YAE3B,IAAI,eAAe,EAAE,CAAC;gBACpB,MAAM,QAAQ,GAAG,IAAI,IAAI,CAAC,eAAe,CAAC,CAAC,OAAO,EAAE,CAAC;gBACrD,QAAQ,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE;oBAClC,IAAI,CAAC,CAAC,CAAC,YAAY;wBAAE,OAAO,IAAI,CAAC;oBACjC,OAAO,IAAI,IAAI,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,OAAO,EAAE,GAAG,QAAQ,CAAC;gBACvD,CAAC,CAAC,CAAC;YACL,CAAC;YACD,aAAa,GAAG,QAAQ,CAAC,MAAM,CAAC;YAChC,OAAO,CAAC,GAAG,CAAC,cAAc,aAAa,sCAAsC,CAAC,CAAC;YAE/E,0BAA0B;YAC1B,MAAM,MAAM,GAAG,IAAI,CAAC,YAAY,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;YACnD,MAAM,UAAU,GAAG,MAAM;iBACtB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;iBACnD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,IAAI,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,eAAe,CAAC;iBACjE,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;YAErC,OAAO,CAAC,GAAG,CAAC,cAAc,UAAU,CAAC,MAAM,8BAA8B,CAAC,CAAC;YAE3E,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC1B,eAAe;gBACf,OAAO,CAAC,GAAG,CAAC,oDAAoD,CAAC,CAAC;gBAClE,MAAM,eAAe,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,OAAO,CAC5C,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,EAAE,CAAC,CAAC,CAChE,CAAC;gBAEF,aAAa,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,GAAG,EAAE,EAAE,CAAC,CAAC;oBAChD,GAAG,OAAO;oBACV,OAAO,EAAE,eAAe,CAAC,GAAG,CAAC,EAAE,OAAO,IAAI,IAAI;oBAC9C,QAAQ,EAAE,eAAe,CAAC,GAAG,CAAC,EAAE,QAAQ,IAAI,eAAe;iBAC5D,CAAC,CAAC,CAAC;gBAEJ,cAAc;gBACd,KAAK,MAAM,IAAI,IAAI,aAAa,EAAE,CAAC;oBACjC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC;wBACvB,GAAG,IAAI;wBACP,WAAW,EAAE,UAAU;wBACvB,SAAS,EAAE,CAAC;qBACN,CAAC,CAAC;gBACZ,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,gDAAgD,CAAC,CAAC;YAChE,CAAC;QACH,CAAC;aAAM,CAAC;YACN,OAAO,CAAC,GAAG,CAAC,gFAAgF,CAAC,CAAC;QAChG,CAAC;QAED,0DAA0D;QAC1D,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,CAAC,oBAAoB,CACzD,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,eAAe,EACvC,EAAE,CACH,CAAC;QAEF,gDAAgD;QAChD,MAAM,eAAe,GAAG,IAAI,GAAG,EAAE,CAAC;QAClC,KAAK,MAAM,IAAI,IAAI,YAAY;YAAE,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;QACpE,KAAK,MAAM,IAAI,IAAI,aAAa;YAAE,eAAe,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,EAAE,IAAI,CAAC,CAAC;QAErE,MAAM,mBAAmB,GAAG,KAAK,CAAC,IAAI,CAAC,eAAe,CAAC,MAAM,EAAE,CAAC,CAAC;QAEjE,IAAI,mBAAmB,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACrC,OAAO,CAAC,GAAG,CAAC,6CAA6C,CAAC,CAAC;YAC3D,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;YACjD,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,WAAW;QACX,MAAM,MAAM,GAAW;YACrB,KAAK,EAAE,mBAAmB,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;gBACxC,KAAK,EAAE,IAAI,CAAC,KAAK;gBACjB,GAAG,EAAE,IAAI,CAAC,GAAG;gBACb,OAAO,EAAE,IAAI,CAAC,OAAO;gBACrB,QAAQ,EAAE,IAAI,CAAC,QAAQ;gBACvB,MAAM,EAAE,IAAI,CAAC,MAAM;gBACnB,KAAK,EAAE,IAAI,CAAC,KAAK;aAClB,CAAC,CAAC;YACH,QAAQ,EAAE;gBACR,eAAe,EAAE,aAAa;gBAC9B,cAAc,EAAE,mBAAmB,CAAC,MAAM;gBAC1C,IAAI,EAAE,UAAU;aACjB;SACF,CAAC;QAEF,OAAO,CAAC,GAAG,CACT,yBAAyB,mBAAmB,CAAC,MAAM,aAAa,IAAI,CAAC,QAAQ,CAAC,MAAM,cAAc,CACnG,CAAC;QACF,MAAM,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;QAE5D,oBAAoB;QACpB,IAAI,CAAC,YAAY,CAAC,eAAe,CAAC,mBAAmB,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAExE,OAAO,CAAC,GAAG,CAAC,mCAAmC,CAAC,CAAC;QACjD,OAAO,mBAAmB,CAAC;IAC7B,CAAC;CACF"}
@@ -0,0 +1,9 @@
1
+ import { Database } from 'better-sqlite3';
2
+ export declare class Scheduler {
3
+ private store;
4
+ constructor(db: Database);
5
+ add(name: string, cron: string, scheduledTime?: string): Promise<string>;
6
+ list(): import("../db/schedule-store.js").Schedule[];
7
+ remove(id: string): void;
8
+ checkAndRun(runner: () => Promise<void>): Promise<void>;
9
+ }
@@ -0,0 +1,64 @@
1
+ import { ScheduleStore } from '../db/schedule-store.js';
2
+ export class Scheduler {
3
+ store;
4
+ constructor(db) {
5
+ this.store = new ScheduleStore(db);
6
+ }
7
+ async add(name, cron, scheduledTime) {
8
+ const id = Math.random().toString(36).substring(2, 9);
9
+ this.store.add({
10
+ id,
11
+ name,
12
+ cron,
13
+ scheduled_time: scheduledTime,
14
+ active: 1,
15
+ });
16
+ return id;
17
+ }
18
+ list() {
19
+ return this.store.getAll();
20
+ }
21
+ remove(id) {
22
+ this.store.delete(id);
23
+ }
24
+ // This would be called by a background daemon or a frequent cron
25
+ async checkAndRun(runner) {
26
+ const schedules = this.store.getAll();
27
+ const now = new Date();
28
+ const currentHHmm = `${now.getHours().toString().padStart(2, '0')}:${now.getMinutes().toString().padStart(2, '0')}`;
29
+ for (const schedule of schedules) {
30
+ if (!schedule.active)
31
+ continue;
32
+ const lastRun = schedule.last_run ? new Date(schedule.last_run) : new Date(0);
33
+ const diffHours = (now.getTime() - lastRun.getTime()) / (1000 * 60 * 60);
34
+ // Simple implementation:
35
+ // 1. If never run, run it instantly.
36
+ // 2. If it's a 'daily' schedule and has a scheduled_time, check if we've passed that time today and haven't run yet.
37
+ // 3. Otherwise fallback to the 24h cooldown.
38
+ let shouldRun = false;
39
+ if (!schedule.last_run) {
40
+ shouldRun = true;
41
+ }
42
+ else if (schedule.cron === 'daily' && schedule.scheduled_time) {
43
+ // Run if:
44
+ // - Current time >= scheduled time
45
+ // - Last run was NOT today
46
+ const lastRunDate = lastRun.toISOString().split('T')[0];
47
+ const todayDate = now.toISOString().split('T')[0];
48
+ if (currentHHmm >= schedule.scheduled_time && lastRunDate !== todayDate) {
49
+ shouldRun = true;
50
+ }
51
+ }
52
+ else if (diffHours >= 24) {
53
+ // Fallback for non-daily or simple daily without time
54
+ shouldRun = true;
55
+ }
56
+ if (shouldRun) {
57
+ console.log(`[Scheduler] Running job: ${schedule.name}`);
58
+ await runner();
59
+ this.store.updateLastRun(schedule.id, now.toISOString());
60
+ }
61
+ }
62
+ }
63
+ }
64
+ //# sourceMappingURL=scheduler.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scheduler.js","sourceRoot":"","sources":["../../src/core/scheduler.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,yBAAyB,CAAC;AAGxD,MAAM,OAAO,SAAS;IACZ,KAAK,CAAgB;IAE7B,YAAY,EAAY;QACtB,IAAI,CAAC,KAAK,GAAG,IAAI,aAAa,CAAC,EAAE,CAAC,CAAC;IACrC,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,IAAY,EAAE,IAAY,EAAE,aAAsB;QAC1D,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACtD,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC;YACb,EAAE;YACF,IAAI;YACJ,IAAI;YACJ,cAAc,EAAE,aAAa;YAC7B,MAAM,EAAE,CAAC;SACV,CAAC,CAAC;QACH,OAAO,EAAE,CAAC;IACZ,CAAC;IAED,IAAI;QACF,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;IAC7B,CAAC;IAED,MAAM,CAAC,EAAU;QACf,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;IACxB,CAAC;IAED,iEAAiE;IACjE,KAAK,CAAC,WAAW,CAAC,MAA2B;QAC3C,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;QACtC,MAAM,GAAG,GAAG,IAAI,IAAI,EAAE,CAAC;QACvB,MAAM,WAAW,GAAG,GAAG,GAAG,CAAC,QAAQ,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,QAAQ,EAAE,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,EAAE,CAAC;QAEpH,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,IAAI,CAAC,QAAQ,CAAC,MAAM;gBAAE,SAAS;YAE/B,MAAM,OAAO,GAAG,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC;YAC9E,MAAM,SAAS,GAAG,CAAC,GAAG,CAAC,OAAO,EAAE,GAAG,OAAO,CAAC,OAAO,EAAE,CAAC,GAAG,CAAC,IAAI,GAAG,EAAE,GAAG,EAAE,CAAC,CAAC;YAEzE,yBAAyB;YACzB,qCAAqC;YACrC,qHAAqH;YACrH,6CAA6C;YAE7C,IAAI,SAAS,GAAG,KAAK,CAAC;YAEtB,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,CAAC;gBACvB,SAAS,GAAG,IAAI,CAAC;YACnB,CAAC;iBAAM,IAAI,QAAQ,CAAC,IAAI,KAAK,OAAO,IAAI,QAAQ,CAAC,cAAc,EAAE,CAAC;gBAChE,UAAU;gBACV,mCAAmC;gBACnC,2BAA2B;gBAC3B,MAAM,WAAW,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBACxD,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,EAAE,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;gBAElD,IAAI,WAAW,IAAI,QAAQ,CAAC,cAAc,IAAI,WAAW,KAAK,SAAS,EAAE,CAAC;oBACxE,SAAS,GAAG,IAAI,CAAC;gBACnB,CAAC;YACH,CAAC;iBAAM,IAAI,SAAS,IAAI,EAAE,EAAE,CAAC;gBAC3B,sDAAsD;gBACtD,SAAS,GAAG,IAAI,CAAC;YACnB,CAAC;YAED,IAAI,SAAS,EAAE,CAAC;gBACd,OAAO,CAAC,GAAG,CAAC,4BAA4B,QAAQ,CAAC,IAAI,EAAE,CAAC,CAAC;gBACzD,MAAM,MAAM,EAAE,CAAC;gBACf,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,QAAQ,CAAC,EAAE,EAAE,GAAG,CAAC,WAAW,EAAE,CAAC,CAAC;YAC3D,CAAC;QACH,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,27 @@
1
+ export interface ScrapedArticle {
2
+ id: string;
3
+ title: string;
4
+ url: string;
5
+ content?: string;
6
+ published_at?: string;
7
+ source: string;
8
+ metadata?: Record<string, unknown>;
9
+ }
10
+ export interface ScraperSource {
11
+ id: string;
12
+ name: string;
13
+ type: 'rss' | 'api' | 'scrape' | 'json';
14
+ url: string;
15
+ tier?: number;
16
+ tags?: string[];
17
+ mapping?: Record<string, string>;
18
+ }
19
+ export interface ScraperResult {
20
+ source: string;
21
+ status: 'ok' | 'error';
22
+ items: ScrapedArticle[];
23
+ error?: string;
24
+ }
25
+ export declare abstract class BaseScraper {
26
+ abstract scrape(source: ScraperSource): Promise<ScraperResult>;
27
+ }
@@ -0,0 +1,3 @@
1
+ export class BaseScraper {
2
+ }
3
+ //# sourceMappingURL=scraper-types.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"scraper-types.js","sourceRoot":"","sources":["../../src/core/scraper-types.ts"],"names":[],"mappings":"AA2BA,MAAM,OAAgB,WAAW;CAEhC"}
@@ -0,0 +1,4 @@
1
+ import { BaseScraper, ScraperSource, ScraperResult } from '../scraper-types.js';
2
+ export declare class ApiScraper extends BaseScraper {
3
+ scrape(source: ScraperSource): Promise<ScraperResult>;
4
+ }
@@ -0,0 +1,46 @@
1
+ import axios from 'axios';
2
+ import { BaseScraper } from '../scraper-types.js';
3
+ export class ApiScraper extends BaseScraper {
4
+ async scrape(source) {
5
+ try {
6
+ const response = await axios.get(source.url);
7
+ const data = response.data;
8
+ let items = [];
9
+ // Specialized logic based on source ID for common APIs
10
+ if (source.id === 'hn') {
11
+ items = data.hits.map((hit) => ({
12
+ id: hit.objectID,
13
+ title: hit.title,
14
+ url: hit.url || `https://news.ycombinator.com/item?id=${hit.objectID}`,
15
+ content: hit.story_text || '',
16
+ published_at: hit.created_at,
17
+ source: source.id,
18
+ }));
19
+ }
20
+ else if (source.id.startsWith('arxiv')) {
21
+ // ArXiv returns XML/Atom, but we'll assume the URL handles it or use a specific XML parser if needed.
22
+ // For brevity in this generic API scraper, we'll keep it simple.
23
+ // In a real scenario, we'd use a dedicated library or cheerio for XML.
24
+ return {
25
+ source: source.id,
26
+ status: 'error',
27
+ items: [],
28
+ error: 'ArXiv requires specialized XML parsing',
29
+ };
30
+ }
31
+ else {
32
+ return { source: source.id, status: 'error', items: [], error: 'Unsupported API source' };
33
+ }
34
+ return { source: source.id, status: 'ok', items };
35
+ }
36
+ catch (error) {
37
+ return {
38
+ source: source.id,
39
+ status: 'error',
40
+ items: [],
41
+ error: error instanceof Error ? error.message : String(error),
42
+ };
43
+ }
44
+ }
45
+ }
46
+ //# sourceMappingURL=api-scraper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"api-scraper.js","sourceRoot":"","sources":["../../../src/core/scrapers/api-scraper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,WAAW,EAAgD,MAAM,qBAAqB,CAAC;AAEhG,MAAM,OAAO,UAAW,SAAQ,WAAW;IACzC,KAAK,CAAC,MAAM,CAAC,MAAqB;QAChC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;YAC3B,IAAI,KAAK,GAAqB,EAAE,CAAC;YAEjC,uDAAuD;YACvD,IAAI,MAAM,CAAC,EAAE,KAAK,IAAI,EAAE,CAAC;gBACvB,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAQ,EAAE,EAAE,CAAC,CAAC;oBACnC,EAAE,EAAE,GAAG,CAAC,QAAQ;oBAChB,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,GAAG,EAAE,GAAG,CAAC,GAAG,IAAI,wCAAwC,GAAG,CAAC,QAAQ,EAAE;oBACtE,OAAO,EAAE,GAAG,CAAC,UAAU,IAAI,EAAE;oBAC7B,YAAY,EAAE,GAAG,CAAC,UAAU;oBAC5B,MAAM,EAAE,MAAM,CAAC,EAAE;iBAClB,CAAC,CAAC,CAAC;YACN,CAAC;iBAAM,IAAI,MAAM,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBACzC,sGAAsG;gBACtG,iEAAiE;gBACjE,uEAAuE;gBACvE,OAAO;oBACL,MAAM,EAAE,MAAM,CAAC,EAAE;oBACjB,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,EAAE;oBACT,KAAK,EAAE,wCAAwC;iBAChD,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,EAAE,EAAE,KAAK,EAAE,wBAAwB,EAAE,CAAC;YAC5F,CAAC;YAED,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;QACpD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,MAAM,CAAC,EAAE;gBACjB,MAAM,EAAE,OAAO;gBACf,KAAK,EAAE,EAAE;gBACT,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,4 @@
1
+ import { BaseScraper, ScraperSource, ScraperResult } from '../scraper-types.js';
2
+ export declare class ArxivScraper extends BaseScraper {
3
+ scrape(source: ScraperSource): Promise<ScraperResult>;
4
+ }
@@ -0,0 +1,34 @@
1
+ import axios from 'axios';
2
+ import * as cheerio from 'cheerio';
3
+ import { BaseScraper } from '../scraper-types.js';
4
+ export class ArxivScraper extends BaseScraper {
5
+ async scrape(source) {
6
+ try {
7
+ const response = await axios.get(source.url);
8
+ const $ = cheerio.load(response.data, { xmlMode: true });
9
+ const items = [];
10
+ $('entry').each((_, el) => {
11
+ const $el = $(el);
12
+ const url = $el.find('id').text().trim();
13
+ items.push({
14
+ id: url,
15
+ title: $el.find('title').text().trim().replace(/\s+/g, ' '),
16
+ url,
17
+ content: $el.find('summary').text().trim().replace(/\s+/g, ' '),
18
+ published_at: $el.find('published').text().trim(),
19
+ source: source.id,
20
+ });
21
+ });
22
+ return { source: source.id, status: 'ok', items };
23
+ }
24
+ catch (error) {
25
+ return {
26
+ source: source.id,
27
+ status: 'error',
28
+ items: [],
29
+ error: error instanceof Error ? error.message : String(error),
30
+ };
31
+ }
32
+ }
33
+ }
34
+ //# sourceMappingURL=arxiv-scraper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"arxiv-scraper.js","sourceRoot":"","sources":["../../../src/core/scrapers/arxiv-scraper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,OAAO,MAAM,SAAS,CAAC;AACnC,OAAO,EAAE,WAAW,EAAgD,MAAM,qBAAqB,CAAC;AAEhG,MAAM,OAAO,YAAa,SAAQ,WAAW;IAC3C,KAAK,CAAC,MAAM,CAAC,MAAqB;QAChC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7C,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;YACzD,MAAM,KAAK,GAAqB,EAAE,CAAC;YAEnC,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,EAAE,EAAE;gBACxB,MAAM,GAAG,GAAG,CAAC,CAAC,EAAE,CAAC,CAAC;gBAClB,MAAM,GAAG,GAAG,GAAG,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC;gBACzC,KAAK,CAAC,IAAI,CAAC;oBACT,EAAE,EAAE,GAAG;oBACP,KAAK,EAAE,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;oBAC3D,GAAG;oBACH,OAAO,EAAE,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;oBAC/D,YAAY,EAAE,GAAG,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC,IAAI,EAAE;oBACjD,MAAM,EAAE,MAAM,CAAC,EAAE;iBAClB,CAAC,CAAC;YACL,CAAC,CAAC,CAAC;YAEH,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;QACpD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,MAAM,CAAC,EAAE;gBACjB,MAAM,EAAE,OAAO;gBACf,KAAK,EAAE,EAAE;gBACT,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,4 @@
1
+ import { BaseScraper, ScraperSource, ScraperResult } from '../scraper-types.js';
2
+ export declare class JsonScraper extends BaseScraper {
3
+ scrape(source: ScraperSource): Promise<ScraperResult>;
4
+ }
@@ -0,0 +1,56 @@
1
+ import axios from 'axios';
2
+ import { BaseScraper } from '../scraper-types.js';
3
+ export class JsonScraper extends BaseScraper {
4
+ async scrape(source) {
5
+ try {
6
+ const response = await axios.get(source.url);
7
+ const data = response.data;
8
+ let items = [];
9
+ if (source.mapping) {
10
+ // Generic JSON array mapping
11
+ const list = Array.isArray(data) ? data : data[source.mapping.items || 'items'];
12
+ if (Array.isArray(list)) {
13
+ items = list.map((item, index) => ({
14
+ id: item.id || `json-${source.id}-${index}`,
15
+ title: item[source.mapping.title || 'title'] || 'No Title',
16
+ url: item[source.mapping.url || 'url'] || source.url,
17
+ content: item[source.mapping.content || 'content'] || '',
18
+ published_at: item[source.mapping.published_at || 'date'] || new Date().toISOString(),
19
+ source: source.id,
20
+ }));
21
+ }
22
+ }
23
+ else if (source.id.startsWith('reddit')) {
24
+ items = data.data.children.map((child) => {
25
+ const post = child.data;
26
+ return {
27
+ id: post.name,
28
+ title: post.title,
29
+ url: post.url.startsWith('/') ? `https://reddit.com${post.url}` : post.url,
30
+ content: post.selftext || '',
31
+ published_at: new Date(post.created_utc * 1000).toISOString(),
32
+ source: source.id,
33
+ };
34
+ });
35
+ }
36
+ else {
37
+ return {
38
+ source: source.id,
39
+ status: 'error',
40
+ items: [],
41
+ error: 'Unsupported JSON source or missing mapping',
42
+ };
43
+ }
44
+ return { source: source.id, status: 'ok', items };
45
+ }
46
+ catch (error) {
47
+ return {
48
+ source: source.id,
49
+ status: 'error',
50
+ items: [],
51
+ error: error instanceof Error ? error.message : String(error),
52
+ };
53
+ }
54
+ }
55
+ }
56
+ //# sourceMappingURL=json-scraper.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json-scraper.js","sourceRoot":"","sources":["../../../src/core/scrapers/json-scraper.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAE,WAAW,EAAgD,MAAM,qBAAqB,CAAC;AAEhG,MAAM,OAAO,WAAY,SAAQ,WAAW;IAC1C,KAAK,CAAC,MAAM,CAAC,MAAqB;QAChC,IAAI,CAAC;YACH,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;YAC7C,MAAM,IAAI,GAAG,QAAQ,CAAC,IAAI,CAAC;YAC3B,IAAI,KAAK,GAAqB,EAAE,CAAC;YAEjC,IAAI,MAAM,CAAC,OAAO,EAAE,CAAC;gBACnB,6BAA6B;gBAC7B,MAAM,IAAI,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,CAAC,KAAK,IAAI,OAAO,CAAC,CAAC;gBAChF,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;oBACxB,KAAK,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,IAAS,EAAE,KAAa,EAAE,EAAE,CAAC,CAAC;wBAC9C,EAAE,EAAE,IAAI,CAAC,EAAE,IAAI,QAAQ,MAAM,CAAC,EAAE,IAAI,KAAK,EAAE;wBAC3C,KAAK,EAAE,IAAI,CAAC,MAAM,CAAC,OAAQ,CAAC,KAAK,IAAI,OAAO,CAAC,IAAI,UAAU;wBAC3D,GAAG,EAAE,IAAI,CAAC,MAAM,CAAC,OAAQ,CAAC,GAAG,IAAI,KAAK,CAAC,IAAI,MAAM,CAAC,GAAG;wBACrD,OAAO,EAAE,IAAI,CAAC,MAAM,CAAC,OAAQ,CAAC,OAAO,IAAI,SAAS,CAAC,IAAI,EAAE;wBACzD,YAAY,EAAE,IAAI,CAAC,MAAM,CAAC,OAAQ,CAAC,YAAY,IAAI,MAAM,CAAC,IAAI,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;wBACtF,MAAM,EAAE,MAAM,CAAC,EAAE;qBAClB,CAAC,CAAC,CAAC;gBACN,CAAC;YACH,CAAC;iBAAM,IAAI,MAAM,CAAC,EAAE,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;gBAC1C,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC,KAAU,EAAE,EAAE;oBAC5C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;oBACxB,OAAO;wBACL,EAAE,EAAE,IAAI,CAAC,IAAI;wBACb,KAAK,EAAE,IAAI,CAAC,KAAK;wBACjB,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,qBAAqB,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG;wBAC1E,OAAO,EAAE,IAAI,CAAC,QAAQ,IAAI,EAAE;wBAC5B,YAAY,EAAE,IAAI,IAAI,CAAC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC,CAAC,WAAW,EAAE;wBAC7D,MAAM,EAAE,MAAM,CAAC,EAAE;qBAClB,CAAC;gBACJ,CAAC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,OAAO;oBACL,MAAM,EAAE,MAAM,CAAC,EAAE;oBACjB,MAAM,EAAE,OAAO;oBACf,KAAK,EAAE,EAAE;oBACT,KAAK,EAAE,4CAA4C;iBACpD,CAAC;YACJ,CAAC;YACD,OAAO,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC;QACpD,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO;gBACL,MAAM,EAAE,MAAM,CAAC,EAAE;gBACjB,MAAM,EAAE,OAAO;gBACf,KAAK,EAAE,EAAE;gBACT,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC;QACJ,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,6 @@
1
+ import { BaseScraper, ScraperSource, ScraperResult } from '../scraper-types.js';
2
+ export declare class RssScraper extends BaseScraper {
3
+ private parser;
4
+ constructor();
5
+ scrape(source: ScraperSource): Promise<ScraperResult>;
6
+ }