@lobu/cli 6.0.1 → 6.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (217) hide show
  1. package/README.md +20 -27
  2. package/dist/bundled-skills/lobu/SKILL.md +11 -11
  3. package/dist/commands/_lib/apply/apply-cmd.d.ts +2 -0
  4. package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
  5. package/dist/commands/_lib/apply/apply-cmd.js +26 -0
  6. package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
  7. package/dist/commands/_lib/apply/client.d.ts +1 -1
  8. package/dist/commands/_lib/apply/client.d.ts.map +1 -1
  9. package/dist/commands/_lib/apply/desired-state.js +4 -4
  10. package/dist/commands/_lib/apply/desired-state.js.map +1 -1
  11. package/dist/commands/agent.d.ts +7 -0
  12. package/dist/commands/agent.d.ts.map +1 -1
  13. package/dist/commands/agent.js +65 -1
  14. package/dist/commands/agent.js.map +1 -1
  15. package/dist/commands/chat.d.ts +12 -9
  16. package/dist/commands/chat.d.ts.map +1 -1
  17. package/dist/commands/chat.js +117 -56
  18. package/dist/commands/chat.js.map +1 -1
  19. package/dist/commands/dev.d.ts +15 -7
  20. package/dist/commands/dev.d.ts.map +1 -1
  21. package/dist/commands/dev.js +79 -44
  22. package/dist/commands/dev.js.map +1 -1
  23. package/dist/commands/doctor.d.ts +1 -0
  24. package/dist/commands/doctor.d.ts.map +1 -1
  25. package/dist/commands/doctor.js +136 -0
  26. package/dist/commands/doctor.js.map +1 -1
  27. package/dist/commands/eval.d.ts +8 -0
  28. package/dist/commands/eval.d.ts.map +1 -1
  29. package/dist/commands/eval.js +56 -1
  30. package/dist/commands/eval.js.map +1 -1
  31. package/dist/commands/init.d.ts +20 -5
  32. package/dist/commands/init.d.ts.map +1 -1
  33. package/dist/commands/init.js +332 -183
  34. package/dist/commands/init.js.map +1 -1
  35. package/dist/commands/link.d.ts +11 -0
  36. package/dist/commands/link.d.ts.map +1 -0
  37. package/dist/commands/link.js +28 -0
  38. package/dist/commands/link.js.map +1 -0
  39. package/dist/commands/login.d.ts.map +1 -1
  40. package/dist/commands/login.js +14 -2
  41. package/dist/commands/login.js.map +1 -1
  42. package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
  43. package/dist/commands/memory/_lib/browser-auth-cmd.js +3 -3
  44. package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
  45. package/dist/commands/memory/_lib/mcp.d.ts +2 -2
  46. package/dist/commands/memory/_lib/mcp.d.ts.map +1 -1
  47. package/dist/commands/memory/_lib/mcp.js +24 -12
  48. package/dist/commands/memory/_lib/mcp.js.map +1 -1
  49. package/dist/commands/memory/_lib/openclaw-auth.d.ts +1 -0
  50. package/dist/commands/memory/_lib/openclaw-auth.d.ts.map +1 -1
  51. package/dist/commands/memory/_lib/openclaw-auth.js +14 -3
  52. package/dist/commands/memory/_lib/openclaw-auth.js.map +1 -1
  53. package/dist/commands/memory/_lib/openclaw-cmd.js +1 -1
  54. package/dist/commands/memory/_lib/openclaw-cmd.js.map +1 -1
  55. package/dist/commands/memory/_lib/schema.d.ts +1 -1
  56. package/dist/commands/memory/_lib/schema.js +1 -1
  57. package/dist/commands/memory/_lib/seed-cmd.d.ts.map +1 -1
  58. package/dist/commands/memory/_lib/seed-cmd.js +5 -6
  59. package/dist/commands/memory/_lib/seed-cmd.js.map +1 -1
  60. package/dist/commands/memory/run.d.ts.map +1 -1
  61. package/dist/commands/memory/run.js +2 -2
  62. package/dist/commands/memory/run.js.map +1 -1
  63. package/dist/commands/platforms/platform-prompts.d.ts +0 -1
  64. package/dist/commands/platforms/platform-prompts.d.ts.map +1 -1
  65. package/dist/commands/platforms/platform-prompts.js +54 -8
  66. package/dist/commands/platforms/platform-prompts.js.map +1 -1
  67. package/dist/commands/telemetry.d.ts +10 -0
  68. package/dist/commands/telemetry.d.ts.map +1 -0
  69. package/dist/commands/telemetry.js +68 -0
  70. package/dist/commands/telemetry.js.map +1 -0
  71. package/dist/commands/whoami.d.ts.map +1 -1
  72. package/dist/commands/whoami.js +1 -1
  73. package/dist/commands/whoami.js.map +1 -1
  74. package/dist/connectors/README.md +534 -0
  75. package/dist/connectors/__tests__/browser-scraper-utils.test.ts +186 -0
  76. package/dist/connectors/browser-scraper-utils.ts +214 -0
  77. package/dist/connectors/capterra.ts +273 -0
  78. package/dist/connectors/g2.ts +286 -0
  79. package/dist/connectors/github.ts +1553 -0
  80. package/dist/connectors/glassdoor.ts +291 -0
  81. package/dist/connectors/gmaps.ts +197 -0
  82. package/dist/connectors/google_calendar.ts +631 -0
  83. package/dist/connectors/google_gmail.ts +751 -0
  84. package/dist/connectors/google_photos.ts +776 -0
  85. package/dist/connectors/google_play.ts +342 -0
  86. package/dist/connectors/hackernews.ts +471 -0
  87. package/dist/connectors/index.ts +23 -0
  88. package/dist/connectors/ios_appstore.ts +226 -0
  89. package/dist/connectors/linkedin.ts +471 -0
  90. package/dist/connectors/microsoft_outlook.ts +410 -0
  91. package/dist/connectors/producthunt.ts +471 -0
  92. package/dist/connectors/reddit.ts +600 -0
  93. package/dist/connectors/rss.ts +448 -0
  94. package/dist/connectors/spotify.ts +590 -0
  95. package/dist/connectors/trustpilot.ts +199 -0
  96. package/dist/connectors/website.ts +629 -0
  97. package/dist/connectors/whatsapp.ts +1073 -0
  98. package/dist/connectors/x.ts +526 -0
  99. package/dist/connectors/youtube.ts +666 -0
  100. package/dist/db/migrations/00000000000000_baseline.sql +4867 -0
  101. package/dist/db/migrations/20260405193000_add_mcp_sessions.sql +33 -0
  102. package/dist/db/migrations/20260408120000_remove_system_connectors.sql +48 -0
  103. package/dist/db/migrations/20260408120001_optional_compiled_code.sql +6 -0
  104. package/dist/db/migrations/20260409110000_add_active_watcher_run_index.sql +9 -0
  105. package/dist/db/migrations/20260409130000_connector_default_config.sql +5 -0
  106. package/dist/db/migrations/20260410120000_add_agent_secrets.sql +25 -0
  107. package/dist/db/migrations/20260413170000_add_watcher_group_id.sql +67 -0
  108. package/dist/db/migrations/20260416120000_add_entity_wa_jid_index.sql +14 -0
  109. package/dist/db/migrations/20260417100000_add_entity_identities.sql +77 -0
  110. package/dist/db/migrations/20260418100000_add_auth_runs.sql +83 -0
  111. package/dist/db/migrations/20260418110000_add_runs_created_by_user.sql +18 -0
  112. package/dist/db/migrations/20260419120000_add_event_identity_indexes.sql +56 -0
  113. package/dist/db/migrations/20260420120000_extend_reserved_org_slugs.sql +56 -0
  114. package/dist/db/migrations/20260424030000_add_watcher_run_correlation.sql +52 -0
  115. package/dist/db/migrations/20260424130000_relax_events_client_id_fk.sql +47 -0
  116. package/dist/db/migrations/20260425100000_normalize_watcher_feedback.sql +91 -0
  117. package/dist/db/migrations/20260425120000_add_run_diagnostics.sql +20 -0
  118. package/dist/db/migrations/20260425130000_add_repair_agent_plumbing.sql +46 -0
  119. package/dist/db/migrations/20260426120000_entities_entity_type_fk.sql +101 -0
  120. package/dist/db/migrations/20260426130000_db_integrity_cleanup.sql +104 -0
  121. package/dist/db/migrations/20260426130001_db_integrity_cleanup_concurrent.sql +187 -0
  122. package/dist/db/migrations/20260427133000_events_created_by_nullable.sql +74 -0
  123. package/dist/db/migrations/20260427140000_identity_engine_indexes.sql +140 -0
  124. package/dist/db/migrations/20260427150000_drop_events_source_id.sql +177 -0
  125. package/dist/db/migrations/20260427160000_drop_dead_schema.sql +76 -0
  126. package/dist/db/migrations/20260427170000_market_founder_to_member.sql +364 -0
  127. package/dist/db/migrations/20260428040000_cascade_events_watchers_org_fk.sql +66 -0
  128. package/dist/db/migrations/20260428050000_add_runs_approved_input.sql +9 -0
  129. package/dist/db/migrations/20260429010000_auth_profile_tenant_scoped_fk.sql +79 -0
  130. package/dist/db/migrations/20260429060000_extend_runs_for_lobu_queue.sql +108 -0
  131. package/dist/db/migrations/20260429120000_agent_changed_notify.sql +97 -0
  132. package/dist/db/migrations/20260429120100_user_auth_profiles_and_model_prefs.sql +36 -0
  133. package/dist/db/migrations/20260429120200_fix_notify_old_keys.sql +130 -0
  134. package/dist/db/migrations/20260429130000_oauth_states_cli_sessions_rate_limits.sql +83 -0
  135. package/dist/db/migrations/20260429140000_phase8_grants_chat_connections_mcp_sessions.sql +84 -0
  136. package/dist/db/migrations/20260429140100_runs_priority_expires_at_retry_delay.sql +44 -0
  137. package/dist/db/migrations/20260429180000_drop_invalidatable_cache_triggers.sql +25 -0
  138. package/dist/db/migrations/20260430005614_agents_apply_fields.sql +21 -0
  139. package/dist/db/migrations/20260430022231_fix_connection_config_encryption.sql +69 -0
  140. package/dist/db/migrations/20260430151215_add_task_run_type.sql +77 -0
  141. package/dist/db/migrations/20260501000000_drop_cli_sessions.sql +27 -0
  142. package/dist/db/migrations/20260501133000_lobu_memory_mcp_id.sql +117 -0
  143. package/dist/db/migrations/20260502000000_drop_chat_connections.sql +60 -0
  144. package/dist/db/migrations/20260503000000_agent_secrets_org_scope.sql +56 -0
  145. package/dist/db/migrations/20260504000000_flatten_agents_drop_sandbox_model.sql +48 -0
  146. package/dist/index.d.ts.map +1 -1
  147. package/dist/index.js +147 -23
  148. package/dist/index.js.map +1 -1
  149. package/dist/internal/api-client.d.ts +4 -8
  150. package/dist/internal/api-client.d.ts.map +1 -1
  151. package/dist/internal/api-client.js +1 -1
  152. package/dist/internal/api-client.js.map +1 -1
  153. package/dist/internal/context.js +2 -2
  154. package/dist/internal/context.js.map +1 -1
  155. package/dist/internal/credentials.d.ts.map +1 -1
  156. package/dist/internal/credentials.js +6 -1
  157. package/dist/internal/credentials.js.map +1 -1
  158. package/dist/internal/index.d.ts +2 -3
  159. package/dist/internal/index.d.ts.map +1 -1
  160. package/dist/internal/index.js +2 -2
  161. package/dist/internal/index.js.map +1 -1
  162. package/dist/internal/oauth.d.ts +6 -5
  163. package/dist/internal/oauth.d.ts.map +1 -1
  164. package/dist/internal/oauth.js +2 -2
  165. package/dist/internal/project-link.d.ts +10 -0
  166. package/dist/internal/project-link.d.ts.map +1 -0
  167. package/dist/internal/project-link.js +48 -0
  168. package/dist/internal/project-link.js.map +1 -0
  169. package/dist/providers.json +2 -2
  170. package/dist/server.bundle.mjs +3090 -4321
  171. package/dist/start-local.bundle.mjs +71481 -0
  172. package/dist/templates/README.md.tmpl +10 -11
  173. package/package.json +14 -12
  174. package/dist/__tests__/chat.integration.test.d.ts +0 -2
  175. package/dist/__tests__/chat.integration.test.d.ts.map +0 -1
  176. package/dist/__tests__/chat.integration.test.js +0 -337
  177. package/dist/__tests__/chat.integration.test.js.map +0 -1
  178. package/dist/__tests__/dev.test.d.ts +0 -2
  179. package/dist/__tests__/dev.test.d.ts.map +0 -1
  180. package/dist/__tests__/dev.test.js +0 -25
  181. package/dist/__tests__/dev.test.js.map +0 -1
  182. package/dist/__tests__/init-memory.test.d.ts +0 -2
  183. package/dist/__tests__/init-memory.test.d.ts.map +0 -1
  184. package/dist/__tests__/init-memory.test.js +0 -45
  185. package/dist/__tests__/init-memory.test.js.map +0 -1
  186. package/dist/__tests__/token.test.d.ts +0 -2
  187. package/dist/__tests__/token.test.d.ts.map +0 -1
  188. package/dist/__tests__/token.test.js +0 -52
  189. package/dist/__tests__/token.test.js.map +0 -1
  190. package/dist/commands/_lib/apply/__tests__/client.test.d.ts +0 -2
  191. package/dist/commands/_lib/apply/__tests__/client.test.d.ts.map +0 -1
  192. package/dist/commands/_lib/apply/__tests__/client.test.js +0 -23
  193. package/dist/commands/_lib/apply/__tests__/client.test.js.map +0 -1
  194. package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts +0 -2
  195. package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts.map +0 -1
  196. package/dist/commands/_lib/apply/__tests__/desired-state.test.js +0 -140
  197. package/dist/commands/_lib/apply/__tests__/desired-state.test.js.map +0 -1
  198. package/dist/commands/_lib/apply/__tests__/diff.test.d.ts +0 -2
  199. package/dist/commands/_lib/apply/__tests__/diff.test.d.ts.map +0 -1
  200. package/dist/commands/_lib/apply/__tests__/diff.test.js +0 -378
  201. package/dist/commands/_lib/apply/__tests__/diff.test.js.map +0 -1
  202. package/dist/commands/apply.d.ts +0 -3
  203. package/dist/commands/apply.d.ts.map +0 -1
  204. package/dist/commands/apply.js +0 -5
  205. package/dist/commands/apply.js.map +0 -1
  206. package/dist/commands/memory/_lib/openclaw-auth.test.d.ts +0 -2
  207. package/dist/commands/memory/_lib/openclaw-auth.test.d.ts.map +0 -1
  208. package/dist/commands/memory/_lib/openclaw-auth.test.js +0 -9
  209. package/dist/commands/memory/_lib/openclaw-auth.test.js.map +0 -1
  210. package/dist/internal/__tests__/api-client.test.d.ts +0 -2
  211. package/dist/internal/__tests__/api-client.test.d.ts.map +0 -1
  212. package/dist/internal/__tests__/api-client.test.js +0 -95
  213. package/dist/internal/__tests__/api-client.test.js.map +0 -1
  214. package/dist/internal/__tests__/context.test.d.ts +0 -2
  215. package/dist/internal/__tests__/context.test.d.ts.map +0 -1
  216. package/dist/internal/__tests__/context.test.js +0 -77
  217. package/dist/internal/__tests__/context.test.js.map +0 -1
@@ -0,0 +1,448 @@
1
+ /**
2
+ * RSS / Atom Connector (V1 runtime)
3
+ *
4
+ * Fetches and parses RSS 2.0 and Atom feeds. Supports multiple feed URLs,
5
+ * deduplication via checkpoint, and HTML entity decoding.
6
+ * No external XML parsing dependencies — uses regex-based parsing.
7
+ */
8
+
9
+ import {
10
+ type ActionContext,
11
+ type ActionResult,
12
+ type ConnectorDefinition,
13
+ ConnectorRuntime,
14
+ type EventEnvelope,
15
+ type SyncContext,
16
+ type SyncResult,
17
+ } from '@lobu/connector-sdk';
18
+
19
+ // ---------------------------------------------------------------------------
20
+ // Types
21
+ // ---------------------------------------------------------------------------
22
+
23
+ interface RSSFeedItem {
24
+ id: string;
25
+ title: string;
26
+ link: string;
27
+ content: string;
28
+ author: string;
29
+ publishedAt: Date;
30
+ feedUrl: string;
31
+ }
32
+
33
+ interface RSSCheckpoint {
34
+ last_item_ids: string[];
35
+ last_published_at?: string;
36
+ }
37
+
38
+ interface RSSConfig {
39
+ feed_urls: string[];
40
+ max_items_per_feed?: number;
41
+ }
42
+
43
+ // ---------------------------------------------------------------------------
44
+ // Connector
45
+ // ---------------------------------------------------------------------------
46
+
47
+ export default class RSSConnector extends ConnectorRuntime {
48
+ readonly definition: ConnectorDefinition = {
49
+ key: 'rss',
50
+ name: 'RSS / Atom',
51
+ description: 'Fetches and parses RSS 2.0 and Atom feeds to collect articles.',
52
+ version: '1.0.0',
53
+ faviconDomain: 'rss.com',
54
+ authSchema: {
55
+ methods: [{ type: 'none' }],
56
+ },
57
+ feeds: {
58
+ articles: {
59
+ key: 'articles',
60
+ name: 'Feed Articles',
61
+ description: 'Articles from RSS/Atom feeds.',
62
+ configSchema: {
63
+ type: 'object',
64
+ required: ['feed_urls'],
65
+ properties: {
66
+ feed_urls: {
67
+ type: 'array',
68
+ items: { type: 'string', format: 'uri' },
69
+ minItems: 1,
70
+ description: 'One or more RSS/Atom feed URLs.',
71
+ },
72
+ max_items_per_feed: {
73
+ type: 'integer',
74
+ minimum: 1,
75
+ maximum: 1000,
76
+ default: 100,
77
+ description: 'Maximum items to collect per feed per sync.',
78
+ },
79
+ },
80
+ },
81
+ eventKinds: {
82
+ article: {
83
+ description: 'A blog post or article from an RSS/Atom feed',
84
+ metadataSchema: {
85
+ type: 'object',
86
+ properties: {
87
+ feed_url: {
88
+ type: 'string',
89
+ format: 'uri',
90
+ description: 'The feed URL this article came from',
91
+ },
92
+ },
93
+ },
94
+ },
95
+ },
96
+ },
97
+ },
98
+ optionsSchema: {
99
+ type: 'object',
100
+ required: ['feed_urls'],
101
+ properties: {
102
+ feed_urls: {
103
+ type: 'array',
104
+ items: { type: 'string', format: 'uri' },
105
+ minItems: 1,
106
+ description: 'One or more RSS/Atom feed URLs.',
107
+ },
108
+ max_items_per_feed: {
109
+ type: 'integer',
110
+ minimum: 1,
111
+ maximum: 1000,
112
+ default: 100,
113
+ description: 'Maximum items to collect per feed per sync.',
114
+ },
115
+ },
116
+ },
117
+ };
118
+
119
+ private readonly MAX_DEDUP_IDS = 500;
120
+ private readonly FETCH_TIMEOUT_MS = 15000;
121
+ private readonly USER_AGENT = 'Lobu-RSS-Connector/1.0.0';
122
+
123
+ // -------------------------------------------------------------------------
124
+ // sync
125
+ // -------------------------------------------------------------------------
126
+
127
+ async sync(ctx: SyncContext): Promise<SyncResult> {
128
+ const config = ctx.config as unknown as RSSConfig;
129
+ const feedUrls = config.feed_urls;
130
+ if (!feedUrls || !Array.isArray(feedUrls) || feedUrls.length === 0) {
131
+ throw new Error('feed_urls is required and must be a non-empty array.');
132
+ }
133
+
134
+ const maxItemsPerFeed = config.max_items_per_feed ?? 100;
135
+ const checkpoint = (ctx.checkpoint as RSSCheckpoint | null) ?? {
136
+ last_item_ids: [],
137
+ };
138
+ const seenIds = new Set<string>(checkpoint.last_item_ids ?? []);
139
+
140
+ const allItems: RSSFeedItem[] = [];
141
+
142
+ for (const feedUrl of feedUrls) {
143
+ try {
144
+ const items = await this.fetchAndParseFeed(feedUrl, maxItemsPerFeed);
145
+ allItems.push(...items);
146
+ } catch (err) {
147
+ const message = err instanceof Error ? err.message : String(err);
148
+ console.warn(`Failed to fetch feed ${feedUrl}: ${message}`);
149
+ }
150
+ }
151
+
152
+ // Sort by occurred_at descending
153
+ allItems.sort((a, b) => b.publishedAt.getTime() - a.publishedAt.getTime());
154
+
155
+ // Deduplicate against checkpoint
156
+ const events: EventEnvelope[] = [];
157
+ const newIds: string[] = [];
158
+
159
+ for (const item of allItems) {
160
+ if (seenIds.has(item.id)) continue;
161
+
162
+ seenIds.add(item.id);
163
+ newIds.push(item.id);
164
+
165
+ events.push({
166
+ origin_id: item.id,
167
+ title: item.title,
168
+ payload_text: item.content,
169
+ author_name: item.author || undefined,
170
+ source_url: item.link || undefined,
171
+ occurred_at: item.publishedAt,
172
+ origin_type: 'article',
173
+ metadata: {
174
+ feed_url: item.feedUrl,
175
+ },
176
+ });
177
+ }
178
+
179
+ // Build updated checkpoint — keep last N IDs for dedup
180
+ const allKnownIds = [...(checkpoint.last_item_ids ?? []), ...newIds];
181
+ const trimmedIds = allKnownIds.slice(-this.MAX_DEDUP_IDS);
182
+
183
+ const latestPublishedAt =
184
+ events.length > 0 ? events[0].occurred_at.toISOString() : checkpoint.last_published_at;
185
+
186
+ const newCheckpoint: RSSCheckpoint = {
187
+ last_item_ids: trimmedIds,
188
+ last_published_at: latestPublishedAt,
189
+ };
190
+
191
+ return {
192
+ events,
193
+ checkpoint: newCheckpoint as unknown as Record<string, unknown>,
194
+ metadata: {
195
+ items_found: events.length,
196
+ feeds_fetched: feedUrls.length,
197
+ },
198
+ };
199
+ }
200
+
201
+ // -------------------------------------------------------------------------
202
+ // execute
203
+ // -------------------------------------------------------------------------
204
+
205
+ async execute(_ctx: ActionContext): Promise<ActionResult> {
206
+ return { success: false, error: 'Actions not supported' };
207
+ }
208
+
209
+ // -------------------------------------------------------------------------
210
+ // Feed fetching & parsing
211
+ // -------------------------------------------------------------------------
212
+
213
+ private async fetchAndParseFeed(feedUrl: string, maxItems: number): Promise<RSSFeedItem[]> {
214
+ const controller = new AbortController();
215
+ const timeoutId = setTimeout(() => controller.abort(), this.FETCH_TIMEOUT_MS);
216
+
217
+ try {
218
+ const response = await fetch(feedUrl, {
219
+ signal: controller.signal,
220
+ headers: {
221
+ 'User-Agent': this.USER_AGENT,
222
+ Accept: 'application/rss+xml, application/atom+xml, application/xml, text/xml, */*',
223
+ },
224
+ });
225
+
226
+ clearTimeout(timeoutId);
227
+
228
+ if (!response.ok) {
229
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
230
+ }
231
+
232
+ const xml = await response.text();
233
+ return this.parseXml(xml, feedUrl, maxItems);
234
+ } catch (err) {
235
+ clearTimeout(timeoutId);
236
+ throw err;
237
+ }
238
+ }
239
+
240
+ private parseXml(xml: string, feedUrl: string, maxItems: number): RSSFeedItem[] {
241
+ // Detect Atom vs RSS
242
+ if (this.isAtomFeed(xml)) {
243
+ return this.parseAtom(xml, feedUrl, maxItems);
244
+ }
245
+ return this.parseRSS(xml, feedUrl, maxItems);
246
+ }
247
+
248
+ private isAtomFeed(xml: string): boolean {
249
+ // Atom feeds have <feed xmlns="http://www.w3.org/2005/Atom"> or just <feed>
250
+ return /<feed[\s>]/.test(xml) && !/<rss[\s>]/.test(xml);
251
+ }
252
+
253
+ // -------------------------------------------------------------------------
254
+ // RSS 2.0 parser
255
+ // -------------------------------------------------------------------------
256
+
257
+ private parseRSS(xml: string, feedUrl: string, maxItems: number): RSSFeedItem[] {
258
+ const items: RSSFeedItem[] = [];
259
+ const itemRegex = /<item[\s>]([\s\S]*?)<\/item>/gi;
260
+ let match: RegExpExecArray | null;
261
+
262
+ while ((match = itemRegex.exec(xml)) !== null && items.length < maxItems) {
263
+ const block = match[1];
264
+
265
+ const title = this.extractTag(block, 'title');
266
+ const link = this.extractTag(block, 'link');
267
+ const description = this.extractTag(block, 'description');
268
+ const contentEncoded =
269
+ this.extractCDataTag(block, 'content:encoded') ?? this.extractTag(block, 'content:encoded');
270
+ const pubDate = this.extractTag(block, 'pubDate');
271
+ const guid = this.extractTag(block, 'guid');
272
+ const author = this.extractTag(block, 'author') ?? this.extractTag(block, 'dc:creator') ?? '';
273
+
274
+ const id = guid || this.hashString(`${title ?? ''}|${link ?? ''}`);
275
+ const content = contentEncoded || description || '';
276
+
277
+ const publishedAt = pubDate ? this.parseDate(pubDate) : new Date();
278
+
279
+ items.push({
280
+ id,
281
+ title: this.decodeEntities(this.stripHtml(title ?? '')),
282
+ link: this.decodeEntities(link ?? ''),
283
+ content: this.decodeEntities(this.stripHtml(content)),
284
+ author: this.decodeEntities(this.stripHtml(author)),
285
+ publishedAt,
286
+ feedUrl,
287
+ });
288
+ }
289
+
290
+ return items;
291
+ }
292
+
293
+ // -------------------------------------------------------------------------
294
+ // Atom parser
295
+ // -------------------------------------------------------------------------
296
+
297
+ private parseAtom(xml: string, feedUrl: string, maxItems: number): RSSFeedItem[] {
298
+ const items: RSSFeedItem[] = [];
299
+ const entryRegex = /<entry[\s>]([\s\S]*?)<\/entry>/gi;
300
+ let match: RegExpExecArray | null;
301
+
302
+ while ((match = entryRegex.exec(xml)) !== null && items.length < maxItems) {
303
+ const block = match[1];
304
+
305
+ const title = this.extractTag(block, 'title');
306
+ const link = this.extractAtomLink(block);
307
+ const content = this.extractTag(block, 'content') ?? this.extractTag(block, 'summary') ?? '';
308
+ const published =
309
+ this.extractTag(block, 'published') ?? this.extractTag(block, 'updated') ?? '';
310
+ const id = this.extractTag(block, 'id');
311
+ const author = this.extractAtomAuthor(block);
312
+
313
+ const externalId = id || this.hashString(`${title ?? ''}|${link ?? ''}`);
314
+ const publishedAt = published ? this.parseDate(published) : new Date();
315
+
316
+ items.push({
317
+ id: externalId,
318
+ title: this.decodeEntities(this.stripHtml(title ?? '')),
319
+ link: this.decodeEntities(link ?? ''),
320
+ content: this.decodeEntities(this.stripHtml(content)),
321
+ author: this.decodeEntities(this.stripHtml(author)),
322
+ publishedAt,
323
+ feedUrl,
324
+ });
325
+ }
326
+
327
+ return items;
328
+ }
329
+
330
+ // -------------------------------------------------------------------------
331
+ // XML extraction helpers
332
+ // -------------------------------------------------------------------------
333
+
334
+ /** Extract text content from an XML tag. Handles CDATA and regular text. */
335
+ private extractTag(block: string, tagName: string): string | null {
336
+ // Try CDATA first
337
+ const cdataResult = this.extractCDataTag(block, tagName);
338
+ if (cdataResult !== null) return cdataResult;
339
+
340
+ // Regular tag content — handle self-closing tags and tags with attributes
341
+ const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
342
+ const regex = new RegExp(`<${escaped}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${escaped}>`, 'i');
343
+ const match = regex.exec(block);
344
+ return match ? match[1].trim() : null;
345
+ }
346
+
347
+ /** Extract CDATA content from an XML tag. */
348
+ private extractCDataTag(block: string, tagName: string): string | null {
349
+ const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
350
+ const regex = new RegExp(
351
+ `<${escaped}(?:\\s[^>]*)?>\\s*<!\\[CDATA\\[([\\s\\S]*?)\\]\\]>\\s*<\\/${escaped}>`,
352
+ 'i'
353
+ );
354
+ const match = regex.exec(block);
355
+ return match ? match[1].trim() : null;
356
+ }
357
+
358
+ /** Extract href from Atom <link> element. */
359
+ private extractAtomLink(block: string): string | null {
360
+ // Try <link rel="alternate" href="..."> first
361
+ const alternateMatch =
362
+ /<link[^>]*rel\s*=\s*["']alternate["'][^>]*href\s*=\s*["']([^"']+)["'][^>]*\/?>/i.exec(block);
363
+ if (alternateMatch) return alternateMatch[1];
364
+
365
+ // Also check href before rel
366
+ const alternateMatch2 =
367
+ /<link[^>]*href\s*=\s*["']([^"']+)["'][^>]*rel\s*=\s*["']alternate["'][^>]*\/?>/i.exec(block);
368
+ if (alternateMatch2) return alternateMatch2[1];
369
+
370
+ // Fall back to any <link href="..."> (not rel="self" or rel="enclosure")
371
+ const linkRegex = /<link[^>]*href\s*=\s*["']([^"']+)["'][^>]*\/?>/gi;
372
+ let match: RegExpExecArray | null;
373
+ while ((match = linkRegex.exec(block)) !== null) {
374
+ const full = match[0];
375
+ if (/rel\s*=\s*["']self["']/i.test(full)) continue;
376
+ if (/rel\s*=\s*["']enclosure["']/i.test(full)) continue;
377
+ return match[1];
378
+ }
379
+
380
+ return null;
381
+ }
382
+
383
+ /** Extract author name from Atom <author><name>...</name></author>. */
384
+ private extractAtomAuthor(block: string): string {
385
+ const authorMatch = /<author[\s>]([\s\S]*?)<\/author>/i.exec(block);
386
+ if (!authorMatch) return '';
387
+ const nameMatch = /<name>([\s\S]*?)<\/name>/i.exec(authorMatch[1]);
388
+ return nameMatch ? nameMatch[1].trim() : '';
389
+ }
390
+
391
+ // -------------------------------------------------------------------------
392
+ // String helpers
393
+ // -------------------------------------------------------------------------
394
+
395
+ /**
396
+ * Decode common HTML entities in a single pass so chained entities like
397
+ * '&amp;lt;' are not double-unescaped into '<'.
398
+ */
399
+ private decodeEntities(text: string): string {
400
+ return text.replace(
401
+ /&(amp|lt|gt|quot|apos|#39|#x([0-9a-fA-F]+)|#(\d+));/g,
402
+ (_match, name, hex, decimal) => {
403
+ switch (name) {
404
+ case 'amp':
405
+ return '&';
406
+ case 'lt':
407
+ return '<';
408
+ case 'gt':
409
+ return '>';
410
+ case 'quot':
411
+ return '"';
412
+ case 'apos':
413
+ case '#39':
414
+ return "'";
415
+ default:
416
+ if (hex) return String.fromCharCode(parseInt(hex, 16));
417
+ if (decimal) return String.fromCharCode(parseInt(decimal, 10));
418
+ return _match;
419
+ }
420
+ }
421
+ );
422
+ }
423
+
424
+ /** Strip HTML tags from text. */
425
+ private stripHtml(text: string): string {
426
+ return text
427
+ .replace(/<[^>]+>/g, ' ')
428
+ .replace(/\s+/g, ' ')
429
+ .trim();
430
+ }
431
+
432
+ /** Parse a date string, falling back to current time. */
433
+ private parseDate(dateStr: string): Date {
434
+ const parsed = new Date(dateStr);
435
+ return Number.isNaN(parsed.getTime()) ? new Date() : parsed;
436
+ }
437
+
438
+ /** Simple hash of a string, returned as a hex string. */
439
+ private hashString(input: string): string {
440
+ let hash = 0;
441
+ for (let i = 0; i < input.length; i++) {
442
+ const char = input.charCodeAt(i);
443
+ hash = (hash << 5) - hash + char;
444
+ hash |= 0; // Convert to 32-bit integer
445
+ }
446
+ return `rss_${Math.abs(hash).toString(16)}`;
447
+ }
448
+ }