@lobu/cli 6.0.0 → 6.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -27
- package/dist/bundled-skills/lobu/SKILL.md +12 -12
- package/dist/commands/_lib/apply/apply-cmd.d.ts +2 -0
- package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
- package/dist/commands/_lib/apply/apply-cmd.js +26 -0
- package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
- package/dist/commands/_lib/apply/client.d.ts +1 -1
- package/dist/commands/_lib/apply/client.d.ts.map +1 -1
- package/dist/commands/_lib/apply/desired-state.js +6 -6
- package/dist/commands/_lib/apply/desired-state.js.map +1 -1
- package/dist/commands/agent.d.ts +7 -0
- package/dist/commands/agent.d.ts.map +1 -1
- package/dist/commands/agent.js +65 -1
- package/dist/commands/agent.js.map +1 -1
- package/dist/commands/chat.d.ts +12 -9
- package/dist/commands/chat.d.ts.map +1 -1
- package/dist/commands/chat.js +117 -56
- package/dist/commands/chat.js.map +1 -1
- package/dist/commands/dev.d.ts +15 -7
- package/dist/commands/dev.d.ts.map +1 -1
- package/dist/commands/dev.js +79 -44
- package/dist/commands/dev.js.map +1 -1
- package/dist/commands/doctor.d.ts +1 -0
- package/dist/commands/doctor.d.ts.map +1 -1
- package/dist/commands/doctor.js +136 -0
- package/dist/commands/doctor.js.map +1 -1
- package/dist/commands/eval.d.ts +8 -0
- package/dist/commands/eval.d.ts.map +1 -1
- package/dist/commands/eval.js +56 -1
- package/dist/commands/eval.js.map +1 -1
- package/dist/commands/init.d.ts +20 -5
- package/dist/commands/init.d.ts.map +1 -1
- package/dist/commands/init.js +332 -183
- package/dist/commands/init.js.map +1 -1
- package/dist/commands/link.d.ts +11 -0
- package/dist/commands/link.d.ts.map +1 -0
- package/dist/commands/link.js +28 -0
- package/dist/commands/link.js.map +1 -0
- package/dist/commands/login.d.ts.map +1 -1
- package/dist/commands/login.js +14 -2
- package/dist/commands/login.js.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
- package/dist/commands/memory/_lib/browser-auth-cmd.js +4 -4
- package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
- package/dist/commands/memory/_lib/install-targets.d.ts.map +1 -1
- package/dist/commands/memory/_lib/install-targets.js +1 -5
- package/dist/commands/memory/_lib/install-targets.js.map +1 -1
- package/dist/commands/memory/_lib/mcp.d.ts +2 -2
- package/dist/commands/memory/_lib/mcp.d.ts.map +1 -1
- package/dist/commands/memory/_lib/mcp.js +24 -12
- package/dist/commands/memory/_lib/mcp.js.map +1 -1
- package/dist/commands/memory/_lib/openclaw-auth.d.ts +1 -0
- package/dist/commands/memory/_lib/openclaw-auth.d.ts.map +1 -1
- package/dist/commands/memory/_lib/openclaw-auth.js +14 -3
- package/dist/commands/memory/_lib/openclaw-auth.js.map +1 -1
- package/dist/commands/memory/_lib/openclaw-cmd.js +1 -1
- package/dist/commands/memory/_lib/openclaw-cmd.js.map +1 -1
- package/dist/commands/memory/_lib/schema.d.ts +2 -2
- package/dist/commands/memory/_lib/schema.d.ts.map +1 -1
- package/dist/commands/memory/_lib/schema.js +3 -3
- package/dist/commands/memory/_lib/schema.js.map +1 -1
- package/dist/commands/memory/_lib/seed-cmd.d.ts.map +1 -1
- package/dist/commands/memory/_lib/seed-cmd.js +5 -6
- package/dist/commands/memory/_lib/seed-cmd.js.map +1 -1
- package/dist/commands/memory/run.d.ts.map +1 -1
- package/dist/commands/memory/run.js +2 -2
- package/dist/commands/memory/run.js.map +1 -1
- package/dist/commands/platforms/platform-prompts.d.ts +0 -1
- package/dist/commands/platforms/platform-prompts.d.ts.map +1 -1
- package/dist/commands/platforms/platform-prompts.js +54 -8
- package/dist/commands/platforms/platform-prompts.js.map +1 -1
- package/dist/commands/telemetry.d.ts +10 -0
- package/dist/commands/telemetry.d.ts.map +1 -0
- package/dist/commands/telemetry.js +68 -0
- package/dist/commands/telemetry.js.map +1 -0
- package/dist/commands/whoami.d.ts.map +1 -1
- package/dist/commands/whoami.js +1 -1
- package/dist/commands/whoami.js.map +1 -1
- package/dist/connectors/README.md +534 -0
- package/dist/connectors/__tests__/browser-scraper-utils.test.ts +186 -0
- package/dist/connectors/browser-scraper-utils.ts +214 -0
- package/dist/connectors/capterra.ts +273 -0
- package/dist/connectors/g2.ts +286 -0
- package/dist/connectors/github.ts +1553 -0
- package/dist/connectors/glassdoor.ts +291 -0
- package/dist/connectors/gmaps.ts +197 -0
- package/dist/connectors/google_calendar.ts +631 -0
- package/dist/connectors/google_gmail.ts +751 -0
- package/dist/connectors/google_photos.ts +776 -0
- package/dist/connectors/google_play.ts +342 -0
- package/dist/connectors/hackernews.ts +471 -0
- package/dist/connectors/index.ts +23 -0
- package/dist/connectors/ios_appstore.ts +226 -0
- package/dist/connectors/linkedin.ts +471 -0
- package/dist/connectors/microsoft_outlook.ts +410 -0
- package/dist/connectors/producthunt.ts +471 -0
- package/dist/connectors/reddit.ts +600 -0
- package/dist/connectors/rss.ts +448 -0
- package/dist/connectors/spotify.ts +590 -0
- package/dist/connectors/trustpilot.ts +199 -0
- package/dist/connectors/website.ts +629 -0
- package/dist/connectors/whatsapp.ts +1073 -0
- package/dist/connectors/x.ts +526 -0
- package/dist/connectors/youtube.ts +666 -0
- package/dist/db/migrations/00000000000000_baseline.sql +4867 -0
- package/dist/db/migrations/20260405193000_add_mcp_sessions.sql +33 -0
- package/dist/db/migrations/20260408120000_remove_system_connectors.sql +48 -0
- package/dist/db/migrations/20260408120001_optional_compiled_code.sql +6 -0
- package/dist/db/migrations/20260409110000_add_active_watcher_run_index.sql +9 -0
- package/dist/db/migrations/20260409130000_connector_default_config.sql +5 -0
- package/dist/db/migrations/20260410120000_add_agent_secrets.sql +25 -0
- package/dist/db/migrations/20260413170000_add_watcher_group_id.sql +67 -0
- package/dist/db/migrations/20260416120000_add_entity_wa_jid_index.sql +14 -0
- package/dist/db/migrations/20260417100000_add_entity_identities.sql +77 -0
- package/dist/db/migrations/20260418100000_add_auth_runs.sql +83 -0
- package/dist/db/migrations/20260418110000_add_runs_created_by_user.sql +18 -0
- package/dist/db/migrations/20260419120000_add_event_identity_indexes.sql +56 -0
- package/dist/db/migrations/20260420120000_extend_reserved_org_slugs.sql +56 -0
- package/dist/db/migrations/20260424030000_add_watcher_run_correlation.sql +52 -0
- package/dist/db/migrations/20260424130000_relax_events_client_id_fk.sql +47 -0
- package/dist/db/migrations/20260425100000_normalize_watcher_feedback.sql +91 -0
- package/dist/db/migrations/20260425120000_add_run_diagnostics.sql +20 -0
- package/dist/db/migrations/20260425130000_add_repair_agent_plumbing.sql +46 -0
- package/dist/db/migrations/20260426120000_entities_entity_type_fk.sql +101 -0
- package/dist/db/migrations/20260426130000_db_integrity_cleanup.sql +104 -0
- package/dist/db/migrations/20260426130001_db_integrity_cleanup_concurrent.sql +187 -0
- package/dist/db/migrations/20260427133000_events_created_by_nullable.sql +74 -0
- package/dist/db/migrations/20260427140000_identity_engine_indexes.sql +140 -0
- package/dist/db/migrations/20260427150000_drop_events_source_id.sql +177 -0
- package/dist/db/migrations/20260427160000_drop_dead_schema.sql +76 -0
- package/dist/db/migrations/20260427170000_market_founder_to_member.sql +364 -0
- package/dist/db/migrations/20260428040000_cascade_events_watchers_org_fk.sql +66 -0
- package/dist/db/migrations/20260428050000_add_runs_approved_input.sql +9 -0
- package/dist/db/migrations/20260429010000_auth_profile_tenant_scoped_fk.sql +79 -0
- package/dist/db/migrations/20260429060000_extend_runs_for_lobu_queue.sql +108 -0
- package/dist/db/migrations/20260429120000_agent_changed_notify.sql +97 -0
- package/dist/db/migrations/20260429120100_user_auth_profiles_and_model_prefs.sql +36 -0
- package/dist/db/migrations/20260429120200_fix_notify_old_keys.sql +130 -0
- package/dist/db/migrations/20260429130000_oauth_states_cli_sessions_rate_limits.sql +83 -0
- package/dist/db/migrations/20260429140000_phase8_grants_chat_connections_mcp_sessions.sql +84 -0
- package/dist/db/migrations/20260429140100_runs_priority_expires_at_retry_delay.sql +44 -0
- package/dist/db/migrations/20260429180000_drop_invalidatable_cache_triggers.sql +25 -0
- package/dist/db/migrations/20260430005614_agents_apply_fields.sql +21 -0
- package/dist/db/migrations/20260430022231_fix_connection_config_encryption.sql +69 -0
- package/dist/db/migrations/20260430151215_add_task_run_type.sql +77 -0
- package/dist/db/migrations/20260501000000_drop_cli_sessions.sql +27 -0
- package/dist/db/migrations/20260501133000_lobu_memory_mcp_id.sql +117 -0
- package/dist/db/migrations/20260502000000_drop_chat_connections.sql +60 -0
- package/dist/db/migrations/20260503000000_agent_secrets_org_scope.sql +56 -0
- package/dist/db/migrations/20260504000000_flatten_agents_drop_sandbox_model.sql +48 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +147 -23
- package/dist/index.js.map +1 -1
- package/dist/internal/api-client.d.ts +4 -8
- package/dist/internal/api-client.d.ts.map +1 -1
- package/dist/internal/api-client.js +1 -1
- package/dist/internal/api-client.js.map +1 -1
- package/dist/internal/context.js +2 -2
- package/dist/internal/context.js.map +1 -1
- package/dist/internal/credentials.d.ts.map +1 -1
- package/dist/internal/credentials.js +6 -1
- package/dist/internal/credentials.js.map +1 -1
- package/dist/internal/index.d.ts +2 -3
- package/dist/internal/index.d.ts.map +1 -1
- package/dist/internal/index.js +2 -2
- package/dist/internal/index.js.map +1 -1
- package/dist/internal/oauth.d.ts +7 -6
- package/dist/internal/oauth.d.ts.map +1 -1
- package/dist/internal/oauth.js +3 -3
- package/dist/internal/project-link.d.ts +10 -0
- package/dist/internal/project-link.d.ts.map +1 -0
- package/dist/internal/project-link.js +48 -0
- package/dist/internal/project-link.js.map +1 -0
- package/dist/providers.json +2 -2
- package/dist/server.bundle.mjs +3173 -4404
- package/dist/start-local.bundle.mjs +71481 -0
- package/dist/templates/README.md.tmpl +10 -11
- package/package.json +14 -12
- package/dist/__tests__/chat.integration.test.d.ts +0 -2
- package/dist/__tests__/chat.integration.test.d.ts.map +0 -1
- package/dist/__tests__/chat.integration.test.js +0 -337
- package/dist/__tests__/chat.integration.test.js.map +0 -1
- package/dist/__tests__/dev.test.d.ts +0 -2
- package/dist/__tests__/dev.test.d.ts.map +0 -1
- package/dist/__tests__/dev.test.js +0 -25
- package/dist/__tests__/dev.test.js.map +0 -1
- package/dist/__tests__/init-memory.test.d.ts +0 -2
- package/dist/__tests__/init-memory.test.d.ts.map +0 -1
- package/dist/__tests__/init-memory.test.js +0 -45
- package/dist/__tests__/init-memory.test.js.map +0 -1
- package/dist/__tests__/token.test.d.ts +0 -2
- package/dist/__tests__/token.test.d.ts.map +0 -1
- package/dist/__tests__/token.test.js +0 -52
- package/dist/__tests__/token.test.js.map +0 -1
- package/dist/commands/_lib/apply/__tests__/client.test.d.ts +0 -2
- package/dist/commands/_lib/apply/__tests__/client.test.d.ts.map +0 -1
- package/dist/commands/_lib/apply/__tests__/client.test.js +0 -23
- package/dist/commands/_lib/apply/__tests__/client.test.js.map +0 -1
- package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts +0 -2
- package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts.map +0 -1
- package/dist/commands/_lib/apply/__tests__/desired-state.test.js +0 -140
- package/dist/commands/_lib/apply/__tests__/desired-state.test.js.map +0 -1
- package/dist/commands/_lib/apply/__tests__/diff.test.d.ts +0 -2
- package/dist/commands/_lib/apply/__tests__/diff.test.d.ts.map +0 -1
- package/dist/commands/_lib/apply/__tests__/diff.test.js +0 -378
- package/dist/commands/_lib/apply/__tests__/diff.test.js.map +0 -1
- package/dist/commands/apply.d.ts +0 -3
- package/dist/commands/apply.d.ts.map +0 -1
- package/dist/commands/apply.js +0 -5
- package/dist/commands/apply.js.map +0 -1
- package/dist/commands/memory/_lib/openclaw-auth.test.d.ts +0 -2
- package/dist/commands/memory/_lib/openclaw-auth.test.d.ts.map +0 -1
- package/dist/commands/memory/_lib/openclaw-auth.test.js +0 -9
- package/dist/commands/memory/_lib/openclaw-auth.test.js.map +0 -1
- package/dist/internal/__tests__/api-client.test.d.ts +0 -2
- package/dist/internal/__tests__/api-client.test.d.ts.map +0 -1
- package/dist/internal/__tests__/api-client.test.js +0 -95
- package/dist/internal/__tests__/api-client.test.js.map +0 -1
- package/dist/internal/__tests__/context.test.d.ts +0 -2
- package/dist/internal/__tests__/context.test.d.ts.map +0 -1
- package/dist/internal/__tests__/context.test.js +0 -77
- package/dist/internal/__tests__/context.test.js.map +0 -1
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* RSS / Atom Connector (V1 runtime)
|
|
3
|
+
*
|
|
4
|
+
* Fetches and parses RSS 2.0 and Atom feeds. Supports multiple feed URLs,
|
|
5
|
+
* deduplication via checkpoint, and HTML entity decoding.
|
|
6
|
+
* No external XML parsing dependencies — uses regex-based parsing.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import {
|
|
10
|
+
type ActionContext,
|
|
11
|
+
type ActionResult,
|
|
12
|
+
type ConnectorDefinition,
|
|
13
|
+
ConnectorRuntime,
|
|
14
|
+
type EventEnvelope,
|
|
15
|
+
type SyncContext,
|
|
16
|
+
type SyncResult,
|
|
17
|
+
} from '@lobu/connector-sdk';
|
|
18
|
+
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Types
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
|
|
23
|
+
interface RSSFeedItem {
|
|
24
|
+
id: string;
|
|
25
|
+
title: string;
|
|
26
|
+
link: string;
|
|
27
|
+
content: string;
|
|
28
|
+
author: string;
|
|
29
|
+
publishedAt: Date;
|
|
30
|
+
feedUrl: string;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
interface RSSCheckpoint {
|
|
34
|
+
last_item_ids: string[];
|
|
35
|
+
last_published_at?: string;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
interface RSSConfig {
|
|
39
|
+
feed_urls: string[];
|
|
40
|
+
max_items_per_feed?: number;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ---------------------------------------------------------------------------
|
|
44
|
+
// Connector
|
|
45
|
+
// ---------------------------------------------------------------------------
|
|
46
|
+
|
|
47
|
+
export default class RSSConnector extends ConnectorRuntime {
|
|
48
|
+
readonly definition: ConnectorDefinition = {
|
|
49
|
+
key: 'rss',
|
|
50
|
+
name: 'RSS / Atom',
|
|
51
|
+
description: 'Fetches and parses RSS 2.0 and Atom feeds to collect articles.',
|
|
52
|
+
version: '1.0.0',
|
|
53
|
+
faviconDomain: 'rss.com',
|
|
54
|
+
authSchema: {
|
|
55
|
+
methods: [{ type: 'none' }],
|
|
56
|
+
},
|
|
57
|
+
feeds: {
|
|
58
|
+
articles: {
|
|
59
|
+
key: 'articles',
|
|
60
|
+
name: 'Feed Articles',
|
|
61
|
+
description: 'Articles from RSS/Atom feeds.',
|
|
62
|
+
configSchema: {
|
|
63
|
+
type: 'object',
|
|
64
|
+
required: ['feed_urls'],
|
|
65
|
+
properties: {
|
|
66
|
+
feed_urls: {
|
|
67
|
+
type: 'array',
|
|
68
|
+
items: { type: 'string', format: 'uri' },
|
|
69
|
+
minItems: 1,
|
|
70
|
+
description: 'One or more RSS/Atom feed URLs.',
|
|
71
|
+
},
|
|
72
|
+
max_items_per_feed: {
|
|
73
|
+
type: 'integer',
|
|
74
|
+
minimum: 1,
|
|
75
|
+
maximum: 1000,
|
|
76
|
+
default: 100,
|
|
77
|
+
description: 'Maximum items to collect per feed per sync.',
|
|
78
|
+
},
|
|
79
|
+
},
|
|
80
|
+
},
|
|
81
|
+
eventKinds: {
|
|
82
|
+
article: {
|
|
83
|
+
description: 'A blog post or article from an RSS/Atom feed',
|
|
84
|
+
metadataSchema: {
|
|
85
|
+
type: 'object',
|
|
86
|
+
properties: {
|
|
87
|
+
feed_url: {
|
|
88
|
+
type: 'string',
|
|
89
|
+
format: 'uri',
|
|
90
|
+
description: 'The feed URL this article came from',
|
|
91
|
+
},
|
|
92
|
+
},
|
|
93
|
+
},
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
optionsSchema: {
|
|
99
|
+
type: 'object',
|
|
100
|
+
required: ['feed_urls'],
|
|
101
|
+
properties: {
|
|
102
|
+
feed_urls: {
|
|
103
|
+
type: 'array',
|
|
104
|
+
items: { type: 'string', format: 'uri' },
|
|
105
|
+
minItems: 1,
|
|
106
|
+
description: 'One or more RSS/Atom feed URLs.',
|
|
107
|
+
},
|
|
108
|
+
max_items_per_feed: {
|
|
109
|
+
type: 'integer',
|
|
110
|
+
minimum: 1,
|
|
111
|
+
maximum: 1000,
|
|
112
|
+
default: 100,
|
|
113
|
+
description: 'Maximum items to collect per feed per sync.',
|
|
114
|
+
},
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
private readonly MAX_DEDUP_IDS = 500;
|
|
120
|
+
private readonly FETCH_TIMEOUT_MS = 15000;
|
|
121
|
+
private readonly USER_AGENT = 'Lobu-RSS-Connector/1.0.0';
|
|
122
|
+
|
|
123
|
+
// -------------------------------------------------------------------------
|
|
124
|
+
// sync
|
|
125
|
+
// -------------------------------------------------------------------------
|
|
126
|
+
|
|
127
|
+
async sync(ctx: SyncContext): Promise<SyncResult> {
|
|
128
|
+
const config = ctx.config as unknown as RSSConfig;
|
|
129
|
+
const feedUrls = config.feed_urls;
|
|
130
|
+
if (!feedUrls || !Array.isArray(feedUrls) || feedUrls.length === 0) {
|
|
131
|
+
throw new Error('feed_urls is required and must be a non-empty array.');
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
const maxItemsPerFeed = config.max_items_per_feed ?? 100;
|
|
135
|
+
const checkpoint = (ctx.checkpoint as RSSCheckpoint | null) ?? {
|
|
136
|
+
last_item_ids: [],
|
|
137
|
+
};
|
|
138
|
+
const seenIds = new Set<string>(checkpoint.last_item_ids ?? []);
|
|
139
|
+
|
|
140
|
+
const allItems: RSSFeedItem[] = [];
|
|
141
|
+
|
|
142
|
+
for (const feedUrl of feedUrls) {
|
|
143
|
+
try {
|
|
144
|
+
const items = await this.fetchAndParseFeed(feedUrl, maxItemsPerFeed);
|
|
145
|
+
allItems.push(...items);
|
|
146
|
+
} catch (err) {
|
|
147
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
148
|
+
console.warn(`Failed to fetch feed ${feedUrl}: ${message}`);
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Sort by occurred_at descending
|
|
153
|
+
allItems.sort((a, b) => b.publishedAt.getTime() - a.publishedAt.getTime());
|
|
154
|
+
|
|
155
|
+
// Deduplicate against checkpoint
|
|
156
|
+
const events: EventEnvelope[] = [];
|
|
157
|
+
const newIds: string[] = [];
|
|
158
|
+
|
|
159
|
+
for (const item of allItems) {
|
|
160
|
+
if (seenIds.has(item.id)) continue;
|
|
161
|
+
|
|
162
|
+
seenIds.add(item.id);
|
|
163
|
+
newIds.push(item.id);
|
|
164
|
+
|
|
165
|
+
events.push({
|
|
166
|
+
origin_id: item.id,
|
|
167
|
+
title: item.title,
|
|
168
|
+
payload_text: item.content,
|
|
169
|
+
author_name: item.author || undefined,
|
|
170
|
+
source_url: item.link || undefined,
|
|
171
|
+
occurred_at: item.publishedAt,
|
|
172
|
+
origin_type: 'article',
|
|
173
|
+
metadata: {
|
|
174
|
+
feed_url: item.feedUrl,
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Build updated checkpoint — keep last N IDs for dedup
|
|
180
|
+
const allKnownIds = [...(checkpoint.last_item_ids ?? []), ...newIds];
|
|
181
|
+
const trimmedIds = allKnownIds.slice(-this.MAX_DEDUP_IDS);
|
|
182
|
+
|
|
183
|
+
const latestPublishedAt =
|
|
184
|
+
events.length > 0 ? events[0].occurred_at.toISOString() : checkpoint.last_published_at;
|
|
185
|
+
|
|
186
|
+
const newCheckpoint: RSSCheckpoint = {
|
|
187
|
+
last_item_ids: trimmedIds,
|
|
188
|
+
last_published_at: latestPublishedAt,
|
|
189
|
+
};
|
|
190
|
+
|
|
191
|
+
return {
|
|
192
|
+
events,
|
|
193
|
+
checkpoint: newCheckpoint as unknown as Record<string, unknown>,
|
|
194
|
+
metadata: {
|
|
195
|
+
items_found: events.length,
|
|
196
|
+
feeds_fetched: feedUrls.length,
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
// -------------------------------------------------------------------------
|
|
202
|
+
// execute
|
|
203
|
+
// -------------------------------------------------------------------------
|
|
204
|
+
|
|
205
|
+
async execute(_ctx: ActionContext): Promise<ActionResult> {
|
|
206
|
+
return { success: false, error: 'Actions not supported' };
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// -------------------------------------------------------------------------
|
|
210
|
+
// Feed fetching & parsing
|
|
211
|
+
// -------------------------------------------------------------------------
|
|
212
|
+
|
|
213
|
+
private async fetchAndParseFeed(feedUrl: string, maxItems: number): Promise<RSSFeedItem[]> {
|
|
214
|
+
const controller = new AbortController();
|
|
215
|
+
const timeoutId = setTimeout(() => controller.abort(), this.FETCH_TIMEOUT_MS);
|
|
216
|
+
|
|
217
|
+
try {
|
|
218
|
+
const response = await fetch(feedUrl, {
|
|
219
|
+
signal: controller.signal,
|
|
220
|
+
headers: {
|
|
221
|
+
'User-Agent': this.USER_AGENT,
|
|
222
|
+
Accept: 'application/rss+xml, application/atom+xml, application/xml, text/xml, */*',
|
|
223
|
+
},
|
|
224
|
+
});
|
|
225
|
+
|
|
226
|
+
clearTimeout(timeoutId);
|
|
227
|
+
|
|
228
|
+
if (!response.ok) {
|
|
229
|
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
const xml = await response.text();
|
|
233
|
+
return this.parseXml(xml, feedUrl, maxItems);
|
|
234
|
+
} catch (err) {
|
|
235
|
+
clearTimeout(timeoutId);
|
|
236
|
+
throw err;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
private parseXml(xml: string, feedUrl: string, maxItems: number): RSSFeedItem[] {
|
|
241
|
+
// Detect Atom vs RSS
|
|
242
|
+
if (this.isAtomFeed(xml)) {
|
|
243
|
+
return this.parseAtom(xml, feedUrl, maxItems);
|
|
244
|
+
}
|
|
245
|
+
return this.parseRSS(xml, feedUrl, maxItems);
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
private isAtomFeed(xml: string): boolean {
|
|
249
|
+
// Atom feeds have <feed xmlns="http://www.w3.org/2005/Atom"> or just <feed>
|
|
250
|
+
return /<feed[\s>]/.test(xml) && !/<rss[\s>]/.test(xml);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// -------------------------------------------------------------------------
|
|
254
|
+
// RSS 2.0 parser
|
|
255
|
+
// -------------------------------------------------------------------------
|
|
256
|
+
|
|
257
|
+
private parseRSS(xml: string, feedUrl: string, maxItems: number): RSSFeedItem[] {
|
|
258
|
+
const items: RSSFeedItem[] = [];
|
|
259
|
+
const itemRegex = /<item[\s>]([\s\S]*?)<\/item>/gi;
|
|
260
|
+
let match: RegExpExecArray | null;
|
|
261
|
+
|
|
262
|
+
while ((match = itemRegex.exec(xml)) !== null && items.length < maxItems) {
|
|
263
|
+
const block = match[1];
|
|
264
|
+
|
|
265
|
+
const title = this.extractTag(block, 'title');
|
|
266
|
+
const link = this.extractTag(block, 'link');
|
|
267
|
+
const description = this.extractTag(block, 'description');
|
|
268
|
+
const contentEncoded =
|
|
269
|
+
this.extractCDataTag(block, 'content:encoded') ?? this.extractTag(block, 'content:encoded');
|
|
270
|
+
const pubDate = this.extractTag(block, 'pubDate');
|
|
271
|
+
const guid = this.extractTag(block, 'guid');
|
|
272
|
+
const author = this.extractTag(block, 'author') ?? this.extractTag(block, 'dc:creator') ?? '';
|
|
273
|
+
|
|
274
|
+
const id = guid || this.hashString(`${title ?? ''}|${link ?? ''}`);
|
|
275
|
+
const content = contentEncoded || description || '';
|
|
276
|
+
|
|
277
|
+
const publishedAt = pubDate ? this.parseDate(pubDate) : new Date();
|
|
278
|
+
|
|
279
|
+
items.push({
|
|
280
|
+
id,
|
|
281
|
+
title: this.decodeEntities(this.stripHtml(title ?? '')),
|
|
282
|
+
link: this.decodeEntities(link ?? ''),
|
|
283
|
+
content: this.decodeEntities(this.stripHtml(content)),
|
|
284
|
+
author: this.decodeEntities(this.stripHtml(author)),
|
|
285
|
+
publishedAt,
|
|
286
|
+
feedUrl,
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
return items;
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// -------------------------------------------------------------------------
|
|
294
|
+
// Atom parser
|
|
295
|
+
// -------------------------------------------------------------------------
|
|
296
|
+
|
|
297
|
+
private parseAtom(xml: string, feedUrl: string, maxItems: number): RSSFeedItem[] {
|
|
298
|
+
const items: RSSFeedItem[] = [];
|
|
299
|
+
const entryRegex = /<entry[\s>]([\s\S]*?)<\/entry>/gi;
|
|
300
|
+
let match: RegExpExecArray | null;
|
|
301
|
+
|
|
302
|
+
while ((match = entryRegex.exec(xml)) !== null && items.length < maxItems) {
|
|
303
|
+
const block = match[1];
|
|
304
|
+
|
|
305
|
+
const title = this.extractTag(block, 'title');
|
|
306
|
+
const link = this.extractAtomLink(block);
|
|
307
|
+
const content = this.extractTag(block, 'content') ?? this.extractTag(block, 'summary') ?? '';
|
|
308
|
+
const published =
|
|
309
|
+
this.extractTag(block, 'published') ?? this.extractTag(block, 'updated') ?? '';
|
|
310
|
+
const id = this.extractTag(block, 'id');
|
|
311
|
+
const author = this.extractAtomAuthor(block);
|
|
312
|
+
|
|
313
|
+
const externalId = id || this.hashString(`${title ?? ''}|${link ?? ''}`);
|
|
314
|
+
const publishedAt = published ? this.parseDate(published) : new Date();
|
|
315
|
+
|
|
316
|
+
items.push({
|
|
317
|
+
id: externalId,
|
|
318
|
+
title: this.decodeEntities(this.stripHtml(title ?? '')),
|
|
319
|
+
link: this.decodeEntities(link ?? ''),
|
|
320
|
+
content: this.decodeEntities(this.stripHtml(content)),
|
|
321
|
+
author: this.decodeEntities(this.stripHtml(author)),
|
|
322
|
+
publishedAt,
|
|
323
|
+
feedUrl,
|
|
324
|
+
});
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
return items;
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
// -------------------------------------------------------------------------
|
|
331
|
+
// XML extraction helpers
|
|
332
|
+
// -------------------------------------------------------------------------
|
|
333
|
+
|
|
334
|
+
/** Extract text content from an XML tag. Handles CDATA and regular text. */
|
|
335
|
+
private extractTag(block: string, tagName: string): string | null {
|
|
336
|
+
// Try CDATA first
|
|
337
|
+
const cdataResult = this.extractCDataTag(block, tagName);
|
|
338
|
+
if (cdataResult !== null) return cdataResult;
|
|
339
|
+
|
|
340
|
+
// Regular tag content — handle self-closing tags and tags with attributes
|
|
341
|
+
const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
342
|
+
const regex = new RegExp(`<${escaped}(?:\\s[^>]*)?>([\\s\\S]*?)<\\/${escaped}>`, 'i');
|
|
343
|
+
const match = regex.exec(block);
|
|
344
|
+
return match ? match[1].trim() : null;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
/** Extract CDATA content from an XML tag. */
|
|
348
|
+
private extractCDataTag(block: string, tagName: string): string | null {
|
|
349
|
+
const escaped = tagName.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
|
|
350
|
+
const regex = new RegExp(
|
|
351
|
+
`<${escaped}(?:\\s[^>]*)?>\\s*<!\\[CDATA\\[([\\s\\S]*?)\\]\\]>\\s*<\\/${escaped}>`,
|
|
352
|
+
'i'
|
|
353
|
+
);
|
|
354
|
+
const match = regex.exec(block);
|
|
355
|
+
return match ? match[1].trim() : null;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/** Extract href from Atom <link> element. */
|
|
359
|
+
private extractAtomLink(block: string): string | null {
|
|
360
|
+
// Try <link rel="alternate" href="..."> first
|
|
361
|
+
const alternateMatch =
|
|
362
|
+
/<link[^>]*rel\s*=\s*["']alternate["'][^>]*href\s*=\s*["']([^"']+)["'][^>]*\/?>/i.exec(block);
|
|
363
|
+
if (alternateMatch) return alternateMatch[1];
|
|
364
|
+
|
|
365
|
+
// Also check href before rel
|
|
366
|
+
const alternateMatch2 =
|
|
367
|
+
/<link[^>]*href\s*=\s*["']([^"']+)["'][^>]*rel\s*=\s*["']alternate["'][^>]*\/?>/i.exec(block);
|
|
368
|
+
if (alternateMatch2) return alternateMatch2[1];
|
|
369
|
+
|
|
370
|
+
// Fall back to any <link href="..."> (not rel="self" or rel="enclosure")
|
|
371
|
+
const linkRegex = /<link[^>]*href\s*=\s*["']([^"']+)["'][^>]*\/?>/gi;
|
|
372
|
+
let match: RegExpExecArray | null;
|
|
373
|
+
while ((match = linkRegex.exec(block)) !== null) {
|
|
374
|
+
const full = match[0];
|
|
375
|
+
if (/rel\s*=\s*["']self["']/i.test(full)) continue;
|
|
376
|
+
if (/rel\s*=\s*["']enclosure["']/i.test(full)) continue;
|
|
377
|
+
return match[1];
|
|
378
|
+
}
|
|
379
|
+
|
|
380
|
+
return null;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
/** Extract author name from Atom <author><name>...</name></author>. */
|
|
384
|
+
private extractAtomAuthor(block: string): string {
|
|
385
|
+
const authorMatch = /<author[\s>]([\s\S]*?)<\/author>/i.exec(block);
|
|
386
|
+
if (!authorMatch) return '';
|
|
387
|
+
const nameMatch = /<name>([\s\S]*?)<\/name>/i.exec(authorMatch[1]);
|
|
388
|
+
return nameMatch ? nameMatch[1].trim() : '';
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// -------------------------------------------------------------------------
|
|
392
|
+
// String helpers
|
|
393
|
+
// -------------------------------------------------------------------------
|
|
394
|
+
|
|
395
|
+
/**
|
|
396
|
+
* Decode common HTML entities in a single pass so chained entities like
|
|
397
|
+
* '&lt;' are not double-unescaped into '<'.
|
|
398
|
+
*/
|
|
399
|
+
private decodeEntities(text: string): string {
|
|
400
|
+
return text.replace(
|
|
401
|
+
/&(amp|lt|gt|quot|apos|#39|#x([0-9a-fA-F]+)|#(\d+));/g,
|
|
402
|
+
(_match, name, hex, decimal) => {
|
|
403
|
+
switch (name) {
|
|
404
|
+
case 'amp':
|
|
405
|
+
return '&';
|
|
406
|
+
case 'lt':
|
|
407
|
+
return '<';
|
|
408
|
+
case 'gt':
|
|
409
|
+
return '>';
|
|
410
|
+
case 'quot':
|
|
411
|
+
return '"';
|
|
412
|
+
case 'apos':
|
|
413
|
+
case '#39':
|
|
414
|
+
return "'";
|
|
415
|
+
default:
|
|
416
|
+
if (hex) return String.fromCharCode(parseInt(hex, 16));
|
|
417
|
+
if (decimal) return String.fromCharCode(parseInt(decimal, 10));
|
|
418
|
+
return _match;
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/** Strip HTML tags from text. */
|
|
425
|
+
private stripHtml(text: string): string {
|
|
426
|
+
return text
|
|
427
|
+
.replace(/<[^>]+>/g, ' ')
|
|
428
|
+
.replace(/\s+/g, ' ')
|
|
429
|
+
.trim();
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
/** Parse a date string, falling back to current time. */
|
|
433
|
+
private parseDate(dateStr: string): Date {
|
|
434
|
+
const parsed = new Date(dateStr);
|
|
435
|
+
return Number.isNaN(parsed.getTime()) ? new Date() : parsed;
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/** Simple hash of a string, returned as a hex string. */
|
|
439
|
+
private hashString(input: string): string {
|
|
440
|
+
let hash = 0;
|
|
441
|
+
for (let i = 0; i < input.length; i++) {
|
|
442
|
+
const char = input.charCodeAt(i);
|
|
443
|
+
hash = (hash << 5) - hash + char;
|
|
444
|
+
hash |= 0; // Convert to 32-bit integer
|
|
445
|
+
}
|
|
446
|
+
return `rss_${Math.abs(hash).toString(16)}`;
|
|
447
|
+
}
|
|
448
|
+
}
|