@lobu/cli 6.0.0 → 6.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (222) hide show
  1. package/README.md +20 -27
  2. package/dist/bundled-skills/lobu/SKILL.md +12 -12
  3. package/dist/commands/_lib/apply/apply-cmd.d.ts +2 -0
  4. package/dist/commands/_lib/apply/apply-cmd.d.ts.map +1 -1
  5. package/dist/commands/_lib/apply/apply-cmd.js +26 -0
  6. package/dist/commands/_lib/apply/apply-cmd.js.map +1 -1
  7. package/dist/commands/_lib/apply/client.d.ts +1 -1
  8. package/dist/commands/_lib/apply/client.d.ts.map +1 -1
  9. package/dist/commands/_lib/apply/desired-state.js +6 -6
  10. package/dist/commands/_lib/apply/desired-state.js.map +1 -1
  11. package/dist/commands/agent.d.ts +7 -0
  12. package/dist/commands/agent.d.ts.map +1 -1
  13. package/dist/commands/agent.js +65 -1
  14. package/dist/commands/agent.js.map +1 -1
  15. package/dist/commands/chat.d.ts +12 -9
  16. package/dist/commands/chat.d.ts.map +1 -1
  17. package/dist/commands/chat.js +117 -56
  18. package/dist/commands/chat.js.map +1 -1
  19. package/dist/commands/dev.d.ts +15 -7
  20. package/dist/commands/dev.d.ts.map +1 -1
  21. package/dist/commands/dev.js +79 -44
  22. package/dist/commands/dev.js.map +1 -1
  23. package/dist/commands/doctor.d.ts +1 -0
  24. package/dist/commands/doctor.d.ts.map +1 -1
  25. package/dist/commands/doctor.js +136 -0
  26. package/dist/commands/doctor.js.map +1 -1
  27. package/dist/commands/eval.d.ts +8 -0
  28. package/dist/commands/eval.d.ts.map +1 -1
  29. package/dist/commands/eval.js +56 -1
  30. package/dist/commands/eval.js.map +1 -1
  31. package/dist/commands/init.d.ts +20 -5
  32. package/dist/commands/init.d.ts.map +1 -1
  33. package/dist/commands/init.js +332 -183
  34. package/dist/commands/init.js.map +1 -1
  35. package/dist/commands/link.d.ts +11 -0
  36. package/dist/commands/link.d.ts.map +1 -0
  37. package/dist/commands/link.js +28 -0
  38. package/dist/commands/link.js.map +1 -0
  39. package/dist/commands/login.d.ts.map +1 -1
  40. package/dist/commands/login.js +14 -2
  41. package/dist/commands/login.js.map +1 -1
  42. package/dist/commands/memory/_lib/browser-auth-cmd.d.ts.map +1 -1
  43. package/dist/commands/memory/_lib/browser-auth-cmd.js +4 -4
  44. package/dist/commands/memory/_lib/browser-auth-cmd.js.map +1 -1
  45. package/dist/commands/memory/_lib/install-targets.d.ts.map +1 -1
  46. package/dist/commands/memory/_lib/install-targets.js +1 -5
  47. package/dist/commands/memory/_lib/install-targets.js.map +1 -1
  48. package/dist/commands/memory/_lib/mcp.d.ts +2 -2
  49. package/dist/commands/memory/_lib/mcp.d.ts.map +1 -1
  50. package/dist/commands/memory/_lib/mcp.js +24 -12
  51. package/dist/commands/memory/_lib/mcp.js.map +1 -1
  52. package/dist/commands/memory/_lib/openclaw-auth.d.ts +1 -0
  53. package/dist/commands/memory/_lib/openclaw-auth.d.ts.map +1 -1
  54. package/dist/commands/memory/_lib/openclaw-auth.js +14 -3
  55. package/dist/commands/memory/_lib/openclaw-auth.js.map +1 -1
  56. package/dist/commands/memory/_lib/openclaw-cmd.js +1 -1
  57. package/dist/commands/memory/_lib/openclaw-cmd.js.map +1 -1
  58. package/dist/commands/memory/_lib/schema.d.ts +2 -2
  59. package/dist/commands/memory/_lib/schema.d.ts.map +1 -1
  60. package/dist/commands/memory/_lib/schema.js +3 -3
  61. package/dist/commands/memory/_lib/schema.js.map +1 -1
  62. package/dist/commands/memory/_lib/seed-cmd.d.ts.map +1 -1
  63. package/dist/commands/memory/_lib/seed-cmd.js +5 -6
  64. package/dist/commands/memory/_lib/seed-cmd.js.map +1 -1
  65. package/dist/commands/memory/run.d.ts.map +1 -1
  66. package/dist/commands/memory/run.js +2 -2
  67. package/dist/commands/memory/run.js.map +1 -1
  68. package/dist/commands/platforms/platform-prompts.d.ts +0 -1
  69. package/dist/commands/platforms/platform-prompts.d.ts.map +1 -1
  70. package/dist/commands/platforms/platform-prompts.js +54 -8
  71. package/dist/commands/platforms/platform-prompts.js.map +1 -1
  72. package/dist/commands/telemetry.d.ts +10 -0
  73. package/dist/commands/telemetry.d.ts.map +1 -0
  74. package/dist/commands/telemetry.js +68 -0
  75. package/dist/commands/telemetry.js.map +1 -0
  76. package/dist/commands/whoami.d.ts.map +1 -1
  77. package/dist/commands/whoami.js +1 -1
  78. package/dist/commands/whoami.js.map +1 -1
  79. package/dist/connectors/README.md +534 -0
  80. package/dist/connectors/__tests__/browser-scraper-utils.test.ts +186 -0
  81. package/dist/connectors/browser-scraper-utils.ts +214 -0
  82. package/dist/connectors/capterra.ts +273 -0
  83. package/dist/connectors/g2.ts +286 -0
  84. package/dist/connectors/github.ts +1553 -0
  85. package/dist/connectors/glassdoor.ts +291 -0
  86. package/dist/connectors/gmaps.ts +197 -0
  87. package/dist/connectors/google_calendar.ts +631 -0
  88. package/dist/connectors/google_gmail.ts +751 -0
  89. package/dist/connectors/google_photos.ts +776 -0
  90. package/dist/connectors/google_play.ts +342 -0
  91. package/dist/connectors/hackernews.ts +471 -0
  92. package/dist/connectors/index.ts +23 -0
  93. package/dist/connectors/ios_appstore.ts +226 -0
  94. package/dist/connectors/linkedin.ts +471 -0
  95. package/dist/connectors/microsoft_outlook.ts +410 -0
  96. package/dist/connectors/producthunt.ts +471 -0
  97. package/dist/connectors/reddit.ts +600 -0
  98. package/dist/connectors/rss.ts +448 -0
  99. package/dist/connectors/spotify.ts +590 -0
  100. package/dist/connectors/trustpilot.ts +199 -0
  101. package/dist/connectors/website.ts +629 -0
  102. package/dist/connectors/whatsapp.ts +1073 -0
  103. package/dist/connectors/x.ts +526 -0
  104. package/dist/connectors/youtube.ts +666 -0
  105. package/dist/db/migrations/00000000000000_baseline.sql +4867 -0
  106. package/dist/db/migrations/20260405193000_add_mcp_sessions.sql +33 -0
  107. package/dist/db/migrations/20260408120000_remove_system_connectors.sql +48 -0
  108. package/dist/db/migrations/20260408120001_optional_compiled_code.sql +6 -0
  109. package/dist/db/migrations/20260409110000_add_active_watcher_run_index.sql +9 -0
  110. package/dist/db/migrations/20260409130000_connector_default_config.sql +5 -0
  111. package/dist/db/migrations/20260410120000_add_agent_secrets.sql +25 -0
  112. package/dist/db/migrations/20260413170000_add_watcher_group_id.sql +67 -0
  113. package/dist/db/migrations/20260416120000_add_entity_wa_jid_index.sql +14 -0
  114. package/dist/db/migrations/20260417100000_add_entity_identities.sql +77 -0
  115. package/dist/db/migrations/20260418100000_add_auth_runs.sql +83 -0
  116. package/dist/db/migrations/20260418110000_add_runs_created_by_user.sql +18 -0
  117. package/dist/db/migrations/20260419120000_add_event_identity_indexes.sql +56 -0
  118. package/dist/db/migrations/20260420120000_extend_reserved_org_slugs.sql +56 -0
  119. package/dist/db/migrations/20260424030000_add_watcher_run_correlation.sql +52 -0
  120. package/dist/db/migrations/20260424130000_relax_events_client_id_fk.sql +47 -0
  121. package/dist/db/migrations/20260425100000_normalize_watcher_feedback.sql +91 -0
  122. package/dist/db/migrations/20260425120000_add_run_diagnostics.sql +20 -0
  123. package/dist/db/migrations/20260425130000_add_repair_agent_plumbing.sql +46 -0
  124. package/dist/db/migrations/20260426120000_entities_entity_type_fk.sql +101 -0
  125. package/dist/db/migrations/20260426130000_db_integrity_cleanup.sql +104 -0
  126. package/dist/db/migrations/20260426130001_db_integrity_cleanup_concurrent.sql +187 -0
  127. package/dist/db/migrations/20260427133000_events_created_by_nullable.sql +74 -0
  128. package/dist/db/migrations/20260427140000_identity_engine_indexes.sql +140 -0
  129. package/dist/db/migrations/20260427150000_drop_events_source_id.sql +177 -0
  130. package/dist/db/migrations/20260427160000_drop_dead_schema.sql +76 -0
  131. package/dist/db/migrations/20260427170000_market_founder_to_member.sql +364 -0
  132. package/dist/db/migrations/20260428040000_cascade_events_watchers_org_fk.sql +66 -0
  133. package/dist/db/migrations/20260428050000_add_runs_approved_input.sql +9 -0
  134. package/dist/db/migrations/20260429010000_auth_profile_tenant_scoped_fk.sql +79 -0
  135. package/dist/db/migrations/20260429060000_extend_runs_for_lobu_queue.sql +108 -0
  136. package/dist/db/migrations/20260429120000_agent_changed_notify.sql +97 -0
  137. package/dist/db/migrations/20260429120100_user_auth_profiles_and_model_prefs.sql +36 -0
  138. package/dist/db/migrations/20260429120200_fix_notify_old_keys.sql +130 -0
  139. package/dist/db/migrations/20260429130000_oauth_states_cli_sessions_rate_limits.sql +83 -0
  140. package/dist/db/migrations/20260429140000_phase8_grants_chat_connections_mcp_sessions.sql +84 -0
  141. package/dist/db/migrations/20260429140100_runs_priority_expires_at_retry_delay.sql +44 -0
  142. package/dist/db/migrations/20260429180000_drop_invalidatable_cache_triggers.sql +25 -0
  143. package/dist/db/migrations/20260430005614_agents_apply_fields.sql +21 -0
  144. package/dist/db/migrations/20260430022231_fix_connection_config_encryption.sql +69 -0
  145. package/dist/db/migrations/20260430151215_add_task_run_type.sql +77 -0
  146. package/dist/db/migrations/20260501000000_drop_cli_sessions.sql +27 -0
  147. package/dist/db/migrations/20260501133000_lobu_memory_mcp_id.sql +117 -0
  148. package/dist/db/migrations/20260502000000_drop_chat_connections.sql +60 -0
  149. package/dist/db/migrations/20260503000000_agent_secrets_org_scope.sql +56 -0
  150. package/dist/db/migrations/20260504000000_flatten_agents_drop_sandbox_model.sql +48 -0
  151. package/dist/index.d.ts.map +1 -1
  152. package/dist/index.js +147 -23
  153. package/dist/index.js.map +1 -1
  154. package/dist/internal/api-client.d.ts +4 -8
  155. package/dist/internal/api-client.d.ts.map +1 -1
  156. package/dist/internal/api-client.js +1 -1
  157. package/dist/internal/api-client.js.map +1 -1
  158. package/dist/internal/context.js +2 -2
  159. package/dist/internal/context.js.map +1 -1
  160. package/dist/internal/credentials.d.ts.map +1 -1
  161. package/dist/internal/credentials.js +6 -1
  162. package/dist/internal/credentials.js.map +1 -1
  163. package/dist/internal/index.d.ts +2 -3
  164. package/dist/internal/index.d.ts.map +1 -1
  165. package/dist/internal/index.js +2 -2
  166. package/dist/internal/index.js.map +1 -1
  167. package/dist/internal/oauth.d.ts +7 -6
  168. package/dist/internal/oauth.d.ts.map +1 -1
  169. package/dist/internal/oauth.js +3 -3
  170. package/dist/internal/project-link.d.ts +10 -0
  171. package/dist/internal/project-link.d.ts.map +1 -0
  172. package/dist/internal/project-link.js +48 -0
  173. package/dist/internal/project-link.js.map +1 -0
  174. package/dist/providers.json +2 -2
  175. package/dist/server.bundle.mjs +3173 -4404
  176. package/dist/start-local.bundle.mjs +71481 -0
  177. package/dist/templates/README.md.tmpl +10 -11
  178. package/package.json +14 -12
  179. package/dist/__tests__/chat.integration.test.d.ts +0 -2
  180. package/dist/__tests__/chat.integration.test.d.ts.map +0 -1
  181. package/dist/__tests__/chat.integration.test.js +0 -337
  182. package/dist/__tests__/chat.integration.test.js.map +0 -1
  183. package/dist/__tests__/dev.test.d.ts +0 -2
  184. package/dist/__tests__/dev.test.d.ts.map +0 -1
  185. package/dist/__tests__/dev.test.js +0 -25
  186. package/dist/__tests__/dev.test.js.map +0 -1
  187. package/dist/__tests__/init-memory.test.d.ts +0 -2
  188. package/dist/__tests__/init-memory.test.d.ts.map +0 -1
  189. package/dist/__tests__/init-memory.test.js +0 -45
  190. package/dist/__tests__/init-memory.test.js.map +0 -1
  191. package/dist/__tests__/token.test.d.ts +0 -2
  192. package/dist/__tests__/token.test.d.ts.map +0 -1
  193. package/dist/__tests__/token.test.js +0 -52
  194. package/dist/__tests__/token.test.js.map +0 -1
  195. package/dist/commands/_lib/apply/__tests__/client.test.d.ts +0 -2
  196. package/dist/commands/_lib/apply/__tests__/client.test.d.ts.map +0 -1
  197. package/dist/commands/_lib/apply/__tests__/client.test.js +0 -23
  198. package/dist/commands/_lib/apply/__tests__/client.test.js.map +0 -1
  199. package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts +0 -2
  200. package/dist/commands/_lib/apply/__tests__/desired-state.test.d.ts.map +0 -1
  201. package/dist/commands/_lib/apply/__tests__/desired-state.test.js +0 -140
  202. package/dist/commands/_lib/apply/__tests__/desired-state.test.js.map +0 -1
  203. package/dist/commands/_lib/apply/__tests__/diff.test.d.ts +0 -2
  204. package/dist/commands/_lib/apply/__tests__/diff.test.d.ts.map +0 -1
  205. package/dist/commands/_lib/apply/__tests__/diff.test.js +0 -378
  206. package/dist/commands/_lib/apply/__tests__/diff.test.js.map +0 -1
  207. package/dist/commands/apply.d.ts +0 -3
  208. package/dist/commands/apply.d.ts.map +0 -1
  209. package/dist/commands/apply.js +0 -5
  210. package/dist/commands/apply.js.map +0 -1
  211. package/dist/commands/memory/_lib/openclaw-auth.test.d.ts +0 -2
  212. package/dist/commands/memory/_lib/openclaw-auth.test.d.ts.map +0 -1
  213. package/dist/commands/memory/_lib/openclaw-auth.test.js +0 -9
  214. package/dist/commands/memory/_lib/openclaw-auth.test.js.map +0 -1
  215. package/dist/internal/__tests__/api-client.test.d.ts +0 -2
  216. package/dist/internal/__tests__/api-client.test.d.ts.map +0 -1
  217. package/dist/internal/__tests__/api-client.test.js +0 -95
  218. package/dist/internal/__tests__/api-client.test.js.map +0 -1
  219. package/dist/internal/__tests__/context.test.d.ts +0 -2
  220. package/dist/internal/__tests__/context.test.d.ts.map +0 -1
  221. package/dist/internal/__tests__/context.test.js +0 -77
  222. package/dist/internal/__tests__/context.test.js.map +0 -1
@@ -0,0 +1,666 @@
1
+ /**
2
+ * YouTube Connector (V1 runtime)
3
+ *
4
+ * Fetches video metadata, comments, and transcripts from YouTube search results
5
+ * via the YouTube Data API v3. Transcripts are extracted from YouTube's embedded
6
+ * caption tracks (no third-party packages required).
7
+ */
8
+
9
+ import {
10
+ type ActionContext,
11
+ type ActionResult,
12
+ type ConnectorDefinition,
13
+ ConnectorRuntime,
14
+ calculateEngagementScore,
15
+ type EventEnvelope,
16
+ type SyncContext,
17
+ type SyncResult,
18
+ } from '@lobu/connector-sdk';
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // YouTube API types
22
+ // ---------------------------------------------------------------------------
23
+
24
+ interface YouTubeSearchItem {
25
+ id: {
26
+ kind: string;
27
+ videoId: string;
28
+ };
29
+ snippet: {
30
+ publishedAt: string;
31
+ channelId: string;
32
+ title: string;
33
+ description: string;
34
+ channelTitle: string;
35
+ };
36
+ }
37
+
38
+ interface YouTubeSearchResponse {
39
+ nextPageToken?: string;
40
+ pageInfo: {
41
+ totalResults: number;
42
+ resultsPerPage: number;
43
+ };
44
+ items: YouTubeSearchItem[];
45
+ }
46
+
47
+ interface YouTubeVideoItem {
48
+ id: string;
49
+ snippet: {
50
+ publishedAt: string;
51
+ channelId: string;
52
+ title: string;
53
+ description: string;
54
+ channelTitle: string;
55
+ tags?: string[];
56
+ };
57
+ statistics: {
58
+ viewCount?: string;
59
+ likeCount?: string;
60
+ commentCount?: string;
61
+ };
62
+ contentDetails?: {
63
+ duration?: string;
64
+ };
65
+ }
66
+
67
+ interface YouTubeVideoResponse {
68
+ items: YouTubeVideoItem[];
69
+ }
70
+
71
+ interface YouTubeCommentSnippet {
72
+ videoId: string;
73
+ topLevelComment: {
74
+ id: string;
75
+ snippet: {
76
+ textDisplay: string;
77
+ textOriginal: string;
78
+ authorDisplayName: string;
79
+ authorChannelUrl?: string;
80
+ likeCount: number;
81
+ publishedAt: string;
82
+ updatedAt: string;
83
+ };
84
+ };
85
+ totalReplyCount: number;
86
+ }
87
+
88
+ interface YouTubeCommentThread {
89
+ id: string;
90
+ snippet: YouTubeCommentSnippet;
91
+ }
92
+
93
+ interface YouTubeCommentThreadResponse {
94
+ nextPageToken?: string;
95
+ pageInfo: {
96
+ totalResults: number;
97
+ resultsPerPage: number;
98
+ };
99
+ items: YouTubeCommentThread[];
100
+ }
101
+
102
+ interface CaptionTrack {
103
+ baseUrl: string;
104
+ languageCode: string;
105
+ }
106
+
107
+ // ---------------------------------------------------------------------------
108
+ // Checkpoint
109
+ // ---------------------------------------------------------------------------
110
+
111
+ interface YouTubeCheckpoint {
112
+ last_published_at?: string;
113
+ next_page_token?: string;
114
+ }
115
+
116
+ // ---------------------------------------------------------------------------
117
+ // Connector
118
+ // ---------------------------------------------------------------------------
119
+
120
+ export default class YouTubeConnector extends ConnectorRuntime {
121
+ readonly definition: ConnectorDefinition = {
122
+ key: 'youtube',
123
+ name: 'YouTube',
124
+ description: 'Fetches video metadata, comments, and transcripts from YouTube search results.',
125
+ version: '1.0.0',
126
+ faviconDomain: 'youtube.com',
127
+ authSchema: {
128
+ methods: [
129
+ {
130
+ type: 'oauth',
131
+ provider: 'google',
132
+ requiredScopes: ['https://www.googleapis.com/auth/youtube.readonly'],
133
+ loginScopes: ['openid', 'email', 'profile'],
134
+ clientIdKey: 'GOOGLE_CLIENT_ID',
135
+ clientSecretKey: 'GOOGLE_CLIENT_SECRET',
136
+ tokenUrl: 'https://oauth2.googleapis.com/token',
137
+ tokenEndpointAuthMethod: 'client_secret_post',
138
+ loginProvisioning: {
139
+ autoCreateConnection: false,
140
+ },
141
+ },
142
+ ],
143
+ },
144
+ feeds: {
145
+ videos: {
146
+ key: 'videos',
147
+ name: 'Videos',
148
+ requiredScopes: ['https://www.googleapis.com/auth/youtube.readonly'],
149
+ description: 'Search YouTube for videos and collect metadata, comments, and transcripts.',
150
+ configSchema: {
151
+ type: 'object',
152
+ required: ['search_query'],
153
+ properties: {
154
+ search_query: {
155
+ type: 'string',
156
+ minLength: 1,
157
+ description: 'Search term to query YouTube.',
158
+ },
159
+ max_results: {
160
+ type: 'integer',
161
+ minimum: 1,
162
+ maximum: 200,
163
+ default: 50,
164
+ description: 'Total videos to fetch per sync (max 200).',
165
+ },
166
+ include_transcripts: {
167
+ type: 'boolean',
168
+ default: true,
169
+ description: 'Whether to fetch video transcripts.',
170
+ },
171
+ include_comments: {
172
+ type: 'boolean',
173
+ default: true,
174
+ description: 'Whether to fetch video comments.',
175
+ },
176
+ },
177
+ },
178
+ eventKinds: {
179
+ video: {
180
+ description: 'A YouTube video with metadata and optional transcript',
181
+ metadataSchema: {
182
+ type: 'object',
183
+ properties: {
184
+ view_count: { type: 'number' },
185
+ like_count: { type: 'number' },
186
+ comment_count: { type: 'number' },
187
+ channel_title: { type: 'string' },
188
+ channel_id: { type: 'string' },
189
+ has_transcript: { type: 'boolean' },
190
+ duration: { type: 'string' },
191
+ tags: {
192
+ type: 'array',
193
+ items: { type: 'string' },
194
+ },
195
+ },
196
+ },
197
+ },
198
+ comment: {
199
+ description: 'A comment on a YouTube video',
200
+ metadataSchema: {
201
+ type: 'object',
202
+ properties: {
203
+ video_id: { type: 'string' },
204
+ like_count: { type: 'number' },
205
+ reply_count: { type: 'number' },
206
+ },
207
+ },
208
+ },
209
+ },
210
+ },
211
+ },
212
+ optionsSchema: {
213
+ type: 'object',
214
+ required: ['search_query'],
215
+ properties: {
216
+ search_query: {
217
+ type: 'string',
218
+ minLength: 1,
219
+ description: 'Search term to query YouTube.',
220
+ },
221
+ max_results: {
222
+ type: 'integer',
223
+ minimum: 1,
224
+ maximum: 200,
225
+ default: 50,
226
+ description: 'Total videos to fetch per sync (max 200).',
227
+ },
228
+ include_transcripts: {
229
+ type: 'boolean',
230
+ default: true,
231
+ description: 'Whether to fetch video transcripts.',
232
+ },
233
+ include_comments: {
234
+ type: 'boolean',
235
+ default: true,
236
+ description: 'Whether to fetch video comments.',
237
+ },
238
+ },
239
+ },
240
+ };
241
+
242
+ private readonly BASE_URL = 'https://www.googleapis.com/youtube/v3';
243
+ private readonly RATE_LIMIT_MS = 200;
244
+ private readonly COMMENT_PAGE_LIMIT = 3;
245
+
246
+ // -------------------------------------------------------------------------
247
+ // sync
248
+ // -------------------------------------------------------------------------
249
+
250
+ async sync(ctx: SyncContext): Promise<SyncResult> {
251
+ const accessToken = ctx.credentials?.accessToken as string | undefined;
252
+ const apiKey = (ctx.config.YOUTUBE_API_KEY as string) || undefined;
253
+ if (!accessToken && !apiKey) {
254
+ throw new Error('YouTube requires either OAuth (Google) or a YOUTUBE_API_KEY.');
255
+ }
256
+
257
+ const searchQuery = ctx.config.search_query as string;
258
+ if (!searchQuery) {
259
+ throw new Error('search_query is required.');
260
+ }
261
+
262
+ const maxResults = Math.min((ctx.config.max_results as number) ?? 50, 200);
263
+ const includeTranscripts = (ctx.config.include_transcripts as boolean) ?? true;
264
+ const includeComments = (ctx.config.include_comments as boolean) ?? true;
265
+
266
+ const checkpoint = (ctx.checkpoint as YouTubeCheckpoint) ?? {};
267
+ const events: EventEnvelope[] = [];
268
+ const seenIds = new Set<string>();
269
+
270
+ const auth = { accessToken, apiKey };
271
+ let pageToken: string | undefined = checkpoint.next_page_token;
272
+ let totalCollected = 0;
273
+
274
+ // ----- Search & collect video IDs -----
275
+ while (totalCollected < maxResults) {
276
+ const pageSize = Math.min(50, maxResults - totalCollected);
277
+ const searchUrl = this.buildSearchUrl(searchQuery, pageSize, pageToken);
278
+
279
+ const searchResponse = await this.apiGet(searchUrl, auth);
280
+ if (!searchResponse.ok) {
281
+ throw new Error(
282
+ `YouTube Search API error (${searchResponse.status}): ${await searchResponse.text()}`
283
+ );
284
+ }
285
+
286
+ const searchData = (await searchResponse.json()) as YouTubeSearchResponse;
287
+
288
+ if (searchData.items.length === 0) break;
289
+
290
+ // Collect unique video IDs from this page
291
+ const videoIds: string[] = [];
292
+ for (const item of searchData.items) {
293
+ const videoId = item.id.videoId;
294
+ if (videoId && !seenIds.has(videoId)) {
295
+ seenIds.add(videoId);
296
+ videoIds.push(videoId);
297
+ }
298
+ }
299
+
300
+ if (videoIds.length === 0) {
301
+ pageToken = searchData.nextPageToken;
302
+ if (!pageToken) break;
303
+ continue;
304
+ }
305
+
306
+ // ----- Fetch video details in batches of 50 -----
307
+ const videoDetails = await this.fetchVideoDetails(auth, videoIds);
308
+
309
+ // ----- Process each video -----
310
+ for (const video of videoDetails) {
311
+ try {
312
+ const viewCount = parseInt(video.statistics.viewCount ?? '0', 10);
313
+ const likeCount = parseInt(video.statistics.likeCount ?? '0', 10);
314
+ const commentCount = parseInt(video.statistics.commentCount ?? '0', 10);
315
+
316
+ // Fetch transcript if enabled
317
+ let transcript: string | null = null;
318
+ if (includeTranscripts) {
319
+ try {
320
+ transcript = await this.fetchTranscript(video.id);
321
+ } catch {
322
+ /* transcript fetch is best-effort */
323
+ }
324
+ await this.sleep(this.RATE_LIMIT_MS);
325
+ }
326
+
327
+ const hasTranscript = transcript != null && transcript.length > 0;
328
+
329
+ const engagementScore = calculateEngagementScore('youtube', {
330
+ upvotes: likeCount,
331
+ reply_count: commentCount,
332
+ score: Math.round(viewCount / 100),
333
+ });
334
+
335
+ const videoEvent: EventEnvelope = {
336
+ origin_id: `yt_video_${video.id}`,
337
+ title: video.snippet.title,
338
+ payload_text: hasTranscript ? transcript! : (video.snippet.description ?? '').trim(),
339
+ author_name: video.snippet.channelTitle,
340
+ source_url: `https://www.youtube.com/watch?v=${video.id}`,
341
+ occurred_at: new Date(video.snippet.publishedAt),
342
+ origin_type: 'video',
343
+ score: engagementScore,
344
+ metadata: {
345
+ view_count: viewCount,
346
+ like_count: likeCount,
347
+ comment_count: commentCount,
348
+ channel_title: video.snippet.channelTitle,
349
+ channel_id: video.snippet.channelId,
350
+ has_transcript: hasTranscript,
351
+ ...(video.contentDetails?.duration && {
352
+ duration: video.contentDetails.duration,
353
+ }),
354
+ ...(video.snippet.tags &&
355
+ video.snippet.tags.length > 0 && {
356
+ tags: video.snippet.tags,
357
+ }),
358
+ },
359
+ };
360
+
361
+ events.push(videoEvent);
362
+
363
+ // ----- Fetch comments if enabled -----
364
+ if (includeComments && commentCount > 0) {
365
+ try {
366
+ const comments = await this.fetchComments(auth, video.id);
367
+ for (const comment of comments) {
368
+ const commentSnippet = comment.snippet.topLevelComment.snippet;
369
+
370
+ const commentEvent: EventEnvelope = {
371
+ origin_id: `yt_comment_${comment.snippet.topLevelComment.id}`,
372
+ payload_text: commentSnippet.textOriginal,
373
+ author_name: commentSnippet.authorDisplayName,
374
+ source_url: `https://www.youtube.com/watch?v=${video.id}&lc=${comment.snippet.topLevelComment.id}`,
375
+ occurred_at: new Date(commentSnippet.publishedAt),
376
+ origin_type: 'comment',
377
+ origin_parent_id: `yt_video_${video.id}`,
378
+ metadata: {
379
+ video_id: video.id,
380
+ like_count: commentSnippet.likeCount,
381
+ reply_count: comment.snippet.totalReplyCount,
382
+ },
383
+ };
384
+
385
+ events.push(commentEvent);
386
+ }
387
+ } catch {
388
+ /* comment fetch is best-effort */
389
+ }
390
+ }
391
+ } catch {
392
+ /* skip individual video failures */
393
+ }
394
+ }
395
+
396
+ totalCollected += videoIds.length;
397
+ pageToken = searchData.nextPageToken;
398
+
399
+ if (!pageToken) break;
400
+
401
+ await this.sleep(this.RATE_LIMIT_MS);
402
+ }
403
+
404
+ // Sort events by occurred_at descending
405
+ events.sort((a, b) => b.occurred_at.getTime() - a.occurred_at.getTime());
406
+
407
+ // Update checkpoint
408
+ const latestPublishedAt = events.length > 0 ? events[0].occurred_at.toISOString() : undefined;
409
+
410
+ const newCheckpoint: YouTubeCheckpoint = {
411
+ last_published_at: latestPublishedAt ?? checkpoint.last_published_at,
412
+ next_page_token: pageToken,
413
+ };
414
+
415
+ return {
416
+ events,
417
+ checkpoint: newCheckpoint as Record<string, unknown>,
418
+ metadata: {
419
+ items_found: events.length,
420
+ videos_collected: seenIds.size,
421
+ },
422
+ };
423
+ }
424
+
425
+ // -------------------------------------------------------------------------
426
+ // execute
427
+ // -------------------------------------------------------------------------
428
+
429
+ async execute(_ctx: ActionContext): Promise<ActionResult> {
430
+ return { success: false, error: 'Actions not supported' };
431
+ }
432
+
433
+ // -------------------------------------------------------------------------
434
+ // YouTube API helpers
435
+ // -------------------------------------------------------------------------
436
+
437
+ private buildSearchUrl(query: string, maxResults: number, pageToken?: string): string {
438
+ const params = new URLSearchParams({
439
+ part: 'snippet',
440
+ q: query,
441
+ type: 'video',
442
+ order: 'date',
443
+ maxResults: String(maxResults),
444
+ });
445
+ if (pageToken) {
446
+ params.set('pageToken', pageToken);
447
+ }
448
+ return `${this.BASE_URL}/search?${params.toString()}`;
449
+ }
450
+
451
+ private async fetchVideoDetails(
452
+ auth: { accessToken?: string; apiKey?: string },
453
+ videoIds: string[]
454
+ ): Promise<YouTubeVideoItem[]> {
455
+ const results: YouTubeVideoItem[] = [];
456
+
457
+ // Batch in groups of 50 (YouTube API limit)
458
+ for (let i = 0; i < videoIds.length; i += 50) {
459
+ const batch = videoIds.slice(i, i + 50);
460
+ const params = new URLSearchParams({
461
+ part: 'snippet,statistics,contentDetails',
462
+ id: batch.join(','),
463
+ });
464
+
465
+ const response = await this.apiGet(`${this.BASE_URL}/videos?${params.toString()}`, auth);
466
+ if (!response.ok) {
467
+ throw new Error(`YouTube Videos API error (${response.status}): ${await response.text()}`);
468
+ }
469
+
470
+ const data = (await response.json()) as YouTubeVideoResponse;
471
+ results.push(...data.items);
472
+
473
+ if (i + 50 < videoIds.length) {
474
+ await this.sleep(this.RATE_LIMIT_MS);
475
+ }
476
+ }
477
+
478
+ return results;
479
+ }
480
+
481
+ private async fetchComments(
482
+ auth: { accessToken?: string; apiKey?: string },
483
+ videoId: string
484
+ ): Promise<YouTubeCommentThread[]> {
485
+ const allComments: YouTubeCommentThread[] = [];
486
+ let pageToken: string | undefined;
487
+ let pages = 0;
488
+
489
+ while (pages < this.COMMENT_PAGE_LIMIT) {
490
+ const params = new URLSearchParams({
491
+ part: 'snippet',
492
+ videoId,
493
+ maxResults: '100',
494
+ order: 'relevance',
495
+ });
496
+ if (pageToken) {
497
+ params.set('pageToken', pageToken);
498
+ }
499
+
500
+ const response = await this.apiGet(
501
+ `${this.BASE_URL}/commentThreads?${params.toString()}`,
502
+ auth
503
+ );
504
+
505
+ if (!response.ok) {
506
+ // Comments may be disabled — not a fatal error
507
+ if (response.status === 403) break;
508
+ throw new Error(
509
+ `YouTube Comments API error (${response.status}): ${await response.text()}`
510
+ );
511
+ }
512
+
513
+ const data = (await response.json()) as YouTubeCommentThreadResponse;
514
+ allComments.push(...data.items);
515
+
516
+ pageToken = data.nextPageToken;
517
+ if (!pageToken) break;
518
+
519
+ pages++;
520
+ await this.sleep(this.RATE_LIMIT_MS);
521
+ }
522
+
523
+ return allComments;
524
+ }
525
+
526
+ // -------------------------------------------------------------------------
527
+ // Transcript fetching (no external packages)
528
+ // -------------------------------------------------------------------------
529
+
530
+ private async fetchTranscript(videoId: string): Promise<string | null> {
531
+ try {
532
+ // Fetch the YouTube watch page HTML
533
+ const watchUrl = `https://www.youtube.com/watch?v=${videoId}`;
534
+ const response = await fetch(watchUrl, {
535
+ headers: {
536
+ 'User-Agent':
537
+ 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
538
+ 'Accept-Language': 'en-US,en;q=0.9',
539
+ },
540
+ });
541
+
542
+ if (!response.ok) return null;
543
+
544
+ const html = await response.text();
545
+
546
+ // Extract captionTracks from ytInitialPlayerResponse
547
+ const captionTracks = this.extractCaptionTracks(html);
548
+ if (!captionTracks || captionTracks.length === 0) return null;
549
+
550
+ // Prefer English, fall back to first available
551
+ const englishTrack = captionTracks.find(
552
+ (t) => t.languageCode === 'en' || t.languageCode.startsWith('en-')
553
+ );
554
+ const track = englishTrack ?? captionTracks[0];
555
+
556
+ // Fetch the timedtext XML
557
+ const captionResponse = await fetch(track.baseUrl);
558
+ if (!captionResponse.ok) return null;
559
+
560
+ const captionXml = await captionResponse.text();
561
+ return this.parseTimedTextXml(captionXml);
562
+ } catch {
563
+ return null;
564
+ }
565
+ }
566
+
567
+ private extractCaptionTracks(html: string): CaptionTrack[] | null {
568
+ // Look for ytInitialPlayerResponse in the page
569
+ const playerResponseMatch = html.match(/ytInitialPlayerResponse\s*=\s*(\{.+?\});/s);
570
+ if (!playerResponseMatch) return null;
571
+
572
+ try {
573
+ const playerResponse = JSON.parse(playerResponseMatch[1]) as {
574
+ captions?: {
575
+ playerCaptionsTracklistRenderer?: {
576
+ captionTracks?: Array<{
577
+ baseUrl: string;
578
+ languageCode: string;
579
+ }>;
580
+ };
581
+ };
582
+ };
583
+
584
+ const tracks = playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
585
+ if (!tracks || tracks.length === 0) return null;
586
+
587
+ return tracks.map((t) => ({
588
+ baseUrl: t.baseUrl,
589
+ languageCode: t.languageCode,
590
+ }));
591
+ } catch {
592
+ return null;
593
+ }
594
+ }
595
+
596
+ private parseTimedTextXml(xml: string): string | null {
597
+ // Extract text from <text> elements in the timedtext XML
598
+ // Format: <text start="0.0" dur="2.0">caption text here</text>
599
+ const textSegments: string[] = [];
600
+ const textRegex = /<text[^>]*>([\s\S]*?)<\/text>/g;
601
+ let match: RegExpExecArray | null;
602
+
603
+ while ((match = textRegex.exec(xml)) !== null) {
604
+ let text = match[1];
605
+ // Decode HTML entities in a single pass so '&amp;lt;' does not become '<'.
606
+ text = text.replace(
607
+ /&(amp|lt|gt|quot|apos|#39|#(\d+));/g,
608
+ (_match, name, numeric) => {
609
+ switch (name) {
610
+ case 'amp':
611
+ return '&';
612
+ case 'lt':
613
+ return '<';
614
+ case 'gt':
615
+ return '>';
616
+ case 'quot':
617
+ return '"';
618
+ case 'apos':
619
+ case '#39':
620
+ return "'";
621
+ default:
622
+ return numeric ? String.fromCharCode(parseInt(numeric, 10)) : '';
623
+ }
624
+ }
625
+ );
626
+ // Strip any remaining HTML tags (loop to handle nested/broken markup
627
+ // like '<<script>script>' that a single pass would leave behind).
628
+ let previous: string;
629
+ do {
630
+ previous = text;
631
+ text = text.replace(/<[^>]*>/g, '');
632
+ } while (text !== previous);
633
+ const trimmed = text.trim();
634
+ if (trimmed) {
635
+ textSegments.push(trimmed);
636
+ }
637
+ }
638
+
639
+ if (textSegments.length === 0) return null;
640
+ return textSegments.join(' ');
641
+ }
642
+
643
+ // -------------------------------------------------------------------------
644
+ // Utilities
645
+ // -------------------------------------------------------------------------
646
+
647
+ /** Fetch a YouTube API URL with auth (OAuth token or API key). */
648
+ private async apiGet(
649
+ url: string,
650
+ auth: { accessToken?: string; apiKey?: string }
651
+ ): Promise<Response> {
652
+ const parsedUrl = new URL(url);
653
+ if (auth.apiKey && !auth.accessToken) {
654
+ parsedUrl.searchParams.set('key', auth.apiKey);
655
+ }
656
+ const headers: Record<string, string> = {};
657
+ if (auth.accessToken) {
658
+ headers.Authorization = `Bearer ${auth.accessToken}`;
659
+ }
660
+ return fetch(parsedUrl.toString(), { headers });
661
+ }
662
+
663
+ private sleep(ms: number): Promise<void> {
664
+ return new Promise((resolve) => setTimeout(resolve, ms));
665
+ }
666
+ }