mulmoclaude 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -0
- package/bin/mulmoclaude.js +202 -0
- package/bin/prepare-dist.js +93 -0
- package/client/assets/chunk-vKJrgz-R-C_I3GbVV.js +1 -0
- package/client/assets/html2canvas-Cx501zZr-BF5dYYkY.js +5 -0
- package/client/assets/index-D8rhwXLq.js +4906 -0
- package/client/assets/index-KNLBjwuh.css +1 -0
- package/client/assets/index.es-D4YyL_Dg-BfRHLTZV.js +5 -0
- package/client/assets/material-icons-Dr0goTwe.woff +0 -0
- package/client/assets/material-icons-kAwBdRge.woff2 +0 -0
- package/client/assets/material-icons-outlined-BpWbwl2n.woff +0 -0
- package/client/assets/material-icons-outlined-DZhiGvEA.woff2 +0 -0
- package/client/assets/material-icons-round-BDlwx-sv.woff +0 -0
- package/client/assets/material-icons-round-DrirKXBx.woff2 +0 -0
- package/client/assets/material-icons-sharp-CH1KkVu7.woff +0 -0
- package/client/assets/material-icons-sharp-gidztirS.woff2 +0 -0
- package/client/assets/material-icons-two-tone-B7wz7mED.woff +0 -0
- package/client/assets/material-icons-two-tone-DuNIpaEj.woff2 +0 -0
- package/client/assets/mulmo_bw-ERmkSv0a.png +0 -0
- package/client/assets/purify.es-Fx1Nqyry-PeS5RUhs.js +2 -0
- package/client/assets/typeof-DBp4T-Ny-BC0P-2DM.js +1 -0
- package/client/index.html +28 -0
- package/package.json +66 -0
- package/server/agent/attachmentConverter.ts +270 -0
- package/server/agent/config.ts +414 -0
- package/server/agent/index.ts +260 -0
- package/server/agent/mcp-server.ts +412 -0
- package/server/agent/mcp-tools/index.ts +63 -0
- package/server/agent/mcp-tools/x.ts +188 -0
- package/server/agent/plugin-names.ts +75 -0
- package/server/agent/prompt.ts +349 -0
- package/server/agent/resumeFailover.ts +129 -0
- package/server/agent/sandboxMounts.ts +329 -0
- package/server/agent/stream.ts +194 -0
- package/server/api/auth/bearerAuth.ts +61 -0
- package/server/api/auth/token.ts +98 -0
- package/server/api/csrfGuard.ts +85 -0
- package/server/api/routes/agent.ts +478 -0
- package/server/api/routes/chart.ts +98 -0
- package/server/api/routes/chat-index.ts +46 -0
- package/server/api/routes/config.ts +258 -0
- package/server/api/routes/dispatchResponse.ts +79 -0
- package/server/api/routes/files.ts +812 -0
- package/server/api/routes/html.ts +101 -0
- package/server/api/routes/image.ts +169 -0
- package/server/api/routes/mulmo-script.ts +712 -0
- package/server/api/routes/mulmoScriptValidate.ts +101 -0
- package/server/api/routes/notifications.ts +69 -0
- package/server/api/routes/pdf.ts +163 -0
- package/server/api/routes/plugins.ts +276 -0
- package/server/api/routes/presentHtml.ts +48 -0
- package/server/api/routes/roles.ts +125 -0
- package/server/api/routes/scheduler.ts +153 -0
- package/server/api/routes/schedulerHandlers.ts +151 -0
- package/server/api/routes/schedulerTasks.ts +163 -0
- package/server/api/routes/sessions.ts +294 -0
- package/server/api/routes/sessionsCursor.ts +59 -0
- package/server/api/routes/skills.ts +195 -0
- package/server/api/routes/sources.ts +540 -0
- package/server/api/routes/todos.ts +263 -0
- package/server/api/routes/todosColumnsHandlers.ts +347 -0
- package/server/api/routes/todosHandlers.ts +274 -0
- package/server/api/routes/todosItemsHandlers.ts +386 -0
- package/server/api/routes/wiki/pageIndex.ts +53 -0
- package/server/api/routes/wiki.ts +363 -0
- package/server/api/sandboxStatus.ts +64 -0
- package/server/events/notifications.ts +160 -0
- package/server/events/pub-sub/index.ts +45 -0
- package/server/events/relay-client.ts +288 -0
- package/server/events/scheduler-adapter.ts +302 -0
- package/server/events/session-store/index.ts +492 -0
- package/server/events/task-manager/index.ts +181 -0
- package/server/index.ts +572 -0
- package/server/system/config.ts +243 -0
- package/server/system/credentials.ts +220 -0
- package/server/system/docker.ts +97 -0
- package/server/system/env.ts +109 -0
- package/server/system/logger/config.ts +112 -0
- package/server/system/logger/formatters.ts +40 -0
- package/server/system/logger/index.ts +53 -0
- package/server/system/logger/rotation.ts +37 -0
- package/server/system/logger/sinks.ts +101 -0
- package/server/system/logger/types.ts +29 -0
- package/server/utils/date.ts +57 -0
- package/server/utils/errors.ts +7 -0
- package/server/utils/fetch.ts +27 -0
- package/server/utils/files/atomic.ts +125 -0
- package/server/utils/files/html-io.ts +20 -0
- package/server/utils/files/image-store.ts +66 -0
- package/server/utils/files/index.ts +45 -0
- package/server/utils/files/journal-io.ts +213 -0
- package/server/utils/files/json.ts +69 -0
- package/server/utils/files/markdown-store.ts +33 -0
- package/server/utils/files/naming.ts +50 -0
- package/server/utils/files/reference-dirs-io.ts +45 -0
- package/server/utils/files/roles-io.ts +45 -0
- package/server/utils/files/safe.ts +106 -0
- package/server/utils/files/scheduler-io.ts +20 -0
- package/server/utils/files/scheduler-overrides-io.ts +64 -0
- package/server/utils/files/session-io.ts +136 -0
- package/server/utils/files/spreadsheet-store.ts +63 -0
- package/server/utils/files/todos-io.ts +29 -0
- package/server/utils/files/user-tasks-io.ts +25 -0
- package/server/utils/files/workspace-io.ts +221 -0
- package/server/utils/gemini.ts +59 -0
- package/server/utils/gitignore.ts +69 -0
- package/server/utils/http.ts +15 -0
- package/server/utils/httpError.ts +61 -0
- package/server/utils/id.ts +16 -0
- package/server/utils/json.ts +83 -0
- package/server/utils/logBackgroundError.ts +22 -0
- package/server/utils/markdown.ts +82 -0
- package/server/utils/request.ts +29 -0
- package/server/utils/slug.ts +50 -0
- package/server/utils/spawn.ts +62 -0
- package/server/utils/time.ts +34 -0
- package/server/utils/types.ts +47 -0
- package/server/workspace/chat-index/index.ts +153 -0
- package/server/workspace/chat-index/indexer.ts +209 -0
- package/server/workspace/chat-index/paths.ts +34 -0
- package/server/workspace/chat-index/summarizer.ts +247 -0
- package/server/workspace/chat-index/types.ts +38 -0
- package/server/workspace/custom-dirs.ts +220 -0
- package/server/workspace/helps/business.md +104 -0
- package/server/workspace/helps/github.md +23 -0
- package/server/workspace/helps/index.md +60 -0
- package/server/workspace/helps/mulmoscript.md +249 -0
- package/server/workspace/helps/sandbox.md +90 -0
- package/server/workspace/helps/spreadsheet.md +43 -0
- package/server/workspace/helps/telegram.md +135 -0
- package/server/workspace/helps/wiki.md +131 -0
- package/server/workspace/journal/archivist.ts +386 -0
- package/server/workspace/journal/dailyPass.ts +743 -0
- package/server/workspace/journal/diff.ts +71 -0
- package/server/workspace/journal/index.ts +185 -0
- package/server/workspace/journal/indexFile.ts +136 -0
- package/server/workspace/journal/linkRewrite.ts +4 -0
- package/server/workspace/journal/memoryExtractor.ts +130 -0
- package/server/workspace/journal/optimizationPass.ts +160 -0
- package/server/workspace/journal/paths.ts +76 -0
- package/server/workspace/journal/state.ts +125 -0
- package/server/workspace/paths.ts +158 -0
- package/server/workspace/reference-dirs.ts +252 -0
- package/server/workspace/roles.ts +37 -0
- package/server/workspace/skills/discovery.ts +125 -0
- package/server/workspace/skills/index.ts +10 -0
- package/server/workspace/skills/parser.ts +144 -0
- package/server/workspace/skills/paths.ts +41 -0
- package/server/workspace/skills/scheduler.ts +149 -0
- package/server/workspace/skills/types.ts +30 -0
- package/server/workspace/skills/user-tasks.ts +257 -0
- package/server/workspace/skills/writer.ts +189 -0
- package/server/workspace/sources/arxivDiscovery.ts +182 -0
- package/server/workspace/sources/classifier.ts +268 -0
- package/server/workspace/sources/fetchers/arxiv.ts +170 -0
- package/server/workspace/sources/fetchers/github.ts +106 -0
- package/server/workspace/sources/fetchers/githubIssues.ts +208 -0
- package/server/workspace/sources/fetchers/githubReleases.ts +186 -0
- package/server/workspace/sources/fetchers/index.ts +71 -0
- package/server/workspace/sources/fetchers/registerAll.ts +15 -0
- package/server/workspace/sources/fetchers/rss.ts +141 -0
- package/server/workspace/sources/fetchers/rssParser.ts +295 -0
- package/server/workspace/sources/httpFetcher.ts +230 -0
- package/server/workspace/sources/interests.ts +120 -0
- package/server/workspace/sources/paths.ts +110 -0
- package/server/workspace/sources/pipeline/dedup.ts +60 -0
- package/server/workspace/sources/pipeline/fetch.ts +136 -0
- package/server/workspace/sources/pipeline/index.ts +249 -0
- package/server/workspace/sources/pipeline/notify.ts +72 -0
- package/server/workspace/sources/pipeline/plan.ts +66 -0
- package/server/workspace/sources/pipeline/summarize.ts +189 -0
- package/server/workspace/sources/pipeline/write.ts +185 -0
- package/server/workspace/sources/rateLimiter.ts +148 -0
- package/server/workspace/sources/registry.ts +326 -0
- package/server/workspace/sources/robots.ts +271 -0
- package/server/workspace/sources/sourceState.ts +135 -0
- package/server/workspace/sources/taxonomy.ts +74 -0
- package/server/workspace/sources/types.ts +144 -0
- package/server/workspace/sources/urls.ts +112 -0
- package/server/workspace/tool-trace/classify.ts +114 -0
- package/server/workspace/tool-trace/index.ts +250 -0
- package/server/workspace/tool-trace/writeSearch.ts +98 -0
- package/server/workspace/wiki-backlinks/index.ts +107 -0
- package/server/workspace/wiki-backlinks/sessionBacklinks.ts +144 -0
- package/server/workspace/workspace.ts +66 -0
- package/src/App.vue +720 -0
- package/src/assets/mulmo_bw.png +0 -0
- package/src/components/CanvasViewToggle.vue +27 -0
- package/src/components/ChatAttachmentPreview.vue +45 -0
- package/src/components/ChatImagePreview.vue +17 -0
- package/src/components/ChatInput.vue +208 -0
- package/src/components/FileContentHeader.vue +49 -0
- package/src/components/FileContentRenderer.vue +162 -0
- package/src/components/FileTree.vue +115 -0
- package/src/components/FileTreePane.vue +85 -0
- package/src/components/FilesView.vue +206 -0
- package/src/components/LockStatusPopup.vue +111 -0
- package/src/components/NotificationBell.vue +131 -0
- package/src/components/NotificationToast.vue +72 -0
- package/src/components/PluginLauncher.vue +138 -0
- package/src/components/RightSidebar.vue +113 -0
- package/src/components/RoleSelector.vue +64 -0
- package/src/components/SessionHistoryPanel.vue +176 -0
- package/src/components/SessionTabBar.vue +81 -0
- package/src/components/SettingsMcpTab.vue +350 -0
- package/src/components/SettingsModal.vue +275 -0
- package/src/components/SettingsReferenceDirsTab.vue +173 -0
- package/src/components/SettingsWorkspaceDirsTab.vue +174 -0
- package/src/components/SidebarHeader.vue +69 -0
- package/src/components/StackView.vue +360 -0
- package/src/components/SuggestionsPanel.vue +65 -0
- package/src/components/TodoExplorer.vue +358 -0
- package/src/components/ToolResultsPanel.vue +77 -0
- package/src/components/todo/TodoAddDialog.vue +131 -0
- package/src/components/todo/TodoEditDialog.vue +47 -0
- package/src/components/todo/TodoEditPanel.vue +113 -0
- package/src/components/todo/TodoKanbanView.vue +249 -0
- package/src/components/todo/TodoListView.vue +79 -0
- package/src/components/todo/TodoTableView.vue +177 -0
- package/src/composables/useActiveSession.ts +40 -0
- package/src/composables/useAppApi.ts +45 -0
- package/src/composables/useCanvasViewMode.ts +121 -0
- package/src/composables/useChatScroll.ts +47 -0
- package/src/composables/useClickOutside.ts +26 -0
- package/src/composables/useClipboardCopy.ts +44 -0
- package/src/composables/useContentDisplay.ts +52 -0
- package/src/composables/useDebugBeat.ts +23 -0
- package/src/composables/useDynamicFavicon.ts +115 -0
- package/src/composables/useEventListeners.ts +42 -0
- package/src/composables/useExpandedDirs.ts +64 -0
- package/src/composables/useFaviconState.ts +30 -0
- package/src/composables/useFileSelection.ts +115 -0
- package/src/composables/useFileSortMode.ts +24 -0
- package/src/composables/useFileTree.ts +85 -0
- package/src/composables/useFreshPluginData.ts +89 -0
- package/src/composables/useHealth.ts +38 -0
- package/src/composables/useImeAwareEnter.ts +57 -0
- package/src/composables/useKeyNavigation.ts +60 -0
- package/src/composables/useMarkdownLinkHandler.ts +46 -0
- package/src/composables/useMarkdownMode.ts +17 -0
- package/src/composables/useMcpTools.ts +71 -0
- package/src/composables/useMergedSessions.ts +27 -0
- package/src/composables/useNotifications.ts +90 -0
- package/src/composables/usePdfDownload.ts +60 -0
- package/src/composables/usePendingCalls.ts +77 -0
- package/src/composables/usePubSub.ts +85 -0
- package/src/composables/useRightSidebar.ts +23 -0
- package/src/composables/useRoles.ts +34 -0
- package/src/composables/useSandboxStatus.ts +67 -0
- package/src/composables/useSelectedResult.ts +49 -0
- package/src/composables/useSessionDerived.ts +51 -0
- package/src/composables/useSessionHistory.ts +81 -0
- package/src/composables/useSessionSync.ts +57 -0
- package/src/composables/useViewLayout.ts +55 -0
- package/src/config/apiRoutes.ts +173 -0
- package/src/config/pubsubChannels.ts +45 -0
- package/src/config/roles.ts +335 -0
- package/src/config/schedulerActions.ts +25 -0
- package/src/config/toolNames.ts +71 -0
- package/src/config/workspacePaths.ts +24 -0
- package/src/index.css +107 -0
- package/src/main.ts +25 -0
- package/src/plugins/canvas/Preview.vue +13 -0
- package/src/plugins/canvas/View.vue +333 -0
- package/src/plugins/canvas/definition.ts +38 -0
- package/src/plugins/canvas/index.ts +36 -0
- package/src/plugins/chart/Preview.vue +49 -0
- package/src/plugins/chart/View.vue +143 -0
- package/src/plugins/chart/definition.ts +58 -0
- package/src/plugins/chart/index.ts +52 -0
- package/src/plugins/editImage/Preview.vue +13 -0
- package/src/plugins/editImage/View.vue +13 -0
- package/src/plugins/editImage/definition.ts +27 -0
- package/src/plugins/editImage/index.ts +36 -0
- package/src/plugins/generateImage/Preview.vue +13 -0
- package/src/plugins/generateImage/View.vue +33 -0
- package/src/plugins/generateImage/definition.ts +32 -0
- package/src/plugins/generateImage/index.ts +56 -0
- package/src/plugins/manageRoles/Preview.vue +49 -0
- package/src/plugins/manageRoles/View.vue +525 -0
- package/src/plugins/manageRoles/definition.ts +43 -0
- package/src/plugins/manageRoles/index.ts +47 -0
- package/src/plugins/manageSkills/Preview.vue +21 -0
- package/src/plugins/manageSkills/View.vue +321 -0
- package/src/plugins/manageSkills/definition.ts +49 -0
- package/src/plugins/manageSkills/index.ts +49 -0
- package/src/plugins/manageSource/Preview.vue +33 -0
- package/src/plugins/manageSource/View.vue +697 -0
- package/src/plugins/manageSource/definition.ts +63 -0
- package/src/plugins/manageSource/index.ts +66 -0
- package/src/plugins/markdown/Preview.vue +77 -0
- package/src/plugins/markdown/View.vue +476 -0
- package/src/plugins/markdown/definition.ts +50 -0
- package/src/plugins/markdown/index.ts +36 -0
- package/src/plugins/presentHtml/Preview.vue +25 -0
- package/src/plugins/presentHtml/View.vue +52 -0
- package/src/plugins/presentHtml/definition.ts +27 -0
- package/src/plugins/presentHtml/helpers.ts +72 -0
- package/src/plugins/presentHtml/index.ts +41 -0
- package/src/plugins/presentMulmoScript/Preview.vue +23 -0
- package/src/plugins/presentMulmoScript/View.vue +1166 -0
- package/src/plugins/presentMulmoScript/definition.ts +95 -0
- package/src/plugins/presentMulmoScript/helpers.ts +162 -0
- package/src/plugins/presentMulmoScript/index.ts +40 -0
- package/src/plugins/scheduler/Preview.vue +67 -0
- package/src/plugins/scheduler/TasksTab.vue +205 -0
- package/src/plugins/scheduler/View.vue +565 -0
- package/src/plugins/scheduler/definition.ts +57 -0
- package/src/plugins/scheduler/index.ts +45 -0
- package/src/plugins/scheduler/viewModes.ts +26 -0
- package/src/plugins/spreadsheet/Preview.vue +29 -0
- package/src/plugins/spreadsheet/View.vue +997 -0
- package/src/plugins/spreadsheet/cellHighlights.ts +79 -0
- package/src/plugins/spreadsheet/definition.ts +121 -0
- package/src/plugins/spreadsheet/engine/calculator.ts +459 -0
- package/src/plugins/spreadsheet/engine/cellBuilder.ts +81 -0
- package/src/plugins/spreadsheet/engine/date-parser.ts +220 -0
- package/src/plugins/spreadsheet/engine/date-utils.ts +56 -0
- package/src/plugins/spreadsheet/engine/engine.ts +176 -0
- package/src/plugins/spreadsheet/engine/evaluator.ts +390 -0
- package/src/plugins/spreadsheet/engine/formatter.ts +172 -0
- package/src/plugins/spreadsheet/engine/formulaRefs.ts +101 -0
- package/src/plugins/spreadsheet/engine/functions/date.ts +299 -0
- package/src/plugins/spreadsheet/engine/functions/financial.ts +387 -0
- package/src/plugins/spreadsheet/engine/functions/index.ts +16 -0
- package/src/plugins/spreadsheet/engine/functions/logical.ts +262 -0
- package/src/plugins/spreadsheet/engine/functions/lookup.ts +400 -0
- package/src/plugins/spreadsheet/engine/functions/mathematical.ts +297 -0
- package/src/plugins/spreadsheet/engine/functions/statistical.ts +338 -0
- package/src/plugins/spreadsheet/engine/functions/text.ts +389 -0
- package/src/plugins/spreadsheet/engine/index.ts +27 -0
- package/src/plugins/spreadsheet/engine/jsonCellLocator.ts +111 -0
- package/src/plugins/spreadsheet/engine/parser.ts +143 -0
- package/src/plugins/spreadsheet/engine/registry.ts +150 -0
- package/src/plugins/spreadsheet/engine/responseDecoder.ts +67 -0
- package/src/plugins/spreadsheet/engine/types.ts +64 -0
- package/src/plugins/spreadsheet/index.ts +36 -0
- package/src/plugins/textResponse/Preview.vue +94 -0
- package/src/plugins/textResponse/View.vue +503 -0
- package/src/plugins/textResponse/definition.ts +34 -0
- package/src/plugins/textResponse/index.ts +27 -0
- package/src/plugins/textResponse/plugin.ts +29 -0
- package/src/plugins/textResponse/samples.ts +97 -0
- package/src/plugins/textResponse/types.ts +11 -0
- package/src/plugins/todo/Preview.vue +63 -0
- package/src/plugins/todo/View.vue +364 -0
- package/src/plugins/todo/composables/useTodos.ts +177 -0
- package/src/plugins/todo/definition.ts +45 -0
- package/src/plugins/todo/index.ts +61 -0
- package/src/plugins/todo/labels.ts +163 -0
- package/src/plugins/todo/priority.ts +98 -0
- package/src/plugins/todo/viewModes.ts +19 -0
- package/src/plugins/ui-image/ImagePreview.vue +23 -0
- package/src/plugins/ui-image/ImageView.vue +34 -0
- package/src/plugins/ui-image/index.ts +3 -0
- package/src/plugins/ui-image/types.ts +4 -0
- package/src/plugins/wiki/Preview.vue +65 -0
- package/src/plugins/wiki/View.vue +342 -0
- package/src/plugins/wiki/definition.ts +25 -0
- package/src/plugins/wiki/helpers.ts +59 -0
- package/src/plugins/wiki/index.ts +52 -0
- package/src/router/guards.ts +61 -0
- package/src/router/index.ts +50 -0
- package/src/tools/index.ts +52 -0
- package/src/tools/types.ts +27 -0
- package/src/types/events.ts +16 -0
- package/src/types/fileTree.ts +13 -0
- package/src/types/notification.ts +67 -0
- package/src/types/session.ts +116 -0
- package/src/types/sse.ts +90 -0
- package/src/types/toolCallHistory.ts +13 -0
- package/src/utils/agent/eventDispatch.ts +74 -0
- package/src/utils/agent/request.ts +55 -0
- package/src/utils/agent/toolCalls.ts +62 -0
- package/src/utils/api.ts +218 -0
- package/src/utils/canvas/viewMode.ts +46 -0
- package/src/utils/dom/authTokenMeta.ts +20 -0
- package/src/utils/dom/clickOutside.ts +11 -0
- package/src/utils/dom/externalLink.ts +57 -0
- package/src/utils/dom/scrollable.ts +24 -0
- package/src/utils/errors.ts +11 -0
- package/src/utils/files/expandedDirs.ts +25 -0
- package/src/utils/files/filename.ts +12 -0
- package/src/utils/files/sortChildren.ts +20 -0
- package/src/utils/filesPreview/schedulerPreview.ts +38 -0
- package/src/utils/filesPreview/todoPreview.ts +40 -0
- package/src/utils/format/date.ts +85 -0
- package/src/utils/format/frontmatter.ts +80 -0
- package/src/utils/format/jsonSyntax.ts +109 -0
- package/src/utils/html/previewCsp.ts +65 -0
- package/src/utils/image/resolve.ts +8 -0
- package/src/utils/image/rewriteMarkdownImageRefs.ts +182 -0
- package/src/utils/markdown/extractFirstH1.ts +39 -0
- package/src/utils/notification/dispatch.ts +22 -0
- package/src/utils/path/relativeLink.ts +130 -0
- package/src/utils/role/icon.ts +20 -0
- package/src/utils/role/merge.ts +10 -0
- package/src/utils/role/plugins.ts +12 -0
- package/src/utils/session/mergeSessions.ts +103 -0
- package/src/utils/session/seedRoleDefault.ts +35 -0
- package/src/utils/session/sessionEntries.ts +121 -0
- package/src/utils/session/sessionFactory.ts +22 -0
- package/src/utils/session/sessionHelpers.ts +99 -0
- package/src/utils/tools/dedup.ts +17 -0
- package/src/utils/tools/mcp.ts +33 -0
- package/src/utils/tools/pendingCalls.ts +16 -0
- package/src/utils/tools/result.ts +40 -0
- package/src/utils/types.ts +44 -0
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
// Auto-categorizer for newly-registered sources.
|
|
2
|
+
//
|
|
3
|
+
// When a user registers a new source, the UI / CLI calls
|
|
4
|
+
// `classifySource({ title, url, sampleTitles, sampleSummaries })`.
|
|
5
|
+
// The classifier spawns `claude` with a strict JSON schema that
|
|
6
|
+
// limits output to the fixed 25-slug taxonomy (see
|
|
7
|
+
// server/sources/taxonomy.ts), so the model can't invent
|
|
8
|
+
// `artificial-intelligence` when we already have `ai` — the whole
|
|
9
|
+
// point of the closed enum.
|
|
10
|
+
//
|
|
11
|
+
// Shape of the spawn layer mirrors `server/chat-index/summarizer.ts`
|
|
12
|
+
// so we reuse the "errors on stdout not stderr", "budget
|
|
13
|
+
// exhaustion surfaces cleanly" behaviour that we already got
|
|
14
|
+
// right once.
|
|
15
|
+
//
|
|
16
|
+
// Injection-friendly: production `classifySource` goes through
|
|
17
|
+
// `defaultClassify`. Tests pass their own `ClassifyFn` that
|
|
18
|
+
// skips the CLI entirely.
|
|
19
|
+
|
|
20
|
+
import { spawn } from "node:child_process";
|
|
21
|
+
import { tmpdir } from "node:os";
|
|
22
|
+
import { ClaudeCliNotFoundError } from "../journal/archivist.js";
|
|
23
|
+
import { formatSpawnFailure } from "../../utils/spawn.js";
|
|
24
|
+
import { ONE_MINUTE_MS } from "../../utils/time.js";
|
|
25
|
+
import { CATEGORY_SLUGS, normalizeCategories, type CategorySlug } from "./taxonomy.js";
|
|
26
|
+
import { errorMessage } from "../../utils/errors.js";
|
|
27
|
+
import { isRecord } from "../../utils/types.js";
|
|
28
|
+
|
|
29
|
+
// Structured input passed to the classifier. Kept small (not the
|
|
30
|
+
// full source content) so the prompt stays cheap — a couple of
|
|
31
|
+
// sample titles is enough for the model to tell "tech-news" from
|
|
32
|
+
// "culture".
|
|
33
|
+
export interface ClassifyInput {
|
|
34
|
+
title: string;
|
|
35
|
+
url: string;
|
|
36
|
+
// Optional extra context from a sample fetch — the first few
|
|
37
|
+
// item titles and (optionally) their summaries give the LLM
|
|
38
|
+
// enough signal to distinguish subject matter. When the
|
|
39
|
+
// pre-register handshake can't fetch yet (or the source is
|
|
40
|
+
// brand new), these can be empty arrays.
|
|
41
|
+
sampleTitles?: string[];
|
|
42
|
+
sampleSummaries?: string[];
|
|
43
|
+
// Optional human notes from the YAML frontmatter body — users
|
|
44
|
+
// sometimes describe why they registered the source.
|
|
45
|
+
notes?: string;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
export interface ClassifyResult {
|
|
49
|
+
// 1-5 slugs from CATEGORY_SLUGS. Order matches model output
|
|
50
|
+
// (usually "most specific → most general"). Always validated
|
|
51
|
+
// through `normalizeCategories` so a hallucinated slug never
|
|
52
|
+
// leaks into the registry.
|
|
53
|
+
categories: CategorySlug[];
|
|
54
|
+
// Model's free-text one-line explanation — stored in the source
|
|
55
|
+
// file's notes body for human review.
|
|
56
|
+
rationale: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Injection point for tests: any function matching this
|
|
60
|
+
// signature is accepted by `classifySource`. Production passes
|
|
61
|
+
// `defaultClassify`.
|
|
62
|
+
export type ClassifyFn = (input: ClassifyInput) => Promise<ClassifyResult>;
|
|
63
|
+
|
|
64
|
+
// Max time we let `claude` run during registration. Registration
|
|
65
|
+
// is a foreground user action, so anything longer than 2 min is
|
|
66
|
+
// effectively broken anyway.
|
|
67
|
+
export const DEFAULT_TIMEOUT_MS = 2 * ONE_MINUTE_MS;
|
|
68
|
+
|
|
69
|
+
// Budget cap. Classification is one small call per source (once
|
|
70
|
+
// at registration, rarely re-classified) so $0.05 is fine — we
|
|
71
|
+
// don't pay the first-call cache-creation cost on every source
|
|
72
|
+
// because we warm it once and reuse.
|
|
73
|
+
const MAX_BUDGET_USD = 0.05;
|
|
74
|
+
|
|
75
|
+
const SYSTEM_PROMPT =
|
|
76
|
+
"You classify an information source (RSS feed, GitHub repo, web site, etc.) into a fixed taxonomy. " +
|
|
77
|
+
"You MUST pick between 1 and 5 categories from the provided enum — no synonyms, no new slugs, no invented labels. " +
|
|
78
|
+
"Choose the most specific matches first, then add broader ones only if they add signal. " +
|
|
79
|
+
"Output strict JSON matching the provided schema. Respond with structured output only.";
|
|
80
|
+
|
|
81
|
+
// The category enum is inlined into the prompt schema so the
|
|
82
|
+
// model sees the exact list of allowed values. Kept as a readonly
|
|
83
|
+
// array so mutations at runtime are impossible.
|
|
84
|
+
function classifySchema(): Record<string, unknown> {
|
|
85
|
+
return {
|
|
86
|
+
type: "object",
|
|
87
|
+
required: ["categories", "rationale"],
|
|
88
|
+
properties: {
|
|
89
|
+
categories: {
|
|
90
|
+
type: "array",
|
|
91
|
+
minItems: 1,
|
|
92
|
+
maxItems: 5,
|
|
93
|
+
items: { type: "string", enum: [...CATEGORY_SLUGS] },
|
|
94
|
+
},
|
|
95
|
+
rationale: {
|
|
96
|
+
type: "string",
|
|
97
|
+
maxLength: 200,
|
|
98
|
+
},
|
|
99
|
+
},
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Build the user-prompt text from the structured input. Pure —
|
|
104
|
+
// exported so tests can pin the shape of what the model sees.
|
|
105
|
+
export function buildClassifyPrompt(input: ClassifyInput): string {
|
|
106
|
+
const lines: string[] = [];
|
|
107
|
+
lines.push(`TITLE: ${input.title}`);
|
|
108
|
+
lines.push(`URL: ${input.url}`);
|
|
109
|
+
if (input.notes && input.notes.trim().length > 0) {
|
|
110
|
+
lines.push("");
|
|
111
|
+
lines.push("USER NOTES:");
|
|
112
|
+
lines.push(input.notes.trim().slice(0, 400));
|
|
113
|
+
}
|
|
114
|
+
const titles = input.sampleTitles ?? [];
|
|
115
|
+
if (titles.length > 0) {
|
|
116
|
+
lines.push("");
|
|
117
|
+
lines.push("RECENT ITEM TITLES:");
|
|
118
|
+
for (const t of titles.slice(0, 5)) {
|
|
119
|
+
lines.push(`- ${t}`);
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
const summaries = input.sampleSummaries ?? [];
|
|
123
|
+
if (summaries.length > 0) {
|
|
124
|
+
lines.push("");
|
|
125
|
+
lines.push("RECENT ITEM SUMMARIES:");
|
|
126
|
+
for (const s of summaries.slice(0, 3)) {
|
|
127
|
+
// One-line truncation so a single long abstract doesn't
|
|
128
|
+
// dominate the prompt budget.
|
|
129
|
+
lines.push(`- ${s.replace(/\s+/g, " ").slice(0, 200)}`);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
return lines.join("\n");
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
interface ClaudeJsonResult {
|
|
136
|
+
type?: string;
|
|
137
|
+
is_error?: boolean;
|
|
138
|
+
structured_output?: unknown;
|
|
139
|
+
result?: string;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Pure: parse the claude `--output-format json` envelope and
|
|
143
|
+
// validate against our result shape. Exported so tests cover the
|
|
144
|
+
// envelope-handling + normalization paths without spawning CLI.
|
|
145
|
+
export function parseClassifyOutput(stdout: string): ClassifyResult {
|
|
146
|
+
let parsed: ClaudeJsonResult;
|
|
147
|
+
try {
|
|
148
|
+
parsed = JSON.parse(stdout.trim());
|
|
149
|
+
} catch (err) {
|
|
150
|
+
throw new Error(`[sources/classifier] failed to parse claude json: ${errorMessage(err)}`);
|
|
151
|
+
}
|
|
152
|
+
if (parsed.is_error) {
|
|
153
|
+
throw new Error(`[sources/classifier] claude returned error: ${parsed.result ?? "unknown"}`);
|
|
154
|
+
}
|
|
155
|
+
return validateClassifyResult(parsed.structured_output);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Runtime-validate the `structured_output` field into a
|
|
159
|
+
// ClassifyResult. Invalid categories are silently dropped
|
|
160
|
+
// (normalizeCategories filter); rationale falls back to empty
|
|
161
|
+
// string. Either the enum-constrained JSON schema OR this
|
|
162
|
+
// validator would catch a bad slug — defense-in-depth.
|
|
163
|
+
export function validateClassifyResult(obj: unknown): ClassifyResult {
|
|
164
|
+
// Arrays are `typeof === "object"` but aren't a valid
|
|
165
|
+
// structured_output shape — reject them explicitly so the
|
|
166
|
+
// error message stays accurate.
|
|
167
|
+
if (!isRecord(obj)) {
|
|
168
|
+
throw new Error("[sources/classifier] output is not an object");
|
|
169
|
+
}
|
|
170
|
+
const o = obj as Record<string, unknown>;
|
|
171
|
+
const categories = normalizeCategories(o.categories);
|
|
172
|
+
if (categories.length === 0) {
|
|
173
|
+
// The model is required to pick at least one (min_items=1 in
|
|
174
|
+
// the schema). If we end up here, something went wrong upstream
|
|
175
|
+
// — either the model ignored the schema or normalizeCategories
|
|
176
|
+
// filtered every slug as invalid. Throw so the caller treats
|
|
177
|
+
// the registration as failed rather than registering a source
|
|
178
|
+
// with no categories.
|
|
179
|
+
throw new Error("[sources/classifier] output has no valid categories from the taxonomy");
|
|
180
|
+
}
|
|
181
|
+
const rationale = typeof o.rationale === "string" ? o.rationale.slice(0, 400) : "";
|
|
182
|
+
return { categories, rationale };
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// --- spawn layer --------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
function spawnClaudeClassify(userPrompt: string, timeoutMs: number): Promise<string> {
|
|
188
|
+
return new Promise((resolve, reject) => {
|
|
189
|
+
const args = [
|
|
190
|
+
"--print",
|
|
191
|
+
"--no-session-persistence",
|
|
192
|
+
"--output-format",
|
|
193
|
+
"json",
|
|
194
|
+
"--model",
|
|
195
|
+
"haiku",
|
|
196
|
+
"--max-budget-usd",
|
|
197
|
+
String(MAX_BUDGET_USD),
|
|
198
|
+
"--json-schema",
|
|
199
|
+
JSON.stringify(classifySchema()),
|
|
200
|
+
"--system-prompt",
|
|
201
|
+
SYSTEM_PROMPT,
|
|
202
|
+
"-p",
|
|
203
|
+
userPrompt,
|
|
204
|
+
];
|
|
205
|
+
// Run from tmpdir so claude doesn't load the project's
|
|
206
|
+
// CLAUDE.md / plugins / memory and inflate the context.
|
|
207
|
+
const proc = spawn("claude", args, {
|
|
208
|
+
cwd: tmpdir(),
|
|
209
|
+
stdio: ["ignore", "pipe", "pipe"],
|
|
210
|
+
});
|
|
211
|
+
|
|
212
|
+
let stdout = "";
|
|
213
|
+
let stderr = "";
|
|
214
|
+
let settled = false;
|
|
215
|
+
|
|
216
|
+
const timer = setTimeout(() => {
|
|
217
|
+
if (settled) return;
|
|
218
|
+
settled = true;
|
|
219
|
+
proc.kill("SIGKILL");
|
|
220
|
+
reject(new Error(`[sources/classifier] claude timed out after ${timeoutMs}ms`));
|
|
221
|
+
}, timeoutMs);
|
|
222
|
+
|
|
223
|
+
proc.stdout.on("data", (chunk: Buffer) => {
|
|
224
|
+
stdout += chunk.toString();
|
|
225
|
+
});
|
|
226
|
+
proc.stderr.on("data", (chunk: Buffer) => {
|
|
227
|
+
stderr += chunk.toString();
|
|
228
|
+
});
|
|
229
|
+
proc.on("error", (err: Error & { code?: string }) => {
|
|
230
|
+
if (settled) return;
|
|
231
|
+
settled = true;
|
|
232
|
+
clearTimeout(timer);
|
|
233
|
+
if (err.code === "ENOENT") {
|
|
234
|
+
reject(new ClaudeCliNotFoundError());
|
|
235
|
+
} else {
|
|
236
|
+
reject(err);
|
|
237
|
+
}
|
|
238
|
+
});
|
|
239
|
+
proc.on("close", (code) => {
|
|
240
|
+
if (settled) return;
|
|
241
|
+
settled = true;
|
|
242
|
+
clearTimeout(timer);
|
|
243
|
+
if (code !== 0) {
|
|
244
|
+
// claude writes structured errors (incl.
|
|
245
|
+
// error_max_budget_usd) to STDOUT in JSON form — same
|
|
246
|
+
// lesson we learned in chat-index/summarizer. Prefer the
|
|
247
|
+
// structured message when we can parse it.
|
|
248
|
+
reject(new Error(formatSpawnFailure("[sources/classifier]", code, stdout, stderr)));
|
|
249
|
+
return;
|
|
250
|
+
}
|
|
251
|
+
resolve(stdout);
|
|
252
|
+
});
|
|
253
|
+
});
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
// Production ClassifyFn — spawns the real claude CLI.
|
|
257
|
+
export const defaultClassify: ClassifyFn = async (input) => {
|
|
258
|
+
const userPrompt = buildClassifyPrompt(input);
|
|
259
|
+
const stdout = await spawnClaudeClassify(userPrompt, DEFAULT_TIMEOUT_MS);
|
|
260
|
+
return parseClassifyOutput(stdout);
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
// Public entry. Thin wrapper so tests can inject a ClassifyFn
|
|
264
|
+
// without reaching into spawn internals, and the call site in
|
|
265
|
+
// the manageSource plugin / pipeline stays a single symbol.
|
|
266
|
+
export async function classifySource(input: ClassifyInput, classify: ClassifyFn = defaultClassify): Promise<ClassifyResult> {
|
|
267
|
+
return classify(input);
|
|
268
|
+
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
// arXiv fetcher.
|
|
2
|
+
//
|
|
3
|
+
// arXiv's query API returns Atom 1.0 XML, which parseFeed already
|
|
4
|
+
// handles — so this fetcher is mostly "build URL + parse + filter
|
|
5
|
+
// by cursor". The arxiv-specific value-adds are:
|
|
6
|
+
//
|
|
7
|
+
// 1. Query-URL builder with sensible defaults
|
|
8
|
+
// (sort=submittedDate, descending) and a defensive cap on
|
|
9
|
+
// max_results (arXiv's own cap is 2000 but our phase-1
|
|
10
|
+
// source model caps at maxItemsPerFetch)
|
|
11
|
+
// 2. Separate cursor key so a source that migrates between
|
|
12
|
+
// fetcher kinds doesn't mishandle state
|
|
13
|
+
//
|
|
14
|
+
// Source config:
|
|
15
|
+
//
|
|
16
|
+
// fetcher_kind: arxiv
|
|
17
|
+
// arxiv_query: cat:cs.CL OR ti:"large language model"
|
|
18
|
+
// arxiv_sort: submittedDate # optional: submittedDate | lastUpdatedDate | relevance
|
|
19
|
+
// arxiv_order: descending # optional: ascending | descending
|
|
20
|
+
//
|
|
21
|
+
// The `arxiv_query` string goes straight into the API's
|
|
22
|
+
// `search_query` parameter (URL-encoded). arXiv's own query
|
|
23
|
+
// syntax supports boolean AND/OR/ANDNOT, field prefixes
|
|
24
|
+
// (ti:, au:, abs:, cat:, ...), and quoted phrases — we pass
|
|
25
|
+
// through verbatim.
|
|
26
|
+
|
|
27
|
+
import { normalizeUrl, stableItemId } from "../urls.js";
|
|
28
|
+
import type { Source, SourceItem, SourceState } from "../types.js";
|
|
29
|
+
import type { FetcherDeps, FetchResult, SourceFetcher } from "./index.js";
|
|
30
|
+
import { registerFetcher } from "./index.js";
|
|
31
|
+
import { fetchPolite } from "../httpFetcher.js";
|
|
32
|
+
import { parseFeed, type ParsedFeed } from "./rssParser.js";
|
|
33
|
+
|
|
34
|
+
export const ARXIV_CURSOR_KEY = "arxiv_last_published_at";
|
|
35
|
+
|
|
36
|
+
export const ARXIV_API_BASE = "https://export.arxiv.org/api/query";
|
|
37
|
+
|
|
38
|
+
const ALLOWED_SORT = new Set(["submittedDate", "lastUpdatedDate", "relevance"]);
|
|
39
|
+
const ALLOWED_ORDER = new Set(["ascending", "descending"]);
|
|
40
|
+
|
|
41
|
+
export class ArxivFetcherError extends Error {
|
|
42
|
+
readonly url: string;
|
|
43
|
+
readonly status: number | null;
|
|
44
|
+
constructor(url: string, status: number | null, message: string) {
|
|
45
|
+
super(message);
|
|
46
|
+
this.name = "ArxivFetcherError";
|
|
47
|
+
this.url = url;
|
|
48
|
+
this.status = status;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Build the arXiv query URL. Validates and defaults sort / order
|
|
53
|
+
// so a typo in a source file falls back to a safe configuration
|
|
54
|
+
// rather than 400-ing from the API.
|
|
55
|
+
export function arxivUrl(query: string, sort: string, order: string, maxResults: number): string {
|
|
56
|
+
const safeSort = ALLOWED_SORT.has(sort) ? sort : "submittedDate";
|
|
57
|
+
const safeOrder = ALLOWED_ORDER.has(order) ? order : "descending";
|
|
58
|
+
// arXiv caps at 2000 items per response; our maxItemsPerFetch
|
|
59
|
+
// is usually 30-ish but clamp defensively so a mistyped large
|
|
60
|
+
// value doesn't waste bandwidth.
|
|
61
|
+
const clamped = Math.max(1, Math.min(200, Math.floor(maxResults)));
|
|
62
|
+
const params = new URLSearchParams();
|
|
63
|
+
params.set("search_query", query);
|
|
64
|
+
params.set("start", "0");
|
|
65
|
+
params.set("max_results", String(clamped));
|
|
66
|
+
params.set("sortBy", safeSort);
|
|
67
|
+
params.set("sortOrder", safeOrder);
|
|
68
|
+
return `${ARXIV_API_BASE}?${params.toString()}`;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
// Pure: given a parsed feed and the parent source, apply the
|
|
72
|
+
// cursor filter and normalize to SourceItem[]. Capped at
|
|
73
|
+
// `source.maxItemsPerFetch`. arXiv's feed items carry ISO
|
|
74
|
+
// publishedAt so the same comparison semantics as RSS apply —
|
|
75
|
+
// items at-or-older than the cursor are dropped.
|
|
76
|
+
export function normalizeArxivFeed(feed: ParsedFeed, source: Source, cursor: Record<string, string>): SourceItem[] {
|
|
77
|
+
const lastSeenTs = parseCursorTs(cursor);
|
|
78
|
+
const items: SourceItem[] = [];
|
|
79
|
+
for (const entry of feed.items) {
|
|
80
|
+
if (items.length >= source.maxItemsPerFetch) break;
|
|
81
|
+
const item = feedItemToSourceItem(entry, source, lastSeenTs);
|
|
82
|
+
if (item) items.push(item);
|
|
83
|
+
}
|
|
84
|
+
return items;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// Extract the cursor's ISO timestamp into a ms-since-epoch number
|
|
88
|
+
// usable for `<=` comparison. Returns null when absent or invalid
|
|
89
|
+
// (either case means "no cursor filtering, emit everything").
|
|
90
|
+
function parseCursorTs(cursor: Record<string, string>): number | null {
|
|
91
|
+
const raw = cursor[ARXIV_CURSOR_KEY];
|
|
92
|
+
if (!raw) return null;
|
|
93
|
+
const ts = Date.parse(raw);
|
|
94
|
+
return Number.isFinite(ts) ? ts : null;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Decide whether one ParsedFeedItem produces a SourceItem given
|
|
98
|
+
// the cursor. Returns null when the item should be skipped
|
|
99
|
+
// (missing link, unparseable URL, at-or-older than cursor).
|
|
100
|
+
// Extracted so `normalizeArxivFeed` stays under the cognitive-
|
|
101
|
+
// complexity threshold.
|
|
102
|
+
function feedItemToSourceItem(entry: ParsedFeed["items"][number], source: Source, lastSeenTs: number | null): SourceItem | null {
|
|
103
|
+
if (!entry.link) return null;
|
|
104
|
+
const normalizedUrl = normalizeUrl(entry.link);
|
|
105
|
+
if (!normalizedUrl) return null;
|
|
106
|
+
if (entry.publishedAt && lastSeenTs !== null) {
|
|
107
|
+
const ts = Date.parse(entry.publishedAt);
|
|
108
|
+
if (Number.isFinite(ts) && ts <= lastSeenTs) return null;
|
|
109
|
+
}
|
|
110
|
+
const publishedAt = entry.publishedAt ?? new Date().toISOString();
|
|
111
|
+
return {
|
|
112
|
+
id: stableItemId(normalizedUrl),
|
|
113
|
+
title: entry.title,
|
|
114
|
+
url: normalizedUrl,
|
|
115
|
+
publishedAt,
|
|
116
|
+
...(entry.summary !== null && { summary: entry.summary }),
|
|
117
|
+
...(entry.content !== null && { content: entry.content }),
|
|
118
|
+
categories: source.categories,
|
|
119
|
+
sourceSlug: source.slug,
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Advance the cursor to the newest publishedAt across the parsed
|
|
124
|
+
// feed (not just the emitted items), same pattern as the RSS /
|
|
125
|
+
// GitHub fetchers so a quiet arXiv query doesn't keep re-emitting
|
|
126
|
+
// the same papers after a one-off republish.
|
|
127
|
+
export function updateArxivCursor(current: Record<string, string>, feed: ParsedFeed): Record<string, string> {
|
|
128
|
+
let newest: number | null = null;
|
|
129
|
+
for (const entry of feed.items) {
|
|
130
|
+
if (!entry.publishedAt) continue;
|
|
131
|
+
const ts = Date.parse(entry.publishedAt);
|
|
132
|
+
if (!Number.isFinite(ts)) continue;
|
|
133
|
+
if (newest === null || ts > newest) newest = ts;
|
|
134
|
+
}
|
|
135
|
+
if (newest === null) return current;
|
|
136
|
+
const currentTs = current[ARXIV_CURSOR_KEY] ? Date.parse(current[ARXIV_CURSOR_KEY]) : -Infinity;
|
|
137
|
+
if (newest <= currentTs) return current;
|
|
138
|
+
return {
|
|
139
|
+
...current,
|
|
140
|
+
[ARXIV_CURSOR_KEY]: new Date(newest).toISOString(),
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
export const arxivFetcher: SourceFetcher = {
|
|
145
|
+
kind: "arxiv",
|
|
146
|
+
async fetch(source: Source, state: SourceState, deps: FetcherDeps): Promise<FetchResult> {
|
|
147
|
+
const query = source.fetcherParams["arxiv_query"];
|
|
148
|
+
if (typeof query !== "string" || query.trim().length === 0) {
|
|
149
|
+
throw new ArxivFetcherError(source.url, null, "arxiv_query param is required");
|
|
150
|
+
}
|
|
151
|
+
const sort = source.fetcherParams["arxiv_sort"] ?? "submittedDate";
|
|
152
|
+
const order = source.fetcherParams["arxiv_order"] ?? "descending";
|
|
153
|
+
const url = arxivUrl(query, sort, order, source.maxItemsPerFetch);
|
|
154
|
+
const res = await fetchPolite(url, deps.http);
|
|
155
|
+
if (!res.ok) {
|
|
156
|
+
throw new ArxivFetcherError(url, res.status, `arXiv fetch failed with HTTP ${res.status}`);
|
|
157
|
+
}
|
|
158
|
+
const body = await res.text();
|
|
159
|
+
const feed = parseFeed(body);
|
|
160
|
+
if (!feed) {
|
|
161
|
+
throw new ArxivFetcherError(url, res.status, `arXiv response did not parse as Atom / RSS`);
|
|
162
|
+
}
|
|
163
|
+
return {
|
|
164
|
+
items: normalizeArxivFeed(feed, source, state.cursor),
|
|
165
|
+
cursor: updateArxivCursor(state.cursor, feed),
|
|
166
|
+
};
|
|
167
|
+
},
|
|
168
|
+
};
|
|
169
|
+
|
|
170
|
+
registerFetcher(arxivFetcher);
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// Shared helpers for GitHub fetchers (releases + issues).
|
|
2
|
+
//
|
|
3
|
+
// Phase-1 scope: UNAUTHENTICATED public REST access only. GitHub
|
|
4
|
+
// grants 60 req/hour/IP without a token, which is plenty for a
|
|
5
|
+
// personal workspace with a handful of registered repos. Adding
|
|
6
|
+
// a `github-authed` fetcher with a PAT lives in phase 3.
|
|
7
|
+
//
|
|
8
|
+
// Everything in this module is pure or uses the already-injected
|
|
9
|
+
// `fetchPolite` — no direct HTTP. Tests stub HTTP at the
|
|
10
|
+
// FetcherDeps boundary.
|
|
11
|
+
|
|
12
|
+
import type { HttpFetcherDeps } from "../httpFetcher.js";
|
|
13
|
+
import { fetchPolite } from "../httpFetcher.js";
|
|
14
|
+
import { hasStringProp } from "../../../utils/types.js";
|
|
15
|
+
|
|
16
|
+
// GitHub REST API base. Factored out so tests / local dev can
|
|
17
|
+
// point at a stub server by patching this module — rare enough
|
|
18
|
+
// that we don't bother with an env var today.
|
|
19
|
+
export const GITHUB_API_BASE = "https://api.github.com";
|
|
20
|
+
|
|
21
|
+
// Owner / repo slugs on GitHub accept letters, digits, hyphen,
|
|
22
|
+
// underscore, dot. Lengths are loose but we cap at 100 each —
|
|
23
|
+
// any real repo comes in well under that.
|
|
24
|
+
//
|
|
25
|
+
// The slug doubles as a URL path segment: rejecting `..` / `/`
|
|
26
|
+
// / whitespace defends against a user-supplied (or LLM-suggested)
|
|
27
|
+
// `github_repo` that would craft a malicious request URL.
|
|
28
|
+
// Owner must start with an alphanumeric (GitHub usernames /
|
|
29
|
+
// org names can't begin with a dot).
|
|
30
|
+
const OWNER_SEGMENT_RE = /^[A-Za-z0-9][A-Za-z0-9._-]{0,99}$/;
|
|
31
|
+
// Repo can start with a dot — GitHub has the special `.github`
|
|
32
|
+
// repository used for org-wide community health files, so the
|
|
33
|
+
// repo-name regex is slightly looser than the owner one.
|
|
34
|
+
const REPO_SEGMENT_RE = /^[A-Za-z0-9.][A-Za-z0-9._-]{0,99}$/;
|
|
35
|
+
|
|
36
|
+
export interface RepoSlug {
|
|
37
|
+
owner: string;
|
|
38
|
+
repo: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Validate + parse an `owner/repo` string from source
|
|
42
|
+
// frontmatter. Returns null on any shape violation — callers
|
|
43
|
+
// treat null as "skip this source" rather than crashing the pass.
|
|
44
|
+
export function parseRepoSlug(raw: string): RepoSlug | null {
|
|
45
|
+
if (typeof raw !== "string") return null;
|
|
46
|
+
const parts = raw.trim().split("/");
|
|
47
|
+
if (parts.length !== 2) return null;
|
|
48
|
+
const [owner, repo] = parts;
|
|
49
|
+
if (!OWNER_SEGMENT_RE.test(owner) || !REPO_SEGMENT_RE.test(repo)) return null;
|
|
50
|
+
// Reject path-traversal-ish repo names even though the regex
|
|
51
|
+
// would accept them (`.`, `..` can both start with a dot).
|
|
52
|
+
if (repo === "." || repo === "..") return null;
|
|
53
|
+
if (owner.endsWith(".") || repo.endsWith(".")) return null;
|
|
54
|
+
return { owner, repo };
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
// Named error for any non-2xx GitHub response. Carries the status
|
|
58
|
+
// code so the pipeline can decide whether to backoff harder (403
|
|
59
|
+
// rate-limit) or surface a "source broken" warning (404 /
|
|
60
|
+
// repo-not-found).
|
|
61
|
+
export class GithubFetcherError extends Error {
|
|
62
|
+
readonly url: string;
|
|
63
|
+
readonly status: number;
|
|
64
|
+
readonly apiMessage: string | null;
|
|
65
|
+
constructor(url: string, status: number, apiMessage: string | null) {
|
|
66
|
+
const suffix = apiMessage ? ` — ${apiMessage}` : "";
|
|
67
|
+
super(`GitHub fetch ${url} failed with HTTP ${status}${suffix}`);
|
|
68
|
+
this.name = "GithubFetcherError";
|
|
69
|
+
this.url = url;
|
|
70
|
+
this.status = status;
|
|
71
|
+
this.apiMessage = apiMessage;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
// Tight helper: issue a GET against the GitHub API, decode the
|
|
76
|
+
// JSON body, and surface non-2xx responses as typed errors.
|
|
77
|
+
// Errors from fetchPolite (including `RobotsDisallowedError`) pass
|
|
78
|
+
// through untouched.
|
|
79
|
+
//
|
|
80
|
+
// GitHub API responses always include an `X-GitHub-Request-Id` and
|
|
81
|
+
// many include a `message` body field on errors. We include the
|
|
82
|
+
// body message in the thrown error for easier log reading.
|
|
83
|
+
export async function githubFetchJson(url: string, http: HttpFetcherDeps): Promise<unknown> {
|
|
84
|
+
const res = await fetchPolite(url, http);
|
|
85
|
+
if (!res.ok) {
|
|
86
|
+
// Body may or may not be JSON — try both. Not throwing on
|
|
87
|
+
// a failed body read so the HTTP status is always reported.
|
|
88
|
+
let apiMessage: string | null = null;
|
|
89
|
+
try {
|
|
90
|
+
const bodyJson: unknown = await res.json();
|
|
91
|
+
if (hasStringProp(bodyJson, "message")) {
|
|
92
|
+
apiMessage = bodyJson.message;
|
|
93
|
+
}
|
|
94
|
+
} catch {
|
|
95
|
+
// Ignore — just means the body wasn't JSON.
|
|
96
|
+
}
|
|
97
|
+
throw new GithubFetcherError(url, res.status, apiMessage);
|
|
98
|
+
}
|
|
99
|
+
return res.json();
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Small type-guard used by the json-shape parsers below. Keeping
|
|
103
|
+
// it here (rather than duplicated per fetcher) so adding a new
|
|
104
|
+
// GitHub endpoint stays boilerplate-free.
|
|
105
|
+
// Re-export so existing callers that import from this module keep working.
|
|
106
|
+
export { isRecord } from "../../../utils/types.js";
|