@mindrian_os/install 1.13.0-beta.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +21 -0
- package/.mcp.json +9 -0
- package/CHANGELOG.md +3333 -0
- package/LICENSE +123 -0
- package/README.md +673 -0
- package/agents/brain-query.md +80 -0
- package/agents/framework-runner.md +237 -0
- package/agents/grading.md +188 -0
- package/agents/investor.md +128 -0
- package/agents/larry-extended.md +135 -0
- package/agents/opportunity-scanner.md +91 -0
- package/agents/persona-analyst.md +132 -0
- package/agents/research.md +89 -0
- package/agents/reverse-salient-agent.md +27 -0
- package/bin/cli.js +142 -0
- package/bin/mindrian-mcp-server.cjs +182 -0
- package/bin/mindrian-tools.cjs +765 -0
- package/commands/act.md +439 -0
- package/commands/admin.md +404 -0
- package/commands/analyze-needs.md +42 -0
- package/commands/analyze-systems.md +39 -0
- package/commands/analyze-timing.md +42 -0
- package/commands/auto-explore.md +64 -0
- package/commands/beautiful-question.md +40 -0
- package/commands/brain-derive.md +78 -0
- package/commands/build-knowledge.md +42 -0
- package/commands/build-thesis.md +46 -0
- package/commands/causal.md +234 -0
- package/commands/challenge-assumptions.md +33 -0
- package/commands/compare-ventures.md +83 -0
- package/commands/dashboard.md +110 -0
- package/commands/deep-grade.md +82 -0
- package/commands/diagnose.md +58 -0
- package/commands/diagnostics.md +151 -0
- package/commands/doctor.md +151 -0
- package/commands/dominant-designs.md +40 -0
- package/commands/explain-decision.md +87 -0
- package/commands/explore-domains.md +42 -0
- package/commands/explore-futures.md +40 -0
- package/commands/explore-trends.md +42 -0
- package/commands/export.md +103 -0
- package/commands/file-meeting.md +724 -0
- package/commands/find-analogies.md +188 -0
- package/commands/find-bottlenecks.md +62 -0
- package/commands/find-connections.md +76 -0
- package/commands/funding.md +81 -0
- package/commands/grade.md +203 -0
- package/commands/graph.md +128 -0
- package/commands/hat-briefing.md +125 -0
- package/commands/heal.md +196 -0
- package/commands/help.md +399 -0
- package/commands/hmi-status.md +172 -0
- package/commands/jtbd.md +241 -0
- package/commands/leadership.md +73 -0
- package/commands/lean-canvas.md +40 -0
- package/commands/macro-trends.md +40 -0
- package/commands/map-unknowns.md +40 -0
- package/commands/memory.md +173 -0
- package/commands/models.md +175 -0
- package/commands/mos-reason.md +285 -0
- package/commands/mullins.md +120 -0
- package/commands/new-project.md +481 -0
- package/commands/onboard.md +434 -0
- package/commands/operator.md +149 -0
- package/commands/opportunities.md +144 -0
- package/commands/organize.md +497 -0
- package/commands/persona.md +198 -0
- package/commands/pipeline.md +112 -0
- package/commands/present.md +91 -0
- package/commands/publish.md +201 -0
- package/commands/query.md +124 -0
- package/commands/radar.md +72 -0
- package/commands/reanalyze.md +91 -0
- package/commands/research.md +196 -0
- package/commands/room.md +352 -0
- package/commands/rooms.md +598 -0
- package/commands/root-cause.md +40 -0
- package/commands/rs-experts.md +85 -0
- package/commands/rs-explain.md +100 -0
- package/commands/rs-fetch.md +94 -0
- package/commands/rs-thesis.md +85 -0
- package/commands/scenario-plan.md +40 -0
- package/commands/scheduled-tasks.md +285 -0
- package/commands/score-innovation.md +43 -0
- package/commands/scout.md +239 -0
- package/commands/setup.md +618 -0
- package/commands/snapshot.md +147 -0
- package/commands/speakers.md +84 -0
- package/commands/splash.md +28 -0
- package/commands/status.md +75 -0
- package/commands/structure-argument.md +42 -0
- package/commands/suggest-next.md +80 -0
- package/commands/systems-thinking.md +40 -0
- package/commands/think-hats.md +42 -0
- package/commands/update.md +181 -0
- package/commands/user-needs.md +40 -0
- package/commands/validate.md +40 -0
- package/commands/value-proposition.md +61 -0
- package/commands/vault.md +180 -0
- package/commands/visualize.md +52 -0
- package/commands/whitespace.md +507 -0
- package/commands/wiki.md +69 -0
- package/hooks/hooks.json +381 -0
- package/hooks/run-hook.cmd +64 -0
- package/lib/__init__.py +0 -0
- package/lib/__pycache__/__init__.cpython-312.pyc +0 -0
- package/lib/agents/auto-explore-agent.cjs +1043 -0
- package/lib/agents/reverse-salient-agent.cjs +679 -0
- package/lib/agents/tension-hook-agent.cjs +544 -0
- package/lib/brain/ROOM.md +44 -0
- package/lib/brain/chain-recommender.cjs +301 -0
- package/lib/chat/chat-context.js +185 -0
- package/lib/chat/chat-panel.js +721 -0
- package/lib/chat/fabric-chat.cjs +288 -0
- package/lib/chat/generative-tools.js +219 -0
- package/lib/conversation/ROOM.md +39 -0
- package/lib/conversation/classifier-rules.json +38 -0
- package/lib/conversation/classifier.cjs +264 -0
- package/lib/conversation/operator.cjs +287 -0
- package/lib/copy/115-spec-strings.cjs +55 -0
- package/lib/core/__init__.py +0 -0
- package/lib/core/__nav-stub.cjs +14 -0
- package/lib/core/__pycache__/__init__.cpython-312.pyc +0 -0
- package/lib/core/__pycache__/rs-math.cpython-312.pyc +0 -0
- package/lib/core/__pycache__/rs_cache.cpython-312.pyc +0 -0
- package/lib/core/__pycache__/rs_corpus.cpython-312.pyc +0 -0
- package/lib/core/__pycache__/rs_hybrid.cpython-312.pyc +0 -0
- package/lib/core/__pycache__/rs_math.cpython-312.pyc +0 -0
- package/lib/core/__pycache__/rs_rooms.cpython-312.pyc +0 -0
- package/lib/core/artifact-id.cjs +148 -0
- package/lib/core/asset-ops.cjs +151 -0
- package/lib/core/auto-commit-throttle.cjs +129 -0
- package/lib/core/bearer-token.cjs +199 -0
- package/lib/core/brain-client.cjs +865 -0
- package/lib/core/brain-derivation-prompts.cjs +326 -0
- package/lib/core/brain-derivation-queue.cjs +431 -0
- package/lib/core/brain-derivation.cjs +580 -0
- package/lib/core/brain-md-schema.cjs +528 -0
- package/lib/core/brain-md-staleness.cjs +357 -0
- package/lib/core/brain-response-sanitize.cjs +188 -0
- package/lib/core/bridge-writer.cjs +477 -0
- package/lib/core/chat-context-builder.cjs +253 -0
- package/lib/core/cross-room-aggregator.cjs +762 -0
- package/lib/core/daily-briefing.cjs +438 -0
- package/lib/core/decision-capture.cjs +618 -0
- package/lib/core/deep-links.cjs +82 -0
- package/lib/core/dispatch-optimizer.cjs +354 -0
- package/lib/core/dual-path-detector.cjs +84 -0
- package/lib/core/dual-path-detector.test.cjs +334 -0
- package/lib/core/exports-log.cjs +79 -0
- package/lib/core/feynman-minto-invariants.cjs +605 -0
- package/lib/core/folder-memory-async.cjs +338 -0
- package/lib/core/folder-memory-shared.cjs +890 -0
- package/lib/core/folder-memory.cjs +416 -0
- package/lib/core/framework-chain-composer.cjs +411 -0
- package/lib/core/frontmatter-schemas.cjs +330 -0
- package/lib/core/git-ops.cjs +141 -0
- package/lib/core/graph-ops.cjs +258 -0
- package/lib/core/hat-persistence.cjs +362 -0
- package/lib/core/index.cjs +60 -0
- package/lib/core/integration-registry.cjs +232 -0
- package/lib/core/intelligence-cascade.cjs +661 -0
- package/lib/core/lazygraph-ops.cjs +1057 -0
- package/lib/core/lru-cache.cjs +139 -0
- package/lib/core/mcp-profiles.cjs +182 -0
- package/lib/core/meeting-ops.cjs +54 -0
- package/lib/core/memory-ops.cjs +600 -0
- package/lib/core/migrations/ROOM.md +33 -0
- package/lib/core/migrations/phase-109-nodes-provenance.cjs +339 -0
- package/lib/core/migrations/phase-109-session-focus.cjs +99 -0
- package/lib/core/model-profiles.cjs +246 -0
- package/lib/core/mullins-scaffold.cjs +160 -0
- package/lib/core/nav-dial.cjs +316 -0
- package/lib/core/navigation/ROOM.md +15 -0
- package/lib/core/navigation/explanation.cjs +43 -0
- package/lib/core/navigation/focus.cjs +135 -0
- package/lib/core/navigation/ingestion.cjs +82 -0
- package/lib/core/navigation/insights.cjs +350 -0
- package/lib/core/navigation/memory-events.cjs +118 -0
- package/lib/core/navigation/neighborhood.cjs +78 -0
- package/lib/core/navigation/packet.cjs +182 -0
- package/lib/core/navigation/room-home.cjs +127 -0
- package/lib/core/navigation/transitions.cjs +82 -0
- package/lib/core/navigation-engine-shared.cjs +242 -0
- package/lib/core/navigation-engine.cjs +664 -0
- package/lib/core/navigation.cjs +60 -0
- package/lib/core/nl-graph-queries.cjs +164 -0
- package/lib/core/offer-presenter.cjs +406 -0
- package/lib/core/opportunity-extractor.cjs +183 -0
- package/lib/core/opportunity-ops.cjs +1371 -0
- package/lib/core/persona-ops.cjs +537 -0
- package/lib/core/persona-taxonomy.cjs +190 -0
- package/lib/core/platform-gates.cjs +120 -0
- package/lib/core/platform.cjs +257 -0
- package/lib/core/proactive-intelligence.cjs +528 -0
- package/lib/core/problem-type-router.cjs +315 -0
- package/lib/core/reasoning-ops.cjs +639 -0
- package/lib/core/reverse-salient-persona-suffix.cjs +115 -0
- package/lib/core/room-classifier-strict-mode.cjs +229 -0
- package/lib/core/room-db.cjs +127 -0
- package/lib/core/room-ops-async.cjs +92 -0
- package/lib/core/room-ops-shared.cjs +64 -0
- package/lib/core/room-ops-sync.cjs +70 -0
- package/lib/core/room-ops.cjs +32 -0
- package/lib/core/room-type-detector.cjs +386 -0
- package/lib/core/rs-brain-substrate-prompts.cjs +129 -0
- package/lib/core/rs-brain-substrate.cjs +570 -0
- package/lib/core/rs-breakthrough-scorer.cjs +255 -0
- package/lib/core/rs-canon-violations.cjs +82 -0
- package/lib/core/rs-chain-feeder.cjs +343 -0
- package/lib/core/rs-commercial-assessor.cjs +280 -0
- package/lib/core/rs-differential-scorer.cjs +376 -0
- package/lib/core/rs-domain-analyzer.cjs +385 -0
- package/lib/core/rs-egress-prompts.cjs +113 -0
- package/lib/core/rs-egress-telemetry.cjs +225 -0
- package/lib/core/rs-egress-violations.cjs +53 -0
- package/lib/core/rs-expert-mapper.cjs +467 -0
- package/lib/core/rs-fetcher-academic.cjs +697 -0
- package/lib/core/rs-fetcher-experts.cjs +314 -0
- package/lib/core/rs-fetcher-industry.cjs +731 -0
- package/lib/core/rs-fetcher-patents.cjs +564 -0
- package/lib/core/rs-innovation-classifier.cjs +194 -0
- package/lib/core/rs-mind-map.cjs +656 -0
- package/lib/core/rs-neo4j-writer.cjs +388 -0
- package/lib/core/rs-nl-to-query.cjs +425 -0
- package/lib/core/rs-pinecone-bridge.cjs +303 -0
- package/lib/core/rs-preprocessor.cjs +350 -0
- package/lib/core/rs-query-matrix.cjs +316 -0
- package/lib/core/rs-query-to-text.cjs +438 -0
- package/lib/core/rs-sqlite-mirror.cjs +443 -0
- package/lib/core/rs-thesis-generator.cjs +188 -0
- package/lib/core/rs_cache.py +479 -0
- package/lib/core/rs_corpus.py +468 -0
- package/lib/core/rs_hybrid.py +586 -0
- package/lib/core/rs_math.py +287 -0
- package/lib/core/rs_rooms.py +193 -0
- package/lib/core/scheduled-scanner.cjs +463 -0
- package/lib/core/scratchpad-ops.cjs +201 -0
- package/lib/core/section-8-trace-schema.cjs +138 -0
- package/lib/core/section-registry.cjs +111 -0
- package/lib/core/session-state.cjs +144 -0
- package/lib/core/shallow-doc-parser.cjs +174 -0
- package/lib/core/shallow-doc-parser.test.cjs +226 -0
- package/lib/core/skill-activation-router.cjs +284 -0
- package/lib/core/state-ops.cjs +46 -0
- package/lib/core/statusline-cache.cjs +266 -0
- package/lib/core/token-estimator.cjs +348 -0
- package/lib/core/user-archetype.cjs +239 -0
- package/lib/core/user-md-ops.cjs +524 -0
- package/lib/core/visual-ops.cjs +624 -0
- package/lib/core/write-lock.cjs +149 -0
- package/lib/graph/canvas-graph.js +467 -0
- package/lib/graph/constellation-config.cjs +299 -0
- package/lib/graph/graph-detail-panel.js +165 -0
- package/lib/hmi/ROOM.md +47 -0
- package/lib/hmi/across-session-memory.cjs +604 -0
- package/lib/hmi/cross-room-memory.cjs +575 -0
- package/lib/hmi/decoy-tier.cjs +395 -0
- package/lib/hmi/jtbd-classifier.cjs +219 -0
- package/lib/hmi/jtbd-state.cjs +199 -0
- package/lib/hmi/jtbd-taxonomy.json +392 -0
- package/lib/hmi/selector-dispatcher.cjs +546 -0
- package/lib/hmi/selector-telemetry.cjs +263 -0
- package/lib/hmi/shape-f0-renderer.cjs +139 -0
- package/lib/hmi/shape-f1-fallback.cjs +80 -0
- package/lib/hmi/shape-f1-renderer.cjs +138 -0
- package/lib/hmi/shape-f2-renderer.cjs +132 -0
- package/lib/hmi/shape-f3-renderer.cjs +66 -0
- package/lib/hmi/shape-f4-renderer.cjs +72 -0
- package/lib/hmi/shape-f5-renderer.cjs +155 -0
- package/lib/hmi/shape-f6-plan-review-renderer.cjs +312 -0
- package/lib/hmi/shape-f6-renderer.cjs +144 -0
- package/lib/hmi/shape-g-renderer.cjs +219 -0
- package/lib/hmi/shape-h-renderer.cjs +222 -0
- package/lib/hmi/tier-check.cjs +63 -0
- package/lib/import/PRECONDITIONS.md +41 -0
- package/lib/import/branding.cjs +210 -0
- package/lib/import/branding.test.cjs +235 -0
- package/lib/import/classifications-sync.cjs +104 -0
- package/lib/import/classifications-sync.test.cjs +129 -0
- package/lib/import/enricher.cjs +296 -0
- package/lib/import/enricher.test.cjs +273 -0
- package/lib/import/integration.test.cjs +376 -0
- package/lib/import/manifest.cjs +129 -0
- package/lib/import/manifest.schema.json +185 -0
- package/lib/import/manifest.test.cjs +123 -0
- package/lib/import/meeting-detector.cjs +92 -0
- package/lib/import/meeting-detector.test.cjs +100 -0
- package/lib/import/person-detector.cjs +229 -0
- package/lib/import/person-detector.test.cjs +149 -0
- package/lib/import/report.cjs +186 -0
- package/lib/import/report.test.cjs +186 -0
- package/lib/import/room-md-scaffolder.cjs +49 -0
- package/lib/import/router.cjs +224 -0
- package/lib/import/router.test.cjs +356 -0
- package/lib/import/run-all-tests.cjs +36 -0
- package/lib/import/smoke-test.cjs +213 -0
- package/lib/import/smoke-test.test.cjs +148 -0
- package/lib/import/test-fixtures/collision-vault/preexisting-room/STATE.md +8 -0
- package/lib/import/test-fixtures/collision-vault/preexisting-room/problem-definition/onboarding/onboarding.md +7 -0
- package/lib/import/test-fixtures/collision-vault/source/onboarding.md +5 -0
- package/lib/import/test-fixtures/obsidian-vault/.obsidian/workspace.json +1 -0
- package/lib/import/test-fixtures/obsidian-vault/notes/with-wikilinks.md +4 -0
- package/lib/import/test-fixtures/tiny-vault/notes/2026-01-15-team-sync.md +9 -0
- package/lib/import/test-fixtures/tiny-vault/notes/empty.md +3 -0
- package/lib/import/test-fixtures/tiny-vault/notes/onboarding.md +5 -0
- package/lib/import/test-fixtures/tiny-vault/notes/pricing.md +5 -0
- package/lib/import/test-fixtures/tiny-vault/notes/random.md +4 -0
- package/lib/import/undo.test.cjs +199 -0
- package/lib/import/vault-scanner.cjs +105 -0
- package/lib/import/vault-scanner.test.cjs +67 -0
- package/lib/mcp/app-html/dashboard.html +316 -0
- package/lib/mcp/app-html/graph.html +428 -0
- package/lib/mcp/app-html/mindrian-platform.html +1841 -0
- package/lib/mcp/app-html/wiki.html +383 -0
- package/lib/mcp/app-views.cjs +322 -0
- package/lib/mcp/brain-router.cjs +418 -0
- package/lib/mcp/capability-registry.cjs +62 -0
- package/lib/mcp/larry-context.cjs +46 -0
- package/lib/mcp/larry-server-instructions.md +114 -0
- package/lib/mcp/pipeline-state.cjs +275 -0
- package/lib/mcp/prompts.cjs +302 -0
- package/lib/mcp/resources.cjs +227 -0
- package/lib/mcp/session-catchup.cjs +327 -0
- package/lib/mcp/surface-detect.cjs +75 -0
- package/lib/mcp/tool-router.cjs +1034 -0
- package/lib/memory/aaak-compress.cjs +403 -0
- package/lib/memory/aaak-compress.test.cjs +288 -0
- package/lib/memory/async-artifact-auto-commit.test.cjs +223 -0
- package/lib/memory/bearer-token.test.cjs +315 -0
- package/lib/memory/brain-cache-lru.test.cjs +259 -0
- package/lib/memory/brain-client-query-shape.test.cjs +160 -0
- package/lib/memory/brain-derivation-graceful-degradation.test.cjs +1019 -0
- package/lib/memory/brain-derivation-queue.test.cjs +539 -0
- package/lib/memory/brain-derivation.test.cjs +634 -0
- package/lib/memory/brain-derive-command.test.cjs +534 -0
- package/lib/memory/brain-md-invariants-validator.test.cjs +704 -0
- package/lib/memory/brain-md-schema.test.cjs +467 -0
- package/lib/memory/brain-md-staleness.test.cjs +525 -0
- package/lib/memory/brain-server-resolution.test.cjs +314 -0
- package/lib/memory/chain-recommender.test.cjs +233 -0
- package/lib/memory/chat-context.test.cjs +128 -0
- package/lib/memory/command-registry.test.cjs +220 -0
- package/lib/memory/cross-room-aggregator.test.cjs +909 -0
- package/lib/memory/dashboard-server.test.cjs +256 -0
- package/lib/memory/debouncer-drain-at-prompt.test.cjs +389 -0
- package/lib/memory/decision-capture.test.cjs +632 -0
- package/lib/memory/decision-capture.worker.cjs +70 -0
- package/lib/memory/explain-decision-command.test.cjs +521 -0
- package/lib/memory/explain-decision-footer.test.cjs +316 -0
- package/lib/memory/explored-materials-store.cjs +392 -0
- package/lib/memory/feynman-minto-guardian.test.cjs +736 -0
- package/lib/memory/feynman-minto-invariants.test.cjs +511 -0
- package/lib/memory/feynman-prompts-drift.test.cjs +144 -0
- package/lib/memory/feynman-prompts.cjs +151 -0
- package/lib/memory/feynman-prompts.test.cjs +96 -0
- package/lib/memory/folder-memory-quadruple.test.cjs +548 -0
- package/lib/memory/folder-memory.test.cjs +503 -0
- package/lib/memory/framework-chain-composer.test.cjs +515 -0
- package/lib/memory/frontmatter-schema-validator.test.cjs +290 -0
- package/lib/memory/heal-command.test.cjs +604 -0
- package/lib/memory/index-artifact-transaction.test.cjs +333 -0
- package/lib/memory/lazygraph-rs-discoveries-view.test.cjs +122 -0
- package/lib/memory/mcp-input-validation.test.cjs +240 -0
- package/lib/memory/mcp-server-brain-deps.test.cjs +270 -0
- package/lib/memory/mcp-stack-fallback.test.cjs +433 -0
- package/lib/memory/minto-debouncer.test.cjs +407 -0
- package/lib/memory/minto-debouncer.worker.cjs +46 -0
- package/lib/memory/minto-migration-v88.test.cjs +265 -0
- package/lib/memory/minto-schema-v88.test.cjs +390 -0
- package/lib/memory/mos-status-renderer.test.cjs +631 -0
- package/lib/memory/narrative-schema.cjs +376 -0
- package/lib/memory/narrative-schema.test.cjs +209 -0
- package/lib/memory/nav-dial.test.cjs +414 -0
- package/lib/memory/navigation-engine-core.test.cjs +722 -0
- package/lib/memory/navigation-invariants.test.cjs +483 -0
- package/lib/memory/offer-presenter.test.cjs +554 -0
- package/lib/memory/on-stop-snapshot.test.cjs +404 -0
- package/lib/memory/pending-tension-store.cjs +373 -0
- package/lib/memory/post-compact-reinjection.test.cjs +854 -0
- package/lib/memory/post-write-triple.test.cjs +317 -0
- package/lib/memory/pre-compact-snapshot.test.cjs +495 -0
- package/lib/memory/problem-type-router.test.cjs +656 -0
- package/lib/memory/query-efficiency-telemetry.test.cjs +370 -0
- package/lib/memory/recompile-room-references.test.cjs +392 -0
- package/lib/memory/recompile-room-references.worker.cjs +42 -0
- package/lib/memory/record-decision-dual-write.test.cjs +454 -0
- package/lib/memory/room-classifier-strict-mode.test.cjs +417 -0
- package/lib/memory/room-minto-hook.test.cjs +398 -0
- package/lib/memory/rs-discovery-engine.test.cjs +323 -0
- package/lib/memory/run-feynman-tests.cjs +1247 -0
- package/lib/memory/security-trifecta.test.cjs +312 -0
- package/lib/memory/session-start-brain-staleness.test.cjs +363 -0
- package/lib/memory/session-start-triple-injection.test.cjs +514 -0
- package/lib/memory/sessionstart-banner-formatter.cjs +318 -0
- package/lib/memory/sessionstart-minto-banner.test.cjs +373 -0
- package/lib/memory/skill-activation-router.test.cjs +419 -0
- package/lib/memory/stamp-artifact-write.test.cjs +304 -0
- package/lib/memory/statusline-active-room.test.cjs +315 -0
- package/lib/memory/statusline-minto-segment.test.cjs +292 -0
- package/lib/memory/sync-async-entry-points.test.cjs +204 -0
- package/lib/memory/test-bridge-writer-enhanced.cjs +452 -0
- package/lib/memory/test-rs-brain-substrate-shape.cjs +529 -0
- package/lib/memory/test-rs-brain-substrate.cjs +636 -0
- package/lib/memory/test-rs-breakthrough-scorer.cjs +375 -0
- package/lib/memory/test-rs-canon-violations.cjs +218 -0
- package/lib/memory/test-rs-chain-feeder-core.cjs +344 -0
- package/lib/memory/test-rs-chain-feeder-skill-spawn.cjs +297 -0
- package/lib/memory/test-rs-commercial-assessor.cjs +385 -0
- package/lib/memory/test-rs-differential-scorer.cjs +480 -0
- package/lib/memory/test-rs-discovery-engine.cjs +603 -0
- package/lib/memory/test-rs-domain-analyzer.cjs +492 -0
- package/lib/memory/test-rs-egress-primitives.cjs +420 -0
- package/lib/memory/test-rs-expert-mapper.cjs +547 -0
- package/lib/memory/test-rs-explain-command.cjs +443 -0
- package/lib/memory/test-rs-fetcher-academic.cjs +848 -0
- package/lib/memory/test-rs-fetcher-experts.cjs +496 -0
- package/lib/memory/test-rs-fetcher-industry.cjs +702 -0
- package/lib/memory/test-rs-fetcher-patents.cjs +674 -0
- package/lib/memory/test-rs-innovation-classifier.cjs +301 -0
- package/lib/memory/test-rs-mind-map.cjs +646 -0
- package/lib/memory/test-rs-neo4j-writer.cjs +518 -0
- package/lib/memory/test-rs-nl-to-query.cjs +449 -0
- package/lib/memory/test-rs-pinecone-bridge.cjs +277 -0
- package/lib/memory/test-rs-preprocessor.cjs +433 -0
- package/lib/memory/test-rs-query-matrix.cjs +391 -0
- package/lib/memory/test-rs-query-to-text.cjs +551 -0
- package/lib/memory/test-rs-sqlite-mirror.cjs +649 -0
- package/lib/memory/test-rs-thesis-generator.cjs +360 -0
- package/lib/memory/triple-context-formatter.cjs +473 -0
- package/lib/memory/triple-context-formatter.test.cjs +442 -0
- package/lib/memory/user-md-persona.test.cjs +565 -0
- package/lib/memory/userpromptsubmit-integration.test.cjs +690 -0
- package/lib/memory/validators/README.md +157 -0
- package/lib/memory/validators/brain-md-invariants.cjs +475 -0
- package/lib/memory/validators/brain-substrate-invariants.cjs +285 -0
- package/lib/memory/validators/external-academic-invariants.cjs +249 -0
- package/lib/memory/validators/external-industry-invariants.cjs +271 -0
- package/lib/memory/validators/external-patents-invariants.cjs +266 -0
- package/lib/memory/validators/minto-invariants.cjs +62 -0
- package/lib/memory/validators/navigation-invariants.cjs +340 -0
- package/lib/memory/validators/queue-health.cjs +95 -0
- package/lib/memory/validators/snapshot-integrity.cjs +129 -0
- package/lib/memory/validators/stale-lifecycle.cjs +116 -0
- package/lib/memory/vault-section-minto-generator-atomic.test.cjs +556 -0
- package/lib/memory/vault-section-minto-generator-atomic.worker.cjs +73 -0
- package/lib/memory/write-lock-atomic.test.cjs +137 -0
- package/lib/memory/write-lock-atomic.worker.cjs +55 -0
- package/lib/parity/check-parity.cjs +83 -0
- package/lib/presentation/presentation-server.cjs +101 -0
- package/lib/presentation/presentation-watcher.cjs +123 -0
- package/lib/quickview/hub-server.cjs +719 -0
- package/lib/quickview/server.cjs +533 -0
- package/lib/render/JTBD-PALETTES.md +145 -0
- package/lib/render/ROOM.md +59 -0
- package/lib/render/render-v2.cjs +486 -0
- package/lib/render/render-v2.test.cjs +267 -0
- package/lib/render/render.cjs +65 -0
- package/lib/state/ROOM.md +46 -0
- package/lib/state/state-md-parser.cjs +215 -0
- package/lib/statusline/ROOM.md +38 -0
- package/lib/statusline/banner-suppression.cjs +50 -0
- package/lib/statusline/surface-detect.cjs +85 -0
- package/lib/update-bootstrap.sh.template +145 -0
- package/lib/vault/frontmatter-schema.cjs +297 -0
- package/lib/vault/room-scanner.cjs +352 -0
- package/lib/vault/wikilink-builder.cjs +231 -0
- package/lib/vault/wikilink-builder.test.cjs +182 -0
- package/lib/wiki/graph-links.cjs +281 -0
- package/lib/wiki/page-renderer.cjs +229 -0
- package/lib/wiki/wiki-chat.cjs +81 -0
- package/lib/wiki/wiki-layout.cjs +1459 -0
- package/lib/wiki/wiki-search.cjs +142 -0
- package/lib/wiki/wiki-server.cjs +678 -0
- package/lib/wiki/wiki-watcher.cjs +105 -0
- package/lib/workflow/ROOM.md +47 -0
- package/lib/workflow/command-resolver.cjs +155 -0
- package/lib/workflow/command-resolver.test.cjs +235 -0
- package/package.json +44 -0
- package/pipelines/analogy/01-decompose.md +80 -0
- package/pipelines/analogy/02-abstract.md +87 -0
- package/pipelines/analogy/03-search.md +135 -0
- package/pipelines/analogy/04-transfer.md +101 -0
- package/pipelines/analogy/05-validate.md +106 -0
- package/pipelines/analogy/CHAIN.md +56 -0
- package/pipelines/discovery/01-explore-domains.md +44 -0
- package/pipelines/discovery/02-think-hats.md +50 -0
- package/pipelines/discovery/03-analyze-needs.md +54 -0
- package/pipelines/discovery/CHAIN.md +37 -0
- package/pipelines/thesis/01-structure-argument.md +45 -0
- package/pipelines/thesis/02-challenge-assumptions.md +48 -0
- package/pipelines/thesis/03-build-thesis.md +54 -0
- package/pipelines/thesis/CHAIN.md +37 -0
- package/references/brain/causal-directives.md +91 -0
- package/references/brain/causal-enrichment.cypher +165 -0
- package/references/brain/command-triggers-schema.md +226 -0
- package/references/brain/graph-architecture.md +317 -0
- package/references/brain/query-patterns.md +460 -0
- package/references/brain/room-hierarchy-schema.md +218 -0
- package/references/brain/schema.md +76 -0
- package/references/capability-radar/capabilities-index.md +241 -0
- package/references/capability-radar/changelog-cache.md +81 -0
- package/references/causal/causal-schema.md +103 -0
- package/references/design/email-template-standard.md +155 -0
- package/references/design/graph-visualization-standard.md +178 -0
- package/references/document-generation.md +179 -0
- package/references/hsi/HSI-TOOLS-REFERENCE.md +222 -0
- package/references/import-config.md +141 -0
- package/references/integrations/detection-patterns.md +101 -0
- package/references/meeting/artifact-template.md +377 -0
- package/references/meeting/cross-meeting-intelligence.md +216 -0
- package/references/meeting/cross-relationship-patterns.md +202 -0
- package/references/meeting/live-join-interface.md +244 -0
- package/references/meeting/section-mapping.md +192 -0
- package/references/meeting/segment-classification.md +258 -0
- package/references/meeting/speaker-profile-template.md +219 -0
- package/references/meeting/summary-template.md +348 -0
- package/references/meeting/transcript-patterns.md +226 -0
- package/references/methodology/analyze-needs.md +135 -0
- package/references/methodology/analyze-systems.md +121 -0
- package/references/methodology/analyze-timing.md +149 -0
- package/references/methodology/beautiful-question.md +109 -0
- package/references/methodology/build-knowledge.md +161 -0
- package/references/methodology/build-thesis.md +237 -0
- package/references/methodology/challenge-assumptions.md +127 -0
- package/references/methodology/diagnose.md +169 -0
- package/references/methodology/dominant-designs.md +212 -0
- package/references/methodology/explore-domains.md +147 -0
- package/references/methodology/explore-futures.md +163 -0
- package/references/methodology/explore-trends.md +129 -0
- package/references/methodology/find-bottlenecks.md +131 -0
- package/references/methodology/grade.md +211 -0
- package/references/methodology/index.md +97 -0
- package/references/methodology/leadership.md +200 -0
- package/references/methodology/lean-canvas.md +116 -0
- package/references/methodology/macro-trends.md +192 -0
- package/references/methodology/map-unknowns.md +137 -0
- package/references/methodology/mullins-7-domains.md +104 -0
- package/references/methodology/problem-types.md +65 -0
- package/references/methodology/root-cause.md +178 -0
- package/references/methodology/sapphire-encoding.md +355 -0
- package/references/methodology/scenario-plan.md +178 -0
- package/references/methodology/score-innovation.md +154 -0
- package/references/methodology/structure-argument.md +158 -0
- package/references/methodology/systems-thinking.md +159 -0
- package/references/methodology/think-hats.md +147 -0
- package/references/methodology/triz-matrix.json +751 -0
- package/references/methodology/triz-principles.md +501 -0
- package/references/methodology/user-needs.md +199 -0
- package/references/methodology/validate.md +163 -0
- package/references/methodology/value-proposition.md +244 -0
- package/references/opportunities/funding-lifecycle.md +103 -0
- package/references/opportunities/grant-api-patterns.md +99 -0
- package/references/opportunities/opportunity-template.md +84 -0
- package/references/personality/assessment-philosophy.md +72 -0
- package/references/personality/lexicon.md +100 -0
- package/references/personality/persona-chains.md +56 -0
- package/references/personality/pws-lexicon-full.md +499 -0
- package/references/personality/voice-dna.md +156 -0
- package/references/personas/hat-perspectives.md +76 -0
- package/references/personas/persona-template.md +63 -0
- package/references/pipeline/act-output-contract.md +88 -0
- package/references/pipeline/chains-index.md +39 -0
- package/references/pws-profile-generation.md +79 -0
- package/references/reasoning/reasoning-schema.md +143 -0
- package/references/reasoning/reasoning-template.md +68 -0
- package/references/reasoning/run-template.md +38 -0
- package/references/research/RESEARCH_14_CLAUDE_CODE_SOURCE_ARCHITECTURE.md +209 -0
- package/references/research/RESEARCH_15_V1.8_OPTIMIZATION_JTBD.md +375 -0
- package/references/research/RESEARCH_16_NATIVE_FIRST_PLUGIN_ARCHITECTURE.md +575 -0
- package/references/research/RESEARCH_17_MCP_UI_FRAMEWORKS.md +272 -0
- package/references/taxonomy/TAXONOMY.md +192 -0
- package/references/templates/MINTO.md +36 -0
- package/references/user-research/2026-04-05-leah-lawrence-session.md +202 -0
- package/references/vault-kit/README.md +35 -0
- package/references/vault-kit/app.json +12 -0
- package/references/vault-kit/appearance.json +12 -0
- package/references/vault-kit/graph.json +35 -0
- package/references/vault-kit/snippets/mindrian-destijl.css +297 -0
- package/references/vault-kit/templates/new-artifact.md +37 -0
- package/references/vault-kit/templates/new-meeting-note.md +35 -0
- package/references/vault-kit/templates/new-team-profile.md +29 -0
- package/references/vault-kit/templates/new-xref.md +35 -0
- package/references/visual/symbol-system.md +151 -0
- package/skills/MOSDeckEngine/SKILL.md +325 -0
- package/skills/brain-connector/SKILL.md +114 -0
- package/skills/context-engine/SKILL.md +147 -0
- package/skills/conversation-mode/SKILL.md +102 -0
- package/skills/larry-personality/SKILL.md +219 -0
- package/skills/larry-personality/framework-chains.md +92 -0
- package/skills/larry-personality/mode-engine.md +185 -0
- package/skills/mullins-scaffold/SKILL.md +61 -0
- package/skills/mullins-scaffold/scaffold.json +146 -0
- package/skills/pws-methodology/SKILL.md +49 -0
- package/skills/room-passive/SKILL.md +165 -0
- package/skills/room-proactive/SKILL.md +250 -0
- package/skills/ui-system/SKILL.md +277 -0
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
rs-corpus.py -- External corpus fetcher for the Reverse Salient Engine
|
|
4
|
+
======================================================================
|
|
5
|
+
|
|
6
|
+
Tiered external corpus fetcher. Plan 89-02 Mode B entry point. Follows the
|
|
7
|
+
RESEARCH Q2 decision (OpenAlex primary, arXiv secondary, Tavily fallback).
|
|
8
|
+
Skips Scopus, Semantic Scholar, USPTO direct, PubMed per Phase 89 locked
|
|
9
|
+
scope.
|
|
10
|
+
|
|
11
|
+
Tiering rationale (RESEARCH.md Q2):
|
|
12
|
+
- OpenAlex: free, 240M+ works, includes patents, stable JSON schema,
|
|
13
|
+
polite pool at 100k/day (email in User-Agent). Single API covers papers,
|
|
14
|
+
patents, dissertations.
|
|
15
|
+
- arXiv: free Atom XML, deep-tech (physics/CS/math/bio) supplement. Soft
|
|
16
|
+
rate limit ~3 req/s; no hard block.
|
|
17
|
+
- Tavily: fallback for domains neither of the above cover. Already
|
|
18
|
+
configured in the plugin via TAVILY_API_KEY. Snippet-only, not full
|
|
19
|
+
abstract.
|
|
20
|
+
|
|
21
|
+
Abstract normalization:
|
|
22
|
+
- OpenAlex returns `abstract_inverted_index` (dict of word -> [positions]).
|
|
23
|
+
invert_abstract() reconstructs plain text.
|
|
24
|
+
- arXiv <summary> is plain text in Atom feed, stripped of whitespace.
|
|
25
|
+
- Tavily `content` is snippet text as-is.
|
|
26
|
+
|
|
27
|
+
Rate limit posture:
|
|
28
|
+
- OpenAlex polite pool: email in User-Agent + sleep(0.1) between pages.
|
|
29
|
+
- arXiv: sleep(0.35) between pages (~3 req/s).
|
|
30
|
+
- Tavily: single POST per topic, no pagination in v1.
|
|
31
|
+
|
|
32
|
+
Dedup:
|
|
33
|
+
- DOI if present, else normalized lowercase title.
|
|
34
|
+
|
|
35
|
+
Env vars:
|
|
36
|
+
- OPENALEX_EMAIL: polite pool contact. Defaults to noreply@mindrian.ai.
|
|
37
|
+
- TAVILY_API_KEY: gates Tier 3 (fallback). Missing = graceful empty list.
|
|
38
|
+
|
|
39
|
+
Three-surface usage:
|
|
40
|
+
- CLI: python scripts/rs-engine.py --mode external --topic "..."
|
|
41
|
+
- Desktop: invoked through ReverseSalientAgent (Plan 89-07).
|
|
42
|
+
- Cowork: external fetch is symmetric across surfaces; results live under
|
|
43
|
+
{room}/research/{topic-slug}/ and copy through 00_Context/.
|
|
44
|
+
|
|
45
|
+
License: BSL-1.1 (see LICENSE at repo root).
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
from __future__ import annotations
|
|
49
|
+
|
|
50
|
+
import json
|
|
51
|
+
import os
|
|
52
|
+
import re
|
|
53
|
+
import sys
|
|
54
|
+
import time
|
|
55
|
+
import xml.etree.ElementTree as ET
|
|
56
|
+
from typing import Dict, List, Optional
|
|
57
|
+
from urllib.parse import quote
|
|
58
|
+
|
|
59
|
+
try:
|
|
60
|
+
import requests
|
|
61
|
+
except ImportError:
|
|
62
|
+
print(
|
|
63
|
+
"rs-corpus requires requests. Run: pip install -r requirements-hsi.txt",
|
|
64
|
+
file=sys.stderr,
|
|
65
|
+
)
|
|
66
|
+
raise
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
OPENALEX_EMAIL = os.environ.get("OPENALEX_EMAIL", "noreply@mindrian.ai")
|
|
70
|
+
USER_AGENT = f"MindrianOS-RS-Engine/1.9.8.1 (mailto:{OPENALEX_EMAIL})"
|
|
71
|
+
|
|
72
|
+
# Network budgets kept generous; callers set target_n.
|
|
73
|
+
OPENALEX_URL = "https://api.openalex.org/works"
|
|
74
|
+
ARXIV_URL = "http://export.arxiv.org/api/query"
|
|
75
|
+
TAVILY_URL = "https://api.tavily.com/search"
|
|
76
|
+
ARXIV_NS = {"atom": "http://www.w3.org/2005/Atom"}
|
|
77
|
+
|
|
78
|
+
# Hard upper bound so a misconfigured topk cannot balloon external API usage.
|
|
79
|
+
MAX_TARGET_N = 20000
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
# --- Abstract reconstruction ------------------------------------------------
|
|
83
|
+
|
|
84
|
+
def invert_abstract(inverted_index: Optional[Dict[str, List[int]]]) -> str:
|
|
85
|
+
"""OpenAlex abstract_inverted_index -> plain text.
|
|
86
|
+
|
|
87
|
+
Per RESEARCH.md Pitfall 3: if you feed the raw inverted-index dict to an
|
|
88
|
+
embedder, you get garbage. Reconstruction sorts (position, word) pairs by
|
|
89
|
+
position and joins with spaces. Empty / None index returns empty string
|
|
90
|
+
so the caller can filter before counting toward target_n.
|
|
91
|
+
"""
|
|
92
|
+
if not inverted_index:
|
|
93
|
+
return ""
|
|
94
|
+
if not isinstance(inverted_index, dict):
|
|
95
|
+
return ""
|
|
96
|
+
pairs = []
|
|
97
|
+
for word, positions in inverted_index.items():
|
|
98
|
+
if not isinstance(positions, list):
|
|
99
|
+
continue
|
|
100
|
+
for pos in positions:
|
|
101
|
+
try:
|
|
102
|
+
pairs.append((int(pos), word))
|
|
103
|
+
except (TypeError, ValueError):
|
|
104
|
+
continue
|
|
105
|
+
pairs.sort(key=lambda x: x[0])
|
|
106
|
+
return " ".join(word for _, word in pairs)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
# --- Tier 1: OpenAlex --------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
def fetch_openalex(topic: str, target_n: int = 2000, per_page: int = 200) -> List[Dict]:
|
|
112
|
+
"""Fetch up to target_n works from OpenAlex matching `topic`.
|
|
113
|
+
|
|
114
|
+
Uses cursor pagination (next_cursor) which is the documented path for
|
|
115
|
+
deep paging. per_page capped at 200 per the OpenAlex polite pool guide.
|
|
116
|
+
Filters out works with empty or missing abstract_inverted_index so the
|
|
117
|
+
count reflects usable corpus, not raw hits.
|
|
118
|
+
"""
|
|
119
|
+
if target_n <= 0 or not topic:
|
|
120
|
+
return []
|
|
121
|
+
target_n = min(target_n, MAX_TARGET_N)
|
|
122
|
+
per_page = max(1, min(per_page, 200))
|
|
123
|
+
|
|
124
|
+
results: List[Dict] = []
|
|
125
|
+
cursor = "*"
|
|
126
|
+
headers = {"User-Agent": USER_AGENT}
|
|
127
|
+
|
|
128
|
+
while len(results) < target_n:
|
|
129
|
+
params = {
|
|
130
|
+
"search": topic,
|
|
131
|
+
"per-page": min(per_page, target_n - len(results)),
|
|
132
|
+
"cursor": cursor,
|
|
133
|
+
"select": (
|
|
134
|
+
"id,title,abstract_inverted_index,publication_year,"
|
|
135
|
+
"authorships,doi"
|
|
136
|
+
),
|
|
137
|
+
}
|
|
138
|
+
try:
|
|
139
|
+
r = requests.get(OPENALEX_URL, params=params, headers=headers, timeout=30)
|
|
140
|
+
except requests.RequestException as e:
|
|
141
|
+
print(f"rs-corpus: OpenAlex request failed: {e}", file=sys.stderr)
|
|
142
|
+
break
|
|
143
|
+
if r.status_code != 200:
|
|
144
|
+
print(
|
|
145
|
+
f"rs-corpus: OpenAlex returned {r.status_code}; stopping tier 1",
|
|
146
|
+
file=sys.stderr,
|
|
147
|
+
)
|
|
148
|
+
break
|
|
149
|
+
try:
|
|
150
|
+
data = r.json()
|
|
151
|
+
except ValueError:
|
|
152
|
+
print("rs-corpus: OpenAlex returned non-JSON; stopping tier 1",
|
|
153
|
+
file=sys.stderr)
|
|
154
|
+
break
|
|
155
|
+
|
|
156
|
+
hits = data.get("results", [])
|
|
157
|
+
if not hits:
|
|
158
|
+
break
|
|
159
|
+
|
|
160
|
+
for work in hits:
|
|
161
|
+
abstract = invert_abstract(work.get("abstract_inverted_index"))
|
|
162
|
+
if not abstract:
|
|
163
|
+
continue
|
|
164
|
+
authors = []
|
|
165
|
+
for authorship in work.get("authorships", [])[:5]:
|
|
166
|
+
if not isinstance(authorship, dict):
|
|
167
|
+
continue
|
|
168
|
+
author = authorship.get("author") or {}
|
|
169
|
+
name = author.get("display_name")
|
|
170
|
+
if name:
|
|
171
|
+
authors.append(name)
|
|
172
|
+
results.append({
|
|
173
|
+
"source": "openalex",
|
|
174
|
+
"external_id": work.get("id") or "",
|
|
175
|
+
"title": work.get("title") or "",
|
|
176
|
+
"abstract": abstract,
|
|
177
|
+
"year": work.get("publication_year"),
|
|
178
|
+
"authors": authors,
|
|
179
|
+
"url": work.get("id") or "",
|
|
180
|
+
"doi": work.get("doi"),
|
|
181
|
+
})
|
|
182
|
+
if len(results) >= target_n:
|
|
183
|
+
break
|
|
184
|
+
|
|
185
|
+
cursor = (data.get("meta") or {}).get("next_cursor")
|
|
186
|
+
if not cursor:
|
|
187
|
+
break
|
|
188
|
+
# Polite pool: 10 req/s ceiling. 0.1s spacing is well under the limit
|
|
189
|
+
# and keeps the engine nice on shared infra.
|
|
190
|
+
time.sleep(0.1)
|
|
191
|
+
|
|
192
|
+
return results[:target_n]
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
# --- Tier 2: arXiv -----------------------------------------------------------
|
|
196
|
+
|
|
197
|
+
def fetch_arxiv(topic: str, target_n: int = 2000) -> List[Dict]:
|
|
198
|
+
"""Fetch up to target_n papers from arXiv Atom feed.
|
|
199
|
+
|
|
200
|
+
Soft rate limit ~3 req/s per RESEARCH.md Pitfall 4. Uses 0.35s spacing
|
|
201
|
+
and max_results <= 200 per call to minimize call count.
|
|
202
|
+
"""
|
|
203
|
+
if target_n <= 0 or not topic:
|
|
204
|
+
return []
|
|
205
|
+
target_n = min(target_n, MAX_TARGET_N)
|
|
206
|
+
|
|
207
|
+
results: List[Dict] = []
|
|
208
|
+
start = 0
|
|
209
|
+
headers = {"User-Agent": USER_AGENT}
|
|
210
|
+
|
|
211
|
+
while len(results) < target_n:
|
|
212
|
+
batch = min(200, target_n - len(results))
|
|
213
|
+
url = (
|
|
214
|
+
f"{ARXIV_URL}?search_query=all:{quote(topic)}"
|
|
215
|
+
f"&start={start}&max_results={batch}"
|
|
216
|
+
)
|
|
217
|
+
try:
|
|
218
|
+
r = requests.get(url, headers=headers, timeout=30)
|
|
219
|
+
except requests.RequestException as e:
|
|
220
|
+
print(f"rs-corpus: arXiv request failed: {e}", file=sys.stderr)
|
|
221
|
+
break
|
|
222
|
+
if r.status_code != 200:
|
|
223
|
+
print(
|
|
224
|
+
f"rs-corpus: arXiv returned {r.status_code}; stopping tier 2",
|
|
225
|
+
file=sys.stderr,
|
|
226
|
+
)
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
root = ET.fromstring(r.content)
|
|
231
|
+
except ET.ParseError as e:
|
|
232
|
+
print(f"rs-corpus: arXiv XML parse failed: {e}", file=sys.stderr)
|
|
233
|
+
break
|
|
234
|
+
|
|
235
|
+
entries = root.findall("atom:entry", ARXIV_NS)
|
|
236
|
+
if not entries:
|
|
237
|
+
break
|
|
238
|
+
|
|
239
|
+
for entry in entries:
|
|
240
|
+
id_el = entry.find("atom:id", ARXIV_NS)
|
|
241
|
+
title_el = entry.find("atom:title", ARXIV_NS)
|
|
242
|
+
summary_el = entry.find("atom:summary", ARXIV_NS)
|
|
243
|
+
published_el = entry.find("atom:published", ARXIV_NS)
|
|
244
|
+
|
|
245
|
+
external_id = (id_el.text or "").strip() if id_el is not None else ""
|
|
246
|
+
title = (title_el.text or "").strip() if title_el is not None else ""
|
|
247
|
+
abstract = (summary_el.text or "").strip() if summary_el is not None else ""
|
|
248
|
+
if not abstract:
|
|
249
|
+
continue
|
|
250
|
+
|
|
251
|
+
year = None
|
|
252
|
+
if published_el is not None and published_el.text:
|
|
253
|
+
year_match = re.match(r"(\d{4})", published_el.text.strip())
|
|
254
|
+
if year_match:
|
|
255
|
+
try:
|
|
256
|
+
year = int(year_match.group(1))
|
|
257
|
+
except ValueError:
|
|
258
|
+
year = None
|
|
259
|
+
|
|
260
|
+
authors: List[str] = []
|
|
261
|
+
for author_el in entry.findall("atom:author", ARXIV_NS)[:5]:
|
|
262
|
+
name_el = author_el.find("atom:name", ARXIV_NS)
|
|
263
|
+
if name_el is not None and name_el.text:
|
|
264
|
+
authors.append(name_el.text.strip())
|
|
265
|
+
|
|
266
|
+
results.append({
|
|
267
|
+
"source": "arxiv",
|
|
268
|
+
"external_id": external_id,
|
|
269
|
+
"title": title,
|
|
270
|
+
"abstract": abstract,
|
|
271
|
+
"year": year,
|
|
272
|
+
"authors": authors,
|
|
273
|
+
"url": external_id,
|
|
274
|
+
"doi": None,
|
|
275
|
+
})
|
|
276
|
+
if len(results) >= target_n:
|
|
277
|
+
break
|
|
278
|
+
|
|
279
|
+
start += batch
|
|
280
|
+
# RESEARCH.md Pitfall 4: arXiv soft-blocks above ~3 req/s. 0.35s is
|
|
281
|
+
# the conservative slot the existing PROJECT stack already uses.
|
|
282
|
+
time.sleep(0.35)
|
|
283
|
+
|
|
284
|
+
return results[:target_n]
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# --- Tier 3: Tavily (fallback) ----------------------------------------------
|
|
288
|
+
|
|
289
|
+
def fetch_tavily(topic: str, target_n: int = 100) -> List[Dict]:
|
|
290
|
+
"""Fallback web search via Tavily. Gated by TAVILY_API_KEY.
|
|
291
|
+
|
|
292
|
+
Snippet-only (content field, not full abstract). Caller decides whether
|
|
293
|
+
snippet-grade evidence is acceptable; Mode B default accepts it because
|
|
294
|
+
any corpus is better than none when OpenAlex+arXiv run dry for a niche
|
|
295
|
+
topic.
|
|
296
|
+
"""
|
|
297
|
+
if target_n <= 0 or not topic:
|
|
298
|
+
return []
|
|
299
|
+
api_key = os.environ.get("TAVILY_API_KEY")
|
|
300
|
+
if not api_key:
|
|
301
|
+
return []
|
|
302
|
+
|
|
303
|
+
payload = {
|
|
304
|
+
"api_key": api_key,
|
|
305
|
+
"query": topic,
|
|
306
|
+
"max_results": min(target_n, 20), # Tavily caps at 20 per call
|
|
307
|
+
"search_depth": "advanced",
|
|
308
|
+
}
|
|
309
|
+
try:
|
|
310
|
+
r = requests.post(TAVILY_URL, json=payload, timeout=30)
|
|
311
|
+
except requests.RequestException as e:
|
|
312
|
+
print(f"rs-corpus: Tavily request failed: {e}", file=sys.stderr)
|
|
313
|
+
return []
|
|
314
|
+
if r.status_code != 200:
|
|
315
|
+
print(
|
|
316
|
+
f"rs-corpus: Tavily returned {r.status_code}; returning empty",
|
|
317
|
+
file=sys.stderr,
|
|
318
|
+
)
|
|
319
|
+
return []
|
|
320
|
+
|
|
321
|
+
try:
|
|
322
|
+
data = r.json()
|
|
323
|
+
except ValueError:
|
|
324
|
+
return []
|
|
325
|
+
|
|
326
|
+
results: List[Dict] = []
|
|
327
|
+
for item in data.get("results", []):
|
|
328
|
+
url = item.get("url") or ""
|
|
329
|
+
if not url:
|
|
330
|
+
continue
|
|
331
|
+
content = (item.get("content") or "").strip()
|
|
332
|
+
if not content:
|
|
333
|
+
continue
|
|
334
|
+
results.append({
|
|
335
|
+
"source": "tavily",
|
|
336
|
+
"external_id": url,
|
|
337
|
+
"title": item.get("title") or "",
|
|
338
|
+
"abstract": content,
|
|
339
|
+
"year": None,
|
|
340
|
+
"authors": [],
|
|
341
|
+
"url": url,
|
|
342
|
+
"doi": None,
|
|
343
|
+
})
|
|
344
|
+
return results[:target_n]
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
# --- Dedup helpers -----------------------------------------------------------
|
|
348
|
+
|
|
349
|
+
def _normalize_title(title: str) -> str:
|
|
350
|
+
"""Collapse whitespace, strip punctuation, lowercase. Keeps dedup robust
|
|
351
|
+
against "Title." vs "title" vs "Title "."""
|
|
352
|
+
if not title:
|
|
353
|
+
return ""
|
|
354
|
+
t = title.lower()
|
|
355
|
+
t = re.sub(r"[^a-z0-9\s]+", " ", t)
|
|
356
|
+
t = re.sub(r"\s+", " ", t).strip()
|
|
357
|
+
return t
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def _dedup_key(doc: Dict) -> str:
|
|
361
|
+
doi = doc.get("doi")
|
|
362
|
+
if isinstance(doi, str) and doi.strip():
|
|
363
|
+
# OpenAlex hands us https://doi.org/10.xxx; strip for a stable key.
|
|
364
|
+
norm = doi.strip().lower()
|
|
365
|
+
norm = norm.replace("https://doi.org/", "").replace("http://doi.org/", "")
|
|
366
|
+
return f"doi:{norm}"
|
|
367
|
+
title_norm = _normalize_title(doc.get("title") or "")
|
|
368
|
+
if title_norm:
|
|
369
|
+
return f"title:{title_norm}"
|
|
370
|
+
# Last resort: external_id. Avoids O(n^2) collapse on degenerate inputs.
|
|
371
|
+
return f"id:{doc.get('external_id') or ''}"
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
def dedupe(docs: List[Dict]) -> List[Dict]:
|
|
375
|
+
"""Dedup by DOI (preferred) or normalized title. Preserves first-seen
|
|
376
|
+
order so Tier 1 wins ties over Tier 2 and Tier 3."""
|
|
377
|
+
seen: set = set()
|
|
378
|
+
out: List[Dict] = []
|
|
379
|
+
for doc in docs:
|
|
380
|
+
key = _dedup_key(doc)
|
|
381
|
+
if key in seen:
|
|
382
|
+
continue
|
|
383
|
+
seen.add(key)
|
|
384
|
+
out.append(doc)
|
|
385
|
+
return out
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
# --- Orchestrator -----------------------------------------------------------
|
|
389
|
+
|
|
390
|
+
def fetch_corpus(topic: str, target_n: int = 2000) -> List[Dict]:
|
|
391
|
+
"""Tiered fetch: OpenAlex -> arXiv -> Tavily, deduped, truncated.
|
|
392
|
+
|
|
393
|
+
Contract: up to target_n documents where each carries
|
|
394
|
+
source, external_id, title, abstract, year, authors, url
|
|
395
|
+
plus optional doi. Empty abstracts are filtered at each tier before
|
|
396
|
+
counting toward target_n, so a return list of len(target_n) is fully
|
|
397
|
+
usable downstream (no abstract filtering required by caller).
|
|
398
|
+
|
|
399
|
+
Provenance is implicit in the `source` field on each doc. Plan 89-03
|
|
400
|
+
will key the Pinecone cache on (source, external_id).
|
|
401
|
+
"""
|
|
402
|
+
if target_n <= 0 or not topic:
|
|
403
|
+
return []
|
|
404
|
+
target_n = min(target_n, MAX_TARGET_N)
|
|
405
|
+
|
|
406
|
+
results: List[Dict] = []
|
|
407
|
+
|
|
408
|
+
# Tier 1: OpenAlex primary.
|
|
409
|
+
results.extend(fetch_openalex(topic, target_n))
|
|
410
|
+
results = dedupe(results)
|
|
411
|
+
if len(results) >= target_n:
|
|
412
|
+
return results[:target_n]
|
|
413
|
+
|
|
414
|
+
# Tier 2: arXiv secondary.
|
|
415
|
+
remaining = target_n - len(results)
|
|
416
|
+
if remaining > 0:
|
|
417
|
+
results.extend(fetch_arxiv(topic, remaining))
|
|
418
|
+
results = dedupe(results)
|
|
419
|
+
if len(results) >= target_n:
|
|
420
|
+
return results[:target_n]
|
|
421
|
+
|
|
422
|
+
# Tier 3: Tavily fallback.
|
|
423
|
+
remaining = target_n - len(results)
|
|
424
|
+
if remaining > 0:
|
|
425
|
+
results.extend(fetch_tavily(topic, remaining))
|
|
426
|
+
results = dedupe(results)
|
|
427
|
+
|
|
428
|
+
return results[:target_n]
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
# --- Topic slug (shared with downstream writers) ----------------------------
|
|
432
|
+
|
|
433
|
+
def topic_slug(topic: str) -> str:
|
|
434
|
+
"""Normalize a topic to a filesystem-safe slug (RESEARCH.md Pitfall 7).
|
|
435
|
+
|
|
436
|
+
Caller uses this for both on-disk paths and future Pinecone namespace
|
|
437
|
+
keys so the name is stable across case/whitespace variations.
|
|
438
|
+
"""
|
|
439
|
+
if not topic:
|
|
440
|
+
return ""
|
|
441
|
+
slug = re.sub(r"[^a-z0-9]+", "-", topic.lower())
|
|
442
|
+
return slug.strip("-")
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
__all__ = [
|
|
446
|
+
"fetch_corpus",
|
|
447
|
+
"fetch_openalex",
|
|
448
|
+
"fetch_arxiv",
|
|
449
|
+
"fetch_tavily",
|
|
450
|
+
"invert_abstract",
|
|
451
|
+
"dedupe",
|
|
452
|
+
"topic_slug",
|
|
453
|
+
]
|
|
454
|
+
|
|
455
|
+
|
|
456
|
+
# --- Self-invert smoke (when run directly) ----------------------------------
|
|
457
|
+
|
|
458
|
+
if __name__ == "__main__":
|
|
459
|
+
# Minimal self-check. Real usage goes through scripts/rs-engine.py.
|
|
460
|
+
sample_index = {"Hello": [0], "world": [1], "from": [2], "test": [3]}
|
|
461
|
+
reconstructed = invert_abstract(sample_index)
|
|
462
|
+
assert reconstructed == "Hello world from test", reconstructed
|
|
463
|
+
print(json.dumps({
|
|
464
|
+
"module": "rs_corpus",
|
|
465
|
+
"invert_ok": True,
|
|
466
|
+
"user_agent": USER_AGENT,
|
|
467
|
+
"tavily_configured": bool(os.environ.get("TAVILY_API_KEY")),
|
|
468
|
+
}, indent=2))
|