ultimate-pi 0.1.7 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.agents/skills/graphify/.graphify_version +1 -0
- package/.agents/skills/graphify/SKILL.md +1204 -0
- package/.agents/skills/wiki-autoresearch/SKILL.md +225 -97
- package/.agents/skills/wiki-autoresearch/references/program.md +28 -62
- package/.agents/skills/wiki-autoresearch/references/quality-sites.md +32 -0
- package/.env.example +5 -1
- package/.gitattributes +1 -0
- package/.github/workflows/publish-github-packages.yml +1 -1
- package/.pi/SYSTEM.md +72 -18
- package/.pi/agents/harness/adversary.md +32 -0
- package/.pi/agents/harness/evaluator.md +32 -0
- package/.pi/agents/harness/executor.md +34 -0
- package/.pi/agents/harness/meta-optimizer.md +33 -0
- package/.pi/agents/harness/planner.md +33 -0
- package/.pi/agents/harness/tie-breaker.md +35 -0
- package/.pi/agents/harness/trace-librarian.md +32 -0
- package/.pi/extensions/banner.png +0 -0
- package/.pi/extensions/budget-guard.ts +265 -0
- package/.pi/extensions/custom-footer.ts +194 -22
- package/.pi/extensions/custom-header.ts +47 -9
- package/.pi/extensions/debate-orchestrator.ts +479 -0
- package/.pi/extensions/harness-live-widget.ts +438 -0
- package/.pi/extensions/policy-gate.ts +349 -0
- package/.pi/extensions/review-integrity.ts +198 -0
- package/.pi/extensions/test-diff-integrity.ts +240 -0
- package/.pi/extensions/trace-recorder.ts +315 -0
- package/.pi/harness/README.md +23 -0
- package/.pi/harness/router/README.md +35 -0
- package/.pi/harness/router/apply-router-proposal.mjs +153 -0
- package/.pi/harness/router/propose-router-tuning.mjs +149 -0
- package/.pi/harness/specs/README.md +37 -0
- package/.pi/harness/specs/adversary-report.schema.json +53 -0
- package/.pi/harness/specs/budget-exhausted-event.schema.json +93 -0
- package/.pi/harness/specs/consensus-packet.schema.json +175 -0
- package/.pi/harness/specs/eval-verdict.schema.json +59 -0
- package/.pi/harness/specs/incident-record.schema.json +84 -0
- package/.pi/harness/specs/plan-packet.schema.json +90 -0
- package/.pi/harness/specs/round-result.schema.json +126 -0
- package/.pi/harness/specs/router-tuning-proposal.schema.json +114 -0
- package/.pi/harness/specs/run-trace.schema.json +107 -0
- package/.pi/lib/harness-ui-state.ts +311 -0
- package/.pi/mcp.json +4 -0
- package/.pi/model-router.json +93 -93
- package/.pi/prompts/graphify.md +23 -0
- package/.pi/prompts/harness-abort.md +41 -0
- package/.pi/prompts/harness-auto.md +83 -0
- package/.pi/prompts/harness-critic.md +52 -0
- package/.pi/prompts/harness-eval.md +51 -0
- package/.pi/prompts/harness-incident.md +51 -0
- package/.pi/prompts/harness-plan.md +64 -0
- package/.pi/prompts/harness-review.md +52 -0
- package/.pi/prompts/harness-router-tune.md +74 -0
- package/.pi/prompts/harness-run.md +59 -0
- package/.pi/prompts/harness-setup.md +316 -216
- package/.pi/prompts/harness-trace.md +51 -0
- package/.pi/prompts/wiki-autoresearch.md +9 -7
- package/.pi/prompts/wiki-save.md +20 -0
- package/.pi/skills/agent-router/SKILL.md +2 -4
- package/.pi/skills/ast-grep/SKILL.md +354 -0
- package/.pi/sounds/project-sounds.json +18 -24
- package/AGENTS.md +30 -0
- package/CHANGELOG.md +89 -0
- package/CONTRIBUTING.md +51 -1
- package/README.md +264 -20
- package/biome.json +8 -2
- package/lefthook.yml +3 -2
- package/node_modules/@sting8k/pi-vcc/README.md +200 -0
- package/node_modules/@sting8k/pi-vcc/index.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/package.json +26 -0
- package/node_modules/@sting8k/pi-vcc/scripts/audit-sessions.ts +88 -0
- package/node_modules/@sting8k/pi-vcc/scripts/benchmark-real-sessions.ts +25 -0
- package/node_modules/@sting8k/pi-vcc/scripts/compare-before-after.ts +36 -0
- package/node_modules/@sting8k/pi-vcc/scripts/dump-branch-output.ts +20 -0
- package/node_modules/@sting8k/pi-vcc/src/commands/pi-vcc.ts +36 -0
- package/node_modules/@sting8k/pi-vcc/src/commands/vcc-recall.ts +65 -0
- package/node_modules/@sting8k/pi-vcc/src/core/brief.ts +381 -0
- package/node_modules/@sting8k/pi-vcc/src/core/build-sections.ts +79 -0
- package/node_modules/@sting8k/pi-vcc/src/core/content.ts +60 -0
- package/node_modules/@sting8k/pi-vcc/src/core/filter-noise.ts +42 -0
- package/node_modules/@sting8k/pi-vcc/src/core/format-recall.ts +27 -0
- package/node_modules/@sting8k/pi-vcc/src/core/format.ts +49 -0
- package/node_modules/@sting8k/pi-vcc/src/core/lineage.ts +26 -0
- package/node_modules/@sting8k/pi-vcc/src/core/load-messages.ts +41 -0
- package/node_modules/@sting8k/pi-vcc/src/core/normalize.ts +66 -0
- package/node_modules/@sting8k/pi-vcc/src/core/recall-scope.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/src/core/render-entries.ts +55 -0
- package/node_modules/@sting8k/pi-vcc/src/core/report.ts +237 -0
- package/node_modules/@sting8k/pi-vcc/src/core/sanitize.ts +5 -0
- package/node_modules/@sting8k/pi-vcc/src/core/search-entries.ts +221 -0
- package/node_modules/@sting8k/pi-vcc/src/core/settings.ts +77 -0
- package/node_modules/@sting8k/pi-vcc/src/core/skill-collapse.ts +35 -0
- package/node_modules/@sting8k/pi-vcc/src/core/summarize.ts +157 -0
- package/node_modules/@sting8k/pi-vcc/src/core/tool-args.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/src/details.ts +7 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/commits.ts +69 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/files.ts +80 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/goals.ts +79 -0
- package/node_modules/@sting8k/pi-vcc/src/extract/preferences.ts +55 -0
- package/node_modules/@sting8k/pi-vcc/src/hooks/before-compact.ts +322 -0
- package/node_modules/@sting8k/pi-vcc/src/sections.ts +12 -0
- package/node_modules/@sting8k/pi-vcc/src/tools/recall.ts +109 -0
- package/node_modules/@sting8k/pi-vcc/src/types.ts +14 -0
- package/node_modules/@sting8k/pi-vcc/tests/before-compact-hook.test.ts +181 -0
- package/node_modules/@sting8k/pi-vcc/tests/before-compact.test.ts +140 -0
- package/node_modules/@sting8k/pi-vcc/tests/brief.test.ts +206 -0
- package/node_modules/@sting8k/pi-vcc/tests/build-sections.test.ts +59 -0
- package/node_modules/@sting8k/pi-vcc/tests/compile.test.ts +80 -0
- package/node_modules/@sting8k/pi-vcc/tests/content.test.ts +31 -0
- package/node_modules/@sting8k/pi-vcc/tests/extract-goals.test.ts +86 -0
- package/node_modules/@sting8k/pi-vcc/tests/extract-preferences.test.ts +30 -0
- package/node_modules/@sting8k/pi-vcc/tests/filter-noise.test.ts +61 -0
- package/node_modules/@sting8k/pi-vcc/tests/fixtures.ts +61 -0
- package/node_modules/@sting8k/pi-vcc/tests/format-recall.test.ts +30 -0
- package/node_modules/@sting8k/pi-vcc/tests/format.test.ts +62 -0
- package/node_modules/@sting8k/pi-vcc/tests/lineage.test.ts +33 -0
- package/node_modules/@sting8k/pi-vcc/tests/load-messages.test.ts +51 -0
- package/node_modules/@sting8k/pi-vcc/tests/normalize.test.ts +97 -0
- package/node_modules/@sting8k/pi-vcc/tests/real-sessions.test.ts +38 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-expand.test.ts +15 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-scope.test.ts +32 -0
- package/node_modules/@sting8k/pi-vcc/tests/recall-tool-scope.test.ts +67 -0
- package/node_modules/@sting8k/pi-vcc/tests/render-entries.test.ts +62 -0
- package/node_modules/@sting8k/pi-vcc/tests/report.test.ts +44 -0
- package/node_modules/@sting8k/pi-vcc/tests/sanitize.test.ts +24 -0
- package/node_modules/@sting8k/pi-vcc/tests/search-entries.test.ts +144 -0
- package/node_modules/@sting8k/pi-vcc/tests/support/load-session.ts +23 -0
- package/node_modules/@sting8k/pi-vcc/tests/support/real-sessions.ts +51 -0
- package/package.json +15 -4
- package/scripts/__pycache__/merge_graphify_corpora.cpython-314.pyc +0 -0
- package/scripts/index_youtube_urls.py +376 -0
- package/scripts/merge_graphify_corpora.py +398 -0
- package/scripts/regen_graphify_html.py +46 -0
- package/.agents/skills/defuddle/SKILL.md +0 -90
- package/.agents/skills/wiki/SKILL.md +0 -215
- package/.agents/skills/wiki/references/css-snippets.md +0 -122
- package/.agents/skills/wiki/references/frontmatter.md +0 -107
- package/.agents/skills/wiki/references/git-setup.md +0 -58
- package/.agents/skills/wiki/references/mcp-setup.md +0 -149
- package/.agents/skills/wiki/references/modes.md +0 -259
- package/.agents/skills/wiki/references/plugins.md +0 -96
- package/.agents/skills/wiki/references/rest-api.md +0 -124
- package/.agents/skills/wiki-fold/SKILL.md +0 -204
- package/.agents/skills/wiki-fold/references/fold-template.md +0 -133
- package/.agents/skills/wiki-ingest/SKILL.md +0 -288
- package/.agents/skills/wiki-lint/SKILL.md +0 -183
- package/.agents/skills/wiki-query/SKILL.md +0 -176
- package/.pi/agents/rethink.md +0 -140
- package/.pi/agents/wiki-ingest.md +0 -67
- package/.pi/agents/wiki-lint.md +0 -75
- package/.pi/internal/cursor-sdk-transcript-parser.ts +0 -59
- package/.pi/prompts/save.md +0 -16
- package/.pi/prompts/wiki.md +0 -23
- package/.pi/providers/cursor-sdk-provider.test.mjs +0 -476
- package/.pi/providers/cursor-sdk-provider.ts +0 -1085
- package/vault/AGENTS.md +0 -37
- package/vault/wiki/_templates/comparison.md +0 -39
- package/vault/wiki/_templates/concept.md +0 -40
- package/vault/wiki/_templates/decision.md +0 -21
- package/vault/wiki/_templates/entity.md +0 -32
- package/vault/wiki/_templates/flow.md +0 -14
- package/vault/wiki/_templates/module.md +0 -18
- package/vault/wiki/_templates/question.md +0 -31
- package/vault/wiki/_templates/source.md +0 -39
- package/vault/wiki/concepts/AST-Aware Code Chunking.md +0 -44
- package/vault/wiki/concepts/Build-Time Prompt Compilation.md +0 -107
- package/vault/wiki/concepts/Context Engine (AI Coding).md +0 -47
- package/vault/wiki/concepts/Context-Aware System Reminders.md +0 -61
- package/vault/wiki/concepts/Contextualized Text Embedding.md +0 -42
- package/vault/wiki/concepts/Contractor vs Employee AI Model.md +0 -55
- package/vault/wiki/concepts/Dual-Model Agent Architecture.md +0 -65
- package/vault/wiki/concepts/Late Chunking vs Early Chunking.md +0 -43
- package/vault/wiki/concepts/Majority Vote Ensembling.md +0 -68
- package/vault/wiki/concepts/Meta-Harness.md +0 -16
- package/vault/wiki/concepts/Multi-Agent AI Coding Architecture.md +0 -75
- package/vault/wiki/concepts/Prompt Enhancement.md +0 -90
- package/vault/wiki/concepts/Prompt Renderer.md +0 -89
- package/vault/wiki/concepts/Semantic Codebase Indexing.md +0 -67
- package/vault/wiki/concepts/additive-config-hierarchy.md +0 -16
- package/vault/wiki/concepts/agent-artifacts-verifiable-deliverables.md +0 -71
- package/vault/wiki/concepts/agent-browser-browser-automation.md +0 -99
- package/vault/wiki/concepts/agent-codebase-interface.md +0 -43
- package/vault/wiki/concepts/agent-harness-architecture.md +0 -67
- package/vault/wiki/concepts/agent-loop-detection-patterns.md +0 -133
- package/vault/wiki/concepts/agent-search-enforcement.md +0 -126
- package/vault/wiki/concepts/agent-skills-ecosystem.md +0 -74
- package/vault/wiki/concepts/agent-skills-pattern.md +0 -68
- package/vault/wiki/concepts/agentic-harness-context-enforcement.md +0 -91
- package/vault/wiki/concepts/agentic-harness.md +0 -34
- package/vault/wiki/concepts/agentic-orchestration-pipeline.md +0 -56
- package/vault/wiki/concepts/agentic-search-no-embeddings.md +0 -18
- package/vault/wiki/concepts/anthropic-context-engineering.md +0 -13
- package/vault/wiki/concepts/antigravity-agent-first-architecture.md +0 -61
- package/vault/wiki/concepts/ast-compression.md +0 -19
- package/vault/wiki/concepts/ast-truncation.md +0 -66
- package/vault/wiki/concepts/barrel-files.md +0 -37
- package/vault/wiki/concepts/browser-harness-agent.md +0 -41
- package/vault/wiki/concepts/browser-subagent-visual-verification.md +0 -82
- package/vault/wiki/concepts/codebase-intelligence-ecosystem-comparison.md +0 -192
- package/vault/wiki/concepts/codebase-intelligence-harness-integration.md +0 -161
- package/vault/wiki/concepts/codebase-to-context-ingestion.md +0 -46
- package/vault/wiki/concepts/codex-harness-innovations.md +0 -147
- package/vault/wiki/concepts/consensus-debate-flow.md +0 -17
- package/vault/wiki/concepts/consensus-debate.md +0 -206
- package/vault/wiki/concepts/content-addressed-spec-identity.md +0 -166
- package/vault/wiki/concepts/context-anxiety.md +0 -57
- package/vault/wiki/concepts/context-compression-techniques.md +0 -19
- package/vault/wiki/concepts/context-continuity.md +0 -22
- package/vault/wiki/concepts/context-drift-in-agents.md +0 -106
- package/vault/wiki/concepts/context-engineering.md +0 -62
- package/vault/wiki/concepts/context-folding.md +0 -67
- package/vault/wiki/concepts/context-mode.md +0 -38
- package/vault/wiki/concepts/cursor-harness-innovations.md +0 -107
- package/vault/wiki/concepts/deterministic-session-compaction.md +0 -79
- package/vault/wiki/concepts/drift-detection-unified.md +0 -296
- package/vault/wiki/concepts/execution-feedback-loop.md +0 -46
- package/vault/wiki/concepts/feedforward-feedback-harness.md +0 -60
- package/vault/wiki/concepts/five-root-cause-metrics-sentrux.md +0 -40
- package/vault/wiki/concepts/fork-safe-spec-storage.md +0 -89
- package/vault/wiki/concepts/fts5-sandbox.md +0 -19
- package/vault/wiki/concepts/fuzzy-edit-matching.md +0 -71
- package/vault/wiki/concepts/gemini-cli-architecture.md +0 -104
- package/vault/wiki/concepts/generator-evaluator-architecture.md +0 -64
- package/vault/wiki/concepts/guardian-agent-pattern.md +0 -67
- package/vault/wiki/concepts/harness-configuration-layers.md +0 -89
- package/vault/wiki/concepts/harness-control-frameworks.md +0 -155
- package/vault/wiki/concepts/harness-engineering-first-principles.md +0 -90
- package/vault/wiki/concepts/harness-h-formalism.md +0 -53
- package/vault/wiki/concepts/hybrid-code-search.md +0 -61
- package/vault/wiki/concepts/inline-post-edit-validation.md +0 -112
- package/vault/wiki/concepts/legendary-engineering-patterns-harness.md +0 -110
- package/vault/wiki/concepts/lifecycle-hooks.md +0 -94
- package/vault/wiki/concepts/mcp-tool-routing.md +0 -102
- package/vault/wiki/concepts/memory-system-of-record-vs-ephemeral-cache.md +0 -47
- package/vault/wiki/concepts/meta-agent-context-pruning.md +0 -151
- package/vault/wiki/concepts/model-adaptive-harness.md +0 -122
- package/vault/wiki/concepts/model-routing-agents.md +0 -101
- package/vault/wiki/concepts/monorepo-architecture.md +0 -45
- package/vault/wiki/concepts/multi-agent-specialization.md +0 -61
- package/vault/wiki/concepts/permission-subsystem.md +0 -16
- package/vault/wiki/concepts/pi-messenger-analysis.md +0 -243
- package/vault/wiki/concepts/pi-vscode-extension-landscape.md +0 -37
- package/vault/wiki/concepts/policy-engine-pattern.md +0 -78
- package/vault/wiki/concepts/progressive-disclosure-agents.md +0 -53
- package/vault/wiki/concepts/progressive-skill-disclosure.md +0 -17
- package/vault/wiki/concepts/provider-native-prompting.md +0 -203
- package/vault/wiki/concepts/quality-signal-sentrux.md +0 -37
- package/vault/wiki/concepts/repo-map-ranking.md +0 -42
- package/vault/wiki/concepts/result-monad-error-handling.md +0 -47
- package/vault/wiki/concepts/safety-defense-in-depth.md +0 -83
- package/vault/wiki/concepts/sandbox-os-enforcement.md +0 -18
- package/vault/wiki/concepts/selective-debate-routing.md +0 -70
- package/vault/wiki/concepts/self-evolving-harness.md +0 -60
- package/vault/wiki/concepts/sentrux-mcp-integration.md +0 -36
- package/vault/wiki/concepts/sentrux-rules-engine.md +0 -49
- package/vault/wiki/concepts/shell-pattern-compression.md +0 -24
- package/vault/wiki/concepts/skill-first-architecture.md +0 -166
- package/vault/wiki/concepts/structured-compaction.md +0 -78
- package/vault/wiki/concepts/subagent-orchestration.md +0 -17
- package/vault/wiki/concepts/subagent-worktree-isolation.md +0 -68
- package/vault/wiki/concepts/superpowers-methodology.md +0 -78
- package/vault/wiki/concepts/think-in-code.md +0 -73
- package/vault/wiki/concepts/ts-execution-layer.md +0 -100
- package/vault/wiki/concepts/typescript-strict-mode.md +0 -37
- package/vault/wiki/concepts/vcc-conversation-compaction-for-pi.md +0 -53
- package/vault/wiki/concepts/verification-drift-detection.md +0 -19
- package/vault/wiki/consensus/consensus-records.md +0 -58
- package/vault/wiki/decisions/2026-04-30-pi-lean-ctx-native.md +0 -122
- package/vault/wiki/decisions/2026-05-07-replace-lean-ctx-with-context-mode.md +0 -59
- package/vault/wiki/decisions/adr-008.md +0 -40
- package/vault/wiki/decisions/adr-009.md +0 -46
- package/vault/wiki/decisions/adr-010.md +0 -55
- package/vault/wiki/decisions/adr-011.md +0 -165
- package/vault/wiki/decisions/adr-012.md +0 -102
- package/vault/wiki/decisions/adr-013.md +0 -59
- package/vault/wiki/decisions/adr-014.md +0 -73
- package/vault/wiki/decisions/adr-015.md +0 -81
- package/vault/wiki/decisions/adr-016.md +0 -91
- package/vault/wiki/decisions/adr-017.md +0 -79
- package/vault/wiki/decisions/adr-018.md +0 -100
- package/vault/wiki/decisions/adr-019.md +0 -75
- package/vault/wiki/decisions/adr-020.md +0 -106
- package/vault/wiki/decisions/adr-021.md +0 -86
- package/vault/wiki/decisions/adr-022.md +0 -113
- package/vault/wiki/decisions/adr-023.md +0 -113
- package/vault/wiki/decisions/adr-024.md +0 -73
- package/vault/wiki/decisions/adr-025.md +0 -130
- package/vault/wiki/decisions/adr-026.md +0 -56
- package/vault/wiki/decisions/adr-027.md +0 -94
- package/vault/wiki/decisions/colocate-wiki.md +0 -34
- package/vault/wiki/entities/Anders Hejlsberg.md +0 -29
- package/vault/wiki/entities/Anthropic.md +0 -17
- package/vault/wiki/entities/Augment Code.md +0 -49
- package/vault/wiki/entities/Bjarne Stroustrup.md +0 -26
- package/vault/wiki/entities/Bolt.new (StackBlitz).md +0 -39
- package/vault/wiki/entities/Boris Cherny.md +0 -11
- package/vault/wiki/entities/Claude Code.md +0 -19
- package/vault/wiki/entities/Dennis Ritchie.md +0 -26
- package/vault/wiki/entities/Emergent Labs.md +0 -32
- package/vault/wiki/entities/Google Cloud.md +0 -16
- package/vault/wiki/entities/Guido van Rossum.md +0 -28
- package/vault/wiki/entities/Ken Thompson.md +0 -28
- package/vault/wiki/entities/Lee et al.md +0 -16
- package/vault/wiki/entities/Linus Torvalds.md +0 -28
- package/vault/wiki/entities/Lovable (company).md +0 -40
- package/vault/wiki/entities/Martin Fowler.md +0 -16
- package/vault/wiki/entities/Meng et al.md +0 -16
- package/vault/wiki/entities/OpenAI.md +0 -16
- package/vault/wiki/entities/Rocket.new.md +0 -38
- package/vault/wiki/entities/VILA-Lab.md +0 -15
- package/vault/wiki/entities/autodev-codebase.md +0 -18
- package/vault/wiki/entities/ck-tool.md +0 -59
- package/vault/wiki/entities/codesearch.md +0 -18
- package/vault/wiki/entities/disler-indydevdan.md +0 -33
- package/vault/wiki/entities/gsd-get-shit-done.md +0 -56
- package/vault/wiki/entities/javascript-runtimes.md +0 -48
- package/vault/wiki/entities/jesse-vincent.md +0 -38
- package/vault/wiki/entities/lean-ctx.md +0 -32
- package/vault/wiki/entities/opendev.md +0 -41
- package/vault/wiki/entities/ops-codegraph-tool.md +0 -18
- package/vault/wiki/entities/pi-coding-agent.md +0 -53
- package/vault/wiki/entities/sentrux.md +0 -54
- package/vault/wiki/entities/vgrep-tool.md +0 -57
- package/vault/wiki/entities/vitest.md +0 -41
- package/vault/wiki/flows/harness-wiki-pipeline.md +0 -204
- package/vault/wiki/hot.md +0 -932
- package/vault/wiki/index.md +0 -437
- package/vault/wiki/log.md +0 -422
- package/vault/wiki/meta/dashboard.md +0 -30
- package/vault/wiki/meta/lint-report-2026-04-30.md +0 -86
- package/vault/wiki/meta/lint-report-2026-05-02.md +0 -251
- package/vault/wiki/meta/overview.canvas +0 -43
- package/vault/wiki/modules/adversarial-verification.md +0 -57
- package/vault/wiki/modules/automated-observability.md +0 -54
- package/vault/wiki/modules/bench.md +0 -20
- package/vault/wiki/modules/extensions.md +0 -23
- package/vault/wiki/modules/grounding-checkpoints.md +0 -62
- package/vault/wiki/modules/harness-implementation-plan.md +0 -345
- package/vault/wiki/modules/harness-wiki-skill-mapping.md +0 -135
- package/vault/wiki/modules/harness.md +0 -86
- package/vault/wiki/modules/persistent-memory.md +0 -85
- package/vault/wiki/modules/schema-orchestration.md +0 -68
- package/vault/wiki/modules/skills.md +0 -27
- package/vault/wiki/modules/spec-hardening.md +0 -58
- package/vault/wiki/modules/structured-planning.md +0 -53
- package/vault/wiki/modules/think-in-code-enforcement.md +0 -153
- package/vault/wiki/modules/wiki-query-interface.md +0 -64
- package/vault/wiki/overview.md +0 -51
- package/vault/wiki/questions/Research-pi-vs-claude-code-agentic-orchestration-pipeline.md +0 -87
- package/vault/wiki/questions/Research-sentrux-dev.md +0 -123
- package/vault/wiki/questions/Research-superpowers-skill-for-agentic-coding-agents.md +0 -164
- package/vault/wiki/questions/Research: Augment Code Context Engine.md +0 -244
- package/vault/wiki/questions/Research: Automating Software Engineering - Lovable, Bolt, Emergent, Rocket.md +0 -112
- package/vault/wiki/questions/Research: Claude Code State-of-the-Art Harness Improvements.md +0 -209
- package/vault/wiki/questions/Research: Codex State-of-the-Art Harness Improvements.md +0 -99
- package/vault/wiki/questions/Research: Engineering Workflows of Legendary Programmers and AI Harness Mapping.md +0 -107
- package/vault/wiki/questions/Research: Fallow Codebase Intelligence Harness Integration.md +0 -72
- package/vault/wiki/questions/Research: Gemini CLI SOTA Harness Integration.md +0 -166
- package/vault/wiki/questions/Research: GitHub Issues as Harness Spec Storage.md +0 -188
- package/vault/wiki/questions/Research: Google Antigravity Harness Integration.md +0 -120
- package/vault/wiki/questions/Research: Meta-Agent Context Drift Detection.md +0 -236
- package/vault/wiki/questions/Research: Model-Adaptive Agent Harness Design.md +0 -95
- package/vault/wiki/questions/Research: Model-Specific Prompting Guides.md +0 -165
- package/vault/wiki/questions/Research: Prompt Renderer for Multi-Model Agent Harness.md +0 -216
- package/vault/wiki/questions/Research: Skill-First Harness Architecture.md +0 -91
- package/vault/wiki/questions/Research: TypeScript Best Practices and Codebase Structure.md +0 -88
- package/vault/wiki/questions/Research: TypeScript Execution Layer for Agent Tool Calling.md +0 -81
- package/vault/wiki/questions/Research: claude-mem over Obsidian for Harness Layer.md +0 -71
- package/vault/wiki/questions/Research: claude-mem over obsidian wiki as the knowledge base for our agentic harness pipeline. think from first principles. does this replace or complement our current setup? no hard feelings about previous decisions. gimme accurate points.md +0 -80
- package/vault/wiki/questions/Research: context-mode vs lean-ctx.md +0 -72
- package/vault/wiki/questions/Research: cursor.sh Harness Innovations.md +0 -92
- package/vault/wiki/questions/Research: executor.sh Harness Integration.md +0 -170
- package/vault/wiki/questions/Research: how GSD fits into our coding harness setup.md +0 -97
- package/vault/wiki/questions/Research: how claude-mem fits into our workflow. and whether it should replace obsidian in the codebase. no hard feelings about previous actions, rethink from first principles always.md +0 -80
- package/vault/wiki/questions/Research: pi-vcc.md +0 -113
- package/vault/wiki/questions/Research: semantic code search tools.md +0 -69
- package/vault/wiki/questions/Research: vcc extension for pi coding agent.md +0 -73
- package/vault/wiki/questions/how-to-enable-semantic-code-search-now.md +0 -111
- package/vault/wiki/questions/mvp-implementation-blueprint.md +0 -552
- package/vault/wiki/questions/research-agent-first-codebase-exploration.md +0 -199
- package/vault/wiki/questions/research-agentic-coding-harness-latest-papers.md +0 -142
- package/vault/wiki/questions/research-gitingest-gitreverse-integration.md +0 -100
- package/vault/wiki/questions/research-wozcode-token-reduction.md +0 -67
- package/vault/wiki/questions/resolved-context-pruning-inplace-vs-restart.md +0 -95
- package/vault/wiki/questions/resolved-context-window-economics.md +0 -167
- package/vault/wiki/questions/resolved-imad-debate-gating-transfer.md +0 -126
- package/vault/wiki/questions/resolved-mcp-tool-preference.md +0 -112
- package/vault/wiki/questions/resolved-small-model-meta-agents.md +0 -107
- package/vault/wiki/questions/resolved-treesitter-dynamic-languages.md +0 -95
- package/vault/wiki/sources/Auggie Context MCP Server.md +0 -63
- package/vault/wiki/sources/Augment Code Codacy AI Giants.md +0 -61
- package/vault/wiki/sources/Augment Code MCP SiliconAngle.md +0 -49
- package/vault/wiki/sources/Augment Code WorkOS ERC 2025.md +0 -55
- package/vault/wiki/sources/Augment Context Engine Official.md +0 -71
- package/vault/wiki/sources/Augment SWE-bench Agent GitHub.md +0 -74
- package/vault/wiki/sources/Augment SWE-bench Pro Blog.md +0 -58
- package/vault/wiki/sources/Source: AgentBus Jinja2 Prompt Pipelines.md +0 -75
- package/vault/wiki/sources/Source: Arxiv /342/200/224 Don't Break the Cache.md" +0 -85
- package/vault/wiki/sources/Source: Augment - Harness Engineering for AI Coding Agents.md +0 -58
- package/vault/wiki/sources/Source: Blake Crosley Agent Architecture Guide.md +0 -100
- package/vault/wiki/sources/Source: Bolt.new Architecture & Case Study.md +0 -75
- package/vault/wiki/sources/Source: Build-Time Prompt Compilation Architecture.md +0 -107
- package/vault/wiki/sources/Source: Claude API Agent Skills Overview.md +0 -70
- package/vault/wiki/sources/Source: Gemini CLI Changelogs.md +0 -88
- package/vault/wiki/sources/Source: Google Blog - Gemini CLI Announcement.md +0 -57
- package/vault/wiki/sources/Source: Google Gemini CLI Architecture Docs.md +0 -53
- package/vault/wiki/sources/Source: LangChain - Anatomy of Agent Harness.md +0 -65
- package/vault/wiki/sources/Source: Lovable Architecture & Clone Analysis.md +0 -83
- package/vault/wiki/sources/Source: Martin Fowler - Harness Engineering.md +0 -70
- package/vault/wiki/sources/Source: OpenAI Harness Engineering Five Principles.md +0 -58
- package/vault/wiki/sources/Source: OpenAI Harness Engineering /342/200/224 0 Lines of Human Code.md" +0 -101
- package/vault/wiki/sources/Source: OpenDev /342/200/224 Building AI Coding Agents for the Terminal.md" +0 -100
- package/vault/wiki/sources/Source: Render AI Coding Agents Benchmark 2025.md +0 -53
- package/vault/wiki/sources/Source: Rocket.new /342/200/224 Vibe Solutioning Platform.md" +0 -70
- package/vault/wiki/sources/Source: SwirlAI Agent Skills Progressive Disclosure.md +0 -71
- package/vault/wiki/sources/Source: TianPan Prompt Caching Architecture.md +0 -89
- package/vault/wiki/sources/Source: Vercel Labs agent-browser.md +0 -155
- package/vault/wiki/sources/Source: browser-harness CDP Harness.md +0 -126
- package/vault/wiki/sources/agent-drift-academic-paper.md +0 -79
- package/vault/wiki/sources/aider-repomap-tree-sitter.md +0 -42
- package/vault/wiki/sources/anthropic-compaction-api.md +0 -58
- package/vault/wiki/sources/anthropic-effective-harnesses.md +0 -42
- package/vault/wiki/sources/anthropic-prompt-best-practices.md +0 -100
- package/vault/wiki/sources/anthropic2026-harness-design.md +0 -63
- package/vault/wiki/sources/barrel-files-tkdodo.md +0 -38
- package/vault/wiki/sources/birth-of-unix-kernighan-interview.md +0 -57
- package/vault/wiki/sources/bockeler2026-harness-engineering.md +0 -69
- package/vault/wiki/sources/cast-code-chunking-paper.md +0 -50
- package/vault/wiki/sources/ck-semantic-search.md +0 -78
- package/vault/wiki/sources/claude-code-architecture-karaxai-2026.md +0 -71
- package/vault/wiki/sources/claude-code-architecture-qubytes-2026.md +0 -50
- package/vault/wiki/sources/claude-code-architecture-vila-lab-2026.md +0 -64
- package/vault/wiki/sources/claude-code-security-architecture-penligent-2026.md +0 -70
- package/vault/wiki/sources/claude-context-editing-docs.md +0 -13
- package/vault/wiki/sources/cloudflare-codemode.md +0 -63
- package/vault/wiki/sources/code-chunk-library-supermemory.md +0 -63
- package/vault/wiki/sources/codeact-apple-2024.md +0 -62
- package/vault/wiki/sources/codex-dsc-rfc-8573.md +0 -41
- package/vault/wiki/sources/codex-open-source-agent-2026.md +0 -110
- package/vault/wiki/sources/coir-code-retrieval-benchmark.md +0 -51
- package/vault/wiki/sources/colinmcnamara-context-optimization-codemode.md +0 -48
- package/vault/wiki/sources/context-folding-paper.md +0 -61
- package/vault/wiki/sources/context-mode-website.md +0 -63
- package/vault/wiki/sources/cursor-agent-best-practices-2026.md +0 -62
- package/vault/wiki/sources/cursor-fork-29b-2025.md +0 -50
- package/vault/wiki/sources/cursor-harness-april-2026.md +0 -76
- package/vault/wiki/sources/cursor-instant-apply-2024.md +0 -45
- package/vault/wiki/sources/cursor-shadow-workspace-2024.md +0 -52
- package/vault/wiki/sources/cursor-shipped-coding-agent-2026.md +0 -53
- package/vault/wiki/sources/cursor-vs-antigravity-2026.md +0 -51
- package/vault/wiki/sources/disler-pi-vs-claude-code.md +0 -69
- package/vault/wiki/sources/distill-deterministic-context-compression.md +0 -53
- package/vault/wiki/sources/embedding-models-benchmark-supermemory-2025.md +0 -48
- package/vault/wiki/sources/executor-rhyssullivan.md +0 -122
- package/vault/wiki/sources/fallow-rs-codebase-intelligence.md +0 -125
- package/vault/wiki/sources/fan2025-imad.md +0 -60
- package/vault/wiki/sources/forgecode-gpt5-agent-improvements.md +0 -63
- package/vault/wiki/sources/gemini-3-prompting-guide.md +0 -78
- package/vault/wiki/sources/gh-cli-sub-issue-rfc.md +0 -50
- package/vault/wiki/sources/gh-sub-issue-extension.md +0 -72
- package/vault/wiki/sources/github-fork-issues-discussion.md +0 -44
- package/vault/wiki/sources/github-issue-dependencies-docs.md +0 -49
- package/vault/wiki/sources/github-sub-issues-docs.md +0 -51
- package/vault/wiki/sources/gitingest.md +0 -91
- package/vault/wiki/sources/gitreverse.md +0 -63
- package/vault/wiki/sources/google-antigravity-official-blog.md +0 -47
- package/vault/wiki/sources/google-antigravity-wikipedia.md +0 -53
- package/vault/wiki/sources/gsd-codecentric-deep-dive.md +0 -57
- package/vault/wiki/sources/gsd-github-repo.md +0 -51
- package/vault/wiki/sources/gsd-hn-discussion.md +0 -59
- package/vault/wiki/sources/guido-python-design-philosophy.md +0 -56
- package/vault/wiki/sources/hejlsberg-7-learnings.md +0 -48
- package/vault/wiki/sources/ironclaw-drift-monitor.md +0 -80
- package/vault/wiki/sources/langsight-loop-detection.md +0 -80
- package/vault/wiki/sources/leanctx-website.md +0 -69
- package/vault/wiki/sources/lee2026-meta-harness.md +0 -59
- package/vault/wiki/sources/linux-kernel-coding-workflow.md +0 -50
- package/vault/wiki/sources/lou2026-autoharness.md +0 -53
- package/vault/wiki/sources/martin-fowler-harness-engineering.md +0 -73
- package/vault/wiki/sources/mcp-architecture-docs.md +0 -13
- package/vault/wiki/sources/meng2026-agent-harness-survey.md +0 -79
- package/vault/wiki/sources/mindstudio-four-agent-types.md +0 -68
- package/vault/wiki/sources/ms-chat-history-management.md +0 -13
- package/vault/wiki/sources/openai-prompt-guidance.md +0 -104
- package/vault/wiki/sources/openclaw-session-pruning.md +0 -13
- package/vault/wiki/sources/opencode-dcp.md +0 -13
- package/vault/wiki/sources/opendev-arxiv-2603.05344v1.md +0 -79
- package/vault/wiki/sources/openhands-platform.md +0 -39
- package/vault/wiki/sources/oss-guide-codebase-exploration.md +0 -53
- package/vault/wiki/sources/pi-compaction-extensions-ecosystem.md +0 -102
- package/vault/wiki/sources/pi-context-prune-github-repo.md +0 -38
- package/vault/wiki/sources/pi-mono-compaction-docs.md +0 -38
- package/vault/wiki/sources/pi-omni-compact-github-repo.md +0 -50
- package/vault/wiki/sources/pi-rtk-optimizer-github-repo.md +0 -45
- package/vault/wiki/sources/pi-vcc-github-repo.md +0 -69
- package/vault/wiki/sources/pi-vscode-marketplace.md +0 -41
- package/vault/wiki/sources/pi-vscode-model-provider-marketplace.md +0 -39
- package/vault/wiki/sources/py-tree-sitter.md +0 -13
- package/vault/wiki/sources/sentrux-dev-landing.md +0 -40
- package/vault/wiki/sources/sentrux-docs-pro-architecture.md +0 -75
- package/vault/wiki/sources/sentrux-docs-quality-signal.md +0 -46
- package/vault/wiki/sources/sentrux-docs-root-cause-metrics.md +0 -57
- package/vault/wiki/sources/sentrux-docs-rules-engine.md +0 -58
- package/vault/wiki/sources/sentrux-github-repo.md +0 -56
- package/vault/wiki/sources/superpowers-github-repo.md +0 -56
- package/vault/wiki/sources/superpowers-release-blog.md +0 -54
- package/vault/wiki/sources/superpowers-termdock-analysis.md +0 -45
- package/vault/wiki/sources/swe-agent-aci.md +0 -42
- package/vault/wiki/sources/swe-bench.md +0 -45
- package/vault/wiki/sources/swe-pruner-context-pruning.md +0 -13
- package/vault/wiki/sources/think-in-code-blog.md +0 -48
- package/vault/wiki/sources/tree-sitter-docs.md +0 -13
- package/vault/wiki/sources/ts-best-practices-2025-devto.md +0 -42
- package/vault/wiki/sources/ts-folder-structure-mingyang.md +0 -58
- package/vault/wiki/sources/ts-monorepo-koerselman.md +0 -44
- package/vault/wiki/sources/ts-result-error-handling-kkalamarski.md +0 -52
- package/vault/wiki/sources/ts-runtimes-comparison-betterstack.md +0 -42
- package/vault/wiki/sources/ts-strict-mode-rishikc.md +0 -43
- package/vault/wiki/sources/unix-philosophy.md +0 -48
- package/vault/wiki/sources/vectara-chunking-vs-embedding-naacl2025.md +0 -39
- package/vault/wiki/sources/vectara-guardian-agents.md +0 -79
- package/vault/wiki/sources/vgrep-semantic-search.md +0 -76
- package/vault/wiki/sources/vitest-official.md +0 -41
- package/vault/wiki/sources/vscode-pi-community-extension.md +0 -40
- package/vault/wiki/sources/wozcode.md +0 -79
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { sanitize } from "../src/core/sanitize";
|
|
3
|
+
|
|
4
|
+
describe("sanitize", () => {
|
|
5
|
+
it("strips ANSI escape codes", () => {
|
|
6
|
+
expect(sanitize("\x1b[31mred\x1b[0m")).toBe("red");
|
|
7
|
+
});
|
|
8
|
+
|
|
9
|
+
it("normalizes CRLF to LF", () => {
|
|
10
|
+
expect(sanitize("a\r\nb\r\n")).toBe("a\nb\n");
|
|
11
|
+
});
|
|
12
|
+
|
|
13
|
+
it("strips bare CR", () => {
|
|
14
|
+
expect(sanitize("a\rb")).toBe("a\nb");
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
it("strips control characters but preserves newlines and tabs", () => {
|
|
18
|
+
expect(sanitize("a\x00b\tc\nd")).toBe("ab\tc\nd");
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
it("passes clean text unchanged", () => {
|
|
22
|
+
expect(sanitize("hello world")).toBe("hello world");
|
|
23
|
+
});
|
|
24
|
+
});
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
import { describe, it, expect } from "bun:test";
|
|
2
|
+
import { searchEntries } from "../src/core/search-entries";
|
|
3
|
+
import type { RenderedEntry } from "../src/core/render-entries";
|
|
4
|
+
import type { Message } from "@mariozechner/pi-ai";
|
|
5
|
+
|
|
6
|
+
const entries: RenderedEntry[] = [
|
|
7
|
+
{ index: 0, role: "user", summary: "Fix login bug" },
|
|
8
|
+
{ index: 1, role: "assistant", summary: "Reading auth.ts" },
|
|
9
|
+
{ index: 2, role: "tool_result", summary: "[Read] code here" },
|
|
10
|
+
{ index: 3, role: "assistant", summary: "Found the root cause in auth module" },
|
|
11
|
+
];
|
|
12
|
+
|
|
13
|
+
const messages: Message[] = [
|
|
14
|
+
{ role: "user", content: "Fix login bug" } as any,
|
|
15
|
+
{ role: "assistant", content: [{ type: "text", text: "Reading auth.ts" }] } as any,
|
|
16
|
+
{ role: "toolResult", content: [{ type: "text", text: "[Read] code here" }] } as any,
|
|
17
|
+
{ role: "assistant", content: [{ type: "text", text: "Found the root cause in auth module" }] } as any,
|
|
18
|
+
];
|
|
19
|
+
|
|
20
|
+
describe("searchEntries", () => {
|
|
21
|
+
it("returns all for empty query", () => {
|
|
22
|
+
expect(searchEntries(entries, messages)).toEqual(entries);
|
|
23
|
+
expect(searchEntries(entries, messages, "")).toEqual(entries);
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
it("filters by single term", () => {
|
|
27
|
+
const r = searchEntries(entries, messages, "login");
|
|
28
|
+
expect(r).toHaveLength(1);
|
|
29
|
+
expect(r[0].index).toBe(0);
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
it("returns empty for no match", () => {
|
|
33
|
+
expect(searchEntries(entries, messages, "xyz123")).toEqual([]);
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
it("finds keyword beyond clip boundary in full content", () => {
|
|
37
|
+
const longText = "A".repeat(400) + " hidden_keyword here";
|
|
38
|
+
const longEntries: RenderedEntry[] = [
|
|
39
|
+
{ index: 0, role: "user", summary: "A".repeat(300) },
|
|
40
|
+
];
|
|
41
|
+
const longMsgs: Message[] = [
|
|
42
|
+
{ role: "user", content: longText } as any,
|
|
43
|
+
];
|
|
44
|
+
const r = searchEntries(longEntries, longMsgs, "hidden_keyword");
|
|
45
|
+
expect(r).toHaveLength(1);
|
|
46
|
+
expect(r[0].snippet).toContain("hidden_keyword");
|
|
47
|
+
});
|
|
48
|
+
|
|
49
|
+
it("returns snippet around matched term", () => {
|
|
50
|
+
const r = searchEntries(entries, messages, "root");
|
|
51
|
+
expect(r).toHaveLength(1);
|
|
52
|
+
expect(r[0].snippet).toBeDefined();
|
|
53
|
+
expect(r[0].snippet).toContain("root");
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
// ── regex support ──
|
|
57
|
+
|
|
58
|
+
it("supports regex pattern: alternation", () => {
|
|
59
|
+
const r = searchEntries(entries, messages, "login|auth");
|
|
60
|
+
expect(r).toHaveLength(3); // "login bug", "auth.ts", "auth module"
|
|
61
|
+
expect(r.map((h) => h.index).sort()).toEqual([0, 1, 3]);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("supports regex pattern: wildcard", () => {
|
|
65
|
+
const r = searchEntries(entries, messages, "Read.*auth");
|
|
66
|
+
expect(r).toHaveLength(1);
|
|
67
|
+
expect(r[0].index).toBe(1);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
it("falls back to escaped literal for invalid regex", () => {
|
|
71
|
+
const extraEntries: RenderedEntry[] = [
|
|
72
|
+
{ index: 0, role: "user", summary: "test (foo" },
|
|
73
|
+
{ index: 1, role: "assistant", summary: "no match here" },
|
|
74
|
+
];
|
|
75
|
+
const extraMsgs: Message[] = [
|
|
76
|
+
{ role: "user", content: "error with (foo pattern" } as any,
|
|
77
|
+
{ role: "assistant", content: [{ type: "text", text: "no match here" }] } as any,
|
|
78
|
+
];
|
|
79
|
+
const r = searchEntries(extraEntries, extraMsgs, "(foo");
|
|
80
|
+
expect(r).toHaveLength(1);
|
|
81
|
+
expect(r[0].index).toBe(0);
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
it("regex is case-insensitive", () => {
|
|
85
|
+
const r = searchEntries(entries, messages, "FIX|ROOT");
|
|
86
|
+
expect(r).toHaveLength(2);
|
|
87
|
+
});
|
|
88
|
+
|
|
89
|
+
// ── natural language queries (OR logic + ranking) ──
|
|
90
|
+
|
|
91
|
+
it("natural language query uses OR logic", () => {
|
|
92
|
+
// "root cause auth" -- matches entries containing ANY of these terms
|
|
93
|
+
const r = searchEntries(entries, messages, "root cause auth");
|
|
94
|
+
expect(r.length).toBeGreaterThanOrEqual(2); // #3 has all 3, #1 has auth
|
|
95
|
+
// Best match (highest BM25) should come first
|
|
96
|
+
expect(r[0].index).toBe(3); // "Found the root cause in auth module" matches all 3
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it("natural language ranks by BM25 score", () => {
|
|
100
|
+
const r = searchEntries(entries, messages, "root cause auth");
|
|
101
|
+
// Top result has more terms matched = higher BM25 score
|
|
102
|
+
expect(r[0].matchCount!).toBeGreaterThanOrEqual(r[r.length - 1].matchCount!);
|
|
103
|
+
});
|
|
104
|
+
|
|
105
|
+
it("filters stopwords from queries", () => {
|
|
106
|
+
// "the root cause of it" → stopwords: the, of, it → meaningful: root, cause
|
|
107
|
+
const r = searchEntries(entries, messages, "the root cause of it");
|
|
108
|
+
expect(r).toHaveLength(1);
|
|
109
|
+
expect(r[0].index).toBe(3);
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
it("keeps all terms if all are stopwords", () => {
|
|
113
|
+
// When all terms are stopwords, keep them (don't drop everything)
|
|
114
|
+
// "the" appears in "Found the root cause" so it matches
|
|
115
|
+
const r = searchEntries(entries, messages, "the");
|
|
116
|
+
expect(r.length).toBeGreaterThan(0);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// ── line-based snippet ──
|
|
120
|
+
|
|
121
|
+
it("snippet shows context lines around match", () => {
|
|
122
|
+
const multiline = "line 0\nline 1\nline 2 TARGET\nline 3\nline 4\nline 5";
|
|
123
|
+
const e: RenderedEntry[] = [{ index: 0, role: "user", summary: "test" }];
|
|
124
|
+
const m: Message[] = [{ role: "user", content: multiline } as any];
|
|
125
|
+
const r = searchEntries(e, m, "TARGET");
|
|
126
|
+
expect(r).toHaveLength(1);
|
|
127
|
+
const snip = r[0].snippet!;
|
|
128
|
+
expect(snip).toContain("line 2 TARGET");
|
|
129
|
+
expect(snip).toContain("line 0");
|
|
130
|
+
expect(snip).toContain("line 4");
|
|
131
|
+
expect(snip).not.toContain("line 5");
|
|
132
|
+
});
|
|
133
|
+
|
|
134
|
+
it("snippet handles match at beginning", () => {
|
|
135
|
+
const multiline = "TARGET here\nline 1\nline 2\nline 3";
|
|
136
|
+
const e: RenderedEntry[] = [{ index: 0, role: "user", summary: "test" }];
|
|
137
|
+
const m: Message[] = [{ role: "user", content: multiline } as any];
|
|
138
|
+
const r = searchEntries(e, m, "TARGET");
|
|
139
|
+
const snip = r[0].snippet!;
|
|
140
|
+
expect(snip).toContain("TARGET here");
|
|
141
|
+
expect(snip).toContain("line 2");
|
|
142
|
+
expect(snip).not.toContain("line 3");
|
|
143
|
+
});
|
|
144
|
+
});
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import { buildSessionContext, loadEntriesFromFile } from "../../node_modules/@mariozechner/pi-coding-agent/dist/core/session-manager.js";
|
|
2
|
+
import type { Message } from "@mariozechner/pi-ai";
|
|
3
|
+
|
|
4
|
+
export interface LoadedSession {
|
|
5
|
+
messageCount: number;
|
|
6
|
+
skippedCount: number;
|
|
7
|
+
messages: Message[];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
export const loadSessionMessages = (file: string): LoadedSession => {
|
|
11
|
+
const entries = loadEntriesFromFile(file);
|
|
12
|
+
const sessionEntries = entries.filter((entry) => entry.type !== "header");
|
|
13
|
+
const context = buildSessionContext(sessionEntries as any);
|
|
14
|
+
const messages = (context.messages as any[]).filter(
|
|
15
|
+
(msg): msg is Message =>
|
|
16
|
+
msg && typeof msg.role === "string" && "content" in msg,
|
|
17
|
+
);
|
|
18
|
+
return {
|
|
19
|
+
messageCount: messages.length,
|
|
20
|
+
skippedCount: context.messages.length - messages.length,
|
|
21
|
+
messages,
|
|
22
|
+
};
|
|
23
|
+
};
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { mkdir, mkdtemp, copyFile, chmod, readdir, stat } from "node:fs/promises";
|
|
2
|
+
import { tmpdir } from "node:os";
|
|
3
|
+
import { join, basename } from "node:path";
|
|
4
|
+
|
|
5
|
+
const SESSION_ROOT = join(process.env.HOME ?? "", ".pi/agent/sessions");
|
|
6
|
+
|
|
7
|
+
export interface SessionSample {
|
|
8
|
+
source: string;
|
|
9
|
+
copy: string;
|
|
10
|
+
size: number;
|
|
11
|
+
mtimeMs: number;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
const walk = async (dir: string): Promise<string[]> => {
|
|
15
|
+
const names = await readdir(dir, { withFileTypes: true });
|
|
16
|
+
const out: string[] = [];
|
|
17
|
+
for (const name of names) {
|
|
18
|
+
const path = join(dir, name.name);
|
|
19
|
+
if (name.isDirectory()) out.push(...await walk(path));
|
|
20
|
+
else if (name.isFile() && path.endsWith(".jsonl")) out.push(path);
|
|
21
|
+
}
|
|
22
|
+
return out;
|
|
23
|
+
};
|
|
24
|
+
|
|
25
|
+
const pickLargest = async (limit: number): Promise<string[]> => {
|
|
26
|
+
const files = await walk(SESSION_ROOT);
|
|
27
|
+
const sized = await Promise.all(
|
|
28
|
+
files.map(async (file) => ({ file, size: (await stat(file)).size })),
|
|
29
|
+
);
|
|
30
|
+
return sized.sort((a, b) => b.size - a.size).slice(0, limit).map((x) => x.file);
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
export const prepareSessionSamples = async (limit = 2): Promise<SessionSample[]> => {
|
|
34
|
+
const selected = await pickLargest(limit);
|
|
35
|
+
const dir = await mkdtemp(join(tmpdir(), "pi-vcc-sessions-"));
|
|
36
|
+
await mkdir(dir, { recursive: true });
|
|
37
|
+
const samples: SessionSample[] = [];
|
|
38
|
+
for (const source of selected) {
|
|
39
|
+
const srcStat = await stat(source);
|
|
40
|
+
const copy = join(dir, basename(source));
|
|
41
|
+
await copyFile(source, copy);
|
|
42
|
+
await chmod(copy, 0o444);
|
|
43
|
+
samples.push({ source, copy, size: srcStat.size, mtimeMs: srcStat.mtimeMs });
|
|
44
|
+
}
|
|
45
|
+
return samples;
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
export const readSourceStat = async (sample: SessionSample) => {
|
|
49
|
+
const s = await stat(sample.source);
|
|
50
|
+
return { size: s.size, mtimeMs: s.mtimeMs };
|
|
51
|
+
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ultimate-pi",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.2",
|
|
4
4
|
"description": "Ultimate AI coding harness for pi.dev — extensible skills, Obsidian wiki knowledge layer, compressed context, deterministic output",
|
|
5
5
|
"keywords": [
|
|
6
6
|
"pi-package",
|
|
@@ -52,8 +52,19 @@
|
|
|
52
52
|
"typescript": "^6.0.3"
|
|
53
53
|
},
|
|
54
54
|
"dependencies": {
|
|
55
|
-
"@
|
|
55
|
+
"@posthog/pi": "latest",
|
|
56
56
|
"@sting8k/pi-vcc": "^0.3.12",
|
|
57
|
-
"
|
|
58
|
-
|
|
57
|
+
"@tintinweb/pi-subagents": "latest",
|
|
58
|
+
"@yeliu84/pi-model-router": "latest",
|
|
59
|
+
"asciify-image": "^0.1.10",
|
|
60
|
+
"context-mode": "latest",
|
|
61
|
+
"jimp": "^1.6.1"
|
|
62
|
+
},
|
|
63
|
+
"bundledDependencies": [
|
|
64
|
+
"@posthog/pi",
|
|
65
|
+
"@sting8k/pi-vcc",
|
|
66
|
+
"@tintinweb/pi-subagents",
|
|
67
|
+
"@yeliu84/pi-model-router",
|
|
68
|
+
"context-mode"
|
|
69
|
+
]
|
|
59
70
|
}
|
|
Binary file
|
|
@@ -0,0 +1,376 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Index YouTube watch URLs: yt-dlp metadata + Firecrawl transcript scrape.
|
|
3
|
+
|
|
4
|
+
Writes ``<data-dir>/<channel-handle>/<YYYY-MM-DD>/<video-id>_<title-slug>.txt`` and
|
|
5
|
+
``.meta.txt``, and merges ``_index.tsv`` per channel. No channel-specific filters.
|
|
6
|
+
Default ``data-dir`` is ``<repo>/data/youtube-transcripts`` when this file lives in ``<repo>/scripts/``.
|
|
7
|
+
|
|
8
|
+
Requirements: ``yt-dlp`` and ``firecrawl`` CLI on PATH (see ``firecrawl --status``).
|
|
9
|
+
|
|
10
|
+
Examples:
|
|
11
|
+
python3 scripts/index_youtube_urls.py 'https://www.youtube.com/watch?v=VIDEO_ID'
|
|
12
|
+
python3 scripts/index_youtube_urls.py --urls-file urls.txt
|
|
13
|
+
python3 scripts/index_youtube_urls.py --data-dir ./data/youtube-transcripts --firecrawl-cwd . URL
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
import argparse
|
|
19
|
+
import os
|
|
20
|
+
import re
|
|
21
|
+
import shutil
|
|
22
|
+
import subprocess
|
|
23
|
+
import tempfile
|
|
24
|
+
import time
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from urllib.parse import parse_qs, urlparse
|
|
27
|
+
|
|
28
|
+
SLEEP_SEC = 5.0
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def slug(s: str, max_len: int = 80) -> str:
|
|
32
|
+
s = re.sub(r"[^\w\s-]", "", s, flags=re.UNICODE)
|
|
33
|
+
s = re.sub(r"[-\s]+", "-", s).strip("-") or "untitled"
|
|
34
|
+
return s[:max_len].rstrip("-")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def ymd(upload_date: str) -> str:
|
|
38
|
+
if len(upload_date) == 8 and upload_date.isdigit():
|
|
39
|
+
return f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}"
|
|
40
|
+
return "unknown-date"
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def parse_firecrawl_youtube_transcript(md: str) -> str | None:
|
|
44
|
+
marker = "## Transcript"
|
|
45
|
+
i = md.find(marker)
|
|
46
|
+
if i == -1:
|
|
47
|
+
return None
|
|
48
|
+
rest = md[i + len(marker) :].lstrip("\n")
|
|
49
|
+
lines_out: list[str] = []
|
|
50
|
+
for line in rest.splitlines():
|
|
51
|
+
if line.startswith("## ") and lines_out:
|
|
52
|
+
break
|
|
53
|
+
lines_out.append(line)
|
|
54
|
+
text = "\n".join(lines_out).strip()
|
|
55
|
+
if len(text) < 30:
|
|
56
|
+
return None
|
|
57
|
+
return text
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _firecrawl_transcript_sane(text: str) -> bool:
|
|
61
|
+
"""Reject full-page scrapes where ## Transcript captured sidebar/recommendations."""
|
|
62
|
+
head = text[:1200]
|
|
63
|
+
if "NaN / NaN" in head:
|
|
64
|
+
return False
|
|
65
|
+
if head.count("[![]") >= 2 or head.count("hqdefault.jpg") >= 2:
|
|
66
|
+
return False
|
|
67
|
+
if head.count("views •") >= 2:
|
|
68
|
+
return False
|
|
69
|
+
return True
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def fetch_transcript_firecrawl(
|
|
73
|
+
video_id: str,
|
|
74
|
+
*,
|
|
75
|
+
firecrawl_bin: str,
|
|
76
|
+
firecrawl_cwd: Path,
|
|
77
|
+
wait_ms: int = 20000,
|
|
78
|
+
attempts: int = 3,
|
|
79
|
+
scrape_timeout: int = 300,
|
|
80
|
+
) -> str | None:
|
|
81
|
+
url = f"https://www.youtube.com/watch?v={video_id}"
|
|
82
|
+
for attempt in range(attempts):
|
|
83
|
+
if attempt:
|
|
84
|
+
time.sleep(4.0)
|
|
85
|
+
fd, out = tempfile.mkstemp(suffix=".md", prefix="ytfc-")
|
|
86
|
+
os.close(fd)
|
|
87
|
+
out_path = Path(out)
|
|
88
|
+
try:
|
|
89
|
+
cmd = [
|
|
90
|
+
firecrawl_bin,
|
|
91
|
+
"scrape",
|
|
92
|
+
url,
|
|
93
|
+
"--wait-for",
|
|
94
|
+
str(wait_ms),
|
|
95
|
+
"--only-main-content",
|
|
96
|
+
"-o",
|
|
97
|
+
str(out_path),
|
|
98
|
+
]
|
|
99
|
+
r = subprocess.run(
|
|
100
|
+
cmd,
|
|
101
|
+
capture_output=True,
|
|
102
|
+
text=True,
|
|
103
|
+
timeout=scrape_timeout,
|
|
104
|
+
cwd=str(firecrawl_cwd),
|
|
105
|
+
)
|
|
106
|
+
if r.returncode != 0:
|
|
107
|
+
continue
|
|
108
|
+
md = out_path.read_text(encoding="utf-8", errors="replace")
|
|
109
|
+
text = parse_firecrawl_youtube_transcript(md)
|
|
110
|
+
if text and _firecrawl_transcript_sane(text):
|
|
111
|
+
return text
|
|
112
|
+
except (OSError, subprocess.TimeoutExpired, ValueError):
|
|
113
|
+
pass
|
|
114
|
+
finally:
|
|
115
|
+
out_path.unlink(missing_ok=True)
|
|
116
|
+
return None
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def needs_transcript(path: Path) -> bool:
|
|
120
|
+
if not path.exists():
|
|
121
|
+
return True
|
|
122
|
+
try:
|
|
123
|
+
text = path.read_text(encoding="utf-8", errors="replace")
|
|
124
|
+
except OSError:
|
|
125
|
+
return True
|
|
126
|
+
return text.strip().startswith("(no transcript")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def channel_dir_from_handle(uploader_id: str) -> str:
|
|
130
|
+
h = (uploader_id or "unknown-channel").strip()
|
|
131
|
+
if h.startswith("@"):
|
|
132
|
+
h = h[1:]
|
|
133
|
+
return h.lower() or "unknown-channel"
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def video_id_from_arg(s: str) -> str:
|
|
137
|
+
s = s.strip()
|
|
138
|
+
if re.fullmatch(r"[0-9A-Za-z_-]{11}", s):
|
|
139
|
+
return s
|
|
140
|
+
u = urlparse(s)
|
|
141
|
+
host = (u.netloc or "").lower().removeprefix("www.")
|
|
142
|
+
if host == "youtu.be":
|
|
143
|
+
seg = u.path.strip("/").split("/")[0]
|
|
144
|
+
if re.fullmatch(r"[0-9A-Za-z_-]{11}", seg):
|
|
145
|
+
return seg
|
|
146
|
+
qs = parse_qs(u.query)
|
|
147
|
+
if "v" in qs and qs["v"]:
|
|
148
|
+
vid = qs["v"][0]
|
|
149
|
+
if re.fullmatch(r"[0-9A-Za-z_-]{11}", vid):
|
|
150
|
+
return vid
|
|
151
|
+
raise SystemExit(f"Could not parse YouTube video id from: {s!r}")
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def yt_dlp_row(watch_url: str, *, yt_dlp_bin: str) -> tuple[str, str, str, str]:
|
|
155
|
+
"""Returns (video_id, upload_date, title, uploader_id)."""
|
|
156
|
+
cmd = [
|
|
157
|
+
yt_dlp_bin,
|
|
158
|
+
"--no-download",
|
|
159
|
+
"--ignore-errors",
|
|
160
|
+
"--print",
|
|
161
|
+
"%(id)s|%(upload_date)s|%(title)s|%(uploader_id)s",
|
|
162
|
+
watch_url,
|
|
163
|
+
]
|
|
164
|
+
r = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
|
165
|
+
if r.returncode != 0:
|
|
166
|
+
raise SystemExit(f"yt-dlp failed ({r.returncode}): {watch_url}\n{r.stderr}")
|
|
167
|
+
line = r.stdout.strip().splitlines()[-1] if r.stdout.strip() else ""
|
|
168
|
+
parts = line.split("|", 3)
|
|
169
|
+
if len(parts) < 4:
|
|
170
|
+
raise SystemExit(f"Unexpected yt-dlp output for {watch_url!r}: {line!r}")
|
|
171
|
+
vid, udate, title, handle = parts[0], parts[1], parts[2], parts[3]
|
|
172
|
+
if not udate.isdigit() or len(udate) != 8:
|
|
173
|
+
raise SystemExit(f"Bad upload_date from yt-dlp: {udate!r}")
|
|
174
|
+
return vid, udate, title, handle or "@unknown"
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def merge_index(idx: Path, rows: dict[str, tuple[str, str]]) -> None:
|
|
178
|
+
if idx.exists():
|
|
179
|
+
for i, line in enumerate(idx.read_text(encoding="utf-8").splitlines()):
|
|
180
|
+
line = line.strip()
|
|
181
|
+
if not line:
|
|
182
|
+
continue
|
|
183
|
+
if i == 0 and line.startswith("video_id"):
|
|
184
|
+
continue
|
|
185
|
+
parts = line.split("\t")
|
|
186
|
+
if len(parts) >= 3:
|
|
187
|
+
vid, ud, tit = parts[0], parts[1], parts[2]
|
|
188
|
+
rows.setdefault(vid, (ud, tit))
|
|
189
|
+
lines = ["video_id\tupload_date\ttitle"]
|
|
190
|
+
for vid in sorted(rows.keys()):
|
|
191
|
+
ud, tit = rows[vid]
|
|
192
|
+
lines.append(f"{vid}\t{ud}\t{tit.replace(chr(9), ' ')}")
|
|
193
|
+
idx.parent.mkdir(parents=True, exist_ok=True)
|
|
194
|
+
idx.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def collect_urls(args: argparse.Namespace) -> list[str]:
|
|
198
|
+
out: list[str] = []
|
|
199
|
+
for a in args.url:
|
|
200
|
+
out.append(a.strip())
|
|
201
|
+
if args.urls_file:
|
|
202
|
+
raw = Path(args.urls_file).read_text(encoding="utf-8")
|
|
203
|
+
for line in raw.splitlines():
|
|
204
|
+
line = line.strip()
|
|
205
|
+
if line and not line.startswith("#"):
|
|
206
|
+
out.append(line)
|
|
207
|
+
seen: set[str] = set()
|
|
208
|
+
uniq: list[str] = []
|
|
209
|
+
for u in out:
|
|
210
|
+
if u not in seen:
|
|
211
|
+
seen.add(u)
|
|
212
|
+
uniq.append(u)
|
|
213
|
+
return uniq
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def default_paths() -> tuple[Path, Path]:
|
|
217
|
+
"""(data_dir, firecrawl_cwd) when script lives in <repo>/scripts/."""
|
|
218
|
+
here = Path(__file__).resolve()
|
|
219
|
+
repo = here.parent.parent
|
|
220
|
+
return repo / "data" / "youtube-transcripts", repo
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def main() -> int:
|
|
224
|
+
default_data, default_fc_cwd = default_paths()
|
|
225
|
+
ap = argparse.ArgumentParser(description=__doc__)
|
|
226
|
+
ap.add_argument(
|
|
227
|
+
"url",
|
|
228
|
+
nargs="*",
|
|
229
|
+
help="YouTube watch URLs, youtu.be links, or 11-char video ids",
|
|
230
|
+
)
|
|
231
|
+
ap.add_argument(
|
|
232
|
+
"--urls-file",
|
|
233
|
+
metavar="PATH",
|
|
234
|
+
help="Text file with one URL or id per line (# comments allowed)",
|
|
235
|
+
)
|
|
236
|
+
ap.add_argument(
|
|
237
|
+
"--data-dir",
|
|
238
|
+
type=Path,
|
|
239
|
+
metavar="DIR",
|
|
240
|
+
default=default_data,
|
|
241
|
+
help=f"Root for channel folders (default: {default_data})",
|
|
242
|
+
)
|
|
243
|
+
ap.add_argument(
|
|
244
|
+
"--firecrawl-cwd",
|
|
245
|
+
type=Path,
|
|
246
|
+
metavar="DIR",
|
|
247
|
+
default=default_fc_cwd,
|
|
248
|
+
help="Working directory for firecrawl subprocess (default: repo root next to scripts/)",
|
|
249
|
+
)
|
|
250
|
+
ap.add_argument(
|
|
251
|
+
"--yt-dlp",
|
|
252
|
+
metavar="BIN",
|
|
253
|
+
default="yt-dlp",
|
|
254
|
+
help="yt-dlp executable name or path (default: yt-dlp)",
|
|
255
|
+
)
|
|
256
|
+
ap.add_argument(
|
|
257
|
+
"--firecrawl",
|
|
258
|
+
metavar="BIN",
|
|
259
|
+
default="",
|
|
260
|
+
help="firecrawl executable (default: search PATH)",
|
|
261
|
+
)
|
|
262
|
+
ap.add_argument(
|
|
263
|
+
"--wait-for",
|
|
264
|
+
type=int,
|
|
265
|
+
default=20000,
|
|
266
|
+
metavar="MS",
|
|
267
|
+
help="Firecrawl scrape --wait-for milliseconds (default 20000)",
|
|
268
|
+
)
|
|
269
|
+
ap.add_argument(
|
|
270
|
+
"--sleep",
|
|
271
|
+
type=float,
|
|
272
|
+
default=SLEEP_SEC,
|
|
273
|
+
metavar="SEC",
|
|
274
|
+
help=f"Seconds between Firecrawl scrapes (default {SLEEP_SEC})",
|
|
275
|
+
)
|
|
276
|
+
ap.add_argument(
|
|
277
|
+
"--dry-run",
|
|
278
|
+
action="store_true",
|
|
279
|
+
help="Print yt-dlp metadata only; do not scrape or write files",
|
|
280
|
+
)
|
|
281
|
+
ap.add_argument(
|
|
282
|
+
"--force",
|
|
283
|
+
action="store_true",
|
|
284
|
+
help="Re-scrape even when a non-placeholder transcript already exists",
|
|
285
|
+
)
|
|
286
|
+
args = ap.parse_args()
|
|
287
|
+
urls = collect_urls(args)
|
|
288
|
+
if not urls:
|
|
289
|
+
ap.error("Pass at least one url, or use --urls-file")
|
|
290
|
+
|
|
291
|
+
fc_bin = args.firecrawl.strip() or shutil.which("firecrawl")
|
|
292
|
+
if not fc_bin and not args.dry_run:
|
|
293
|
+
raise SystemExit(
|
|
294
|
+
"firecrawl CLI not found on PATH. Install it and run `firecrawl --status`, "
|
|
295
|
+
"or pass --firecrawl /path/to/firecrawl."
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
data_dir: Path = args.data_dir
|
|
299
|
+
fc_cwd: Path = args.firecrawl_cwd
|
|
300
|
+
|
|
301
|
+
index_rows: dict[str, dict[str, tuple[str, str]]] = {}
|
|
302
|
+
first_scrape = True
|
|
303
|
+
|
|
304
|
+
for raw in urls:
|
|
305
|
+
vid_guess = video_id_from_arg(raw)
|
|
306
|
+
watch = f"https://www.youtube.com/watch?v={vid_guess}"
|
|
307
|
+
vid, udate, title, uploader_id = yt_dlp_row(watch, yt_dlp_bin=args.yt_dlp)
|
|
308
|
+
ch_slug = channel_dir_from_handle(uploader_id)
|
|
309
|
+
day = ymd(udate)
|
|
310
|
+
out_base = data_dir / ch_slug
|
|
311
|
+
day_dir = out_base / day
|
|
312
|
+
base = f"{vid}_{slug(title)}"
|
|
313
|
+
path = day_dir / f"{base}.txt"
|
|
314
|
+
meta_path = day_dir / f"{base}.meta.txt"
|
|
315
|
+
|
|
316
|
+
if args.dry_run:
|
|
317
|
+
print(f"{ch_slug}\t{day}\t{vid}\t{udate}\t{title}", flush=True)
|
|
318
|
+
bucket = index_rows.setdefault(ch_slug, {})
|
|
319
|
+
bucket[vid] = (udate, title)
|
|
320
|
+
continue
|
|
321
|
+
|
|
322
|
+
day_dir.mkdir(parents=True, exist_ok=True)
|
|
323
|
+
need = args.force or needs_transcript(path)
|
|
324
|
+
text: str | None
|
|
325
|
+
if need:
|
|
326
|
+
if not first_scrape:
|
|
327
|
+
time.sleep(max(0.0, args.sleep))
|
|
328
|
+
first_scrape = False
|
|
329
|
+
print(f"scrape {ch_slug} {day} {vid} …", flush=True)
|
|
330
|
+
assert fc_bin is not None
|
|
331
|
+
text = fetch_transcript_firecrawl(
|
|
332
|
+
vid,
|
|
333
|
+
firecrawl_bin=fc_bin,
|
|
334
|
+
firecrawl_cwd=fc_cwd,
|
|
335
|
+
wait_ms=args.wait_for,
|
|
336
|
+
)
|
|
337
|
+
else:
|
|
338
|
+
print(f"skip {ch_slug} {day} {vid} (existing transcript)", flush=True)
|
|
339
|
+
text = None
|
|
340
|
+
|
|
341
|
+
ch_meta = uploader_id if uploader_id.startswith("@") else f"@{uploader_id}"
|
|
342
|
+
meta = (
|
|
343
|
+
f"video_id: {vid}\n"
|
|
344
|
+
f"upload_date: {udate}\n"
|
|
345
|
+
f"title: {title}\n"
|
|
346
|
+
f"url: https://www.youtube.com/watch?v={vid}\n"
|
|
347
|
+
f"transcript_source: firecrawl\n"
|
|
348
|
+
f"channel: {ch_meta}\n"
|
|
349
|
+
)
|
|
350
|
+
meta_path.write_text(meta, encoding="utf-8")
|
|
351
|
+
if need:
|
|
352
|
+
if text is None:
|
|
353
|
+
path.write_text(
|
|
354
|
+
"(no transcript yet: Firecrawl scrape had no ## Transcript section or empty body. "
|
|
355
|
+
"Retry later or open the watch URL in a browser.)\n",
|
|
356
|
+
encoding="utf-8",
|
|
357
|
+
)
|
|
358
|
+
print(" -> no transcript", flush=True)
|
|
359
|
+
else:
|
|
360
|
+
path.write_text(text, encoding="utf-8")
|
|
361
|
+
print(f" -> ok ({len(text)} chars)", flush=True)
|
|
362
|
+
|
|
363
|
+
bucket = index_rows.setdefault(ch_slug, {})
|
|
364
|
+
bucket[vid] = (udate, title)
|
|
365
|
+
|
|
366
|
+
if not args.dry_run:
|
|
367
|
+
for ch_slug, rows in index_rows.items():
|
|
368
|
+
idx = data_dir / ch_slug / "_index.tsv"
|
|
369
|
+
merge_index(idx, dict(rows))
|
|
370
|
+
print(f"wrote {idx}", flush=True)
|
|
371
|
+
|
|
372
|
+
return 0
|
|
373
|
+
|
|
374
|
+
|
|
375
|
+
if __name__ == "__main__":
|
|
376
|
+
raise SystemExit(main())
|