npm - mdcontext - Versions diffs - 0.0.1 → 0.1.0 - Mend

mdcontext 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/.changeset/README.md +28 -0
package/.changeset/config.json +11 -0
package/.github/workflows/ci.yml +83 -0
package/.github/workflows/release.yml +113 -0
package/.tldrignore +112 -0
package/AGENTS.md +46 -0
package/BACKLOG.md +338 -0
package/README.md +231 -11
package/biome.json +36 -0
package/cspell.config.yaml +14 -0
package/dist/chunk-KRYIFLQR.js +92 -0
package/dist/chunk-S7E6TFX6.js +742 -0
package/dist/chunk-VVTGZNBT.js +1519 -0
package/dist/cli/main.d.ts +1 -0
package/dist/cli/main.js +2015 -0
package/dist/index.d.ts +266 -0
package/dist/index.js +86 -0
package/dist/mcp/server.d.ts +1 -0
package/dist/mcp/server.js +376 -0
package/docs/019-USAGE.md +586 -0
package/docs/020-current-implementation.md +364 -0
package/docs/021-DOGFOODING-FINDINGS.md +175 -0
package/docs/BACKLOG.md +80 -0
package/docs/DESIGN.md +439 -0
package/docs/PROJECT.md +88 -0
package/docs/ROADMAP.md +407 -0
package/docs/test-links.md +9 -0
package/package.json +69 -10
package/pnpm-workspace.yaml +5 -0
package/research/config-analysis/01-current-implementation.md +470 -0
package/research/config-analysis/02-strategy-recommendation.md +428 -0
package/research/config-analysis/03-task-candidates.md +715 -0
package/research/config-analysis/033-research-configuration-management.md +828 -0
package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
package/research/dogfood/consolidated-tool-evaluation.md +373 -0
package/research/dogfood/strategy-a/a-synthesis.md +184 -0
package/research/dogfood/strategy-a/a1-docs.md +226 -0
package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
package/research/dogfood/strategy-a/a3-llm.md +164 -0
package/research/dogfood/strategy-b/b-synthesis.md +228 -0
package/research/dogfood/strategy-b/b1-architecture.md +207 -0
package/research/dogfood/strategy-b/b2-gaps.md +258 -0
package/research/dogfood/strategy-b/b3-workflows.md +250 -0
package/research/dogfood/strategy-c/c-synthesis.md +451 -0
package/research/dogfood/strategy-c/c1-explorer.md +192 -0
package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
package/research/effect-cli-error-handling.md +845 -0
package/research/effect-errors-as-values.md +943 -0
package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
package/research/errors-task-analysis/embeddings-analysis.md +709 -0
package/research/errors-task-analysis/index-search-analysis.md +812 -0
package/research/mdcontext-error-analysis.md +521 -0
package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
package/research/semantic-search/002-research-embedding-models.md +490 -0
package/research/semantic-search/003-research-rag-alternatives.md +523 -0
package/research/semantic-search/004-research-vector-search.md +841 -0
package/research/semantic-search/032-research-semantic-search.md +427 -0
package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
package/research/task-management-2026/03-lightweight-file-based.md +567 -0
package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
package/research/task-management-2026/linear/02-api-integrations.md +930 -0
package/research/task-management-2026/linear/03-ai-features.md +368 -0
package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
package/scripts/rebuild-hnswlib.js +63 -0
package/src/cli/argv-preprocessor.test.ts +210 -0
package/src/cli/argv-preprocessor.ts +202 -0
package/src/cli/cli.test.ts +430 -0
package/src/cli/commands/backlinks.ts +54 -0
package/src/cli/commands/context.ts +197 -0
package/src/cli/commands/index-cmd.ts +300 -0
package/src/cli/commands/index.ts +13 -0
package/src/cli/commands/links.ts +52 -0
package/src/cli/commands/search.ts +451 -0
package/src/cli/commands/stats.ts +146 -0
package/src/cli/commands/tree.ts +107 -0
package/src/cli/flag-schemas.ts +275 -0
package/src/cli/help.ts +386 -0
package/src/cli/index.ts +9 -0
package/src/cli/main.ts +145 -0
package/src/cli/options.ts +31 -0
package/src/cli/typo-suggester.test.ts +105 -0
package/src/cli/typo-suggester.ts +130 -0
package/src/cli/utils.ts +126 -0
package/src/core/index.ts +1 -0
package/src/core/types.ts +140 -0
package/src/embeddings/index.ts +8 -0
package/src/embeddings/openai-provider.ts +165 -0
package/src/embeddings/semantic-search.ts +583 -0
package/src/embeddings/types.ts +82 -0
package/src/embeddings/vector-store.ts +299 -0
package/src/index/index.ts +4 -0
package/src/index/indexer.ts +446 -0
package/src/index/storage.ts +196 -0
package/src/index/types.ts +109 -0
package/src/index/watcher.ts +131 -0
package/src/index.ts +8 -0
package/src/mcp/server.ts +483 -0
package/src/parser/index.ts +1 -0
package/src/parser/parser.test.ts +291 -0
package/src/parser/parser.ts +395 -0
package/src/parser/section-filter.ts +270 -0
package/src/search/query-parser.test.ts +260 -0
package/src/search/query-parser.ts +319 -0
package/src/search/searcher.test.ts +182 -0
package/src/search/searcher.ts +602 -0
package/src/summarize/budget-bugs.test.ts +620 -0
package/src/summarize/formatters.ts +419 -0
package/src/summarize/index.ts +20 -0
package/src/summarize/summarizer.test.ts +275 -0
package/src/summarize/summarizer.ts +528 -0
package/src/summarize/verify-bugs.test.ts +238 -0
package/src/utils/index.ts +1 -0
package/src/utils/tokens.test.ts +142 -0
package/src/utils/tokens.ts +186 -0
package/tests/fixtures/cli/.mdcontext/config.json +8 -0
package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
package/tests/fixtures/cli/README.md +9 -0
package/tests/fixtures/cli/api-reference.md +11 -0
package/tests/fixtures/cli/getting-started.md +11 -0
package/tsconfig.json +26 -0
package/vitest.config.ts +21 -0
package/vitest.setup.ts +12 -0

package/docs/ROADMAP.md ADDED Viewed

@@ -0,0 +1,407 @@
+# Roadmap: @hw/mdcontext
+## Overview
+Build a token-efficient markdown analysis tool for LLM consumption. Each phase delivers testable functionality, building toward a complete system with parsing, semantic search, summarization, and analytics.
+## Phases
+- [ ] **Phase 1: Core Parsing** — Markdown AST extraction and structure
+- [ ] **Phase 2: Index & Storage** — Persistent indexes, file watching, caching
+- [ ] **Phase 3: Semantic Layer** — Embeddings, vector search
+- [ ] **Phase 4: Summarization** — Hierarchical compression, token optimization
+- [ ] **Phase 5: Analytics** — Performance metrics, query tracking
+- [ ] **Phase 6: Integration** — CLI, MCP server, HumanWork skills
+---
+## Phase 1: Core Parsing
+**Goal:** Extract structured data from markdown files.
+### 1.1: Project Setup
+- Initialize `packages/hw_mdcontext` in monorepo
+- TypeScript + Effect setup
+- Test infrastructure (vitest)
+- Basic CI integration
+**Deliverables:**
+- Package scaffolding
+- Build working
+- First test passing
+### 1.2: Markdown Parser
+- Integrate remark/unified
+- Parse to mdast (Markdown AST)
+- Handle frontmatter (YAML)
+- Handle GFM extensions (tables, task lists)
+**Deliverables:**
+- `parse(content: string): MdastRoot`
+- Frontmatter extraction
+- Unit tests for various markdown features
+### 1.3: Structure Extraction
+- Extract heading hierarchy
+- Identify sections (content between headings)
+- Extract code blocks with language tags
+- Extract links (internal, external, images)
+- Extract lists and tables
+**Deliverables:**
+- `extractStructure(ast): DocumentStructure`
+- Section tree with content
+- Link graph per document
+- Code block inventory
+### 1.4: Document Model
+- Define document schema (Effect Schema)
+- Section schema with metadata
+- Serialize/deserialize to JSON
+**Deliverables:**
+- `Document` type with full structure
+- `Section` type with bounds, content, metadata
+- JSON round-trip tests
+---
+## Phase 2: Index & Storage
+**Goal:** Persist parsed data, enable fast lookups, handle updates.
+### 2.1: Storage Interface
+- Define `MdStore` interface
+- In-memory implementation for testing
+- File-based implementation for persistence
+**Deliverables:**
+- `MdStore` interface (save, load, query)
+- `MemoryMdStore`
+- `FileMdStore` (JSON files in `.mdcontext/`)
+### 2.2: Document Indexing
+- Index documents by path
+- Index sections by heading
+- Index links (forward and back)
+- Incremental updates (changed files only)
+**Deliverables:**
+- Path → Document lookup
+- Heading → Section lookup
+- Backlink index
+- Change detection (mtime, hash)
+### 2.3: File Watching
+- Watch directory for changes
+- Debounce rapid changes
+- Incremental re-index
+- Configurable ignore patterns
+**Deliverables:**
+- `watch(dir, options): Effect<void>`
+- `.mdcontextignore` support
+- Debounce logic (default 500ms)
+### 2.4: Cache Management
+- Cache parsed documents
+- Cache structure indexes
+- Invalidation on file change
+- Size limits and eviction
+**Deliverables:**
+- LRU cache for documents
+- Index persistence to disk
+- Cache stats (hits, misses, size)
+---
+## Phase 3: Semantic Layer
+**Goal:** Enable meaning-based search via embeddings.
+### 3.1: Embedding Interface
+- Define `Embedder` interface
+- Pluggable backends (API, local)
+- Batch embedding support
+**Deliverables:**
+- `Embedder` interface
+- `embed(texts: string[]): Effect<Vector[]>`
+- Configuration for model selection
+### 3.2: OpenAI Embeddings
+- Implement OpenAI text-embedding-3-small
+- Rate limiting and retry logic
+- Cost tracking
+**Deliverables:**
+- `OpenAIEmbedder`
+- Automatic batching (max 8k tokens)
+- Cost per query metric
+### 3.3: Local Embeddings (Optional)
+- Python subprocess for sentence-transformers
+- Or ONNX runtime in Node
+- Fallback when API unavailable
+**Deliverables:**
+- `LocalEmbedder` (stretch goal)
+- Model download management
+### 3.4: Vector Index
+- Store embeddings with document/section IDs
+- Similarity search (cosine)
+- FAISS or hnswlib integration
+**Deliverables:**
+- `VectorIndex` interface
+- `search(query: Vector, k: number): Result[]`
+- Persistence to disk
+### 3.5: Semantic Search API
+- Text query → embed → search
+- Combine with structural filters
+- Rank and return results
+**Deliverables:**
+- `semanticSearch(query: string, options): SearchResult[]`
+- Filter by path pattern, heading level
+- Result with score, snippet, location
+---
+## Phase 4: Summarization
+**Goal:** Generate token-efficient summaries at multiple granularities.
+### 4.1: Token Counting
+- Accurate token counting (tiktoken or similar)
+- Budget management
+- Truncation strategies
+**Deliverables:**
+- `countTokens(text: string): number`
+- `truncateToTokens(text, limit): string`
+- Model-specific tokenizers (GPT-4, Claude)
+### 4.2: Section Summarization
+- Extract key points from section
+- Preserve structure indicators
+- Configurable compression ratio
+**Deliverables:**
+- `summarizeSection(section, options): Summary`
+- Key sentence extraction
+- Heading preservation
+### 4.3: Document Summarization
+- Hierarchical: summarize sections, then combine
+- TOC generation
+- Key topics extraction
+**Deliverables:**
+- `summarizeDocument(doc, options): DocSummary`
+- Multi-level output (100, 500, 2000 tokens)
+- Topic list
+### 4.4: Context Assembly
+- Build LLM-ready context from multiple sources
+- Priority-based inclusion
+- Token budget management
+**Deliverables:**
+- `assembleContext(sources, budget): string`
+- Source attribution
+- Overflow handling (truncate vs omit)
+---
+## Phase 5: Analytics
+**Goal:** Built-in observability for performance and usage.
+### 5.1: Metrics Foundation
+- Effect Metrics integration
+- Counter, Gauge, Histogram types
+- Metric naming conventions
+**Deliverables:**
+- Metrics layer setup
+- Standard metric types
+- Tagging (operation, status)
+### 5.2: Performance Metrics
+- Query latency (p50, p95, p99)
+- Index build time
+- Cache hit/miss rates
+- Embedding API latency
+**Deliverables:**
+- `mdcontext_query_duration_ms` histogram
+- `mdcontext_cache_hits_total` counter
+- `mdcontext_index_build_duration_ms` gauge
+### 5.3: Usage Metrics
+- Queries per time period
+- Token usage (input/output)
+- Most queried documents/sections
+- Search result click-through (if applicable)
+**Deliverables:**
+- `mdcontext_queries_total` counter
+- `mdcontext_tokens_used` counter
+- Query log with timestamps
+### 5.4: Reporting
+- Metrics export (Prometheus format)
+- Simple CLI report command
+- Alerting thresholds (optional)
+**Deliverables:**
+- `mdcontext metrics` CLI command
+- JSON and text output formats
+- Configurable retention
+---
+## Phase 6: Integration
+**Goal:** Make mdcontext usable from CLI, MCP, and HumanWork.
+### 6.1: CLI Tool
+- `mdcontext index <dir>` — build index
+- `mdcontext search <query>` — semantic search
+- `mdcontext context <path>` — LLM-ready summary
+- `mdcontext structure <path>` — show document structure
+**Deliverables:**
+- CLI with subcommands
+- Output formats (text, JSON)
+- Config file support
+### 6.2: Daemon Mode
+- `mdcontext daemon` — run as background service
+- HTTP/IPC API for queries
+- Auto-rebuild on changes
+**Deliverables:**
+- Daemon process management
+- Query API (REST or IPC)
+- Health check endpoint
+### 6.3: MCP Server
+- Expose tools for Claude integration
+- `md_search` — semantic search
+- `md_context` — get context for file/section
+- `md_structure` — document outline
+**Deliverables:**
+- MCP server implementation
+- Tool definitions
+- Claude Desktop/Code integration docs
+### 6.4: HumanWork Skills
+- `hw-md-search` — search markdown in .humanwork/
+- `hw-md-context` — get context for task/session
+- Integration with session-history
+**Deliverables:**
+- Skill definitions
+- Integration with existing HumanWork skills
+- Documentation
+---
+## Progress
+| Phase              | Status      | Plans | Completed |
+| ------------------ | ----------- | ----- | --------- |
+| 1. Core Parsing    | Not started | 4     | -         |
+| 2. Index & Storage | Not started | 4     | -         |
+| 3. Semantic Layer  | Not started | 5     | -         |
+| 4. Summarization   | Not started | 4     | -         |
+| 5. Analytics       | Not started | 4     | -         |
+| 6. Integration     | Not started | 4     | -         |
+**Total: 25 tasks across 6 phases**
+---
+## Dependencies
+```
+Phase 1 ─────────────────────────────────────────┐
+    │                                            │
+    ▼                                            │
+Phase 2 ──────────────┐                          │
+    │                 │                          │
+    ▼                 ▼                          │
+Phase 3          Phase 4                         │
+    │                 │                          │
+    └────────┬────────┘                          │
+             ▼                                   │
+         Phase 5 ◄───────────────────────────────┘
+             │
+             ▼
+         Phase 6
+```
+- Phase 2 depends on Phase 1 (need parser for indexing)
+- Phase 3 & 4 can parallel after Phase 2
+- Phase 5 spans all (analytics hooks added throughout)
+- Phase 6 integrates everything
+---
+_Created: 2025-01-18_

package/docs/test-links.md ADDED Viewed

@@ -0,0 +1,9 @@
+# Test Links
+This document links to other files.
+## Links
+- See [PROJECT.md](./PROJECT.md) for project overview
+- See [DESIGN.md](./DESIGN.md) for design details
+- See [ROADMAP.md](./ROADMAP.md) for roadmap

package/package.json CHANGED Viewed

@@ -1,19 +1,78 @@
 {
   "name": "mdcontext",
-  "version": "0.0.1",
-  "description": "Intelligent markdown context extraction for LLMs. Coming soon.",
+  "version": "0.1.0",
+  "description": "Token-efficient markdown analysis tool for LLM consumption",
+  "type": "module",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/mdcontext/mdcontext.git"
+  },
+  "publishConfig": {
+    "access": "public",
+    "provenance": true
+  },
+  "main": "./dist/index.js",
+  "types": "./dist/index.d.ts",
+  "bin": {
+    "mdcontext": "./dist/cli/main.js",
+    "mdcontext-mcp": "./dist/mcp/server.js"
+  },
   "keywords": [
     "markdown",
-    "context",
     "llm",
-    "semantic-search",
+    "ai",
+    "analysis",
+    "search",
     "embeddings"
   ],
-  "author": "alphab",
+  "author": "",
   "license": "MIT",
-  "repository": {
-    "type": "git",
-    "url": "https://github.com/mdcontext/mdcontext"
+  "engines": {
+    "node": ">=18.0.0"
+  },
+  "dependencies": {
+    "@effect/cli": "^0.73.0",
+    "@effect/platform": "^0.94.0",
+    "@effect/platform-node": "^0.104.0",
+    "@modelcontextprotocol/sdk": "^1.25.2",
+    "chokidar": "^4.0.3",
+    "effect": "^3.19.0",
+    "gray-matter": "^4.0.3",
+    "hnswlib-node": "^3.0.0",
+    "openai": "^6.16.0",
+    "remark": "^15.0.1",
+    "remark-gfm": "^4.0.0",
+    "remark-parse": "^11.0.0",
+    "tiktoken": "^1.0.18",
+    "unified": "^11.0.5",
+    "unist-util-visit": "^5.0.0"
+  },
+  "devDependencies": {
+    "@biomejs/biome": "^2.3.11",
+    "@changesets/changelog-github": "^0.5.2",
+    "@changesets/cli": "^2.29.8",
+    "@types/mdast": "^4.0.4",
+    "@types/node": "^22.10.0",
+    "tsup": "^8.5.1",
+    "typescript": "^5.7.2",
+    "vitest": "^2.1.8"
   },
-  "homepage": "https://mdcontext.com"
-}
+  "scripts": {
+    "postinstall": "node scripts/rebuild-hnswlib.js",
+    "build": "tsup src/cli/main.ts src/mcp/server.ts src/index.ts --format esm --dts",
+    "dev": "tsc --watch",
+    "test": "vitest run",
+    "test:all": "INCLUDE_EMBED_TESTS=true vitest run",
+    "test:rebuild": "REBUILD_TEST_INDEX=true vitest run",
+    "test:all:rebuild": "REBUILD_TEST_INDEX=true INCLUDE_EMBED_TESTS=true vitest run",
+    "test:watch": "vitest",
+    "typecheck": "tsc --noEmit",
+    "lint": "biome lint --write .",
+    "format": "biome format --write . && biome check --write .",
+    "check": "pnpm format && pnpm lint && pnpm typecheck",
+    "clean": "rm -rf dist",
+    "quality": "pnpm build && npx publint && npx attw --pack .",
+    "changeset": "changeset",
+    "release": "changeset publish"
+  }
+}

package/pnpm-workspace.yaml ADDED Viewed

@@ -0,0 +1,5 @@
+ignoredBuiltDependencies:
+  - '@parcel/watcher'
+  - esbuild
+  - hnswlib-node
+  - msgpackr-extract