npm - mdcontext - Versions diffs - 0.0.1 → 0.1.0 - Mend

mdcontext 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (140) hide show

package/.changeset/README.md +28 -0
package/.changeset/config.json +11 -0
package/.github/workflows/ci.yml +83 -0
package/.github/workflows/release.yml +113 -0
package/.tldrignore +112 -0
package/AGENTS.md +46 -0
package/BACKLOG.md +338 -0
package/README.md +231 -11
package/biome.json +36 -0
package/cspell.config.yaml +14 -0
package/dist/chunk-KRYIFLQR.js +92 -0
package/dist/chunk-S7E6TFX6.js +742 -0
package/dist/chunk-VVTGZNBT.js +1519 -0
package/dist/cli/main.d.ts +1 -0
package/dist/cli/main.js +2015 -0
package/dist/index.d.ts +266 -0
package/dist/index.js +86 -0
package/dist/mcp/server.d.ts +1 -0
package/dist/mcp/server.js +376 -0
package/docs/019-USAGE.md +586 -0
package/docs/020-current-implementation.md +364 -0
package/docs/021-DOGFOODING-FINDINGS.md +175 -0
package/docs/BACKLOG.md +80 -0
package/docs/DESIGN.md +439 -0
package/docs/PROJECT.md +88 -0
package/docs/ROADMAP.md +407 -0
package/docs/test-links.md +9 -0
package/package.json +69 -10
package/pnpm-workspace.yaml +5 -0
package/research/config-analysis/01-current-implementation.md +470 -0
package/research/config-analysis/02-strategy-recommendation.md +428 -0
package/research/config-analysis/03-task-candidates.md +715 -0
package/research/config-analysis/033-research-configuration-management.md +828 -0
package/research/config-analysis/034-research-effect-cli-config.md +1504 -0
package/research/config-analysis/04-consolidated-task-candidates.md +277 -0
package/research/dogfood/consolidated-tool-evaluation.md +373 -0
package/research/dogfood/strategy-a/a-synthesis.md +184 -0
package/research/dogfood/strategy-a/a1-docs.md +226 -0
package/research/dogfood/strategy-a/a2-amorphic.md +156 -0
package/research/dogfood/strategy-a/a3-llm.md +164 -0
package/research/dogfood/strategy-b/b-synthesis.md +228 -0
package/research/dogfood/strategy-b/b1-architecture.md +207 -0
package/research/dogfood/strategy-b/b2-gaps.md +258 -0
package/research/dogfood/strategy-b/b3-workflows.md +250 -0
package/research/dogfood/strategy-c/c-synthesis.md +451 -0
package/research/dogfood/strategy-c/c1-explorer.md +192 -0
package/research/dogfood/strategy-c/c2-diver-memory.md +145 -0
package/research/dogfood/strategy-c/c3-diver-control.md +148 -0
package/research/dogfood/strategy-c/c4-diver-failure.md +151 -0
package/research/dogfood/strategy-c/c5-diver-execution.md +221 -0
package/research/dogfood/strategy-c/c6-diver-org.md +221 -0
package/research/effect-cli-error-handling.md +845 -0
package/research/effect-errors-as-values.md +943 -0
package/research/errors-task-analysis/00-consolidated-tasks.md +207 -0
package/research/errors-task-analysis/cli-commands-analysis.md +909 -0
package/research/errors-task-analysis/embeddings-analysis.md +709 -0
package/research/errors-task-analysis/index-search-analysis.md +812 -0
package/research/mdcontext-error-analysis.md +521 -0
package/research/npm_publish/011-npm-workflow-research-agent2.md +792 -0
package/research/npm_publish/012-npm-workflow-research-agent1.md +530 -0
package/research/npm_publish/013-npm-workflow-research-agent3.md +722 -0
package/research/npm_publish/014-npm-workflow-synthesis.md +556 -0
package/research/npm_publish/031-npm-workflow-task-analysis.md +134 -0
package/research/semantic-search/002-research-embedding-models.md +490 -0
package/research/semantic-search/003-research-rag-alternatives.md +523 -0
package/research/semantic-search/004-research-vector-search.md +841 -0
package/research/semantic-search/032-research-semantic-search.md +427 -0
package/research/task-management-2026/00-synthesis-recommendations.md +295 -0
package/research/task-management-2026/01-ai-workflow-tools.md +416 -0
package/research/task-management-2026/02-agent-framework-patterns.md +476 -0
package/research/task-management-2026/03-lightweight-file-based.md +567 -0
package/research/task-management-2026/04-established-tools-ai-features.md +541 -0
package/research/task-management-2026/linear/01-core-features-workflow.md +771 -0
package/research/task-management-2026/linear/02-api-integrations.md +930 -0
package/research/task-management-2026/linear/03-ai-features.md +368 -0
package/research/task-management-2026/linear/04-pricing-setup.md +205 -0
package/research/task-management-2026/linear/05-usage-patterns-best-practices.md +605 -0
package/scripts/rebuild-hnswlib.js +63 -0
package/src/cli/argv-preprocessor.test.ts +210 -0
package/src/cli/argv-preprocessor.ts +202 -0
package/src/cli/cli.test.ts +430 -0
package/src/cli/commands/backlinks.ts +54 -0
package/src/cli/commands/context.ts +197 -0
package/src/cli/commands/index-cmd.ts +300 -0
package/src/cli/commands/index.ts +13 -0
package/src/cli/commands/links.ts +52 -0
package/src/cli/commands/search.ts +451 -0
package/src/cli/commands/stats.ts +146 -0
package/src/cli/commands/tree.ts +107 -0
package/src/cli/flag-schemas.ts +275 -0
package/src/cli/help.ts +386 -0
package/src/cli/index.ts +9 -0
package/src/cli/main.ts +145 -0
package/src/cli/options.ts +31 -0
package/src/cli/typo-suggester.test.ts +105 -0
package/src/cli/typo-suggester.ts +130 -0
package/src/cli/utils.ts +126 -0
package/src/core/index.ts +1 -0
package/src/core/types.ts +140 -0
package/src/embeddings/index.ts +8 -0
package/src/embeddings/openai-provider.ts +165 -0
package/src/embeddings/semantic-search.ts +583 -0
package/src/embeddings/types.ts +82 -0
package/src/embeddings/vector-store.ts +299 -0
package/src/index/index.ts +4 -0
package/src/index/indexer.ts +446 -0
package/src/index/storage.ts +196 -0
package/src/index/types.ts +109 -0
package/src/index/watcher.ts +131 -0
package/src/index.ts +8 -0
package/src/mcp/server.ts +483 -0
package/src/parser/index.ts +1 -0
package/src/parser/parser.test.ts +291 -0
package/src/parser/parser.ts +395 -0
package/src/parser/section-filter.ts +270 -0
package/src/search/query-parser.test.ts +260 -0
package/src/search/query-parser.ts +319 -0
package/src/search/searcher.test.ts +182 -0
package/src/search/searcher.ts +602 -0
package/src/summarize/budget-bugs.test.ts +620 -0
package/src/summarize/formatters.ts +419 -0
package/src/summarize/index.ts +20 -0
package/src/summarize/summarizer.test.ts +275 -0
package/src/summarize/summarizer.ts +528 -0
package/src/summarize/verify-bugs.test.ts +238 -0
package/src/utils/index.ts +1 -0
package/src/utils/tokens.test.ts +142 -0
package/src/utils/tokens.ts +186 -0
package/tests/fixtures/cli/.mdcontext/config.json +8 -0
package/tests/fixtures/cli/.mdcontext/indexes/documents.json +33 -0
package/tests/fixtures/cli/.mdcontext/indexes/links.json +12 -0
package/tests/fixtures/cli/.mdcontext/indexes/sections.json +233 -0
package/tests/fixtures/cli/.mdcontext/vectors.bin +0 -0
package/tests/fixtures/cli/.mdcontext/vectors.meta.json +1264 -0
package/tests/fixtures/cli/README.md +9 -0
package/tests/fixtures/cli/api-reference.md +11 -0
package/tests/fixtures/cli/getting-started.md +11 -0
package/tsconfig.json +26 -0
package/vitest.config.ts +21 -0
package/vitest.setup.ts +12 -0

package/src/summarize/summarizer.ts ADDED Viewed

@@ -0,0 +1,528 @@
+/**
+ * Summarization engine for mdcontext
+ *
+ * Provides hierarchical summarization and multi-document context assembly
+ */
+import * as fs from 'node:fs/promises'
+import * as path from 'node:path'
+import { Effect } from 'effect'
+import type { MdDocument, MdSection } from '../core/types.js'
+import { parseFile } from '../parser/parser.js'
+import { countTokensApprox } from '../utils/tokens.js'
+import { formatSummary as formatSummaryImpl } from './formatters.js'
+// ============================================================================
+// Types
+// ============================================================================
+export type CompressionLevel = 'brief' | 'summary' | 'full'
+export interface SummarizeOptions {
+  /** Compression level */
+  readonly level?: CompressionLevel | undefined
+  /** Maximum tokens for output */
+  readonly maxTokens?: number | undefined
+}
+export interface SectionSummary {
+  readonly heading: string
+  readonly level: number
+  readonly originalTokens: number
+  readonly summaryTokens: number
+  readonly summary: string
+  readonly children: readonly SectionSummary[]
+  readonly hasCode: boolean
+  readonly hasList: boolean
+  readonly hasTable: boolean
+}
+export interface DocumentSummary {
+  readonly path: string
+  readonly title: string
+  readonly originalTokens: number
+  readonly summaryTokens: number
+  readonly compressionRatio: number
+  readonly sections: readonly SectionSummary[]
+  readonly keyTopics: readonly string[]
+  /** True if content was truncated to fit budget */
+  readonly truncated?: boolean
+  /** Number of sections that were omitted due to budget constraints */
+  readonly truncatedCount?: number
+}
+export interface AssembleContextOptions {
+  /** Total token budget */
+  readonly budget: number
+  /** Compression level for each source */
+  readonly level?: CompressionLevel | undefined
+}
+export interface AssembledContext {
+  readonly sources: readonly SourceContext[]
+  readonly totalTokens: number
+  readonly budget: number
+  readonly overflow: readonly string[]
+}
+export interface SourceContext {
+  readonly path: string
+  readonly title: string
+  readonly tokens: number
+  readonly content: string
+}
+// ============================================================================
+// Constants
+// ============================================================================
+/** Token budgets per compression level */
+const TOKEN_BUDGETS: Record<CompressionLevel, number> = {
+  brief: 100,
+  summary: 500,
+  full: Infinity,
+}
+/** Minimum character length for a sentence to be considered meaningful */
+const MIN_SENTENCE_LENGTH = 10
+/** Score weights for sentence importance heuristics */
+const SENTENCE_SCORE_DEFINITION = 2 // sentences with colons (definitions)
+const SENTENCE_SCORE_PROPER_START = 1 // sentences starting with capital
+const SENTENCE_SCORE_MEDIUM_LENGTH = 1 // sentences in ideal length range
+const SENTENCE_SCORE_EMPHASIS = 1 // sentences with emphasis or code
+/** Ideal sentence length range for summaries */
+const SENTENCE_LENGTH_MIN = 50
+const SENTENCE_LENGTH_MAX = 200
+/** Target compression ratio for summaries (30% of original) */
+const SUMMARY_COMPRESSION_RATIO = 0.3
+/** Minimum tokens for any section summary */
+const MIN_SECTION_TOKENS = 20
+/** Minimum sentences to include in any summary */
+const MIN_SUMMARY_SENTENCES = 2
+/** Approximate tokens per sentence (for calculating max sentences) */
+const TOKENS_PER_SENTENCE_ESTIMATE = 30
+/** Topic heading length constraints */
+const MIN_TOPIC_LENGTH = 2
+const MAX_TOPIC_LENGTH = 50
+/** Maximum topics to extract from a document */
+const MAX_TOPICS = 10
+/** Minimum remaining budget to include partial content */
+const MIN_PARTIAL_BUDGET = 50
+// ============================================================================
+// Section Summarization
+// ============================================================================
+const extractKeyPoints = (content: string, maxSentences: number): string[] => {
+  // Split into sentences
+  const sentences = content
+    .replace(/\n+/g, ' ')
+    .split(/(?<=[.!?])\s+/)
+    .filter((s) => s.trim().length > MIN_SENTENCE_LENGTH)
+  if (sentences.length <= maxSentences) {
+    return sentences
+  }
+  // Simple heuristic: prefer sentences with key indicators
+  const scored = sentences.map((s) => {
+    let score = 0
+    // Prefer sentences with:
+    if (s.includes(':')) score += SENTENCE_SCORE_DEFINITION
+    if (/^[A-Z]/.test(s)) score += SENTENCE_SCORE_PROPER_START
+    if (s.length > SENTENCE_LENGTH_MIN && s.length < SENTENCE_LENGTH_MAX)
+      score += SENTENCE_SCORE_MEDIUM_LENGTH
+    if (/\*\*|`/.test(s)) score += SENTENCE_SCORE_EMPHASIS
+    return { sentence: s, score }
+  })
+  // Sort by score and take top sentences
+  scored.sort((a, b) => b.score - a.score)
+  return scored.slice(0, maxSentences).map((s) => s.sentence)
+}
+const summarizeSection = (
+  section: MdSection,
+  level: CompressionLevel,
+): SectionSummary => {
+  const originalTokens = section.metadata.tokenCount
+  // Get children summaries first
+  const children = section.children.map((child) =>
+    summarizeSection(child, level),
+  )
+  // Calculate target tokens based on level
+  const targetTokens = Math.min(
+    TOKEN_BUDGETS[level],
+    Math.max(originalTokens * SUMMARY_COMPRESSION_RATIO, MIN_SECTION_TOKENS),
+  )
+  let summary: string
+  if (level === 'full' || originalTokens <= targetTokens) {
+    // Include full content for "full" level or if already small
+    // Use plainText instead of content to avoid including the heading markdown
+    // (the heading is output separately by the formatter)
+    summary = section.plainText
+  } else if (level === 'brief') {
+    // Just heading and metadata for brief
+    const meta: string[] = []
+    if (section.metadata.hasCode) meta.push('code')
+    if (section.metadata.hasList) meta.push('list')
+    if (section.metadata.hasTable) meta.push('table')
+    summary = meta.length > 0 ? `[${meta.join(', ')}]` : ''
+  } else {
+    // Summary level: extract key points
+    const maxSentences = Math.max(
+      MIN_SUMMARY_SENTENCES,
+      Math.floor(targetTokens / TOKENS_PER_SENTENCE_ESTIMATE),
+    )
+    const keyPoints = extractKeyPoints(section.plainText, maxSentences)
+    if (keyPoints.length > 0) {
+      summary = keyPoints.join(' ')
+    } else {
+      // Fallback: truncate
+      const words = section.plainText.split(/\s+/).slice(0, targetTokens)
+      summary =
+        words.join(' ') +
+        (words.length < section.plainText.split(/\s+/).length ? '...' : '')
+    }
+  }
+  const summaryTokens = countTokensApprox(summary)
+  return {
+    heading: section.heading,
+    level: section.level,
+    originalTokens,
+    summaryTokens,
+    summary,
+    children,
+    hasCode: section.metadata.hasCode,
+    hasList: section.metadata.hasList,
+    hasTable: section.metadata.hasTable,
+  }
+}
+// ============================================================================
+// Document Summarization
+// ============================================================================
+const extractTopics = (document: MdDocument): string[] => {
+  const topics: Set<string> = new Set()
+  // Extract from headings
+  const processSection = (section: MdSection) => {
+    // Clean heading and add as topic
+    const cleanHeading = section.heading
+      .replace(/[:#\-_]/g, ' ')
+      .trim()
+      .toLowerCase()
+    if (
+      cleanHeading.length > MIN_TOPIC_LENGTH &&
+      cleanHeading.length < MAX_TOPIC_LENGTH
+    ) {
+      topics.add(cleanHeading)
+    }
+    for (const child of section.children) {
+      processSection(child)
+    }
+  }
+  for (const section of document.sections) {
+    processSection(section)
+  }
+  // Also extract from frontmatter tags if present
+  const frontmatter = document.frontmatter as Record<string, unknown>
+  if (frontmatter.tags && Array.isArray(frontmatter.tags)) {
+    for (const tag of frontmatter.tags) {
+      if (typeof tag === 'string') {
+        topics.add(tag.toLowerCase())
+      }
+    }
+  }
+  return Array.from(topics).slice(0, MAX_TOPICS)
+}
+export const summarizeDocument = (
+  document: MdDocument,
+  options: SummarizeOptions = {},
+): DocumentSummary => {
+  const level = options.level ?? 'summary'
+  const maxTokens = options.maxTokens ?? TOKEN_BUDGETS[level]
+  // Summarize all sections
+  const allSections = document.sections.map((s) => summarizeSection(s, level))
+  // Calculate totals and collect all flattened sections with their tokens
+  const originalTokens = document.metadata.tokenCount
+  let totalSummaryTokens = 0
+  const flatSections: SectionSummary[] = []
+  const flattenWithTokens = (section: SectionSummary) => {
+    flatSections.push(section)
+    totalSummaryTokens += section.summaryTokens
+    for (const child of section.children) {
+      flattenWithTokens(child)
+    }
+  }
+  for (const section of allSections) {
+    flattenWithTokens(section)
+  }
+  // Calculate formatting overhead dynamically based on actual content
+  // Header includes: "# {title}\nPath: {path}\nTokens: X (Y% reduction from Z)\n"
+  // Plus topics line if present, plus possible truncation warning
+  const topics = extractTopics(document)
+  const headerTemplate = `# ${document.title}\nPath: ${document.path}\nTokens: 9999 (99% reduction from ${document.metadata.tokenCount})\n`
+  const topicsLine =
+    topics.length > 0 ? `\n**Topics:** ${topics.join(', ')}\n` : ''
+  const truncationWarning =
+    '\n⚠️ TRUNCATED: 999 sections omitted to fit token budget'
+  // Add all possible overhead plus a generous safety margin (20% of overhead + 20 base)
+  // This accounts for variance in token estimation
+  const baseOverhead = countTokensApprox(
+    headerTemplate + topicsLine + truncationWarning,
+  )
+  const formattingOverhead = Math.ceil(baseOverhead * 1.2) + 20
+  const contentBudget = maxTokens - formattingOverhead
+  // If over budget, truncate sections to fit
+  let truncated = false
+  let truncatedCount = 0
+  let sections: SectionSummary[]
+  let summaryTokens: number
+  if (totalSummaryTokens > contentBudget && contentBudget > 0) {
+    // Need to truncate - use greedy tree traversal that can include children
+    // even when parent doesn't fit (orphan rescue)
+    let tokensUsed = 0
+    // Process tree with orphan rescue: if parent doesn't fit, still try children
+    const truncateSections = (
+      sectionList: readonly SectionSummary[],
+    ): SectionSummary[] => {
+      const result: SectionSummary[] = []
+      for (const section of sectionList) {
+        const sectionOwnTokens = section.summaryTokens
+        const fitsInBudget = tokensUsed + sectionOwnTokens <= contentBudget
+        if (fitsInBudget) {
+          // Section fits - include it and recursively process children
+          tokensUsed += sectionOwnTokens
+          const truncatedChildren = truncateSections(section.children)
+          result.push({
+            ...section,
+            children: truncatedChildren,
+          })
+        } else {
+          // Section doesn't fit - but still try to rescue children (orphan rescue)
+          truncatedCount++
+          const rescuedChildren = truncateSections(section.children)
+          // Add rescued children as top-level items in result
+          result.push(...rescuedChildren)
+        }
+      }
+      return result
+    }
+    sections = truncateSections(allSections)
+    summaryTokens = tokensUsed
+    truncated = truncatedCount > 0
+  } else {
+    sections = allSections
+    summaryTokens = totalSummaryTokens
+  }
+  const compressionRatio =
+    originalTokens > 0 ? 1 - summaryTokens / originalTokens : 0
+  const result: DocumentSummary = {
+    path: document.path,
+    title: document.title,
+    originalTokens,
+    summaryTokens,
+    compressionRatio,
+    sections,
+    keyTopics: topics,
+  }
+  if (truncated) {
+    return {
+      ...result,
+      truncated: true,
+      truncatedCount,
+    }
+  }
+  return result
+}
+export const summarizeFile = (
+  filePath: string,
+  options: SummarizeOptions = {},
+): Effect.Effect<DocumentSummary, Error> =>
+  Effect.gen(function* () {
+    const document = yield* parseFile(filePath).pipe(
+      Effect.mapError((e) => new Error(`${e._tag}: ${e.message}`)),
+    )
+    return summarizeDocument(document, options)
+  })
+// ============================================================================
+// Format Summary for Output (re-exported from formatters.ts)
+// ============================================================================
+export { type FormatSummaryOptions, formatSummary } from './formatters.js'
+// ============================================================================
+// Multi-Document Context Assembly
+// ============================================================================
+export const assembleContext = (
+  rootPath: string,
+  sourcePaths: readonly string[],
+  options: AssembleContextOptions,
+): Effect.Effect<AssembledContext, Error> =>
+  Effect.gen(function* () {
+    const budget = options.budget
+    const level = options.level ?? 'summary'
+    const sources: SourceContext[] = []
+    const overflow: string[] = []
+    let totalTokens = 0
+    // Calculate per-source budget (even distribution)
+    const perSourceBudget = Math.floor(budget / sourcePaths.length)
+    for (const sourcePath of sourcePaths) {
+      const resolvedPath = path.isAbsolute(sourcePath)
+        ? sourcePath
+        : path.join(rootPath, sourcePath)
+      try {
+        const summary = yield* summarizeFile(resolvedPath, {
+          level,
+          maxTokens: perSourceBudget,
+        })
+        const content = formatSummaryImpl(summary)
+        // Count actual formatted output tokens, not pre-format summary tokens
+        const tokens = countTokensApprox(content)
+        if (totalTokens + tokens <= budget) {
+          sources.push({
+            path: path.relative(rootPath, resolvedPath),
+            title: summary.title,
+            tokens,
+            content,
+          })
+          totalTokens += tokens
+        } else {
+          // Over budget
+          const remaining = budget - totalTokens
+          if (remaining > MIN_PARTIAL_BUDGET) {
+            // Include partial if we have some room
+            const briefSummary = yield* summarizeFile(resolvedPath, {
+              level: 'brief',
+              maxTokens: remaining,
+            })
+            const briefContent = formatSummaryImpl(briefSummary)
+            // Count actual formatted output tokens, not pre-format summary tokens
+            const briefTokens = countTokensApprox(briefContent)
+            sources.push({
+              path: path.relative(rootPath, resolvedPath),
+              title: briefSummary.title,
+              tokens: briefTokens,
+              content: briefContent,
+            })
+            totalTokens += briefTokens
+          } else {
+            overflow.push(path.relative(rootPath, resolvedPath))
+          }
+        }
+      } catch (_e) {
+        // Skip files that can't be processed
+        overflow.push(sourcePath)
+      }
+    }
+    return {
+      sources,
+      totalTokens,
+      budget,
+      overflow,
+    }
+  })
+// ============================================================================
+// Format Assembled Context (re-exported from formatters.ts)
+// ============================================================================
+export { formatAssembledContext } from './formatters.js'
+// ============================================================================
+// Measure Token Reduction
+// ============================================================================
+export interface TokenReductionReport {
+  readonly originalTokens: number
+  readonly summaryTokens: number
+  readonly reduction: number
+  readonly reductionPercent: number
+}
+export const measureReduction = async (
+  filePath: string,
+  level: CompressionLevel = 'summary',
+): Promise<TokenReductionReport> => {
+  // Read original content
+  const originalContent = await fs.readFile(filePath, 'utf-8')
+  const originalTokens = countTokensApprox(originalContent)
+  // Get summary
+  const result = await Effect.runPromise(
+    summarizeFile(filePath, { level }).pipe(
+      Effect.catchAll(() => Effect.succeed(null)),
+    ),
+  )
+  if (!result) {
+    return {
+      originalTokens,
+      summaryTokens: originalTokens,
+      reduction: 0,
+      reductionPercent: 0,
+    }
+  }
+  const summaryTokens = result.summaryTokens
+  const reduction = originalTokens - summaryTokens
+  const reductionPercent = originalTokens > 0 ? reduction / originalTokens : 0
+  return {
+    originalTokens,
+    summaryTokens,
+    reduction,
+    reductionPercent,
+  }
+}