npm - inkdex - Versions diffs - 0.0.1 → 0.1.0 - Mend

inkdex 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/LICENSE +1 -1
package/dist/store/db.js +1 -1
package/package.json +13 -3
package/.claude/settings.local.json +0 -15
package/.github/workflows/ci.yml +0 -73
package/.github/workflows/release.yml +0 -65
package/AGENTS.md +0 -32
package/biome.json +0 -43
package/inkdex-0.0.1.tgz +0 -0
package/release.sh +0 -33
package/src/cli.ts +0 -45
package/src/embedder/embedder.ts +0 -52
package/src/ingest/chunker.ts +0 -158
package/src/ingest/index-docs.ts +0 -120
package/src/logger.ts +0 -39
package/src/search/search.ts +0 -93
package/src/server.ts +0 -96
package/src/store/db.ts +0 -217
package/src/types.ts +0 -16
package/src/version.ts +0 -16
package/test/fixtures/docs/api.md +0 -26
package/test/fixtures/docs/getting-started.md +0 -13
package/test/helpers/index.ts +0 -14
package/test/integration/embedder.test.ts +0 -52
package/test/integration/server.test.ts +0 -125
package/test/unit/chunker.test.ts +0 -193
package/test/unit/db.test.ts +0 -190
package/test/unit/index-docs.test.ts +0 -120
package/test/unit/logger.test.ts +0 -11
package/test/unit/search.test.ts +0 -93
package/test/unit/version.test.ts +0 -16
package/test-docs/api-reference.md +0 -76
package/test-docs/deployment.md +0 -55
package/test-docs/getting-started.md +0 -52
package/tsconfig.json +0 -18

package/LICENSE CHANGED Viewed

@@ -175,7 +175,7 @@
    END OF TERMS AND CONDITIONS
-   Copyright 2026 Anton Lundén
+   Copyright 2026 Kandobyte
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.

package/dist/store/db.js CHANGED Viewed

@@ -9,7 +9,7 @@ export function dbPath(docsPath) {
     const hash = createHash("sha256").update(docsPath).digest("hex").slice(0, 12);
     return join(STORE_DIR, `${hash}.db`);
 }
-const SCHEMA_VERSION = 2;
+const SCHEMA_VERSION = 1;
 const CHUNK_COLUMNS = "id, document_path, file_heading, heading, text, metadata, embedding";
 let db;
 let stmts;

package/package.json CHANGED Viewed

@@ -1,7 +1,19 @@
 {
   "name": "inkdex",
-  "version": "0.0.1",
+  "version": "0.1.0",
   "description": "MCP server that makes your markdown docs searchable",
+  "license": "Apache-2.0",
+  "author": "Anton Lundén",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/kandobyte/inkdex"
+  },
+  "bugs": {
+    "url": "https://github.com/kandobyte/inkdex/issues"
+  },
+  "files": [
+    "dist"
+  ],
   "type": "module",
   "main": "dist/cli.js",
   "bin": {
@@ -30,8 +42,6 @@
     "markdown",
     "rag"
   ],
-  "author": "Anton Lundén",
-  "license": "Apache-2.0",
   "dependencies": {
     "@huggingface/transformers": "^3.8.1",
     "@modelcontextprotocol/sdk": "^1.25.3",

package/.claude/settings.local.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "WebFetch(domain:github.com)",
-      "mcp__acp__Bash",
-      "mcp__acp__Write",
-      "mcp__acp__Edit",
-      "WebFetch(domain:raw.githubusercontent.com)",
-      "WebFetch(domain:api.github.com)",
-      "WebFetch(domain:www.firecrawl.dev)",
-      "WebFetch(domain:unstructured.io)",
-      "WebFetch(domain:www.npmjs.com)"
-    ]
-  }
-}

package/.github/workflows/ci.yml DELETED Viewed

@@ -1,73 +0,0 @@
-name: CI
-on:
-  push:
-    branches: [main]
-    paths:
-      - "src/**"
-      - "test/**"
-      - "package.json"
-      - "package-lock.json"
-      - "tsconfig.json"
-      - ".github/workflows/**"
-  pull_request:
-    branches: [main]
-    paths:
-      - "src/**"
-      - "test/**"
-      - "package.json"
-      - "package-lock.json"
-      - "tsconfig.json"
-      - ".github/workflows/**"
-jobs:
-  audit:
-    name: Security Audit
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-      - run: npm audit --audit-level=critical
-  check:
-    name: Lint & Format
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-      - run: npm ci
-      - run: npm run check
-  test-unit:
-    name: Unit Tests
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-      - run: npm ci
-      - run: npm run test:unit
-  test-integration:
-    name: Integration Tests
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-      - run: npm ci
-      - run: npm run test:integration

package/.github/workflows/release.yml DELETED Viewed

@@ -1,65 +0,0 @@
-name: Release
-on:
-  push:
-    tags:
-      - "v*"
-permissions:
-  contents: write
-  id-token: write
-jobs:
-  check:
-    name: Check
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-      - run: npm ci
-      - run: npm run check
-  test:
-    name: Test
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-      - run: npm ci
-      - run: npm run test
-  npm:
-    name: npm
-    needs: [check, test]
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: actions/setup-node@v6
-        with:
-          node-version: "22"
-          registry-url: "https://registry.npmjs.org"
-      - run: npm install -g npm@latest
-      - run: npm ci
-      - run: npm run build
-      - run: npm publish --access public --provenance
-  release:
-    name: Release
-    needs: [npm]
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v5
-      - uses: softprops/action-gh-release@v2
-        with:
-          generate_release_notes: true

package/AGENTS.md DELETED Viewed

@@ -1,32 +0,0 @@
-# AGENTS.md
-- Use [Conventional Commits](https://www.conventionalcommits.org/) (`feat:`, `fix:`, `chore:`, `docs:`)
-## General Coding Guidelines
-- Maintain consistency with existing patterns and style in the codebase
-- Use TypeScript strictly: enable `strict: true`, prefer `unknown` over `any`, avoid type assertions unless necessary
-- Write comments that explain *why*, not *what*—update or remove stale comments when modifying code
-- Prefer renaming over commenting: if code needs a comment to explain what it does, rename instead
-- Use JSDoc (`/** */`) only for exported functions/types; use `//` for implementation notes
-- Use `@package` on exports internal to their feature package
-- Include `@example` in JSDoc when input/output isn't obvious from the signature
-- No commented-out code, no TODO/FIXME without a linked issue
-- Naming: camelCase functions/variables, PascalCase types/classes, UPPER_SNAKE_CASE constants; prefix booleans with `is`/`has`/`should`
-- Keep functions focused and single-responsibility; favor immutable patterns (`readonly`, no mutation)
-- Handle errors consistently: prefer typed errors or Result patterns, handle promise rejections explicitly
-- Use modern syntax: optional chaining (`?.`), nullish coalescing (`??`), `satisfies`, ES modules
-- After refactoring, run `npm run test` to verify tests pass and coverage requirements are met
-- Write tests covering happy path, edge cases, and error conditions with descriptive names
-- Test should validate observable behavior not implementation details
-## Development
-```bash
-npm install
-npm run build       # TypeScript compilation
-npm run dev         # Run via tsx
-npm run check       # Biome lint
-npm run format      # Biome format
-npm test            # Unit + integration
-```

package/biome.json DELETED Viewed

@@ -1,43 +0,0 @@
-{
-  "$schema": "https://biomejs.dev/schemas/2.3.14/schema.json",
-  "vcs": {
-    "enabled": true,
-    "clientKind": "git",
-    "useIgnoreFile": true
-  },
-  "assist": { "actions": { "source": { "organizeImports": "on" } } },
-  "formatter": {
-    "indentStyle": "space",
-    "indentWidth": 2
-  },
-  "linter": {
-    "enabled": true,
-    "rules": {
-      "recommended": true,
-      "suspicious": {
-        "noExplicitAny": "error"
-      },
-      "performance": {
-        "noDelete": "off"
-      },
-      "correctness": {
-        "noPrivateImports": "error"
-      }
-    }
-  },
-  "overrides": [
-    {
-      "includes": ["test/**"],
-      "linter": {
-        "rules": {
-          "correctness": {
-            "noPrivateImports": "off"
-          }
-        }
-      }
-    }
-  ],
-  "files": {
-    "includes": ["**", "!**/dist", "!**/node_modules"]
-  }
-}

package/inkdex-0.0.1.tgz DELETED Viewed

Binary file

package/release.sh DELETED Viewed

@@ -1,33 +0,0 @@
-#!/usr/bin/env bash
-set -euo pipefail
-if [[ "${1:-}" =~ ^(-h|--help)$ ]] || [[ -z "${1:-}" ]]; then
-  echo "Usage: ./release.sh <version>"
-  echo "  version: X.Y.Z (e.g., 0.1.0)"
-  exit 0
-fi
-VERSION="$1"
-if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-  echo "Error: Version must be in format X.Y.Z (e.g., 0.1.0)"
-  exit 1
-fi
-echo "Releasing v$VERSION"
-npm version "$VERSION" --no-git-tag-version
-npm install
-npm audit --audit-level=critical
-npm run check
-npm run test:unit
-npm run test:integration
-npm run build
-git add package.json package-lock.json
-git commit -m "v$VERSION"
-git tag "v$VERSION"
-echo "Done. Push with: git push origin main v$VERSION"

package/src/cli.ts DELETED Viewed

@@ -1,45 +0,0 @@
-#!/usr/bin/env node
-import { stat } from "node:fs/promises";
-import { resolve } from "node:path";
-import { Embedder } from "./embedder/embedder.js";
-import { indexDocs } from "./ingest/index-docs.js";
-import { logger } from "./logger.js";
-import { startServer } from "./server.js";
-import { closeDb, openDb } from "./store/db.js";
-process.on("uncaughtException", (error) => {
-  logger.error({ error }, "Uncaught exception");
-  process.exit(1);
-});
-process.on("unhandledRejection", (reason) => {
-  logger.error({ reason }, "Unhandled rejection");
-  process.exit(1);
-});
-async function main(): Promise<void> {
-  const docsPath = process.env.DOCS_PATH;
-  if (!docsPath) {
-    logger.error("DOCS_PATH environment variable is required");
-    process.exit(1);
-  }
-  const resolved = resolve(docsPath);
-  const info = await stat(resolved).catch(() => null);
-  if (!info?.isDirectory()) {
-    logger.error({ path: resolved }, "DOCS_PATH is not a directory");
-    process.exit(1);
-  }
-  const embedder = await Embedder.load();
-  openDb(resolved);
-  await indexDocs(embedder, resolved);
-  await startServer(embedder);
-}
-main().catch((error) => {
-  closeDb();
-  logger.error({ error }, "Failed to start server");
-  process.exit(1);
-});

package/src/embedder/embedder.ts DELETED Viewed

@@ -1,52 +0,0 @@
-import type { FeatureExtractionPipeline } from "@huggingface/transformers";
-import { pipeline } from "@huggingface/transformers";
-const MODEL = "Xenova/all-MiniLM-L6-v2";
-const BATCH_SIZE = 32;
-export class Embedder {
-  readonly maxTokens: number;
-  private readonly pipeline: FeatureExtractionPipeline;
-  private constructor(pipe: FeatureExtractionPipeline) {
-    this.pipeline = pipe;
-    this.maxTokens = (pipe.tokenizer.model_max_length as number) ?? 256;
-  }
-  static async load(): Promise<Embedder> {
-    const pipe = await pipeline<"feature-extraction">(
-      "feature-extraction",
-      MODEL,
-    );
-    return new Embedder(pipe);
-  }
-  tokenize(text: string): number[] {
-    return this.pipeline.tokenizer.encode(text);
-  }
-  async embed(text: string): Promise<number[]> {
-    const result = await this.pipeline(text, {
-      pooling: "mean",
-      normalize: true,
-    });
-    return (result.tolist() as number[][])[0];
-  }
-  async embedBatch(texts: string[]): Promise<number[][]> {
-    if (texts.length === 0) return [];
-    const results: number[][] = [];
-    for (let i = 0; i < texts.length; i += BATCH_SIZE) {
-      const batch = texts.slice(i, i + BATCH_SIZE);
-      const result = await this.pipeline(batch, {
-        pooling: "mean",
-        normalize: true,
-      });
-      results.push(...(result.tolist() as number[][]));
-    }
-    return results;
-  }
-}

package/src/ingest/chunker.ts DELETED Viewed

@@ -1,158 +0,0 @@
-import { basename } from "node:path";
-import matter from "gray-matter";
-import type { BaseChunk } from "../types.js";
-const OVERLAP_RATIO = 0.1;
-const SUB_SEPARATORS = [/^### /m, /\n\n/, /\. /];
-export interface ChunkOptions {
-  readonly maxTokens: number;
-  readonly countTokens: (text: string) => number;
-}
-function extractH1(body: string): string | null {
-  const match = body.match(/^# (.+)$/m);
-  return match ? match[1].trim() : null;
-}
-function clean(text: string): string {
-  return text
-    .replace(/<!--.*?-->/gs, "")
-    .replace(/\n{3,}/g, "\n\n")
-    .trim();
-}
-function splitWithOverlap(
-  text: string,
-  separators: RegExp[],
-  maxTokens: number,
-  overlap: number,
-  countTokens: (text: string) => number,
-): string[] {
-  if (countTokens(text) <= maxTokens) return [text];
-  const separator = separators[0];
-  const remaining = separators.slice(1);
-  const parts = text.split(separator).filter((p) => p.trim());
-  if (parts.length <= 1) {
-    // Separator didn't help — try the next one
-    if (remaining.length > 0) {
-      return splitWithOverlap(text, remaining, maxTokens, overlap, countTokens);
-    }
-    // Last resort: hard split
-    return hardSplit(text, maxTokens, overlap, countTokens);
-  }
-  const chunks: string[] = [];
-  let current = "";
-  for (const part of parts) {
-    const combined = current ? `${current}\n\n${part}` : part;
-    if (current && countTokens(combined) > maxTokens) {
-      chunks.push(current.trim());
-      // Start next chunk with overlap from the end of the previous
-      const overlapText = current.slice(-overlap);
-      current = overlapText + part;
-    } else {
-      current = combined;
-    }
-  }
-  if (current.trim()) chunks.push(current.trim());
-  // Recursively split any chunks that are still too large
-  return chunks.flatMap((chunk) => {
-    if (countTokens(chunk) <= maxTokens) return [chunk];
-    if (remaining.length > 0) {
-      return splitWithOverlap(
-        chunk,
-        remaining,
-        maxTokens,
-        overlap,
-        countTokens,
-      );
-    }
-    return hardSplit(chunk, maxTokens, overlap, countTokens);
-  });
-}
-function hardSplit(
-  text: string,
-  maxTokens: number,
-  overlap: number,
-  countTokens: (text: string) => number,
-): string[] {
-  const chunks: string[] = [];
-  const words = text.split(/\s+/);
-  let current = "";
-  for (const word of words) {
-    const next = current ? `${current} ${word}` : word;
-    if (countTokens(next) > maxTokens && current) {
-      chunks.push(current.trim());
-      // Keep overlap from end of current chunk
-      const overlapText = current.slice(-overlap);
-      current = overlapText + word;
-    } else {
-      current = next;
-    }
-  }
-  if (current.trim()) chunks.push(current.trim());
-  return chunks;
-}
-/** @package */
-export function chunkMarkdown(
-  content: string,
-  path: string,
-  options: ChunkOptions,
-): BaseChunk[] {
-  const { maxTokens, countTokens } = options;
-  const overlap = Math.floor(maxTokens * OVERLAP_RATIO);
-  const { data: metadata, content: body } = matter(content);
-  const fileHeading = extractH1(body) || basename(path, ".md");
-  const sections = body.split(/^## /m);
-  const chunks: BaseChunk[] = [];
-  for (let i = 0; i < sections.length; i++) {
-    const section = sections[i];
-    if (!section.trim()) continue;
-    let heading: string;
-    let text: string;
-    if (i === 0) {
-      // Content before the first ## — strip the H1 line and use fileHeading
-      heading = fileHeading;
-      const withoutH1 = section.replace(/^# .+$/m, "");
-      text = clean(withoutH1);
-    } else {
-      const [headingLine, ...rest] = section.split("\n");
-      heading = headingLine.trim();
-      text = clean(rest.join("\n"));
-    }
-    if (!text) continue;
-    const subChunks = splitWithOverlap(
-      text,
-      SUB_SEPARATORS,
-      maxTokens,
-      overlap,
-      countTokens,
-    );
-    for (const sub of subChunks) {
-      chunks.push({
-        path,
-        fileHeading,
-        heading,
-        text: sub,
-        metadata,
-      });
-    }
-  }
-  return chunks;
-}

package/src/ingest/index-docs.ts DELETED Viewed

@@ -1,120 +0,0 @@
-import { createHash } from "node:crypto";
-import { glob, readFile } from "node:fs/promises";
-import { relative } from "node:path";
-import type { Embedder } from "../embedder/embedder.js";
-import { logger } from "../logger.js";
-import {
-  getAllDocumentHashes,
-  insertChunk,
-  removeDocument,
-  runInTransaction,
-  setDocumentHash,
-} from "../store/db.js";
-import { chunkMarkdown } from "./chunker.js";
-const MAX_CHUNK_FILL = 0.8;
-async function findMarkdownFiles(docsPath: string): Promise<string[]> {
-  const files: string[] = [];
-  for await (const entry of glob("**/*.md", { cwd: docsPath })) {
-    files.push(`${docsPath}/${entry}`);
-  }
-  return files.sort();
-}
-function hashContent(content: string): string {
-  return createHash("sha256").update(content).digest("hex");
-}
-export async function indexDocs(
-  embedder: Embedder,
-  docsPath: string,
-): Promise<void> {
-  const files = await findMarkdownFiles(docsPath);
-  if (files.length === 0) {
-    logger.warn({ path: docsPath }, "No markdown files found");
-    return;
-  }
-  const fileContents = new Map<string, string>();
-  for (const file of files) {
-    const key = relative(docsPath, file);
-    const content = await readFile(file, "utf-8");
-    fileContents.set(key, content);
-  }
-  const storedHashes = getAllDocumentHashes();
-  const changedKeys: string[] = [];
-  for (const [key, content] of fileContents) {
-    if (storedHashes[key] !== hashContent(content)) {
-      changedKeys.push(key);
-    }
-  }
-  const removedKeys: string[] = [];
-  for (const key of Object.keys(storedHashes)) {
-    if (!fileContents.has(key)) {
-      removedKeys.push(key);
-    }
-  }
-  if (changedKeys.length === 0 && removedKeys.length === 0) {
-    logger.info({ files: files.length }, "Index up to date");
-    return;
-  }
-  const start = performance.now();
-  logger.info(
-    { changed: changedKeys.length, removed: removedKeys.length },
-    "Indexing changed files",
-  );
-  if (removedKeys.length > 0) {
-    runInTransaction(() => {
-      for (const key of removedKeys) {
-        removeDocument(key);
-      }
-    });
-  }
-  const chunkOptions = {
-    maxTokens: Math.floor(embedder.maxTokens * MAX_CHUNK_FILL),
-    countTokens: (text: string) => embedder.tokenize(text).length,
-  };
-  let totalChunks = 0;
-  for (const key of changedKeys) {
-    const content = fileContents.get(key) as string;
-    const chunks = chunkMarkdown(content, key, chunkOptions);
-    logger.debug({ path: key, chunks: chunks.length }, "Embedding chunks");
-    const embeddings = await embedder.embedBatch(chunks.map((c) => c.text));
-    runInTransaction(() => {
-      removeDocument(key);
-      setDocumentHash(key, hashContent(content));
-      for (let i = 0; i < chunks.length; i++) {
-        const chunk = chunks[i];
-        insertChunk(
-          chunk.path,
-          chunk.fileHeading,
-          chunk.heading,
-          chunk.text,
-          chunk.metadata,
-          embeddings[i],
-        );
-      }
-    });
-    totalChunks += chunks.length;
-  }
-  const duration = ((performance.now() - start) / 1000).toFixed(1);
-  logger.info(
-    { duration: `${duration}s`, chunks: totalChunks },
-    "Indexing complete",
-  );
-}