inkdex 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE CHANGED
@@ -175,7 +175,7 @@
175
175
 
176
176
  END OF TERMS AND CONDITIONS
177
177
 
178
- Copyright 2026 Anton Lundén
178
+ Copyright 2026 Kandobyte
179
179
 
180
180
  Licensed under the Apache License, Version 2.0 (the "License");
181
181
  you may not use this file except in compliance with the License.
package/dist/store/db.js CHANGED
@@ -9,7 +9,7 @@ export function dbPath(docsPath) {
9
9
  const hash = createHash("sha256").update(docsPath).digest("hex").slice(0, 12);
10
10
  return join(STORE_DIR, `${hash}.db`);
11
11
  }
12
- const SCHEMA_VERSION = 2;
12
+ const SCHEMA_VERSION = 1;
13
13
  const CHUNK_COLUMNS = "id, document_path, file_heading, heading, text, metadata, embedding";
14
14
  let db;
15
15
  let stmts;
package/package.json CHANGED
@@ -1,7 +1,19 @@
1
1
  {
2
2
  "name": "inkdex",
3
- "version": "0.0.1",
3
+ "version": "0.1.0",
4
4
  "description": "MCP server that makes your markdown docs searchable",
5
+ "license": "Apache-2.0",
6
+ "author": "Anton Lundén",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/kandobyte/inkdex"
10
+ },
11
+ "bugs": {
12
+ "url": "https://github.com/kandobyte/inkdex/issues"
13
+ },
14
+ "files": [
15
+ "dist"
16
+ ],
5
17
  "type": "module",
6
18
  "main": "dist/cli.js",
7
19
  "bin": {
@@ -30,8 +42,6 @@
30
42
  "markdown",
31
43
  "rag"
32
44
  ],
33
- "author": "Anton Lundén",
34
- "license": "Apache-2.0",
35
45
  "dependencies": {
36
46
  "@huggingface/transformers": "^3.8.1",
37
47
  "@modelcontextprotocol/sdk": "^1.25.3",
@@ -1,15 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "WebFetch(domain:github.com)",
5
- "mcp__acp__Bash",
6
- "mcp__acp__Write",
7
- "mcp__acp__Edit",
8
- "WebFetch(domain:raw.githubusercontent.com)",
9
- "WebFetch(domain:api.github.com)",
10
- "WebFetch(domain:www.firecrawl.dev)",
11
- "WebFetch(domain:unstructured.io)",
12
- "WebFetch(domain:www.npmjs.com)"
13
- ]
14
- }
15
- }
@@ -1,73 +0,0 @@
1
- name: CI
2
-
3
- on:
4
- push:
5
- branches: [main]
6
- paths:
7
- - "src/**"
8
- - "test/**"
9
- - "package.json"
10
- - "package-lock.json"
11
- - "tsconfig.json"
12
- - ".github/workflows/**"
13
- pull_request:
14
- branches: [main]
15
- paths:
16
- - "src/**"
17
- - "test/**"
18
- - "package.json"
19
- - "package-lock.json"
20
- - "tsconfig.json"
21
- - ".github/workflows/**"
22
-
23
- jobs:
24
- audit:
25
- name: Security Audit
26
- runs-on: ubuntu-latest
27
- steps:
28
- - uses: actions/checkout@v5
29
-
30
- - uses: actions/setup-node@v6
31
- with:
32
- node-version: "22"
33
-
34
- - run: npm audit --audit-level=critical
35
-
36
- check:
37
- name: Lint & Format
38
- runs-on: ubuntu-latest
39
- steps:
40
- - uses: actions/checkout@v5
41
-
42
- - uses: actions/setup-node@v6
43
- with:
44
- node-version: "22"
45
-
46
- - run: npm ci
47
- - run: npm run check
48
-
49
- test-unit:
50
- name: Unit Tests
51
- runs-on: ubuntu-latest
52
- steps:
53
- - uses: actions/checkout@v5
54
-
55
- - uses: actions/setup-node@v6
56
- with:
57
- node-version: "22"
58
-
59
- - run: npm ci
60
- - run: npm run test:unit
61
-
62
- test-integration:
63
- name: Integration Tests
64
- runs-on: ubuntu-latest
65
- steps:
66
- - uses: actions/checkout@v5
67
-
68
- - uses: actions/setup-node@v6
69
- with:
70
- node-version: "22"
71
-
72
- - run: npm ci
73
- - run: npm run test:integration
@@ -1,65 +0,0 @@
1
- name: Release
2
-
3
- on:
4
- push:
5
- tags:
6
- - "v*"
7
-
8
- permissions:
9
- contents: write
10
- id-token: write
11
-
12
- jobs:
13
- check:
14
- name: Check
15
- runs-on: ubuntu-latest
16
- steps:
17
- - uses: actions/checkout@v5
18
-
19
- - uses: actions/setup-node@v6
20
- with:
21
- node-version: "22"
22
-
23
- - run: npm ci
24
- - run: npm run check
25
-
26
- test:
27
- name: Test
28
- runs-on: ubuntu-latest
29
- steps:
30
- - uses: actions/checkout@v5
31
-
32
- - uses: actions/setup-node@v6
33
- with:
34
- node-version: "22"
35
-
36
- - run: npm ci
37
- - run: npm run test
38
-
39
- npm:
40
- name: npm
41
- needs: [check, test]
42
- runs-on: ubuntu-latest
43
- steps:
44
- - uses: actions/checkout@v5
45
-
46
- - uses: actions/setup-node@v6
47
- with:
48
- node-version: "22"
49
- registry-url: "https://registry.npmjs.org"
50
-
51
- - run: npm install -g npm@latest
52
- - run: npm ci
53
- - run: npm run build
54
- - run: npm publish --access public --provenance
55
-
56
- release:
57
- name: Release
58
- needs: [npm]
59
- runs-on: ubuntu-latest
60
- steps:
61
- - uses: actions/checkout@v5
62
-
63
- - uses: softprops/action-gh-release@v2
64
- with:
65
- generate_release_notes: true
package/AGENTS.md DELETED
@@ -1,32 +0,0 @@
1
- # AGENTS.md
2
-
3
- - Use [Conventional Commits](https://www.conventionalcommits.org/) (`feat:`, `fix:`, `chore:`, `docs:`)
4
-
5
- ## General Coding Guidelines
6
-
7
- - Maintain consistency with existing patterns and style in the codebase
8
- - Use TypeScript strictly: enable `strict: true`, prefer `unknown` over `any`, avoid type assertions unless necessary
9
- - Write comments that explain *why*, not *what*—update or remove stale comments when modifying code
10
- - Prefer renaming over commenting: if code needs a comment to explain what it does, rename instead
11
- - Use JSDoc (`/** */`) only for exported functions/types; use `//` for implementation notes
12
- - Use `@package` on exports internal to their feature package
13
- - Include `@example` in JSDoc when input/output isn't obvious from the signature
14
- - No commented-out code, no TODO/FIXME without a linked issue
15
- - Naming: camelCase functions/variables, PascalCase types/classes, UPPER_SNAKE_CASE constants; prefix booleans with `is`/`has`/`should`
16
- - Keep functions focused and single-responsibility; favor immutable patterns (`readonly`, no mutation)
17
- - Handle errors consistently: prefer typed errors or Result patterns, handle promise rejections explicitly
18
- - Use modern syntax: optional chaining (`?.`), nullish coalescing (`??`), `satisfies`, ES modules
19
- - After refactoring, run `npm run test` to verify tests pass and coverage requirements are met
20
- - Write tests covering happy path, edge cases, and error conditions with descriptive names
21
- - Test should validate observable behavior not implementation details
22
-
23
- ## Development
24
-
25
- ```bash
26
- npm install
27
- npm run build # TypeScript compilation
28
- npm run dev # Run via tsx
29
- npm run check # Biome lint
30
- npm run format # Biome format
31
- npm test # Unit + integration
32
- ```
package/biome.json DELETED
@@ -1,43 +0,0 @@
1
- {
2
- "$schema": "https://biomejs.dev/schemas/2.3.14/schema.json",
3
- "vcs": {
4
- "enabled": true,
5
- "clientKind": "git",
6
- "useIgnoreFile": true
7
- },
8
- "assist": { "actions": { "source": { "organizeImports": "on" } } },
9
- "formatter": {
10
- "indentStyle": "space",
11
- "indentWidth": 2
12
- },
13
- "linter": {
14
- "enabled": true,
15
- "rules": {
16
- "recommended": true,
17
- "suspicious": {
18
- "noExplicitAny": "error"
19
- },
20
- "performance": {
21
- "noDelete": "off"
22
- },
23
- "correctness": {
24
- "noPrivateImports": "error"
25
- }
26
- }
27
- },
28
- "overrides": [
29
- {
30
- "includes": ["test/**"],
31
- "linter": {
32
- "rules": {
33
- "correctness": {
34
- "noPrivateImports": "off"
35
- }
36
- }
37
- }
38
- }
39
- ],
40
- "files": {
41
- "includes": ["**", "!**/dist", "!**/node_modules"]
42
- }
43
- }
package/inkdex-0.0.1.tgz DELETED
Binary file
package/release.sh DELETED
@@ -1,33 +0,0 @@
1
- #!/usr/bin/env bash
2
- set -euo pipefail
3
-
4
- if [[ "${1:-}" =~ ^(-h|--help)$ ]] || [[ -z "${1:-}" ]]; then
5
- echo "Usage: ./release.sh <version>"
6
- echo " version: X.Y.Z (e.g., 0.1.0)"
7
- exit 0
8
- fi
9
-
10
- VERSION="$1"
11
-
12
- if ! [[ "$VERSION" =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
13
- echo "Error: Version must be in format X.Y.Z (e.g., 0.1.0)"
14
- exit 1
15
- fi
16
-
17
- echo "Releasing v$VERSION"
18
-
19
- npm version "$VERSION" --no-git-tag-version
20
- npm install
21
-
22
- npm audit --audit-level=critical
23
- npm run check
24
- npm run test:unit
25
- npm run test:integration
26
-
27
- npm run build
28
-
29
- git add package.json package-lock.json
30
- git commit -m "v$VERSION"
31
- git tag "v$VERSION"
32
-
33
- echo "Done. Push with: git push origin main v$VERSION"
package/src/cli.ts DELETED
@@ -1,45 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- import { stat } from "node:fs/promises";
4
- import { resolve } from "node:path";
5
- import { Embedder } from "./embedder/embedder.js";
6
- import { indexDocs } from "./ingest/index-docs.js";
7
- import { logger } from "./logger.js";
8
- import { startServer } from "./server.js";
9
- import { closeDb, openDb } from "./store/db.js";
10
-
11
- process.on("uncaughtException", (error) => {
12
- logger.error({ error }, "Uncaught exception");
13
- process.exit(1);
14
- });
15
-
16
- process.on("unhandledRejection", (reason) => {
17
- logger.error({ reason }, "Unhandled rejection");
18
- process.exit(1);
19
- });
20
-
21
- async function main(): Promise<void> {
22
- const docsPath = process.env.DOCS_PATH;
23
- if (!docsPath) {
24
- logger.error("DOCS_PATH environment variable is required");
25
- process.exit(1);
26
- }
27
-
28
- const resolved = resolve(docsPath);
29
- const info = await stat(resolved).catch(() => null);
30
- if (!info?.isDirectory()) {
31
- logger.error({ path: resolved }, "DOCS_PATH is not a directory");
32
- process.exit(1);
33
- }
34
-
35
- const embedder = await Embedder.load();
36
- openDb(resolved);
37
- await indexDocs(embedder, resolved);
38
- await startServer(embedder);
39
- }
40
-
41
- main().catch((error) => {
42
- closeDb();
43
- logger.error({ error }, "Failed to start server");
44
- process.exit(1);
45
- });
@@ -1,52 +0,0 @@
1
- import type { FeatureExtractionPipeline } from "@huggingface/transformers";
2
- import { pipeline } from "@huggingface/transformers";
3
-
4
- const MODEL = "Xenova/all-MiniLM-L6-v2";
5
- const BATCH_SIZE = 32;
6
-
7
- export class Embedder {
8
- readonly maxTokens: number;
9
- private readonly pipeline: FeatureExtractionPipeline;
10
-
11
- private constructor(pipe: FeatureExtractionPipeline) {
12
- this.pipeline = pipe;
13
- this.maxTokens = (pipe.tokenizer.model_max_length as number) ?? 256;
14
- }
15
-
16
- static async load(): Promise<Embedder> {
17
- const pipe = await pipeline<"feature-extraction">(
18
- "feature-extraction",
19
- MODEL,
20
- );
21
- return new Embedder(pipe);
22
- }
23
-
24
- tokenize(text: string): number[] {
25
- return this.pipeline.tokenizer.encode(text);
26
- }
27
-
28
- async embed(text: string): Promise<number[]> {
29
- const result = await this.pipeline(text, {
30
- pooling: "mean",
31
- normalize: true,
32
- });
33
- return (result.tolist() as number[][])[0];
34
- }
35
-
36
- async embedBatch(texts: string[]): Promise<number[][]> {
37
- if (texts.length === 0) return [];
38
-
39
- const results: number[][] = [];
40
-
41
- for (let i = 0; i < texts.length; i += BATCH_SIZE) {
42
- const batch = texts.slice(i, i + BATCH_SIZE);
43
- const result = await this.pipeline(batch, {
44
- pooling: "mean",
45
- normalize: true,
46
- });
47
- results.push(...(result.tolist() as number[][]));
48
- }
49
-
50
- return results;
51
- }
52
- }
@@ -1,158 +0,0 @@
1
- import { basename } from "node:path";
2
- import matter from "gray-matter";
3
- import type { BaseChunk } from "../types.js";
4
-
5
- const OVERLAP_RATIO = 0.1;
6
- const SUB_SEPARATORS = [/^### /m, /\n\n/, /\. /];
7
-
8
- export interface ChunkOptions {
9
- readonly maxTokens: number;
10
- readonly countTokens: (text: string) => number;
11
- }
12
-
13
- function extractH1(body: string): string | null {
14
- const match = body.match(/^# (.+)$/m);
15
- return match ? match[1].trim() : null;
16
- }
17
-
18
- function clean(text: string): string {
19
- return text
20
- .replace(/<!--.*?-->/gs, "")
21
- .replace(/\n{3,}/g, "\n\n")
22
- .trim();
23
- }
24
-
25
- function splitWithOverlap(
26
- text: string,
27
- separators: RegExp[],
28
- maxTokens: number,
29
- overlap: number,
30
- countTokens: (text: string) => number,
31
- ): string[] {
32
- if (countTokens(text) <= maxTokens) return [text];
33
-
34
- const separator = separators[0];
35
- const remaining = separators.slice(1);
36
-
37
- const parts = text.split(separator).filter((p) => p.trim());
38
- if (parts.length <= 1) {
39
- // Separator didn't help — try the next one
40
- if (remaining.length > 0) {
41
- return splitWithOverlap(text, remaining, maxTokens, overlap, countTokens);
42
- }
43
- // Last resort: hard split
44
- return hardSplit(text, maxTokens, overlap, countTokens);
45
- }
46
-
47
- const chunks: string[] = [];
48
- let current = "";
49
-
50
- for (const part of parts) {
51
- const combined = current ? `${current}\n\n${part}` : part;
52
- if (current && countTokens(combined) > maxTokens) {
53
- chunks.push(current.trim());
54
- // Start next chunk with overlap from the end of the previous
55
- const overlapText = current.slice(-overlap);
56
- current = overlapText + part;
57
- } else {
58
- current = combined;
59
- }
60
- }
61
- if (current.trim()) chunks.push(current.trim());
62
-
63
- // Recursively split any chunks that are still too large
64
- return chunks.flatMap((chunk) => {
65
- if (countTokens(chunk) <= maxTokens) return [chunk];
66
- if (remaining.length > 0) {
67
- return splitWithOverlap(
68
- chunk,
69
- remaining,
70
- maxTokens,
71
- overlap,
72
- countTokens,
73
- );
74
- }
75
- return hardSplit(chunk, maxTokens, overlap, countTokens);
76
- });
77
- }
78
-
79
- function hardSplit(
80
- text: string,
81
- maxTokens: number,
82
- overlap: number,
83
- countTokens: (text: string) => number,
84
- ): string[] {
85
- const chunks: string[] = [];
86
- const words = text.split(/\s+/);
87
- let current = "";
88
-
89
- for (const word of words) {
90
- const next = current ? `${current} ${word}` : word;
91
- if (countTokens(next) > maxTokens && current) {
92
- chunks.push(current.trim());
93
- // Keep overlap from end of current chunk
94
- const overlapText = current.slice(-overlap);
95
- current = overlapText + word;
96
- } else {
97
- current = next;
98
- }
99
- }
100
- if (current.trim()) chunks.push(current.trim());
101
-
102
- return chunks;
103
- }
104
-
105
- /** @package */
106
- export function chunkMarkdown(
107
- content: string,
108
- path: string,
109
- options: ChunkOptions,
110
- ): BaseChunk[] {
111
- const { maxTokens, countTokens } = options;
112
- const overlap = Math.floor(maxTokens * OVERLAP_RATIO);
113
- const { data: metadata, content: body } = matter(content);
114
- const fileHeading = extractH1(body) || basename(path, ".md");
115
- const sections = body.split(/^## /m);
116
- const chunks: BaseChunk[] = [];
117
-
118
- for (let i = 0; i < sections.length; i++) {
119
- const section = sections[i];
120
- if (!section.trim()) continue;
121
-
122
- let heading: string;
123
- let text: string;
124
-
125
- if (i === 0) {
126
- // Content before the first ## — strip the H1 line and use fileHeading
127
- heading = fileHeading;
128
- const withoutH1 = section.replace(/^# .+$/m, "");
129
- text = clean(withoutH1);
130
- } else {
131
- const [headingLine, ...rest] = section.split("\n");
132
- heading = headingLine.trim();
133
- text = clean(rest.join("\n"));
134
- }
135
-
136
- if (!text) continue;
137
-
138
- const subChunks = splitWithOverlap(
139
- text,
140
- SUB_SEPARATORS,
141
- maxTokens,
142
- overlap,
143
- countTokens,
144
- );
145
-
146
- for (const sub of subChunks) {
147
- chunks.push({
148
- path,
149
- fileHeading,
150
- heading,
151
- text: sub,
152
- metadata,
153
- });
154
- }
155
- }
156
-
157
- return chunks;
158
- }
@@ -1,120 +0,0 @@
1
- import { createHash } from "node:crypto";
2
- import { glob, readFile } from "node:fs/promises";
3
- import { relative } from "node:path";
4
- import type { Embedder } from "../embedder/embedder.js";
5
- import { logger } from "../logger.js";
6
- import {
7
- getAllDocumentHashes,
8
- insertChunk,
9
- removeDocument,
10
- runInTransaction,
11
- setDocumentHash,
12
- } from "../store/db.js";
13
- import { chunkMarkdown } from "./chunker.js";
14
-
15
- const MAX_CHUNK_FILL = 0.8;
16
-
17
- async function findMarkdownFiles(docsPath: string): Promise<string[]> {
18
- const files: string[] = [];
19
- for await (const entry of glob("**/*.md", { cwd: docsPath })) {
20
- files.push(`${docsPath}/${entry}`);
21
- }
22
- return files.sort();
23
- }
24
-
25
- function hashContent(content: string): string {
26
- return createHash("sha256").update(content).digest("hex");
27
- }
28
-
29
- export async function indexDocs(
30
- embedder: Embedder,
31
- docsPath: string,
32
- ): Promise<void> {
33
- const files = await findMarkdownFiles(docsPath);
34
-
35
- if (files.length === 0) {
36
- logger.warn({ path: docsPath }, "No markdown files found");
37
- return;
38
- }
39
-
40
- const fileContents = new Map<string, string>();
41
- for (const file of files) {
42
- const key = relative(docsPath, file);
43
- const content = await readFile(file, "utf-8");
44
- fileContents.set(key, content);
45
- }
46
-
47
- const storedHashes = getAllDocumentHashes();
48
-
49
- const changedKeys: string[] = [];
50
- for (const [key, content] of fileContents) {
51
- if (storedHashes[key] !== hashContent(content)) {
52
- changedKeys.push(key);
53
- }
54
- }
55
-
56
- const removedKeys: string[] = [];
57
- for (const key of Object.keys(storedHashes)) {
58
- if (!fileContents.has(key)) {
59
- removedKeys.push(key);
60
- }
61
- }
62
-
63
- if (changedKeys.length === 0 && removedKeys.length === 0) {
64
- logger.info({ files: files.length }, "Index up to date");
65
- return;
66
- }
67
-
68
- const start = performance.now();
69
-
70
- logger.info(
71
- { changed: changedKeys.length, removed: removedKeys.length },
72
- "Indexing changed files",
73
- );
74
-
75
- if (removedKeys.length > 0) {
76
- runInTransaction(() => {
77
- for (const key of removedKeys) {
78
- removeDocument(key);
79
- }
80
- });
81
- }
82
-
83
- const chunkOptions = {
84
- maxTokens: Math.floor(embedder.maxTokens * MAX_CHUNK_FILL),
85
- countTokens: (text: string) => embedder.tokenize(text).length,
86
- };
87
-
88
- let totalChunks = 0;
89
- for (const key of changedKeys) {
90
- const content = fileContents.get(key) as string;
91
- const chunks = chunkMarkdown(content, key, chunkOptions);
92
-
93
- logger.debug({ path: key, chunks: chunks.length }, "Embedding chunks");
94
- const embeddings = await embedder.embedBatch(chunks.map((c) => c.text));
95
-
96
- runInTransaction(() => {
97
- removeDocument(key);
98
- setDocumentHash(key, hashContent(content));
99
- for (let i = 0; i < chunks.length; i++) {
100
- const chunk = chunks[i];
101
- insertChunk(
102
- chunk.path,
103
- chunk.fileHeading,
104
- chunk.heading,
105
- chunk.text,
106
- chunk.metadata,
107
- embeddings[i],
108
- );
109
- }
110
- });
111
-
112
- totalChunks += chunks.length;
113
- }
114
-
115
- const duration = ((performance.now() - start) / 1000).toFixed(1);
116
- logger.info(
117
- { duration: `${duration}s`, chunks: totalChunks },
118
- "Indexing complete",
119
- );
120
- }