xindex 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.xindex.json CHANGED
@@ -18,5 +18,5 @@
18
18
  "utf8", "length", "map", "slice", "push", "join", "resolve", "stringify",
19
19
  "json", "settimeout", "path", "readfile"
20
20
  ],
21
- "ignoreFiles": ["rnd/**", ".xindex"]
21
+ "ignoreFiles": [".xindex"]
22
22
  }
package/README.md CHANGED
@@ -39,6 +39,73 @@ Drop this into `.mcp.json` at your project root:
39
39
 
40
40
  Open the project in Claude Code — it picks up the xindex MCP server and can call `xindex_search`, `xindex_index`, and `xindex_reset` directly. Fewer hallucinations, fewer round-trips.
41
41
 
42
+ ## Claude Code skills (`@xi`)
43
+
44
+ Two optional [Claude Code skills](https://docs.claude.com/en/docs/claude-code/skills) wrap the MCP tools so you don't have to think about them:
45
+
46
+ - **`ask-xi`** — read-only discovery. `@xi where is auth handled` drafts several focused queries, runs `xindex_search` for each, and returns ranked file paths with matched keywords. Use it as a cheap first step before grepping or asking a heavier model.
47
+ - **`xindex`** — index management (`xindex_index`, `xindex_reset`). Reset requires explicit confirmation every time.
48
+
49
+ Keeping them separate keeps `@xi` safe to fire casually while destructive ops stay behind the `xindex` skill.
50
+
51
+ ### Install
52
+
53
+ Pick one — project-scoped or user-global:
54
+
55
+ ```bash
56
+ # Project (checked in, shared with the repo)
57
+ mkdir -p .claude/skills/ask-xi .claude/skills/xindex
58
+
59
+ # Or user-global (available in every project)
60
+ mkdir -p ~/.claude/skills/ask-xi ~/.claude/skills/xindex
61
+ ```
62
+
63
+ Then drop these two files in.
64
+
65
+ `ask-xi/SKILL.md`:
66
+
67
+ ````md
68
+ ---
69
+ name: ask-xi
70
+ description: Discovers relevant files via xindex semantic search — preps queries, auto-indexes on empty, returns file links with keywords. Triggered by @xi.
71
+ argument-hint: "[question]"
72
+ ---
73
+ Surface-level codebase discovery via xindex. Tool: `xindex_search` (natural-language, meaning-based).
74
+
75
+ **Steps:**
76
+ 1. Draft 5–10 focused queries from $ARGUMENTS (entry points, routing, config, integrations, tests, related patterns).
77
+ 2. Run `xindex_search` for each.
78
+ 3. If results are empty/sparse/stale → scoped-index the most relevant content-heavy root folders (one path per `xindex_index` call, e.g. `src`, `apps`, `features`, `componets`), then re-search. Prefer scoped over full-repo.
79
+ 4. Refine with 2–3 narrower follow-ups.
80
+ 5. Return file paths + brief keywords showing why each matched.
81
+
82
+ Output = file links + keywords, not analysis. For reset or full re-index, delegate to `/xindex` (owns safety rules).
83
+ ````
84
+
85
+ `xindex/SKILL.md`:
86
+
87
+ ````md
88
+ ---
89
+ name: xindex
90
+ description: Manages xindex semantic search — index, search, reset via MCP tools. For research questions, use /ask-xi.
91
+ argument-hint: "[search query | index | reset]"
92
+ ---
93
+ Full xindex tool management. For research, use `/ask-xi`. Install: `npm i -g xindex`.
94
+
95
+ **Tools:**
96
+ - `xindex_search` — find files by meaning (synonyms, semantics). Try before grepping blindly.
97
+ - `xindex_index` — index a path (recursive, respects .gitignore). **MUST** run one path per call.
98
+ - `xindex_reset` — destructive wipe+rebuild. **MUST** get explicit user confirmation every time; if ambiguous, don't run.
99
+
100
+ **Workflow:** stale/corrupt → confirm → `xindex_reset` → `xindex_index(["."])` → `xindex_search`. Incremental → one-path `xindex_index(["changed/path"])` calls.
101
+
102
+ **Scoped indexing (preferred):** index only task-relevant content-heavy folders, sequentially. Full-repo `xindex_index(["."])` only for cross-cutting discovery.
103
+
104
+ $ARGUMENTS
105
+ ````
106
+
107
+ Both skills assume the `xindex` MCP server is registered (see the section above). Restart Claude Code after adding skills.
108
+
42
109
  ## Features
43
110
 
44
111
  - **Local** — everything runs on your machine; embeddings cached on disk
package/apps/mcpApp.ts CHANGED
@@ -44,8 +44,8 @@ export function McpApp({
44
44
  inputSchema: z.object({
45
45
  query: z.string()
46
46
  .describe("Natural language search query"),
47
- limit: z.number().int().min(1).max(100).default(5)
48
- .describe("Max results to return, 5 by default, 100 max"),
47
+ limit: z.number().int().min(1).max(50).default(7)
48
+ .describe("Max results to return, 7 by default, 50 max"),
49
49
  }),
50
50
  annotations: {readOnlyHint: true},
51
51
  }, async ({query, limit}) => {
package/apps/searchApp.ts CHANGED
@@ -3,7 +3,7 @@ import {ISearchIndex, IIndexRecord} from "../features/searchIndex.js";
3
3
  export type ISearchApp = (query: string, limit?: number) => Promise<IIndexRecord[]>;
4
4
 
5
5
  export function SearchApp({searchContentIndex}: {searchContentIndex: ISearchIndex}): ISearchApp {
6
- return async function search(query, limit = 10) {
6
+ return async function search(query, limit = 7) {
7
7
  return await searchContentIndex(query, limit);
8
8
  }
9
9
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "xindex",
3
- "version": "1.0.2",
3
+ "version": "1.0.3",
4
4
  "description": "Local semantic code search — index codebase, search by meaning or keywords",
5
5
  "type": "module",
6
6
  "main": "xindex.ts",
package/rnd/hf.ts DELETED
@@ -1,14 +0,0 @@
1
- import { pipeline } from "@huggingface/transformers";
2
-
3
- const generator = await pipeline(
4
- "text-generation",
5
- "HuggingFaceTB/SmolLM2-135M-Instruct"
6
- );
7
-
8
- const messages = [
9
- { role: "system", content: "You are a helpful assistant." },
10
- { role: "user", content: "Who is Microsoft?" },
11
- ];
12
-
13
- const output = await generator(messages, { max_new_tokens: 64 });
14
- console.log(output[0].generated_text.at(-1).content);
@@ -1,18 +0,0 @@
1
- import nlp from "compromise";
2
- import { readFile } from "fs/promises";
3
-
4
- const filePath = process.argv[2];
5
- if (!filePath) {
6
- console.error("Usage: npx tsx keywords-compromise.ts <file>");
7
- process.exit(1);
8
- }
9
-
10
- const text = await readFile(filePath, "utf8");
11
- const doc = nlp(text);
12
-
13
- console.log(`Keywords from: ${filePath}\n`);
14
- console.log("Topics:", doc.topics().out("array"));
15
- console.log("\nNouns:", doc.nouns().out("array"));
16
- console.log("\nVerbs:", doc.verbs().out("array"));
17
- console.log("\nPeople:", doc.people().out("array"));
18
- console.log("\nOrganizations:", doc.organizations().out("array"));
@@ -1,79 +0,0 @@
1
- import nlp from "compromise";
2
- import {createRequire} from "module";
3
- import {readFile} from "fs/promises";
4
- import {pipeline} from "@huggingface/transformers";
5
-
6
- const require = createRequire(import.meta.url);
7
- const keyword_extractor = require("keyword-extractor");
8
-
9
- const generator = await pipeline(
10
- "text-generation",
11
- "HuggingFaceTB/SmolLM2-135M-Instruct"
12
- );
13
-
14
- export async function llm(input: string): Promise<string> {
15
- const prompt = "Extract and list the most important keywords from the following text. Return only keywords separated by commas.";
16
-
17
- const messages = [
18
- {role: "system", content: prompt},
19
- {role: "user", content: input},
20
- ];
21
-
22
- const output = await generator(messages, {max_new_tokens: 128});
23
- return output[0].generated_text.at(-1).content;
24
- }
25
-
26
- const filePath = process.argv[2];
27
- if (!filePath) {
28
- console.error("Usage: npx tsx keywords-pipeline.ts <file>");
29
- process.exit(1);
30
- }
31
-
32
- // Step 1: Read file
33
- const text = await readFile(filePath, "utf8");
34
- console.log("=== Step 1: Read file ===");
35
- console.log(`${text.length} chars\n`);
36
-
37
- // Step 2: Compromise — extract nouns, verbs, topics
38
- const doc = nlp(text);
39
- const nouns = doc.nouns().out("array") as string[];
40
- const verbs = doc.verbs().out("array") as string[];
41
- const topics = doc.topics().out("array") as string[];
42
- const combined = [...topics, ...nouns, ...verbs].join(" ");
43
- console.log("=== Step 2: Compromise ===");
44
- console.log(`${topics.length} topics, ${nouns.length} nouns, ${verbs.length} verbs\n`);
45
-
46
- // Step 3: Regex — replace all non-word chars with space
47
- const cleaned = combined.replace(/\W+/g, " ").trim();
48
- console.log("=== Step 3: Regex cleanup ===");
49
- console.log(cleaned.slice(0, 200), "\n");
50
-
51
- // Step 4: LLM — pass cleaned text to local model
52
- const llmResult = await llm(cleaned);
53
- console.log("=== Step 4: LLM ===");
54
- console.log(llmResult, "\n");
55
-
56
- // Step 5: keyword-extractor
57
- const keywords: string[] = keyword_extractor.extract(cleaned, {
58
- language: "english",
59
- remove_digits: false,
60
- return_changed_case: true,
61
- remove_duplicates: true,
62
- return_max_ngrams: 3,
63
- });
64
-
65
- // Score by frequency
66
- const lower = cleaned.toLowerCase();
67
- const scored = keywords
68
- .filter((kw) => kw.length > 2)
69
- .map((kw) => {
70
- const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "gi");
71
- const count = (lower.match(re) || []).length;
72
- return {keyword: kw, count};
73
- });
74
- scored.sort((a, b) => b.count - a.count);
75
-
76
- console.log("=== Step 5: Keywords ===");
77
- for (const {keyword, count} of scored) {
78
- console.log(` ${keyword.padEnd(35)} (${count}x)`);
79
- }
package/rnd/keywords.ts DELETED
@@ -1,38 +0,0 @@
1
- import { createRequire } from "module";
2
- import { readFile } from "fs/promises";
3
-
4
- const require = createRequire(import.meta.url);
5
- const keyword_extractor = require("keyword-extractor");
6
-
7
- const filePath = process.argv[2];
8
- if (!filePath) {
9
- console.error("Usage: npx tsx keywords.ts <file>");
10
- process.exit(1);
11
- }
12
-
13
- const text = await readFile(filePath, "utf8");
14
-
15
- const keywords: string[] = keyword_extractor.extract(text, {
16
- language: "english",
17
- remove_digits: false,
18
- return_changed_case: true,
19
- remove_duplicates: true,
20
- return_max_ngrams: 3,
21
- });
22
-
23
- // Count frequency of each keyword in original text (case-insensitive)
24
- const lower = text.toLowerCase();
25
- const scored = keywords
26
- .filter((kw) => kw.length > 2 && !/^[^a-z]*$/.test(kw))
27
- .map((kw) => {
28
- const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "gi");
29
- const count = (lower.match(re) || []).length;
30
- return { keyword: kw, count };
31
- });
32
-
33
- scored.sort((a, b) => b.count - a.count);
34
-
35
- console.log(`Keywords from: ${filePath}\n`);
36
- for (const { keyword, count } of scored) {
37
- console.log(` ${keyword.padEnd(35)} (${count}x)`);
38
- }
@@ -1,63 +0,0 @@
1
- import { LocalIndex, VirtualFileStorage } from 'vectra';
2
-
3
- async function main() {
4
- // 1. Create in-memory index
5
- const storage = new VirtualFileStorage();
6
- const index = new LocalIndex('mem://test', undefined, storage);
7
- await index.createIndex({ version: 1 });
8
- console.log('created index');
9
-
10
- // 2. Insert items with fake 8-dim vectors
11
- const dim = 8;
12
- await index.insertItem({
13
- id: 'doc-a',
14
- vector: Array.from({ length: dim }, (_, i) => (i + 1) / dim),
15
- metadata: { block: 'A', label: 'first' },
16
- });
17
- await index.insertItem({
18
- id: 'doc-b',
19
- vector: Array.from({ length: dim }, (_, i) => (dim - i) / dim),
20
- metadata: { block: 'B', label: 'second' },
21
- });
22
- await index.insertItem({
23
- id: 'doc-c',
24
- vector: Array.from({ length: dim }, () => 0.5),
25
- metadata: { block: 'C', label: 'third' },
26
- });
27
- console.log('inserted 3 items');
28
-
29
- // 3. Query — vector close to doc-a
30
- const queryVec = Array.from({ length: dim }, (_, i) => (i + 1) / dim);
31
- const results = await index.queryItems(queryVec, '', 3);
32
- console.log('query results:');
33
- for (const r of results) {
34
- console.log(` ${r.item.id} score=${r.score.toFixed(4)} block=${r.item.metadata.block}`);
35
- }
36
-
37
- // 4. Upsert — update doc-a's vector
38
- await index.upsertItem({
39
- id: 'doc-a',
40
- vector: Array.from({ length: dim }, () => 0.5),
41
- metadata: { block: 'A', label: 'updated' },
42
- });
43
- const updated = await index.getItem('doc-a');
44
- console.log(`upserted doc-a → label=${updated?.metadata.label}`);
45
-
46
- // 5. List all items
47
- const all = await index.listItems();
48
- console.log(`total items: ${all.length}`);
49
-
50
- // 6. Delete
51
- await index.deleteItem('doc-b');
52
- const afterDelete = await index.listItems();
53
- console.log(`after delete: ${afterDelete.length} items`);
54
-
55
- // 7. Verify no disk artifacts
56
- const { default: fs } = await import('fs');
57
- const exists = fs.existsSync('mem://test');
58
- console.log(`disk folder exists: ${exists}`);
59
-
60
- console.log('\ndone — all in-memory, nothing on disk');
61
- }
62
-
63
- main().catch(console.error);
@@ -1,95 +0,0 @@
1
- import { LocalIndex } from "vectra";
2
- import { pipeline } from "@huggingface/transformers";
3
- import nlp from "compromise";
4
- import { createRequire } from "module";
5
- import { readFile } from "fs/promises";
6
- import { readdirSync } from "fs";
7
-
8
- const require = createRequire(import.meta.url);
9
- const keyword_extractor = require("keyword-extractor");
10
-
11
- // Init embedder + index
12
- const embedder = await pipeline(
13
- "feature-extraction",
14
- "sentence-transformers/all-MiniLM-L6-v2"
15
- );
16
- const index = new LocalIndex("./vectra-keyword-index");
17
- if (!(await index.isIndexCreated())) {
18
- await index.createIndex();
19
- }
20
-
21
- async function embed(text: string): Promise<number[]> {
22
- const result = await embedder(text, { pooling: "mean", normalize: true });
23
- return Array.from(result.data as Float32Array);
24
- }
25
-
26
- function extractKeywords(text: string): string[] {
27
- // Compromise: pull nouns, verbs, topics
28
- const doc = nlp(text);
29
- const parts = [
30
- ...doc.topics().out("array"),
31
- ...doc.nouns().out("array"),
32
- ...doc.verbs().out("array"),
33
- ] as string[];
34
- const cleaned = parts.join(" ").replace(/\W+/g, " ").trim();
35
-
36
- // keyword-extractor
37
- const keywords: string[] = keyword_extractor.extract(cleaned, {
38
- language: "english",
39
- remove_digits: false,
40
- return_changed_case: true,
41
- remove_duplicates: true,
42
- return_max_ngrams: 2,
43
- });
44
- return keywords.filter((kw: string) => kw.length > 2);
45
- }
46
-
47
- // --- Index files ---
48
- const files = process.argv.slice(2);
49
- if (!files.length) {
50
- console.error("Usage: npx tsx vectra-keywords.ts <file1> [file2] ...");
51
- process.exit(1);
52
- }
53
-
54
- console.log("=== Indexing ===");
55
- for (const filePath of files) {
56
- const text = await readFile(filePath, "utf8");
57
- const keywords = extractKeywords(text);
58
- const keywordStr = keywords.join(", ");
59
- const vector = await embed(keywordStr);
60
-
61
- await index.upsertItem({
62
- id: filePath,
63
- vector,
64
- metadata: { keywords: keywordStr, file: filePath },
65
- });
66
- console.log(` ${filePath} → ${keywords.slice(0, 8).join(", ")}...`);
67
- }
68
-
69
- // --- Search by keyword ---
70
- async function search(query: string, topK = 5) {
71
- const queryVector = await embed(query);
72
- return index.queryItems(queryVector, query, topK);
73
- }
74
-
75
- console.log("\n=== Search: keyword 'fruit' ===");
76
- const r1 = await search("fruit");
77
- for (const r of r1) {
78
- console.log(` ${r.score.toFixed(4)} | ${r.item.id}`);
79
- console.log(` keywords: ${(r.item.metadata as any).keywords.slice(0, 100)}`);
80
- }
81
-
82
- // --- Search by synonym ---
83
- console.log("\n=== Search: synonym 'automobile' (for 'cars/vehicles') ===");
84
- const r2 = await search("automobile vehicle transportation");
85
- for (const r of r2) {
86
- console.log(` ${r.score.toFixed(4)} | ${r.item.id}`);
87
- console.log(` keywords: ${(r.item.metadata as any).keywords.slice(0, 100)}`);
88
- }
89
-
90
- console.log("\n=== Search: synonym 'embedding model neural network' ===");
91
- const r3 = await search("embedding model neural network");
92
- for (const r of r3) {
93
- console.log(` ${r.score.toFixed(4)} | ${r.item.id}`);
94
- console.log(` keywords: ${(r.item.metadata as any).keywords.slice(0, 100)}`);
95
- }
package/rnd/vectra.ts DELETED
@@ -1,50 +0,0 @@
1
- import { LocalIndex } from "vectra";
2
- import { pipeline } from "@huggingface/transformers";
3
-
4
- // Create/load index (folder-based)
5
- const index = new LocalIndex("./vectra-index");
6
-
7
- if (!(await index.isIndexCreated())) {
8
- await index.createIndex();
9
- }
10
-
11
- // Embedding pipeline (fast local model)
12
- const embedder = await pipeline(
13
- "feature-extraction",
14
- "sentence-transformers/all-MiniLM-L6-v2"
15
- );
16
-
17
- async function embed(text: string): Promise<number[]> {
18
- const result = await embedder(text, { pooling: "mean", normalize: true });
19
- return Array.from(result.data as Float32Array);
20
- }
21
-
22
- // Add items with vectors + metadata
23
- const texts = [
24
- { text: "Apples are red fruit", metadata: { category: "fruit" } },
25
- { text: "Bananas are yellow", metadata: { category: "fruit" } },
26
- { text: "Cars are vehicles", metadata: { category: "transport" } },
27
- ];
28
-
29
- for (const item of texts) {
30
- const vector = await embed(item.text);
31
- await index.upsertItem({
32
- id: item.text.slice(0, 20),
33
- vector,
34
- metadata: item.metadata,
35
- });
36
- }
37
- console.log("Items indexed");
38
-
39
- // Query example
40
- const queryText = "red fruit";
41
- const queryVector = await embed(queryText);
42
- const results = await index.queryItems(queryVector, queryText, 3, {
43
- category: { $eq: "fruit" },
44
- });
45
-
46
- for (const result of results) {
47
- console.log(
48
- `Score: ${result.score.toFixed(4)} | ID: ${result.item.id} | Metadata: ${JSON.stringify(result.item.metadata)}`
49
- );
50
- }