xindex 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (160) hide show
  1. package/.ai/research/2026-04-10-file-watching.md +79 -0
  2. package/.ai/research/2026-04-10-mcp-output-format.md +129 -0
  3. package/.ai/task/INDEX.md +12 -0
  4. package/.ai/task/done/INDEX.md +3 -0
  5. package/.ai/task/done/task.2026-04-09-local-ai-research-protos.log.md +98 -0
  6. package/.ai/task/done/task.2026-04-09-local-ai-research-protos.md +102 -0
  7. package/.ai/task/task.2026-04-10-cluster-config.log.md +19 -0
  8. package/.ai/task/task.2026-04-10-cluster-config.md +118 -0
  9. package/.ai/task/task.2026-04-10-dir-indexing.log.md +8 -0
  10. package/.ai/task/task.2026-04-10-dir-indexing.md +92 -0
  11. package/.ai/task/task.2026-04-10-line-clustering.log.md +50 -0
  12. package/.ai/task/task.2026-04-10-line-clustering.md +176 -0
  13. package/.ai/task/task.2026-04-10-object-store.log.md +7 -0
  14. package/.ai/task/task.2026-04-10-object-store.md +81 -0
  15. package/.ai/task/task.2026-04-10-search-config.log.md +46 -0
  16. package/.ai/task/task.2026-04-10-search-config.md +274 -0
  17. package/.ai/task/task.2026-04-10-watch-indexing.log.md +32 -0
  18. package/.ai/task/task.2026-04-10-watch-indexing.md +101 -0
  19. package/.ai/task/task.2026-04-10-xindex-mcp.log.md +5 -0
  20. package/.ai/task/task.2026-04-10-xindex-mcp.md +92 -0
  21. package/.ai/task/task.2026-04-10-xindex-mcp.report.md +113 -0
  22. package/.claude/settings.local.json +73 -0
  23. package/.claude/skills/make-hof/SKILL.md +8 -0
  24. package/.claude/skills/make-hof/playbook.md +38 -0
  25. package/.cursor/mcp.json +8 -0
  26. package/.mcp.json +8 -0
  27. package/.xindex.json +22 -0
  28. package/CLAUDE.md +54 -0
  29. package/README.md +206 -0
  30. package/apps/indexApp.ts +31 -0
  31. package/apps/mcpApp.ts +119 -0
  32. package/apps/run.index.ts +19 -0
  33. package/apps/run.mcp.ts +49 -0
  34. package/apps/run.reset.ts +10 -0
  35. package/apps/run.search.ts +21 -0
  36. package/apps/run.watch.ts +44 -0
  37. package/apps/searchApp.ts +9 -0
  38. package/apps/watchApp.ts +53 -0
  39. package/apps/watchFileEventsApp.ts +39 -0
  40. package/bin/xindex-index +2 -0
  41. package/bin/xindex-mcp +2 -0
  42. package/bin/xindex-reset +2 -0
  43. package/bin/xindex-search +2 -0
  44. package/bin/xindex-watch +2 -0
  45. package/componets/IType.ts +1 -0
  46. package/componets/appId.ts +3 -0
  47. package/componets/buildComponents.ts +27 -0
  48. package/componets/config/loadConfig.ts +43 -0
  49. package/componets/config/xindexConfig.ts +4 -0
  50. package/componets/index/contentIndexDriver.ts +39 -0
  51. package/componets/index/formatSearchResults.ts +18 -0
  52. package/componets/index/getIndexStats.ts +11 -0
  53. package/componets/index/handleFileEvent.ts +25 -0
  54. package/componets/index/indexApi.ts +45 -0
  55. package/componets/index/vectraIndex.ts +11 -0
  56. package/componets/index/watcherLock.ts +107 -0
  57. package/componets/keywords/cleanUpKeywords.ts +38 -0
  58. package/componets/keywords/extractKeywords.ts +14 -0
  59. package/componets/keywords/refineKeywords.ts +16 -0
  60. package/componets/llm/embed.ts +18 -0
  61. package/componets/llm/queryLLM.ts +20 -0
  62. package/componets/logger.ts +34 -0
  63. package/componets/walkFiles.ts +51 -0
  64. package/componets/watchFiles.ts +106 -0
  65. package/features/indexContent.ts +16 -0
  66. package/features/removeContent.ts +9 -0
  67. package/features/resetIndex.ts +9 -0
  68. package/features/searchIndex.ts +33 -0
  69. package/package.json +32 -0
  70. package/packages/fun/src/IType.ts +5 -0
  71. package/packages/fun/src/array-finder.ts +55 -0
  72. package/packages/fun/src/array-index.ts +35 -0
  73. package/packages/fun/src/array.ts +112 -0
  74. package/packages/fun/src/assert.ts +5 -0
  75. package/packages/fun/src/asyncRequest.ts +35 -0
  76. package/packages/fun/src/callsites.ts +18 -0
  77. package/packages/fun/src/case-never.ts +9 -0
  78. package/packages/fun/src/casting.ts +41 -0
  79. package/packages/fun/src/collect.ts +13 -0
  80. package/packages/fun/src/concurrency.ts +186 -0
  81. package/packages/fun/src/container.ts +86 -0
  82. package/packages/fun/src/counter.ts +45 -0
  83. package/packages/fun/src/create-map.ts +2 -0
  84. package/packages/fun/src/dedupe.ts +2 -0
  85. package/packages/fun/src/defer.ts +55 -0
  86. package/packages/fun/src/delay.ts +5 -0
  87. package/packages/fun/src/discriminate.ts +34 -0
  88. package/packages/fun/src/enum-values.ts +12 -0
  89. package/packages/fun/src/exponential-backoff.ts +20 -0
  90. package/packages/fun/src/flatten.ts +11 -0
  91. package/packages/fun/src/hash.ts +67 -0
  92. package/packages/fun/src/hash128.ts +6 -0
  93. package/packages/fun/src/hash256.ts +6 -0
  94. package/packages/fun/src/hub.ts +53 -0
  95. package/packages/fun/src/id.ts +10 -0
  96. package/packages/fun/src/interval.ts +76 -0
  97. package/packages/fun/src/is-non-nullable.ts +2 -0
  98. package/packages/fun/src/isIterable.ts +3 -0
  99. package/packages/fun/src/mailbox.ts +13 -0
  100. package/packages/fun/src/map-record.ts +19 -0
  101. package/packages/fun/src/match-collections.ts +57 -0
  102. package/packages/fun/src/match-left-and-right-arrays.ts +78 -0
  103. package/packages/fun/src/mem.ts +26 -0
  104. package/packages/fun/src/memos.ts +28 -0
  105. package/packages/fun/src/normalizeError.ts +25 -0
  106. package/packages/fun/src/nothing.ts +3 -0
  107. package/packages/fun/src/pipe.ts +18 -0
  108. package/packages/fun/src/prettyJson.ts +3 -0
  109. package/packages/fun/src/project.ts +8 -0
  110. package/packages/fun/src/promise.ts +27 -0
  111. package/packages/fun/src/pubsub.ts +128 -0
  112. package/packages/fun/src/randomId.ts +14 -0
  113. package/packages/fun/src/regexp-escape.ts +13 -0
  114. package/packages/fun/src/retry.ts +15 -0
  115. package/packages/fun/src/serial.test.ts +107 -0
  116. package/packages/fun/src/serial.ts +17 -0
  117. package/packages/fun/src/sleep.ts +3 -0
  118. package/packages/fun/src/sort-object.ts +46 -0
  119. package/packages/fun/src/speed-test.ts +56 -0
  120. package/packages/fun/src/tick.ts +37 -0
  121. package/packages/fun/src/time-behavior.ts +50 -0
  122. package/packages/fun/src/time.ts +22 -0
  123. package/packages/fun/src/timedFallback.ts +37 -0
  124. package/packages/fun/src/timer.ts +30 -0
  125. package/packages/fun/src/value.ts +33 -0
  126. package/packages/fun/src/waitForCounter.ts +15 -0
  127. package/packages/streamx/src/batch.ts +23 -0
  128. package/packages/streamx/src/batchTimed.ts +113 -0
  129. package/packages/streamx/src/buffer.ts +72 -0
  130. package/packages/streamx/src/concatenate.ts +33 -0
  131. package/packages/streamx/src/filter.ts +14 -0
  132. package/packages/streamx/src/flat.ts +19 -0
  133. package/packages/streamx/src/flatMap.ts +9 -0
  134. package/packages/streamx/src/from.ts +30 -0
  135. package/packages/streamx/src/index.ts +49 -0
  136. package/packages/streamx/src/interval.ts +58 -0
  137. package/packages/streamx/src/loop.ts +8 -0
  138. package/packages/streamx/src/map.ts +12 -0
  139. package/packages/streamx/src/merge.ts +89 -0
  140. package/packages/streamx/src/nodeReadable.ts +6 -0
  141. package/packages/streamx/src/nodeTransform.ts +9 -0
  142. package/packages/streamx/src/nodeWritable.ts +38 -0
  143. package/packages/streamx/src/objectReader.ts +16 -0
  144. package/packages/streamx/src/polyfill.ts +20 -0
  145. package/packages/streamx/src/reader.ts +38 -0
  146. package/packages/streamx/src/reduce.ts +15 -0
  147. package/packages/streamx/src/scale.ts +93 -0
  148. package/packages/streamx/src/scaleSync.ts +13 -0
  149. package/packages/streamx/src/sequence.ts +7 -0
  150. package/packages/streamx/src/tap.ts +9 -0
  151. package/packages/streamx/src/toArray.ts +9 -0
  152. package/packages/streamx/src/writer.ts +96 -0
  153. package/rnd/hf.ts +14 -0
  154. package/rnd/keywords-compromise.ts +18 -0
  155. package/rnd/keywords-pipeline.ts +79 -0
  156. package/rnd/keywords.ts +38 -0
  157. package/rnd/test-vectra-memory.ts +63 -0
  158. package/rnd/vectra-keywords.ts +95 -0
  159. package/rnd/vectra.ts +50 -0
  160. package/tsconfig.json +14 -0
@@ -0,0 +1,38 @@
1
+ import { createRequire } from "module";
2
+ import { readFile } from "fs/promises";
3
+
4
+ const require = createRequire(import.meta.url);
5
+ const keyword_extractor = require("keyword-extractor");
6
+
7
+ const filePath = process.argv[2];
8
+ if (!filePath) {
9
+ console.error("Usage: npx tsx keywords.ts <file>");
10
+ process.exit(1);
11
+ }
12
+
13
+ const text = await readFile(filePath, "utf8");
14
+
15
+ const keywords: string[] = keyword_extractor.extract(text, {
16
+ language: "english",
17
+ remove_digits: false,
18
+ return_changed_case: true,
19
+ remove_duplicates: true,
20
+ return_max_ngrams: 3,
21
+ });
22
+
23
+ // Count frequency of each keyword in original text (case-insensitive)
24
+ const lower = text.toLowerCase();
25
+ const scored = keywords
26
+ .filter((kw) => kw.length > 2 && !/^[^a-z]*$/.test(kw))
27
+ .map((kw) => {
28
+ const re = new RegExp(`\\b${kw.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "gi");
29
+ const count = (lower.match(re) || []).length;
30
+ return { keyword: kw, count };
31
+ });
32
+
33
+ scored.sort((a, b) => b.count - a.count);
34
+
35
+ console.log(`Keywords from: ${filePath}\n`);
36
+ for (const { keyword, count } of scored) {
37
+ console.log(` ${keyword.padEnd(35)} (${count}x)`);
38
+ }
@@ -0,0 +1,63 @@
1
+ import { LocalIndex, VirtualFileStorage } from 'vectra';
2
+
3
+ async function main() {
4
+ // 1. Create in-memory index
5
+ const storage = new VirtualFileStorage();
6
+ const index = new LocalIndex('mem://test', undefined, storage);
7
+ await index.createIndex({ version: 1 });
8
+ console.log('created index');
9
+
10
+ // 2. Insert items with fake 8-dim vectors
11
+ const dim = 8;
12
+ await index.insertItem({
13
+ id: 'doc-a',
14
+ vector: Array.from({ length: dim }, (_, i) => (i + 1) / dim),
15
+ metadata: { block: 'A', label: 'first' },
16
+ });
17
+ await index.insertItem({
18
+ id: 'doc-b',
19
+ vector: Array.from({ length: dim }, (_, i) => (dim - i) / dim),
20
+ metadata: { block: 'B', label: 'second' },
21
+ });
22
+ await index.insertItem({
23
+ id: 'doc-c',
24
+ vector: Array.from({ length: dim }, () => 0.5),
25
+ metadata: { block: 'C', label: 'third' },
26
+ });
27
+ console.log('inserted 3 items');
28
+
29
+ // 3. Query — vector close to doc-a
30
+ const queryVec = Array.from({ length: dim }, (_, i) => (i + 1) / dim);
31
+ const results = await index.queryItems(queryVec, '', 3);
32
+ console.log('query results:');
33
+ for (const r of results) {
34
+ console.log(` ${r.item.id} score=${r.score.toFixed(4)} block=${r.item.metadata.block}`);
35
+ }
36
+
37
+ // 4. Upsert — update doc-a's vector
38
+ await index.upsertItem({
39
+ id: 'doc-a',
40
+ vector: Array.from({ length: dim }, () => 0.5),
41
+ metadata: { block: 'A', label: 'updated' },
42
+ });
43
+ const updated = await index.getItem('doc-a');
44
+ console.log(`upserted doc-a → label=${updated?.metadata.label}`);
45
+
46
+ // 5. List all items
47
+ const all = await index.listItems();
48
+ console.log(`total items: ${all.length}`);
49
+
50
+ // 6. Delete
51
+ await index.deleteItem('doc-b');
52
+ const afterDelete = await index.listItems();
53
+ console.log(`after delete: ${afterDelete.length} items`);
54
+
55
+ // 7. Verify no disk artifacts
56
+ const { default: fs } = await import('fs');
57
+ const exists = fs.existsSync('mem://test');
58
+ console.log(`disk folder exists: ${exists}`);
59
+
60
+ console.log('\ndone — all in-memory, nothing on disk');
61
+ }
62
+
63
+ main().catch(console.error);
@@ -0,0 +1,95 @@
1
+ import { LocalIndex } from "vectra";
2
+ import { pipeline } from "@huggingface/transformers";
3
+ import nlp from "compromise";
4
+ import { createRequire } from "module";
5
+ import { readFile } from "fs/promises";
6
+ import { readdirSync } from "fs";
7
+
8
+ const require = createRequire(import.meta.url);
9
+ const keyword_extractor = require("keyword-extractor");
10
+
11
+ // Init embedder + index
12
+ const embedder = await pipeline(
13
+ "feature-extraction",
14
+ "sentence-transformers/all-MiniLM-L6-v2"
15
+ );
16
+ const index = new LocalIndex("./vectra-keyword-index");
17
+ if (!(await index.isIndexCreated())) {
18
+ await index.createIndex();
19
+ }
20
+
21
+ async function embed(text: string): Promise<number[]> {
22
+ const result = await embedder(text, { pooling: "mean", normalize: true });
23
+ return Array.from(result.data as Float32Array);
24
+ }
25
+
26
+ function extractKeywords(text: string): string[] {
27
+ // Compromise: pull nouns, verbs, topics
28
+ const doc = nlp(text);
29
+ const parts = [
30
+ ...doc.topics().out("array"),
31
+ ...doc.nouns().out("array"),
32
+ ...doc.verbs().out("array"),
33
+ ] as string[];
34
+ const cleaned = parts.join(" ").replace(/\W+/g, " ").trim();
35
+
36
+ // keyword-extractor
37
+ const keywords: string[] = keyword_extractor.extract(cleaned, {
38
+ language: "english",
39
+ remove_digits: false,
40
+ return_changed_case: true,
41
+ remove_duplicates: true,
42
+ return_max_ngrams: 2,
43
+ });
44
+ return keywords.filter((kw: string) => kw.length > 2);
45
+ }
46
+
47
+ // --- Index files ---
48
+ const files = process.argv.slice(2);
49
+ if (!files.length) {
50
+ console.error("Usage: npx tsx vectra-keywords.ts <file1> [file2] ...");
51
+ process.exit(1);
52
+ }
53
+
54
+ console.log("=== Indexing ===");
55
+ for (const filePath of files) {
56
+ const text = await readFile(filePath, "utf8");
57
+ const keywords = extractKeywords(text);
58
+ const keywordStr = keywords.join(", ");
59
+ const vector = await embed(keywordStr);
60
+
61
+ await index.upsertItem({
62
+ id: filePath,
63
+ vector,
64
+ metadata: { keywords: keywordStr, file: filePath },
65
+ });
66
+ console.log(` ${filePath} → ${keywords.slice(0, 8).join(", ")}...`);
67
+ }
68
+
69
+ // --- Search by keyword ---
70
+ async function search(query: string, topK = 5) {
71
+ const queryVector = await embed(query);
72
+ return index.queryItems(queryVector, query, topK);
73
+ }
74
+
75
+ console.log("\n=== Search: keyword 'fruit' ===");
76
+ const r1 = await search("fruit");
77
+ for (const r of r1) {
78
+ console.log(` ${r.score.toFixed(4)} | ${r.item.id}`);
79
+ console.log(` keywords: ${(r.item.metadata as any).keywords.slice(0, 100)}`);
80
+ }
81
+
82
+ // --- Search by synonym ---
83
+ console.log("\n=== Search: synonym 'automobile' (for 'cars/vehicles') ===");
84
+ const r2 = await search("automobile vehicle transportation");
85
+ for (const r of r2) {
86
+ console.log(` ${r.score.toFixed(4)} | ${r.item.id}`);
87
+ console.log(` keywords: ${(r.item.metadata as any).keywords.slice(0, 100)}`);
88
+ }
89
+
90
+ console.log("\n=== Search: synonym 'embedding model neural network' ===");
91
+ const r3 = await search("embedding model neural network");
92
+ for (const r of r3) {
93
+ console.log(` ${r.score.toFixed(4)} | ${r.item.id}`);
94
+ console.log(` keywords: ${(r.item.metadata as any).keywords.slice(0, 100)}`);
95
+ }
package/rnd/vectra.ts ADDED
@@ -0,0 +1,50 @@
1
+ import { LocalIndex } from "vectra";
2
+ import { pipeline } from "@huggingface/transformers";
3
+
4
+ // Create/load index (folder-based)
5
+ const index = new LocalIndex("./vectra-index");
6
+
7
+ if (!(await index.isIndexCreated())) {
8
+ await index.createIndex();
9
+ }
10
+
11
+ // Embedding pipeline (fast local model)
12
+ const embedder = await pipeline(
13
+ "feature-extraction",
14
+ "sentence-transformers/all-MiniLM-L6-v2"
15
+ );
16
+
17
+ async function embed(text: string): Promise<number[]> {
18
+ const result = await embedder(text, { pooling: "mean", normalize: true });
19
+ return Array.from(result.data as Float32Array);
20
+ }
21
+
22
+ // Add items with vectors + metadata
23
+ const texts = [
24
+ { text: "Apples are red fruit", metadata: { category: "fruit" } },
25
+ { text: "Bananas are yellow", metadata: { category: "fruit" } },
26
+ { text: "Cars are vehicles", metadata: { category: "transport" } },
27
+ ];
28
+
29
+ for (const item of texts) {
30
+ const vector = await embed(item.text);
31
+ await index.upsertItem({
32
+ id: item.text.slice(0, 20),
33
+ vector,
34
+ metadata: item.metadata,
35
+ });
36
+ }
37
+ console.log("Items indexed");
38
+
39
+ // Query example
40
+ const queryText = "red fruit";
41
+ const queryVector = await embed(queryText);
42
+ const results = await index.queryItems(queryVector, queryText, 3, {
43
+ category: { $eq: "fruit" },
44
+ });
45
+
46
+ for (const result of results) {
47
+ console.log(
48
+ `Score: ${result.score.toFixed(4)} | ID: ${result.item.id} | Metadata: ${JSON.stringify(result.item.metadata)}`
49
+ );
50
+ }
package/tsconfig.json ADDED
@@ -0,0 +1,14 @@
1
+ {
2
+ "compilerOptions": {
3
+ "target": "ES2022",
4
+ "module": "Node16",
5
+ "moduleResolution": "Node16",
6
+ "esModuleInterop": true,
7
+ "strict": true,
8
+ "outDir": "dist",
9
+ "rootDir": ".",
10
+ "skipLibCheck": true
11
+ },
12
+ "include": ["*.ts"],
13
+ "exclude": ["node_modules", "dist"]
14
+ }