npm - grepmax - Versions diffs - 0.7.44 → 0.8.0 - Mend

grepmax 0.7.44 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/commands/verify.js +8 -8
package/dist/lib/workers/colbert-tokenizer.js +11 -9
package/package.json +13 -12
package/plugins/grepmax/.claude-plugin/plugin.json +1 -1

package/dist/commands/verify.js CHANGED Viewed

@@ -52,7 +52,7 @@ const MODEL_PATH = path.join(MODEL_DIR, "model.onnx");
 const SKIPLIST_PATH = path.join(MODEL_DIR, "skiplist.json");
 function main() {
     return __awaiter(this, void 0, void 0, function* () {
-        var _a, _b;
+        var _a, _b, _c;
         console.log("🔍 Starting ColBERT Integrity Check...\n");
         // --- CHECK 1: FILES EXIST ---
         if (!fs.existsSync(MODEL_PATH))
@@ -68,19 +68,19 @@ function main() {
         // Note: We use the ID we know works from your export: 50368
         // But let's see if the tokenizer resolves "[Q] " correctly.
         const encoded = yield tokenizer(queryText, { add_special_tokens: false });
-        const inputIds = encoded.input_ids; // BigInt64Array in newer transformers versions
+        const inputIds = (_a = encoded.input_ids.data) !== null && _a !== void 0 ? _a : encoded.input_ids;
         // Convert to standard array for inspection
         const ids = Array.from(inputIds).map(Number);
         // Mixedbread expects: [CLS] [Q] ...tokens... [SEP]
         // Let's verify we can construct that.
         const Q_ID = 50368;
-        const CLS_ID = (_a = tokenizer.model.tokens_to_ids.get("[CLS]")) !== null && _a !== void 0 ? _a : 50281; // Fallback to standard if null
+        const CLS_ID = (_b = tokenizer.convert_tokens_to_ids("[CLS]")) !== null && _b !== void 0 ? _b : 50281; // Fallback to standard if null
         console.log(`\n--- Tokenizer Check ---`);
         console.log(`Query: "${queryText}"`);
         console.log(`Raw IDs:`, ids);
         // Check if tokenizer recognizes the special tokens by text
-        const qCheck = tokenizer.model.tokens_to_ids.get("[Q] ");
-        const dCheck = tokenizer.model.tokens_to_ids.get("[D] ");
+        const qCheck = tokenizer.convert_tokens_to_ids("[Q] ");
+        const dCheck = tokenizer.convert_tokens_to_ids("[D] ");
         if (qCheck === 50368 && dCheck === 50369) {
             console.log(`✅ Tokenizer Map Correct: [Q] -> ${qCheck}, [D] -> ${dCheck}`);
         }
@@ -93,8 +93,8 @@ function main() {
         console.log(`\n--- Skiplist Check ---`);
         console.log(`Skiplist size: ${skiplist.size}`);
         // Check common punctuation
-        const commaId = tokenizer.model.tokens_to_ids.get(",");
-        const dotId = tokenizer.model.tokens_to_ids.get(".");
+        const commaId = tokenizer.convert_tokens_to_ids(",");
+        const dotId = tokenizer.convert_tokens_to_ids(".");
         if (skiplist.has(commaId) && skiplist.has(dotId)) {
             console.log(`✅ Skiplist contains punctuation ('.'=${dotId}, ','=${commaId})`);
         }
@@ -110,7 +110,7 @@ function main() {
             BigInt(CLS_ID),
             BigInt(Q_ID),
             BigInt(1234),
-            BigInt((_b = tokenizer.sep_token_id) !== null && _b !== void 0 ? _b : 50282),
+            BigInt((_c = tokenizer.sep_token_id) !== null && _c !== void 0 ? _c : 50282),
         ];
         const tensorIds = new ort.Tensor("int64", new BigInt64Array(batchIds), [1, 4]);
         const tensorMask = new ort.Tensor("int64", new BigInt64Array([BigInt(1), BigInt(1), BigInt(1), BigInt(1)]), [1, 4]);

package/dist/lib/workers/colbert-tokenizer.js CHANGED Viewed

@@ -23,20 +23,22 @@ class ColBERTTokenizer {
     }
     init(modelPath) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a, _b, _c, _d, _e, _f, _g, _h, _j;
+            var _a, _b, _c, _d, _e, _f;
             this.tokenizer = yield transformers_1.AutoTokenizer.from_pretrained(modelPath);
             // Get special token IDs with fallbacks
             // We use the IDs we discovered in validation: [Q]=50368, [D]=50369
             // But we still try to look them up dynamically first.
             const tokenizer = this.tokenizer;
-            const get = (token) => tokenizer === null || tokenizer === void 0 ? void 0 : tokenizer.model.tokens_to_ids.get(token);
-            const specialTokens = tokenizer;
-            const clsId = (_b = get((_a = specialTokens.cls_token) !== null && _a !== void 0 ? _a : "[CLS]")) !== null && _b !== void 0 ? _b : 50281;
-            const sepId = (_d = get((_c = specialTokens.sep_token) !== null && _c !== void 0 ? _c : "[SEP]")) !== null && _d !== void 0 ? _d : 50282;
-            const padId = (_f = get((_e = specialTokens.pad_token) !== null && _e !== void 0 ? _e : "[PAD]")) !== null && _f !== void 0 ? _f : 50283;
-            const maskId = (_g = get(MASK_TOKEN)) !== null && _g !== void 0 ? _g : 50284;
-            const queryMarkerId = (_h = get(QUERY_MARKER_TOKEN)) !== null && _h !== void 0 ? _h : 50368;
-            const docMarkerId = (_j = get(DOC_MARKER_TOKEN)) !== null && _j !== void 0 ? _j : 50369;
+            const get = (token) => {
+                const id = tokenizer === null || tokenizer === void 0 ? void 0 : tokenizer.convert_tokens_to_ids(token);
+                return typeof id === "number" && id >= 0 ? id : undefined;
+            };
+            const clsId = (_a = get("[CLS]")) !== null && _a !== void 0 ? _a : 50281;
+            const sepId = (_b = get("[SEP]")) !== null && _b !== void 0 ? _b : 50282;
+            const padId = (_c = get("[PAD]")) !== null && _c !== void 0 ? _c : 50283;
+            const maskId = (_d = get(MASK_TOKEN)) !== null && _d !== void 0 ? _d : 50284;
+            const queryMarkerId = (_e = get(QUERY_MARKER_TOKEN)) !== null && _e !== void 0 ? _e : 50368;
+            const docMarkerId = (_f = get(DOC_MARKER_TOKEN)) !== null && _f !== void 0 ? _f : 50369;
             this.specialTokenIds = {
                 cls: clsId,
                 sep: sepId,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "grepmax",
-  "version": "0.7.44",
+  "version": "0.8.0",
   "author": "Robert Owens <robowens@me.com>",
   "homepage": "https://github.com/reowens/grepmax",
   "bugs": {
@@ -33,9 +33,9 @@
   "description": "Semantic code search for coding agents. Local embeddings, LLM summaries, call graph tracing.",
   "dependencies": {
     "@clack/prompts": "^1.1.0",
-    "@huggingface/transformers": "^3.8.0",
-    "@lancedb/lancedb": "^0.26.2",
-    "@modelcontextprotocol/sdk": "^1.24.3",
+    "@huggingface/transformers": "^4.0.0",
+    "@lancedb/lancedb": "^0.27.1",
+    "@modelcontextprotocol/sdk": "^1.29.0",
     "apache-arrow": "^18.1.0",
     "chalk": "^5.6.2",
     "chokidar": "^5.0.0",
@@ -44,23 +44,24 @@
     "dotenv": "^17.2.3",
     "fast-glob": "^3.3.3",
     "ignore": "^7.0.5",
-    "lmdb": "^3.4.4",
+    "lmdb": "^3.5.2",
     "onnxruntime-node": "1.24.3",
-    "ora": "^5.4.1",
+    "ora": "^9.3.0",
     "piscina": "^5.1.4",
     "simsimd": "^6.5.5",
     "uuid": "^13.0.0",
-    "web-tree-sitter": "^0.26.6",
+    "web-tree-sitter": "^0.26.7",
     "zod": "^4.1.12"
   },
   "devDependencies": {
-    "@anthropic-ai/claude-agent-sdk": "^0.2.76",
-    "@biomejs/biome": "2.4.7",
+    "@anthropic-ai/claude-agent-sdk": "^0.2.87",
+    "@biomejs/biome": "2.4.10",
     "@types/node": "^25.5.0",
     "node-gyp": "^12.1.0",
     "ts-node": "^10.9.2",
-    "typescript": "^5.9.3",
-    "vitest": "^1.6.1"
+    "typescript": "^6.0.2",
+    "vite": "^8.0.3",
+    "vitest": "^4.1.2"
   },
   "scripts": {
     "postinstall": "node scripts/postinstall.js",
@@ -80,6 +81,6 @@
     "typecheck": "tsc --noEmit",
     "preversion": "pnpm test && pnpm typecheck",
     "version": "bash scripts/sync-versions.sh && git add -A",
-    "postversion": "git push origin main --tags && gh release create v$npm_package_version --generate-notes --title v$npm_package_version && sleep 5 && gh run watch $(gh run list --workflow=release.yml --limit 1 --json databaseId --jq '.[0].databaseId') --exit-status && sleep 30 && npm cache clean --force && npm install -g grepmax@$npm_package_version"
+    "postversion": "git push origin main && git push origin v$npm_package_version && gh release create v$npm_package_version --generate-notes --title v$npm_package_version && sleep 5 && gh run watch $(gh run list --workflow=release.yml --branch v$npm_package_version --limit 1 --json databaseId --jq '.[0].databaseId') --exit-status && sleep 30 && npm cache clean --force && npm install -g grepmax@$npm_package_version"
   }
 }

package/plugins/grepmax/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "grepmax",
-  "version": "0.7.44",
+  "version": "0.8.0",
   "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
   "author": {
     "name": "Robert Owens",