grepmax 0.7.44 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -52,7 +52,7 @@ const MODEL_PATH = path.join(MODEL_DIR, "model.onnx");
52
52
  const SKIPLIST_PATH = path.join(MODEL_DIR, "skiplist.json");
53
53
  function main() {
54
54
  return __awaiter(this, void 0, void 0, function* () {
55
- var _a, _b;
55
+ var _a, _b, _c;
56
56
  console.log("🔍 Starting ColBERT Integrity Check...\n");
57
57
  // --- CHECK 1: FILES EXIST ---
58
58
  if (!fs.existsSync(MODEL_PATH))
@@ -68,19 +68,19 @@ function main() {
68
68
  // Note: We use the ID we know works from your export: 50368
69
69
  // But let's see if the tokenizer resolves "[Q] " correctly.
70
70
  const encoded = yield tokenizer(queryText, { add_special_tokens: false });
71
- const inputIds = encoded.input_ids; // BigInt64Array in newer transformers versions
71
+ const inputIds = (_a = encoded.input_ids.data) !== null && _a !== void 0 ? _a : encoded.input_ids;
72
72
  // Convert to standard array for inspection
73
73
  const ids = Array.from(inputIds).map(Number);
74
74
  // Mixedbread expects: [CLS] [Q] ...tokens... [SEP]
75
75
  // Let's verify we can construct that.
76
76
  const Q_ID = 50368;
77
- const CLS_ID = (_a = tokenizer.model.tokens_to_ids.get("[CLS]")) !== null && _a !== void 0 ? _a : 50281; // Fallback to standard if null
77
+ const CLS_ID = (_b = tokenizer.convert_tokens_to_ids("[CLS]")) !== null && _b !== void 0 ? _b : 50281; // Fallback to standard if null
78
78
  console.log(`\n--- Tokenizer Check ---`);
79
79
  console.log(`Query: "${queryText}"`);
80
80
  console.log(`Raw IDs:`, ids);
81
81
  // Check if tokenizer recognizes the special tokens by text
82
- const qCheck = tokenizer.model.tokens_to_ids.get("[Q] ");
83
- const dCheck = tokenizer.model.tokens_to_ids.get("[D] ");
82
+ const qCheck = tokenizer.convert_tokens_to_ids("[Q] ");
83
+ const dCheck = tokenizer.convert_tokens_to_ids("[D] ");
84
84
  if (qCheck === 50368 && dCheck === 50369) {
85
85
  console.log(`✅ Tokenizer Map Correct: [Q] -> ${qCheck}, [D] -> ${dCheck}`);
86
86
  }
@@ -93,8 +93,8 @@ function main() {
93
93
  console.log(`\n--- Skiplist Check ---`);
94
94
  console.log(`Skiplist size: ${skiplist.size}`);
95
95
  // Check common punctuation
96
- const commaId = tokenizer.model.tokens_to_ids.get(",");
97
- const dotId = tokenizer.model.tokens_to_ids.get(".");
96
+ const commaId = tokenizer.convert_tokens_to_ids(",");
97
+ const dotId = tokenizer.convert_tokens_to_ids(".");
98
98
  if (skiplist.has(commaId) && skiplist.has(dotId)) {
99
99
  console.log(`✅ Skiplist contains punctuation ('.'=${dotId}, ','=${commaId})`);
100
100
  }
@@ -110,7 +110,7 @@ function main() {
110
110
  BigInt(CLS_ID),
111
111
  BigInt(Q_ID),
112
112
  BigInt(1234),
113
- BigInt((_b = tokenizer.sep_token_id) !== null && _b !== void 0 ? _b : 50282),
113
+ BigInt((_c = tokenizer.sep_token_id) !== null && _c !== void 0 ? _c : 50282),
114
114
  ];
115
115
  const tensorIds = new ort.Tensor("int64", new BigInt64Array(batchIds), [1, 4]);
116
116
  const tensorMask = new ort.Tensor("int64", new BigInt64Array([BigInt(1), BigInt(1), BigInt(1), BigInt(1)]), [1, 4]);
@@ -23,20 +23,22 @@ class ColBERTTokenizer {
23
23
  }
24
24
  init(modelPath) {
25
25
  return __awaiter(this, void 0, void 0, function* () {
26
- var _a, _b, _c, _d, _e, _f, _g, _h, _j;
26
+ var _a, _b, _c, _d, _e, _f;
27
27
  this.tokenizer = yield transformers_1.AutoTokenizer.from_pretrained(modelPath);
28
28
  // Get special token IDs with fallbacks
29
29
  // We use the IDs we discovered in validation: [Q]=50368, [D]=50369
30
30
  // But we still try to look them up dynamically first.
31
31
  const tokenizer = this.tokenizer;
32
- const get = (token) => tokenizer === null || tokenizer === void 0 ? void 0 : tokenizer.model.tokens_to_ids.get(token);
33
- const specialTokens = tokenizer;
34
- const clsId = (_b = get((_a = specialTokens.cls_token) !== null && _a !== void 0 ? _a : "[CLS]")) !== null && _b !== void 0 ? _b : 50281;
35
- const sepId = (_d = get((_c = specialTokens.sep_token) !== null && _c !== void 0 ? _c : "[SEP]")) !== null && _d !== void 0 ? _d : 50282;
36
- const padId = (_f = get((_e = specialTokens.pad_token) !== null && _e !== void 0 ? _e : "[PAD]")) !== null && _f !== void 0 ? _f : 50283;
37
- const maskId = (_g = get(MASK_TOKEN)) !== null && _g !== void 0 ? _g : 50284;
38
- const queryMarkerId = (_h = get(QUERY_MARKER_TOKEN)) !== null && _h !== void 0 ? _h : 50368;
39
- const docMarkerId = (_j = get(DOC_MARKER_TOKEN)) !== null && _j !== void 0 ? _j : 50369;
32
+ const get = (token) => {
33
+ const id = tokenizer === null || tokenizer === void 0 ? void 0 : tokenizer.convert_tokens_to_ids(token);
34
+ return typeof id === "number" && id >= 0 ? id : undefined;
35
+ };
36
+ const clsId = (_a = get("[CLS]")) !== null && _a !== void 0 ? _a : 50281;
37
+ const sepId = (_b = get("[SEP]")) !== null && _b !== void 0 ? _b : 50282;
38
+ const padId = (_c = get("[PAD]")) !== null && _c !== void 0 ? _c : 50283;
39
+ const maskId = (_d = get(MASK_TOKEN)) !== null && _d !== void 0 ? _d : 50284;
40
+ const queryMarkerId = (_e = get(QUERY_MARKER_TOKEN)) !== null && _e !== void 0 ? _e : 50368;
41
+ const docMarkerId = (_f = get(DOC_MARKER_TOKEN)) !== null && _f !== void 0 ? _f : 50369;
40
42
  this.specialTokenIds = {
41
43
  cls: clsId,
42
44
  sep: sepId,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.7.44",
3
+ "version": "0.8.0",
4
4
  "author": "Robert Owens <robowens@me.com>",
5
5
  "homepage": "https://github.com/reowens/grepmax",
6
6
  "bugs": {
@@ -33,9 +33,9 @@
33
33
  "description": "Semantic code search for coding agents. Local embeddings, LLM summaries, call graph tracing.",
34
34
  "dependencies": {
35
35
  "@clack/prompts": "^1.1.0",
36
- "@huggingface/transformers": "^3.8.0",
37
- "@lancedb/lancedb": "^0.26.2",
38
- "@modelcontextprotocol/sdk": "^1.24.3",
36
+ "@huggingface/transformers": "^4.0.0",
37
+ "@lancedb/lancedb": "^0.27.1",
38
+ "@modelcontextprotocol/sdk": "^1.29.0",
39
39
  "apache-arrow": "^18.1.0",
40
40
  "chalk": "^5.6.2",
41
41
  "chokidar": "^5.0.0",
@@ -44,23 +44,24 @@
44
44
  "dotenv": "^17.2.3",
45
45
  "fast-glob": "^3.3.3",
46
46
  "ignore": "^7.0.5",
47
- "lmdb": "^3.4.4",
47
+ "lmdb": "^3.5.2",
48
48
  "onnxruntime-node": "1.24.3",
49
- "ora": "^5.4.1",
49
+ "ora": "^9.3.0",
50
50
  "piscina": "^5.1.4",
51
51
  "simsimd": "^6.5.5",
52
52
  "uuid": "^13.0.0",
53
- "web-tree-sitter": "^0.26.6",
53
+ "web-tree-sitter": "^0.26.7",
54
54
  "zod": "^4.1.12"
55
55
  },
56
56
  "devDependencies": {
57
- "@anthropic-ai/claude-agent-sdk": "^0.2.76",
58
- "@biomejs/biome": "2.4.7",
57
+ "@anthropic-ai/claude-agent-sdk": "^0.2.87",
58
+ "@biomejs/biome": "2.4.10",
59
59
  "@types/node": "^25.5.0",
60
60
  "node-gyp": "^12.1.0",
61
61
  "ts-node": "^10.9.2",
62
- "typescript": "^5.9.3",
63
- "vitest": "^1.6.1"
62
+ "typescript": "^6.0.2",
63
+ "vite": "^8.0.3",
64
+ "vitest": "^4.1.2"
64
65
  },
65
66
  "scripts": {
66
67
  "postinstall": "node scripts/postinstall.js",
@@ -80,6 +81,6 @@
80
81
  "typecheck": "tsc --noEmit",
81
82
  "preversion": "pnpm test && pnpm typecheck",
82
83
  "version": "bash scripts/sync-versions.sh && git add -A",
83
- "postversion": "git push origin main --tags && gh release create v$npm_package_version --generate-notes --title v$npm_package_version && sleep 5 && gh run watch $(gh run list --workflow=release.yml --limit 1 --json databaseId --jq '.[0].databaseId') --exit-status && sleep 30 && npm cache clean --force && npm install -g grepmax@$npm_package_version"
84
+ "postversion": "git push origin main && git push origin v$npm_package_version && gh release create v$npm_package_version --generate-notes --title v$npm_package_version && sleep 5 && gh run watch $(gh run list --workflow=release.yml --branch v$npm_package_version --limit 1 --json databaseId --jq '.[0].databaseId') --exit-status && sleep 30 && npm cache clean --force && npm install -g grepmax@$npm_package_version"
84
85
  }
85
86
  }
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "grepmax",
3
- "version": "0.7.44",
3
+ "version": "0.8.0",
4
4
  "description": "Semantic code search for Claude Code. Automatically indexes your project and provides intelligent search capabilities.",
5
5
  "author": {
6
6
  "name": "Robert Owens",