@joycodetech/qmd-ja 2.5.3 → 2.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [2.5.4] - 2026-06-22
6
+
5
7
  ### Documentation
6
8
 
7
9
  - README: documented collection filtering (`-c` semantics), the `collection
package/dist/cli/qmd.js CHANGED
@@ -1,4 +1,3 @@
1
- #!/usr/bin/env node
2
1
  import { isBun, openDatabase } from "../db.js";
3
2
  import fastGlob from "fast-glob";
4
3
  import { execSync, spawn as nodeSpawn } from "child_process";
@@ -7,7 +6,7 @@ import { basename, dirname, join as pathJoin, relative as relativePath, resolve
7
6
  import { parseArgs } from "util";
8
7
  import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
9
8
  import { createInterface } from "readline/promises";
10
- import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, getEmbeddingFingerprint, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, findOrMigrateLegacyDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_QUERY_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, initializeKuromojiTokenizer, generateEmbeddings, maybeAdoptLegacyEmbeddingFingerprint, syncConfigToDb, } from "../store.js";
9
+ import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, getEmbeddingFingerprint, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, findOrMigrateLegacyDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_QUERY_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, initializeKuromojiTokenizer, initializeVaporettoTokenizer, FTS_CJK_NORMALIZED_VERSION, resolveVaporettoModelPath, generateEmbeddings, maybeAdoptLegacyEmbeddingFingerprint, syncConfigToDb, } from "../store.js";
11
10
  import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile, isDarwinMetalMitigationActive } from "../llm.js";
12
11
  import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
13
12
  import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, saveConfig, setConfigSource, findLocalConfigPath, getLocalDbPath, getConfigPath, configExists, } from "../collections.js";
@@ -3490,6 +3489,27 @@ async function showDoctor() {
3490
3489
  catch (error) {
3491
3490
  doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
3492
3491
  }
3492
+ // CJK tokenizer check (qmd-ja: Vaporetto WASM)
3493
+ try {
3494
+ await initializeVaporettoTokenizer();
3495
+ const modelPath = resolveVaporettoModelPath();
3496
+ const modelName = modelPath.split("/").pop() ?? modelPath;
3497
+ let ftsLabel = "not indexed yet";
3498
+ try {
3499
+ const row = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get();
3500
+ if (row?.value === FTS_CJK_NORMALIZED_VERSION) {
3501
+ ftsLabel = `v${FTS_CJK_NORMALIZED_VERSION} (current)`;
3502
+ }
3503
+ else if (row?.value) {
3504
+ ftsLabel = `v${row.value} -> v${FTS_CJK_NORMALIZED_VERSION} (stale, run qmd-ja update)`;
3505
+ }
3506
+ }
3507
+ catch { /* ignore */ }
3508
+ doctorCheck("CJK tokenizer", true, `Vaporetto WASM — model: ${modelName}, FTS index: ${ftsLabel}`);
3509
+ }
3510
+ catch (error) {
3511
+ doctorCheck("CJK tokenizer", false, `Vaporetto WASM failed: ${error instanceof Error ? error.message : String(error)}`);
3512
+ }
3493
3513
  const configCheck = checkDoctorIndexConfig(nextSteps);
3494
3514
  const configModels = configCheck.config?.models ?? {};
3495
3515
  checkEnvironmentOverrides(activeModels, configModels);
@@ -3600,7 +3620,7 @@ async function showVersion() {
3600
3620
  // Not a git repo or git not available
3601
3621
  }
3602
3622
  const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
3603
- console.log(`qmd ${versionStr}`);
3623
+ console.log(`qmd-ja ${versionStr}`);
3604
3624
  }
3605
3625
  // Main CLI - only run if this is the main module
3606
3626
  const __filename = fileURLToPath(import.meta.url);
package/dist/store.d.ts CHANGED
@@ -187,6 +187,12 @@ export declare function resolveVirtualPath(db: Database, virtualPath: string): s
187
187
  */
188
188
  export declare function toVirtualPath(db: Database, absolutePath: string): string | null;
189
189
  export declare function verifySqliteVecLoaded(db: Database): void;
190
+ export declare const FTS_CJK_NORMALIZED_VERSION = "3";
191
+ /**
192
+ * Resolve the Vaporetto model file path relative to this module.
193
+ * The model is bundled under vendor/vaporetto-node-wasm/../models/ relative to the project root.
194
+ */
195
+ export declare function resolveVaporettoModelPath(): string;
190
196
  /**
191
197
  * Pre-initialize the Vaporetto WASM Japanese morphological analyzer.
192
198
  * Call this before indexing or search operations involving CJK text.
package/dist/store.js CHANGED
@@ -595,7 +595,7 @@ const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\
595
595
  // parts of katakana words (e.g. "ナレッジベース"). Without these, the regex splits
596
596
  // on "ー" and vaporetto receives broken sub-strings like "ナレッジベ" and "ス".
597
597
  const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}・ー]+/gu;
598
- const FTS_CJK_NORMALIZED_VERSION = "3"; // bumped: vaporetto WASM morphological tokenization
598
+ export const FTS_CJK_NORMALIZED_VERSION = "3"; // bumped: vaporetto WASM morphological tokenization
599
599
  // --- Vaporetto WASM Japanese morphological analyzer (lazy singleton) ---
600
600
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
601
601
  let _vaporettoTokenizer = null;
@@ -604,7 +604,7 @@ let _vaporettoInitPromise = null;
604
604
  * Resolve the Vaporetto model file path relative to this module.
605
605
  * The model is bundled under vendor/vaporetto-node-wasm/../models/ relative to the project root.
606
606
  */
607
- function _resolveVaporettoModelPath() {
607
+ export function resolveVaporettoModelPath() {
608
608
  // __dirname equivalent for ESM
609
609
  const thisDir = dirname(fileURLToPath(import.meta.url));
610
610
  // src/ → project root → models/
@@ -626,8 +626,8 @@ export async function initializeVaporettoTokenizer() {
626
626
  _vaporettoInitPromise = (async () => {
627
627
  const req = createRequire(import.meta.url);
628
628
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
629
- const { VaporettoTokenizer } = req(pathJoin(dirname(fileURLToPath(import.meta.url)), "_vendor", "vaporetto-node-wasm", "vaporetto_node_wasm.js"));
630
- const modelPath = _resolveVaporettoModelPath();
629
+ const { VaporettoTokenizer } = req(pathJoin(dirname(fileURLToPath(import.meta.url)), "..", "vendor", "vaporetto-node-wasm", "vaporetto_node_wasm.js"));
630
+ const modelPath = resolveVaporettoModelPath();
631
631
  const modelData = readFileSync(modelPath);
632
632
  return new VaporettoTokenizer(modelData);
633
633
  })();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@joycodetech/qmd-ja",
3
- "version": "2.5.3",
3
+ "version": "2.5.5",
4
4
  "description": "Japanese-enhanced fork of qmd — On-device hybrid search with Vaporetto WASM morphological tokenizer for accurate Japanese BM25 full-text search",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -12,7 +12,7 @@
12
12
  }
13
13
  },
14
14
  "bin": {
15
- "qmd": "bin/qmd"
15
+ "qmd-ja": "bin/qmd-ja"
16
16
  },
17
17
  "files": [
18
18
  "bin/",
@@ -0,0 +1,22 @@
1
+ MIT License
2
+
3
+ Copyright (c) Koichi Akabe, Shunsuke Kanda, Yusuke Oda, Shinsuke Mori
4
+ (daac-tools/vaporetto — https://github.com/daac-tools/vaporetto)
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy
7
+ of this software and associated documentation files (the "Software"), to deal
8
+ in the Software without restriction, including without limitation the rights
9
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10
+ copies of the Software, and to permit persons to whom the Software is
11
+ furnished to do so, subject to the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be included in all
14
+ copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
+ SOFTWARE.
File without changes