@joycodetech/qmd-ja 2.5.4 → 2.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +2 -0
- package/dist/cli/qmd.js +23 -3
- package/dist/store.d.ts +6 -0
- package/dist/store.js +3 -3
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
package/dist/cli/qmd.js
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
#!/usr/bin/env node
|
|
2
1
|
import { isBun, openDatabase } from "../db.js";
|
|
3
2
|
import fastGlob from "fast-glob";
|
|
4
3
|
import { execSync, spawn as nodeSpawn } from "child_process";
|
|
@@ -7,7 +6,7 @@ import { basename, dirname, join as pathJoin, relative as relativePath, resolve
|
|
|
7
6
|
import { parseArgs } from "util";
|
|
8
7
|
import { readFileSync, readdirSync, realpathSync, statSync, existsSync, unlinkSync, writeFileSync, openSync, closeSync, mkdirSync, lstatSync, rmSync, symlinkSync, readlinkSync, copyFileSync } from "fs";
|
|
9
8
|
import { createInterface } from "readline/promises";
|
|
10
|
-
import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, getEmbeddingFingerprint, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, findOrMigrateLegacyDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_QUERY_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, initializeKuromojiTokenizer, generateEmbeddings, maybeAdoptLegacyEmbeddingFingerprint, syncConfigToDb, } from "../store.js";
|
|
9
|
+
import { getPwd, getRealPath, homedir, resolve, enableProductionMode, searchFTS, extractSnippet, getContextForFile, getContextForPath, listCollections, removeCollection, renameCollection, findSimilarFiles, findDocumentByDocid, isDocid, matchFilesByGlob, getHashesNeedingEmbedding, clearAllEmbeddings, insertEmbedding, getStatus, hashContent, extractTitle, formatDocForEmbedding, getEmbeddingFingerprint, chunkDocumentByTokens, clearCache, getCacheKey, getCachedResult, setCachedResult, getIndexHealth, parseVirtualPath, buildVirtualPath, isVirtualPath, resolveVirtualPath, toVirtualPath, insertContent, insertDocument, findActiveDocument, findOrMigrateLegacyDocument, updateDocumentTitle, updateDocument, deactivateDocument, getActiveDocumentPaths, cleanupOrphanedContent, deleteLLMCache, deleteInactiveDocuments, cleanupOrphanedVectors, vacuumDatabase, getCollectionsWithoutContext, getTopLevelPathsWithoutContext, handelize, hybridQuery, vectorSearchQuery, structuredSearch, addLineNumbers, DEFAULT_EMBED_MODEL, DEFAULT_EMBED_MAX_BATCH_BYTES, DEFAULT_EMBED_MAX_DOCS_PER_BATCH, DEFAULT_RERANK_MODEL, DEFAULT_QUERY_MODEL, DEFAULT_GLOB, DEFAULT_MULTI_GET_MAX_BYTES, createStore, getDefaultDbPath, reindexCollection, initializeKuromojiTokenizer, initializeVaporettoTokenizer, FTS_CJK_NORMALIZED_VERSION, resolveVaporettoModelPath, generateEmbeddings, maybeAdoptLegacyEmbeddingFingerprint, syncConfigToDb, } from "../store.js";
|
|
11
10
|
import { disposeDefaultLlamaCpp, getDefaultLlamaCpp, setDefaultLlamaCpp, LlamaCpp, withLLMSession, pullModels, DEFAULT_MODEL_CACHE_DIR, resolveEmbedModel, resolveGenerateModel, resolveRerankModel, resolveModels, inspectGgufFile, isDarwinMetalMitigationActive } from "../llm.js";
|
|
12
11
|
import { formatSearchResults, formatDocuments, escapeXml, escapeCSV, } from "./formatter.js";
|
|
13
12
|
import { getCollection as getCollectionFromYaml, listCollections as yamlListCollections, getDefaultCollectionNames, addContext as yamlAddContext, removeContext as yamlRemoveContext, removeCollection as yamlRemoveCollectionFn, renameCollection as yamlRenameCollectionFn, setGlobalContext, listAllContexts, setConfigIndexName, loadConfig, saveConfig, setConfigSource, findLocalConfigPath, getLocalDbPath, getConfigPath, configExists, } from "../collections.js";
|
|
@@ -3490,6 +3489,27 @@ async function showDoctor() {
|
|
|
3490
3489
|
catch (error) {
|
|
3491
3490
|
doctorCheck("sqlite-vec", false, error instanceof Error ? error.message : String(error));
|
|
3492
3491
|
}
|
|
3492
|
+
// CJK tokenizer check (qmd-ja: Vaporetto WASM)
|
|
3493
|
+
try {
|
|
3494
|
+
await initializeVaporettoTokenizer();
|
|
3495
|
+
const modelPath = resolveVaporettoModelPath();
|
|
3496
|
+
const modelName = modelPath.split("/").pop() ?? modelPath;
|
|
3497
|
+
let ftsLabel = "not indexed yet";
|
|
3498
|
+
try {
|
|
3499
|
+
const row = db.prepare(`SELECT value FROM store_config WHERE key = 'fts_cjk_normalized_version'`).get();
|
|
3500
|
+
if (row?.value === FTS_CJK_NORMALIZED_VERSION) {
|
|
3501
|
+
ftsLabel = `v${FTS_CJK_NORMALIZED_VERSION} (current)`;
|
|
3502
|
+
}
|
|
3503
|
+
else if (row?.value) {
|
|
3504
|
+
ftsLabel = `v${row.value} -> v${FTS_CJK_NORMALIZED_VERSION} (stale, run qmd-ja update)`;
|
|
3505
|
+
}
|
|
3506
|
+
}
|
|
3507
|
+
catch { /* ignore */ }
|
|
3508
|
+
doctorCheck("CJK tokenizer", true, `Vaporetto WASM — model: ${modelName}, FTS index: ${ftsLabel}`);
|
|
3509
|
+
}
|
|
3510
|
+
catch (error) {
|
|
3511
|
+
doctorCheck("CJK tokenizer", false, `Vaporetto WASM failed: ${error instanceof Error ? error.message : String(error)}`);
|
|
3512
|
+
}
|
|
3493
3513
|
const configCheck = checkDoctorIndexConfig(nextSteps);
|
|
3494
3514
|
const configModels = configCheck.config?.models ?? {};
|
|
3495
3515
|
checkEnvironmentOverrides(activeModels, configModels);
|
|
@@ -3600,7 +3620,7 @@ async function showVersion() {
|
|
|
3600
3620
|
// Not a git repo or git not available
|
|
3601
3621
|
}
|
|
3602
3622
|
const versionStr = commit ? `${pkg.version} (${commit})` : pkg.version;
|
|
3603
|
-
console.log(`qmd ${versionStr}`);
|
|
3623
|
+
console.log(`qmd-ja ${versionStr}`);
|
|
3604
3624
|
}
|
|
3605
3625
|
// Main CLI - only run if this is the main module
|
|
3606
3626
|
const __filename = fileURLToPath(import.meta.url);
|
package/dist/store.d.ts
CHANGED
|
@@ -187,6 +187,12 @@ export declare function resolveVirtualPath(db: Database, virtualPath: string): s
|
|
|
187
187
|
*/
|
|
188
188
|
export declare function toVirtualPath(db: Database, absolutePath: string): string | null;
|
|
189
189
|
export declare function verifySqliteVecLoaded(db: Database): void;
|
|
190
|
+
export declare const FTS_CJK_NORMALIZED_VERSION = "3";
|
|
191
|
+
/**
|
|
192
|
+
* Resolve the Vaporetto model file path relative to this module.
|
|
193
|
+
* The model is bundled under vendor/vaporetto-node-wasm/../models/ relative to the project root.
|
|
194
|
+
*/
|
|
195
|
+
export declare function resolveVaporettoModelPath(): string;
|
|
190
196
|
/**
|
|
191
197
|
* Pre-initialize the Vaporetto WASM Japanese morphological analyzer.
|
|
192
198
|
* Call this before indexing or search operations involving CJK text.
|
package/dist/store.js
CHANGED
|
@@ -595,7 +595,7 @@ const CJK_CHAR_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\
|
|
|
595
595
|
// parts of katakana words (e.g. "ナレッジベース"). Without these, the regex splits
|
|
596
596
|
// on "ー" and vaporetto receives broken sub-strings like "ナレッジベ" and "ス".
|
|
597
597
|
const CJK_RUN_PATTERN = /[\p{Script=Han}\p{Script=Hiragana}\p{Script=Katakana}\p{Script=Hangul}・ー]+/gu;
|
|
598
|
-
const FTS_CJK_NORMALIZED_VERSION = "3"; // bumped: vaporetto WASM morphological tokenization
|
|
598
|
+
export const FTS_CJK_NORMALIZED_VERSION = "3"; // bumped: vaporetto WASM morphological tokenization
|
|
599
599
|
// --- Vaporetto WASM Japanese morphological analyzer (lazy singleton) ---
|
|
600
600
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
601
601
|
let _vaporettoTokenizer = null;
|
|
@@ -604,7 +604,7 @@ let _vaporettoInitPromise = null;
|
|
|
604
604
|
* Resolve the Vaporetto model file path relative to this module.
|
|
605
605
|
* The model is bundled under vendor/vaporetto-node-wasm/../models/ relative to the project root.
|
|
606
606
|
*/
|
|
607
|
-
function
|
|
607
|
+
export function resolveVaporettoModelPath() {
|
|
608
608
|
// __dirname equivalent for ESM
|
|
609
609
|
const thisDir = dirname(fileURLToPath(import.meta.url));
|
|
610
610
|
// src/ → project root → models/
|
|
@@ -627,7 +627,7 @@ export async function initializeVaporettoTokenizer() {
|
|
|
627
627
|
const req = createRequire(import.meta.url);
|
|
628
628
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
629
629
|
const { VaporettoTokenizer } = req(pathJoin(dirname(fileURLToPath(import.meta.url)), "..", "vendor", "vaporetto-node-wasm", "vaporetto_node_wasm.js"));
|
|
630
|
-
const modelPath =
|
|
630
|
+
const modelPath = resolveVaporettoModelPath();
|
|
631
631
|
const modelData = readFileSync(modelPath);
|
|
632
632
|
return new VaporettoTokenizer(modelData);
|
|
633
633
|
})();
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@joycodetech/qmd-ja",
|
|
3
|
-
"version": "2.5.
|
|
3
|
+
"version": "2.5.5",
|
|
4
4
|
"description": "Japanese-enhanced fork of qmd — On-device hybrid search with Vaporetto WASM morphological tokenizer for accurate Japanese BM25 full-text search",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|