npm - gs-search - Versions diffs - 0.1.7 → 0.1.8 - Mend

gs-search 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/lib/core.cjs CHANGED Viewed

@@ -181,21 +181,129 @@ function murmur3_32(str, h = 305419896) {
   const remainder = len & 3;
   return remainder > 0 && (remainder >= 3 && (k1 ^= (str.charCodeAt(i + 2) & 255) << 16), remainder >= 2 && (k1 ^= (str.charCodeAt(i + 1) & 255) << 8), remainder >= 1 && (k1 ^= str.charCodeAt(i) & 255), k1 = (k1 & 65535) * 3432918353 + (((k1 >>> 16) * 3432918353 & 65535) << 16) & 4294967295, k1 = k1 << 15 | k1 >>> 17, k1 = (k1 & 65535) * 461845907 + (((k1 >>> 16) * 461845907 & 65535) << 16) & 4294967295, h ^= k1), h ^= len, h ^= h >>> 16, h = (h & 65535) * 2246822507 + (((h >>> 16) * 2246822507 & 65535) << 16) & 4294967295, h ^= h >>> 13, h = (h & 65535) * 3266489909 + (((h >>> 16) * 3266489909 & 65535) << 16) & 4294967295, h ^= h >>> 16, h >>> 0;
 }
+function murmur3_64(str, seed = 305419896) {
+  const len = str.length, nBlocks = len >> 3;
+  let h1 = BigInt(seed), h2 = BigInt(seed);
+  const c1 = 0x87c37b91114253d5n, c2 = 0x4cf5ad432745937fn;
+  let i = 0;
+  for (; i < nBlocks; ) {
+    let k12 = BigInt(str.charCodeAt(i) & 255) | BigInt(str.charCodeAt(++i) & 255) << 8n | BigInt(str.charCodeAt(++i) & 255) << 16n | BigInt(str.charCodeAt(++i) & 255) << 24n | BigInt(str.charCodeAt(++i) & 255) << 32n | BigInt(str.charCodeAt(++i) & 255) << 40n | BigInt(str.charCodeAt(++i) & 255) << 48n | BigInt(str.charCodeAt(++i) & 255) << 56n;
+    ++i;
+    let k22 = BigInt(str.charCodeAt(i) & 255) | BigInt(str.charCodeAt(++i) & 255) << 8n | BigInt(str.charCodeAt(++i) & 255) << 16n | BigInt(str.charCodeAt(++i) & 255) << 24n | BigInt(str.charCodeAt(++i) & 255) << 32n | BigInt(str.charCodeAt(++i) & 255) << 40n | BigInt(str.charCodeAt(++i) & 255) << 48n | BigInt(str.charCodeAt(++i) & 255) << 56n;
+    ++i, k12 = k12 * c1 % 2n ** 64n, k12 = (k12 << 31n | k12 >> 33n) % 2n ** 64n, k12 = k12 * c2 % 2n ** 64n, h1 ^= k12, h1 = (h1 << 27n | h1 >> 37n) % 2n ** 64n, h1 = (h1 + h2) % 2n ** 64n, h1 = (h1 * 5n + 0x52dce729n) % 2n ** 64n, k22 = k22 * c2 % 2n ** 64n, k22 = (k22 << 33n | k22 >> 31n) % 2n ** 64n, k22 = k22 * c1 % 2n ** 64n, h2 ^= k22, h2 = (h2 << 31n | h2 >> 33n) % 2n ** 64n, h2 = (h2 + h1) % 2n ** 64n, h2 = (h2 * 5n + 0x38495ab5n) % 2n ** 64n;
+  }
+  let k1 = 0n, k2 = 0n;
+  const remainder = len & 7;
+  return remainder > 0 && (remainder >= 8 && (k2 ^= BigInt(str.charCodeAt(i + 7) & 255) << 56n), remainder >= 7 && (k2 ^= BigInt(str.charCodeAt(i + 6) & 255) << 48n), remainder >= 6 && (k2 ^= BigInt(str.charCodeAt(i + 5) & 255) << 40n), remainder >= 5 && (k2 ^= BigInt(str.charCodeAt(i + 4) & 255) << 32n), remainder >= 4 && (k1 ^= BigInt(str.charCodeAt(i + 3) & 255) << 24n), remainder >= 3 && (k1 ^= BigInt(str.charCodeAt(i + 2) & 255) << 16n), remainder >= 2 && (k1 ^= BigInt(str.charCodeAt(i + 1) & 255) << 8n), remainder >= 1 && (k1 ^= BigInt(str.charCodeAt(i) & 255)), k1 = k1 * c1 % 2n ** 64n, k1 = (k1 << 31n | k1 >> 33n) % 2n ** 64n, k1 = k1 * c2 % 2n ** 64n, h1 ^= k1, k2 = k2 * c2 % 2n ** 64n, k2 = (k2 << 33n | k2 >> 31n) % 2n ** 64n, k2 = k2 * c1 % 2n ** 64n, h2 ^= k2), h1 ^= BigInt(len), h2 ^= BigInt(len), h1 = (h1 + h2) % 2n ** 64n, h2 = (h2 + h1) % 2n ** 64n, h1 = (h1 ^ h1 >> 33n) * 0xff51afd7ed558ccdn, h1 = (h1 ^ h1 >> 33n) * 0xc4ceb9fe1a85ec53n, h1 = h1 ^ h1 >> 33n, h2 = (h2 ^ h2 >> 33n) * 0xff51afd7ed558ccdn, h2 = (h2 ^ h2 >> 33n) * 0xc4ceb9fe1a85ec53n, h2 = h2 ^ h2 >> 33n, (h2 << 64n | h1) & 0xffffffffffffffffn;
+}
+class Murmur3_32 {
+  seed;
+  /**
+   * 构造函数
+   * @param seed 种子值，默认为0x12345678
+   */
+  constructor(seed = 305419896) {
+    this.seed = seed;
+  }
+  /**
+   * 计算字符串的32位哈希值
+   * @param str 输入字符串
+   * @returns 32位哈希值（number类型）
+   */
+  hash(str) {
+    return murmur3_32(str, this.seed);
+  }
+}
+class Murmur3_64 {
+  seed;
+  /**
+   * 构造函数
+   * @param seed 种子值，默认为0x12345678
+   */
+  constructor(seed = 305419896) {
+    this.seed = seed;
+  }
+  /**
+   * 计算字符串的64位哈希值
+   * @param str 输入字符串
+   * @returns 64位哈希值（bigint类型）
+   */
+  hash(str) {
+    return murmur3_64(str, this.seed);
+  }
+}
+class Murmur3_128 {
+  seed;
+  /**
+   * 构造函数
+   * @param seed 种子值，默认为0x12345678
+   */
+  constructor(seed = 305419896) {
+    this.seed = seed;
+  }
+  /**
+   * 计算字符串的128位哈希值
+   * @param str 输入字符串
+   * @returns 128位哈希值（bigint类型）
+   */
+  hash(str) {
+    const hash1 = murmur3_64(str, this.seed);
+    return murmur3_64(str + str, this.seed ^ 1515870810) << 64n | hash1;
+  }
+}
+class Murmur3HashFactory {
+  /**
+   * 创建32位哈希算法实例
+   * @returns 32位哈希算法实例
+   */
+  create32() {
+    return new Murmur3_32();
+  }
+  /**
+   * 创建64位哈希算法实例
+   * @returns 64位哈希算法实例
+   */
+  create64() {
+    return new Murmur3_64();
+  }
+  /**
+   * 创建128位哈希算法实例
+   * @returns 128位哈希算法实例
+   */
+  create128() {
+    return new Murmur3_128();
+  }
+}
+const defaultHashFactory = new Murmur3HashFactory();
 class IndexSegment {
   #filename;
   #storage;
   #buffer = null;
   #view = null;
+  #hashAlgorithm;
   /**
-   * 使用MurmurHash3计算字符串哈希值
+   * 构造函数
+   * @param filename 文件名
+   * @param storage 存储接口
+   * @param hashAlgorithm 哈希算法实例，默认为Murmur3_32
+   */
+  constructor(filename, storage, hashAlgorithm = new Murmur3_32()) {
+    this.#filename = filename, this.#storage = storage, this.#hashAlgorithm = hashAlgorithm;
+  }
+  /**
+   * 使用当前哈希算法计算字符串哈希值
    * @param str 要哈希的字符串
    * @returns 32位无符号哈希值
    */
-  static hash(str) {
-    return murmur3_32(str);
+  hash(str) {
+    return this.#hashAlgorithm.hash(str);
   }
-  constructor(filename, storage) {
-    this.#filename = filename, this.#storage = storage;
+  /**
+   * 设置哈希算法
+   * @param hashAlgorithm 新的哈希算法实例
+   */
+  setHashAlgorithm(hashAlgorithm) {
+    this.#hashAlgorithm = hashAlgorithm;
   }
   async loadIndex() {
     return this.#buffer ? !0 : (this.#buffer = await this.#storage.read(this.#filename), this.#buffer ? (this.#view = new DataView(this.#buffer), !0) : !1);
@@ -206,7 +314,7 @@ class IndexSegment {
       const uniqueTokens = /* @__PURE__ */ new Map();
       for (const token of doc.tokens)
         uniqueTokens.has(token) || (uniqueTokens.set(token, !0), tokenMap.has(token) || tokenMap.set(token, {
-          hash: IndexSegment.hash(token),
+          hash: this.hash(token),
           postings: []
         }), tokenMap.get(token).postings.push(doc.id));
     }
@@ -222,8 +330,8 @@ class IndexSegment {
     const headerSize = 12, dictSize = entries.length * 20, postingsSize = totalPostings * 4, tokensOffset = headerSize + dictSize + postingsSize, totalSize = tokensOffset + totalTokensSize, buffer = new ArrayBuffer(totalSize), view = new DataView(buffer);
     view.setUint32(0, 1229866072), view.setUint32(4, entries.length, !0), view.setUint32(8, tokensOffset, !0);
     let currentDictOffset = headerSize, currentPostingsOffset = headerSize + dictSize, currentTokenOffset = tokensOffset;
-    for (const [token, { hash: hash2, postings }] of entries) {
-      view.setUint32(currentDictOffset, hash2, !0);
+    for (const [token, { hash, postings }] of entries) {
+      view.setUint32(currentDictOffset, hash, !0);
       const tokenBytes = encoder.encode(token);
       view.setUint32(currentDictOffset + 4, tokenBytes.length, !0), view.setUint32(currentDictOffset + 8, currentTokenOffset, !0), view.setUint32(currentDictOffset + 12, currentPostingsOffset, !0), view.setUint32(currentDictOffset + 16, postings.length, !0), currentDictOffset += 20;
       for (let i = 0; i < postings.length; i++)
@@ -236,7 +344,7 @@ class IndexSegment {
   }
   search(term) {
     if (!this.#view || !this.#buffer) return [];
-    const h = IndexSegment.hash(term), count = this.#view.getUint32(4, !0);
+    const h = this.hash(term), count = this.#view.getUint32(4, !0);
     let left = 0, right = count - 1;
     const headerSize = 12, entrySize = 20, decoder = new TextDecoder();
     for (; left <= right; ) {
@@ -277,6 +385,116 @@ class IndexSegment {
     return [];
   }
 }
+class IndexSegment64 {
+  #filename;
+  #storage;
+  #buffer = null;
+  #view = null;
+  #hashAlgorithm;
+  /**
+   * 构造函数
+   * @param filename 文件名
+   * @param storage 存储接口
+   * @param hashAlgorithm 哈希算法实例，默认为Murmur3_64
+   */
+  constructor(filename, storage, hashAlgorithm = new Murmur3_64()) {
+    this.#filename = filename, this.#storage = storage, this.#hashAlgorithm = hashAlgorithm;
+  }
+  /**
+   * 使用当前哈希算法计算字符串哈希值
+   * @param str 要哈希的字符串
+   * @returns 64位无符号哈希值
+   */
+  hash(str) {
+    return this.#hashAlgorithm.hash(str);
+  }
+  /**
+   * 设置哈希算法
+   * @param hashAlgorithm 新的哈希算法实例
+   */
+  setHashAlgorithm(hashAlgorithm) {
+    this.#hashAlgorithm = hashAlgorithm;
+  }
+  async loadIndex() {
+    return this.#buffer ? !0 : (this.#buffer = await this.#storage.read(this.#filename), this.#buffer ? (this.#view = new DataView(this.#buffer), !0) : !1);
+  }
+  async buildAndSave(docs) {
+    const tokenMap = /* @__PURE__ */ new Map();
+    for (const doc of docs) {
+      const uniqueTokens = /* @__PURE__ */ new Map();
+      for (const token of doc.tokens)
+        uniqueTokens.has(token) || (uniqueTokens.set(token, !0), tokenMap.has(token) || tokenMap.set(token, {
+          hash: this.hash(token),
+          postings: []
+        }), tokenMap.get(token).postings.push(doc.id));
+    }
+    const entries = Array.from(tokenMap.entries());
+    entries.sort(([a, ah], [b, bh]) => ah.hash !== bh.hash ? ah.hash > bh.hash ? 1 : -1 : a.localeCompare(b));
+    const encoder = new TextEncoder();
+    let totalPostings = 0, totalTokensSize = 0;
+    for (const [token, { postings }] of entries) {
+      totalPostings += postings.length;
+      const bytes = encoder.encode(token);
+      totalTokensSize += bytes.length + 1;
+    }
+    const headerSize = 16, dictSize = entries.length * 28, postingsSize = totalPostings * 4, tokensOffset = headerSize + dictSize + postingsSize, totalSize = tokensOffset + totalTokensSize, buffer = new ArrayBuffer(totalSize), view = new DataView(buffer);
+    view.setUint32(0, 1229866072), view.setUint32(4, entries.length, !0), view.setUint32(8, tokensOffset, !0), view.setUint32(12, 64, !0);
+    let currentDictOffset = headerSize, currentPostingsOffset = headerSize + dictSize, currentTokenOffset = tokensOffset;
+    for (const [token, { hash, postings }] of entries) {
+      view.setBigUint64(currentDictOffset, hash, !0);
+      const tokenBytes = encoder.encode(token);
+      view.setUint32(currentDictOffset + 8, tokenBytes.length, !0), view.setUint32(currentDictOffset + 12, currentTokenOffset, !0), view.setUint32(currentDictOffset + 16, currentPostingsOffset, !0), view.setUint32(currentDictOffset + 20, postings.length, !0), currentDictOffset += 28;
+      for (let i = 0; i < postings.length; i++)
+        view.setUint32(currentPostingsOffset, postings[i], !0), currentPostingsOffset += 4;
+      for (let i = 0; i < tokenBytes.length; i++)
+        view.setUint8(currentTokenOffset++, tokenBytes[i]);
+      view.setUint8(currentTokenOffset++, 0);
+    }
+    await this.#storage.write(this.#filename, buffer), this.#buffer = buffer, this.#view = view;
+  }
+  search(term) {
+    if (!this.#view || !this.#buffer) return [];
+    const h = this.hash(term), count = this.#view.getUint32(4, !0);
+    let left = 0, right = count - 1;
+    const headerSize = 16, entrySize = 28, decoder = new TextDecoder();
+    for (; left <= right; ) {
+      const mid = left + right >>> 1, entryPos = headerSize + mid * entrySize, entryHash = this.#view.getBigUint64(entryPos, !0);
+      if (entryHash < h)
+        left = mid + 1;
+      else if (entryHash > h)
+        right = mid - 1;
+      else {
+        if (!(mid > 0 && this.#view.getBigUint64(headerSize + (mid - 1) * entrySize, !0) === h || mid < count - 1 && this.#view.getBigUint64(headerSize + (mid + 1) * entrySize, !0) === h)) {
+          const postingsOffset = this.#view.getUint32(headerSize + mid * entrySize + 16, !0), postingsLen = this.#view.getUint32(headerSize + mid * entrySize + 20, !0), result = [];
+          for (let j = 0; j < postingsLen; j++)
+            result.push(this.#view.getUint32(postingsOffset + j * 4, !0));
+          return result;
+        }
+        let firstMatch = mid;
+        for (; firstMatch > 0; ) {
+          const prevPos = headerSize + (firstMatch - 1) * entrySize;
+          if (this.#view.getBigUint64(prevPos, !0) === h)
+            firstMatch--;
+          else
+            break;
+        }
+        for (let i = firstMatch; i < count; i++) {
+          const checkPos = headerSize + i * entrySize;
+          if (this.#view.getBigUint64(checkPos, !0) !== h) break;
+          const tokenLen = this.#view.getUint32(checkPos + 8, !0), tokenOffset = this.#view.getUint32(checkPos + 12, !0), tokenBuffer = new Uint8Array(this.#buffer, tokenOffset, tokenLen);
+          if (decoder.decode(tokenBuffer) === term) {
+            const postingsOffset = this.#view.getUint32(checkPos + 16, !0), postingsLen = this.#view.getUint32(checkPos + 20, !0), result = [];
+            for (let j = 0; j < postingsLen; j++)
+              result.push(this.#view.getUint32(postingsOffset + j * 4, !0));
+            return result;
+          }
+        }
+        return [];
+      }
+    }
+    return [];
+  }
+}
 const defaultTokenize = ({ text }) => {
   try {
     if (typeof Intl < "u" && typeof Intl.Segmenter == "function" && typeof Array.from == "function") {
@@ -295,6 +513,9 @@ class SearchEngine {
   #segments;
   #initialized = !1;
   #config;
+  #isHash64Bit = !0;
+  #hashAlgorithm32;
+  #hashAlgorithm64;
   // 批处理状态
   #inBatch = !1;
   #pendingTokenCounts = { word: 0, char: 0 };
@@ -306,12 +527,19 @@ class SearchEngine {
       minCharTokenSave: 0,
       indexingTokenizer: config.indexingTokenizer || defaultTokenize,
       ...config
-    }, (this.#config.minWordTokenSave || 0) >= (this.#config.wordSegmentTokenThreshold || 1e5))
+    }, this.#processHashAlgorithmConfig(), (this.#config.minWordTokenSave || 0) >= (this.#config.wordSegmentTokenThreshold || 1e5))
       throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");
     if ((this.#config.minCharTokenSave || 0) >= (this.#config.charSegmentTokenThreshold || 5e5))
       throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");
     this.#storage = config.storage, this.#meta = new MetaManager(this.#storage), this.#cache = new IntermediateCache(this.#storage), this.#segments = /* @__PURE__ */ new Map();
   }
+  /**
+   * 处理哈希算法配置
+   */
+  #processHashAlgorithmConfig() {
+    const hashConfig = this.#config.hashAlgorithm;
+    hashConfig === 64 ? (this.#isHash64Bit = !0, this.#hashAlgorithm64 = new Murmur3_64()) : hashConfig === 32 ? (this.#isHash64Bit = !1, this.#hashAlgorithm32 = new Murmur3_32()) : hashConfig && typeof hashConfig.hash == "function" ? typeof hashConfig.hash("test") == "bigint" ? (this.#isHash64Bit = !0, this.#hashAlgorithm64 = hashConfig) : (this.#isHash64Bit = !1, this.#hashAlgorithm32 = hashConfig) : (this.#isHash64Bit = !0, this.#hashAlgorithm64 = new Murmur3_64());
+  }
   /**
    * 开启批处理
    * 批处理期间 addDocuments 只写入缓存，不触发索引段构建
@@ -397,7 +625,7 @@ class SearchEngine {
       const segmentsMeta = this.#meta.getSegments(type);
       for (const meta of segmentsMeta) {
         const filename = meta.filename;
-        !this.#segments.has(filename) && !segmentsToLoad.has(filename) && segmentsToLoad.set(filename, new IndexSegment(filename, this.#storage));
+        !this.#segments.has(filename) && !segmentsToLoad.has(filename) && (this.#isHash64Bit ? segmentsToLoad.set(filename, new IndexSegment64(filename, this.#storage, this.#hashAlgorithm64)) : segmentsToLoad.set(filename, new IndexSegment(filename, this.#storage, this.#hashAlgorithm32)));
       }
     };
     collectSegments("word"), collectSegments("char"), await Promise.all(
@@ -466,7 +694,7 @@ class SearchEngine {
       ...this.#meta.getSegments("char")
     ];
     for (const seg of allSegments)
-      this.#segments.has(seg.filename) || this.#segments.set(seg.filename, new IndexSegment(seg.filename, this.#storage)), await this.#segments.get(seg.filename).loadIndex();
+      this.#segments.has(seg.filename) || (this.#isHash64Bit ? this.#segments.set(seg.filename, new IndexSegment64(seg.filename, this.#storage, this.#hashAlgorithm64)) : this.#segments.set(seg.filename, new IndexSegment(seg.filename, this.#storage, this.#hashAlgorithm32))), await this.#segments.get(seg.filename).loadIndex();
     this.#initialized = !0;
   }
   #getIndexingTokens(doc) {
@@ -498,7 +726,7 @@ class SearchEngine {
     }
     const docsToBuild = await this.#cache.readRange(cacheFilename, startOffset, currentCacheSize);
     let segment = this.#segments.get(targetSegmentName);
-    segment || (segment = new IndexSegment(targetSegmentName, this.#storage), this.#segments.set(targetSegmentName, segment)), await segment.buildAndSave(docsToBuild), this.#meta.updateSegment(type, targetSegmentName, startOffset, currentCacheSize, newTokenCountTotal, isNew);
+    segment || (this.#isHash64Bit ? segment = new IndexSegment64(targetSegmentName, this.#storage, this.#hashAlgorithm64) : segment = new IndexSegment(targetSegmentName, this.#storage, this.#hashAlgorithm32), this.#segments.set(targetSegmentName, segment)), await segment.buildAndSave(docsToBuild), this.#meta.updateSegment(type, targetSegmentName, startOffset, currentCacheSize, newTokenCountTotal, isNew);
   }
 }
-exports.SearchEngine = SearchEngine, exports.hash = murmur3_32, exports.murmur3_32 = murmur3_32;
+exports.Murmur3HashFactory = Murmur3HashFactory, exports.Murmur3_128 = Murmur3_128, exports.Murmur3_32 = Murmur3_32, exports.Murmur3_64 = Murmur3_64, exports.SearchEngine = SearchEngine, exports.defaultHashFactory = defaultHashFactory, exports.murmur3_32 = murmur3_32, exports.murmur3_64 = murmur3_64;

package/lib/core.d.ts CHANGED Viewed

@@ -1,4 +1,4 @@
-import { ISearchEngine, ISearchEngineOption, IDocument, IDocumentBase, IResult, ISearchEngineStatus } from './type';
+import { ISearchEngine, ISearchEngineOption, IDocument, IDocumentBase, IResult, ISearchEngineStatus, IHashAlgorithm32, IHashAlgorithm64, IHashAlgorithm128, IHashAlgorithmFactory } from './type';
 /**
  * 核心搜索引擎类 (多实例支持)
@@ -52,4 +52,90 @@ declare class SearchEngine implements ISearchEngine {
  */
 declare function murmur3_32(str: string, h?: number): number;
-export { SearchEngine, murmur3_32 as hash, murmur3_32 };
+/**
+ * MurmurHash3 64位实现
+ * 高效的非加密哈希函数，适用于哈希表等数据结构
+ */
+/**
+ * 计算字符串的64位MurmurHash3哈希值
+ * @param str 要哈希的字符串
+ * @param seed 种子值，默认为0x12345678
+ * @returns 64位无符号哈希值（BigInt类型）
+ */
+declare function murmur3_64(str: string, seed?: number): bigint;
+/**
+ * Murmur3 32位哈希算法实现类
+ */
+declare class Murmur3_32 implements IHashAlgorithm32 {
+    private seed;
+    /**
+     * 构造函数
+     * @param seed 种子值，默认为0x12345678
+     */
+    constructor(seed?: number);
+    /**
+     * 计算字符串的32位哈希值
+     * @param str 输入字符串
+     * @returns 32位哈希值（number类型）
+     */
+    hash(str: string): number;
+}
+/**
+ * Murmur3 64位哈希算法实现类
+ */
+declare class Murmur3_64 implements IHashAlgorithm64 {
+    private seed;
+    /**
+     * 构造函数
+     * @param seed 种子值，默认为0x12345678
+     */
+    constructor(seed?: number);
+    /**
+     * 计算字符串的64位哈希值
+     * @param str 输入字符串
+     * @returns 64位哈希值（bigint类型）
+     */
+    hash(str: string): bigint;
+}
+/**
+ * Murmur3 128位哈希算法实现类
+ * 注意：当前项目中没有实际的128位实现，这里只是一个占位符
+ */
+declare class Murmur3_128 implements IHashAlgorithm128 {
+    private seed;
+    /**
+     * 构造函数
+     * @param seed 种子值，默认为0x12345678
+     */
+    constructor(seed?: number);
+    /**
+     * 计算字符串的128位哈希值
+     * @param str 输入字符串
+     * @returns 128位哈希值（bigint类型）
+     */
+    hash(str: string): bigint;
+}
+/**
+ * Murmur3哈希算法工厂类
+ */
+declare class Murmur3HashFactory implements IHashAlgorithmFactory {
+    /**
+     * 创建32位哈希算法实例
+     * @returns 32位哈希算法实例
+     */
+    create32(): IHashAlgorithm32;
+    /**
+     * 创建64位哈希算法实例
+     * @returns 64位哈希算法实例
+     */
+    create64(): IHashAlgorithm64;
+    /**
+     * 创建128位哈希算法实例
+     * @returns 128位哈希算法实例
+     */
+    create128(): IHashAlgorithm128;
+}
+declare const defaultHashFactory: Murmur3HashFactory;
+export { Murmur3HashFactory, Murmur3_128, Murmur3_32, Murmur3_64, SearchEngine, defaultHashFactory, murmur3_32, murmur3_64 };

package/lib/core.js CHANGED Viewed

@@ -180,21 +180,129 @@ function murmur3_32(str, h = 305419896) {
   const remainder = len & 3;
   return remainder > 0 && (remainder >= 3 && (k1 ^= (str.charCodeAt(i + 2) & 255) << 16), remainder >= 2 && (k1 ^= (str.charCodeAt(i + 1) & 255) << 8), remainder >= 1 && (k1 ^= str.charCodeAt(i) & 255), k1 = (k1 & 65535) * 3432918353 + (((k1 >>> 16) * 3432918353 & 65535) << 16) & 4294967295, k1 = k1 << 15 | k1 >>> 17, k1 = (k1 & 65535) * 461845907 + (((k1 >>> 16) * 461845907 & 65535) << 16) & 4294967295, h ^= k1), h ^= len, h ^= h >>> 16, h = (h & 65535) * 2246822507 + (((h >>> 16) * 2246822507 & 65535) << 16) & 4294967295, h ^= h >>> 13, h = (h & 65535) * 3266489909 + (((h >>> 16) * 3266489909 & 65535) << 16) & 4294967295, h ^= h >>> 16, h >>> 0;
 }
+function murmur3_64(str, seed = 305419896) {
+  const len = str.length, nBlocks = len >> 3;
+  let h1 = BigInt(seed), h2 = BigInt(seed);
+  const c1 = 0x87c37b91114253d5n, c2 = 0x4cf5ad432745937fn;
+  let i = 0;
+  for (; i < nBlocks; ) {
+    let k12 = BigInt(str.charCodeAt(i) & 255) | BigInt(str.charCodeAt(++i) & 255) << 8n | BigInt(str.charCodeAt(++i) & 255) << 16n | BigInt(str.charCodeAt(++i) & 255) << 24n | BigInt(str.charCodeAt(++i) & 255) << 32n | BigInt(str.charCodeAt(++i) & 255) << 40n | BigInt(str.charCodeAt(++i) & 255) << 48n | BigInt(str.charCodeAt(++i) & 255) << 56n;
+    ++i;
+    let k22 = BigInt(str.charCodeAt(i) & 255) | BigInt(str.charCodeAt(++i) & 255) << 8n | BigInt(str.charCodeAt(++i) & 255) << 16n | BigInt(str.charCodeAt(++i) & 255) << 24n | BigInt(str.charCodeAt(++i) & 255) << 32n | BigInt(str.charCodeAt(++i) & 255) << 40n | BigInt(str.charCodeAt(++i) & 255) << 48n | BigInt(str.charCodeAt(++i) & 255) << 56n;
+    ++i, k12 = k12 * c1 % 2n ** 64n, k12 = (k12 << 31n | k12 >> 33n) % 2n ** 64n, k12 = k12 * c2 % 2n ** 64n, h1 ^= k12, h1 = (h1 << 27n | h1 >> 37n) % 2n ** 64n, h1 = (h1 + h2) % 2n ** 64n, h1 = (h1 * 5n + 0x52dce729n) % 2n ** 64n, k22 = k22 * c2 % 2n ** 64n, k22 = (k22 << 33n | k22 >> 31n) % 2n ** 64n, k22 = k22 * c1 % 2n ** 64n, h2 ^= k22, h2 = (h2 << 31n | h2 >> 33n) % 2n ** 64n, h2 = (h2 + h1) % 2n ** 64n, h2 = (h2 * 5n + 0x38495ab5n) % 2n ** 64n;
+  }
+  let k1 = 0n, k2 = 0n;
+  const remainder = len & 7;
+  return remainder > 0 && (remainder >= 8 && (k2 ^= BigInt(str.charCodeAt(i + 7) & 255) << 56n), remainder >= 7 && (k2 ^= BigInt(str.charCodeAt(i + 6) & 255) << 48n), remainder >= 6 && (k2 ^= BigInt(str.charCodeAt(i + 5) & 255) << 40n), remainder >= 5 && (k2 ^= BigInt(str.charCodeAt(i + 4) & 255) << 32n), remainder >= 4 && (k1 ^= BigInt(str.charCodeAt(i + 3) & 255) << 24n), remainder >= 3 && (k1 ^= BigInt(str.charCodeAt(i + 2) & 255) << 16n), remainder >= 2 && (k1 ^= BigInt(str.charCodeAt(i + 1) & 255) << 8n), remainder >= 1 && (k1 ^= BigInt(str.charCodeAt(i) & 255)), k1 = k1 * c1 % 2n ** 64n, k1 = (k1 << 31n | k1 >> 33n) % 2n ** 64n, k1 = k1 * c2 % 2n ** 64n, h1 ^= k1, k2 = k2 * c2 % 2n ** 64n, k2 = (k2 << 33n | k2 >> 31n) % 2n ** 64n, k2 = k2 * c1 % 2n ** 64n, h2 ^= k2), h1 ^= BigInt(len), h2 ^= BigInt(len), h1 = (h1 + h2) % 2n ** 64n, h2 = (h2 + h1) % 2n ** 64n, h1 = (h1 ^ h1 >> 33n) * 0xff51afd7ed558ccdn, h1 = (h1 ^ h1 >> 33n) * 0xc4ceb9fe1a85ec53n, h1 = h1 ^ h1 >> 33n, h2 = (h2 ^ h2 >> 33n) * 0xff51afd7ed558ccdn, h2 = (h2 ^ h2 >> 33n) * 0xc4ceb9fe1a85ec53n, h2 = h2 ^ h2 >> 33n, (h2 << 64n | h1) & 0xffffffffffffffffn;
+}
+class Murmur3_32 {
+  seed;
+  /**
+   * 构造函数
+   * @param seed 种子值，默认为0x12345678
+   */
+  constructor(seed = 305419896) {
+    this.seed = seed;
+  }
+  /**
+   * 计算字符串的32位哈希值
+   * @param str 输入字符串
+   * @returns 32位哈希值（number类型）
+   */
+  hash(str) {
+    return murmur3_32(str, this.seed);
+  }
+}
+class Murmur3_64 {
+  seed;
+  /**
+   * 构造函数
+   * @param seed 种子值，默认为0x12345678
+   */
+  constructor(seed = 305419896) {
+    this.seed = seed;
+  }
+  /**
+   * 计算字符串的64位哈希值
+   * @param str 输入字符串
+   * @returns 64位哈希值（bigint类型）
+   */
+  hash(str) {
+    return murmur3_64(str, this.seed);
+  }
+}
+class Murmur3_128 {
+  seed;
+  /**
+   * 构造函数
+   * @param seed 种子值，默认为0x12345678
+   */
+  constructor(seed = 305419896) {
+    this.seed = seed;
+  }
+  /**
+   * 计算字符串的128位哈希值
+   * @param str 输入字符串
+   * @returns 128位哈希值（bigint类型）
+   */
+  hash(str) {
+    const hash1 = murmur3_64(str, this.seed);
+    return murmur3_64(str + str, this.seed ^ 1515870810) << 64n | hash1;
+  }
+}
+class Murmur3HashFactory {
+  /**
+   * 创建32位哈希算法实例
+   * @returns 32位哈希算法实例
+   */
+  create32() {
+    return new Murmur3_32();
+  }
+  /**
+   * 创建64位哈希算法实例
+   * @returns 64位哈希算法实例
+   */
+  create64() {
+    return new Murmur3_64();
+  }
+  /**
+   * 创建128位哈希算法实例
+   * @returns 128位哈希算法实例
+   */
+  create128() {
+    return new Murmur3_128();
+  }
+}
+const defaultHashFactory = new Murmur3HashFactory();
 class IndexSegment {
   #filename;
   #storage;
   #buffer = null;
   #view = null;
+  #hashAlgorithm;
   /**
-   * 使用MurmurHash3计算字符串哈希值
+   * 构造函数
+   * @param filename 文件名
+   * @param storage 存储接口
+   * @param hashAlgorithm 哈希算法实例，默认为Murmur3_32
+   */
+  constructor(filename, storage, hashAlgorithm = new Murmur3_32()) {
+    this.#filename = filename, this.#storage = storage, this.#hashAlgorithm = hashAlgorithm;
+  }
+  /**
+   * 使用当前哈希算法计算字符串哈希值
    * @param str 要哈希的字符串
    * @returns 32位无符号哈希值
    */
-  static hash(str) {
-    return murmur3_32(str);
+  hash(str) {
+    return this.#hashAlgorithm.hash(str);
   }
-  constructor(filename, storage) {
-    this.#filename = filename, this.#storage = storage;
+  /**
+   * 设置哈希算法
+   * @param hashAlgorithm 新的哈希算法实例
+   */
+  setHashAlgorithm(hashAlgorithm) {
+    this.#hashAlgorithm = hashAlgorithm;
   }
   async loadIndex() {
     return this.#buffer ? !0 : (this.#buffer = await this.#storage.read(this.#filename), this.#buffer ? (this.#view = new DataView(this.#buffer), !0) : !1);
@@ -205,7 +313,7 @@ class IndexSegment {
       const uniqueTokens = /* @__PURE__ */ new Map();
       for (const token of doc.tokens)
         uniqueTokens.has(token) || (uniqueTokens.set(token, !0), tokenMap.has(token) || tokenMap.set(token, {
-          hash: IndexSegment.hash(token),
+          hash: this.hash(token),
           postings: []
         }), tokenMap.get(token).postings.push(doc.id));
     }
@@ -221,8 +329,8 @@ class IndexSegment {
     const headerSize = 12, dictSize = entries.length * 20, postingsSize = totalPostings * 4, tokensOffset = headerSize + dictSize + postingsSize, totalSize = tokensOffset + totalTokensSize, buffer = new ArrayBuffer(totalSize), view = new DataView(buffer);
     view.setUint32(0, 1229866072), view.setUint32(4, entries.length, !0), view.setUint32(8, tokensOffset, !0);
     let currentDictOffset = headerSize, currentPostingsOffset = headerSize + dictSize, currentTokenOffset = tokensOffset;
-    for (const [token, { hash: hash2, postings }] of entries) {
-      view.setUint32(currentDictOffset, hash2, !0);
+    for (const [token, { hash, postings }] of entries) {
+      view.setUint32(currentDictOffset, hash, !0);
       const tokenBytes = encoder.encode(token);
       view.setUint32(currentDictOffset + 4, tokenBytes.length, !0), view.setUint32(currentDictOffset + 8, currentTokenOffset, !0), view.setUint32(currentDictOffset + 12, currentPostingsOffset, !0), view.setUint32(currentDictOffset + 16, postings.length, !0), currentDictOffset += 20;
       for (let i = 0; i < postings.length; i++)
@@ -235,7 +343,7 @@ class IndexSegment {
   }
   search(term) {
     if (!this.#view || !this.#buffer) return [];
-    const h = IndexSegment.hash(term), count = this.#view.getUint32(4, !0);
+    const h = this.hash(term), count = this.#view.getUint32(4, !0);
     let left = 0, right = count - 1;
     const headerSize = 12, entrySize = 20, decoder = new TextDecoder();
     for (; left <= right; ) {
@@ -276,6 +384,116 @@ class IndexSegment {
     return [];
   }
 }
+class IndexSegment64 {
+  #filename;
+  #storage;
+  #buffer = null;
+  #view = null;
+  #hashAlgorithm;
+  /**
+   * 构造函数
+   * @param filename 文件名
+   * @param storage 存储接口
+   * @param hashAlgorithm 哈希算法实例，默认为Murmur3_64
+   */
+  constructor(filename, storage, hashAlgorithm = new Murmur3_64()) {
+    this.#filename = filename, this.#storage = storage, this.#hashAlgorithm = hashAlgorithm;
+  }
+  /**
+   * 使用当前哈希算法计算字符串哈希值
+   * @param str 要哈希的字符串
+   * @returns 64位无符号哈希值
+   */
+  hash(str) {
+    return this.#hashAlgorithm.hash(str);
+  }
+  /**
+   * 设置哈希算法
+   * @param hashAlgorithm 新的哈希算法实例
+   */
+  setHashAlgorithm(hashAlgorithm) {
+    this.#hashAlgorithm = hashAlgorithm;
+  }
+  async loadIndex() {
+    return this.#buffer ? !0 : (this.#buffer = await this.#storage.read(this.#filename), this.#buffer ? (this.#view = new DataView(this.#buffer), !0) : !1);
+  }
+  async buildAndSave(docs) {
+    const tokenMap = /* @__PURE__ */ new Map();
+    for (const doc of docs) {
+      const uniqueTokens = /* @__PURE__ */ new Map();
+      for (const token of doc.tokens)
+        uniqueTokens.has(token) || (uniqueTokens.set(token, !0), tokenMap.has(token) || tokenMap.set(token, {
+          hash: this.hash(token),
+          postings: []
+        }), tokenMap.get(token).postings.push(doc.id));
+    }
+    const entries = Array.from(tokenMap.entries());
+    entries.sort(([a, ah], [b, bh]) => ah.hash !== bh.hash ? ah.hash > bh.hash ? 1 : -1 : a.localeCompare(b));
+    const encoder = new TextEncoder();
+    let totalPostings = 0, totalTokensSize = 0;
+    for (const [token, { postings }] of entries) {
+      totalPostings += postings.length;
+      const bytes = encoder.encode(token);
+      totalTokensSize += bytes.length + 1;
+    }
+    const headerSize = 16, dictSize = entries.length * 28, postingsSize = totalPostings * 4, tokensOffset = headerSize + dictSize + postingsSize, totalSize = tokensOffset + totalTokensSize, buffer = new ArrayBuffer(totalSize), view = new DataView(buffer);
+    view.setUint32(0, 1229866072), view.setUint32(4, entries.length, !0), view.setUint32(8, tokensOffset, !0), view.setUint32(12, 64, !0);
+    let currentDictOffset = headerSize, currentPostingsOffset = headerSize + dictSize, currentTokenOffset = tokensOffset;
+    for (const [token, { hash, postings }] of entries) {
+      view.setBigUint64(currentDictOffset, hash, !0);
+      const tokenBytes = encoder.encode(token);
+      view.setUint32(currentDictOffset + 8, tokenBytes.length, !0), view.setUint32(currentDictOffset + 12, currentTokenOffset, !0), view.setUint32(currentDictOffset + 16, currentPostingsOffset, !0), view.setUint32(currentDictOffset + 20, postings.length, !0), currentDictOffset += 28;
+      for (let i = 0; i < postings.length; i++)
+        view.setUint32(currentPostingsOffset, postings[i], !0), currentPostingsOffset += 4;
+      for (let i = 0; i < tokenBytes.length; i++)
+        view.setUint8(currentTokenOffset++, tokenBytes[i]);
+      view.setUint8(currentTokenOffset++, 0);
+    }
+    await this.#storage.write(this.#filename, buffer), this.#buffer = buffer, this.#view = view;
+  }
+  search(term) {
+    if (!this.#view || !this.#buffer) return [];
+    const h = this.hash(term), count = this.#view.getUint32(4, !0);
+    let left = 0, right = count - 1;
+    const headerSize = 16, entrySize = 28, decoder = new TextDecoder();
+    for (; left <= right; ) {
+      const mid = left + right >>> 1, entryPos = headerSize + mid * entrySize, entryHash = this.#view.getBigUint64(entryPos, !0);
+      if (entryHash < h)
+        left = mid + 1;
+      else if (entryHash > h)
+        right = mid - 1;
+      else {
+        if (!(mid > 0 && this.#view.getBigUint64(headerSize + (mid - 1) * entrySize, !0) === h || mid < count - 1 && this.#view.getBigUint64(headerSize + (mid + 1) * entrySize, !0) === h)) {
+          const postingsOffset = this.#view.getUint32(headerSize + mid * entrySize + 16, !0), postingsLen = this.#view.getUint32(headerSize + mid * entrySize + 20, !0), result = [];
+          for (let j = 0; j < postingsLen; j++)
+            result.push(this.#view.getUint32(postingsOffset + j * 4, !0));
+          return result;
+        }
+        let firstMatch = mid;
+        for (; firstMatch > 0; ) {
+          const prevPos = headerSize + (firstMatch - 1) * entrySize;
+          if (this.#view.getBigUint64(prevPos, !0) === h)
+            firstMatch--;
+          else
+            break;
+        }
+        for (let i = firstMatch; i < count; i++) {
+          const checkPos = headerSize + i * entrySize;
+          if (this.#view.getBigUint64(checkPos, !0) !== h) break;
+          const tokenLen = this.#view.getUint32(checkPos + 8, !0), tokenOffset = this.#view.getUint32(checkPos + 12, !0), tokenBuffer = new Uint8Array(this.#buffer, tokenOffset, tokenLen);
+          if (decoder.decode(tokenBuffer) === term) {
+            const postingsOffset = this.#view.getUint32(checkPos + 16, !0), postingsLen = this.#view.getUint32(checkPos + 20, !0), result = [];
+            for (let j = 0; j < postingsLen; j++)
+              result.push(this.#view.getUint32(postingsOffset + j * 4, !0));
+            return result;
+          }
+        }
+        return [];
+      }
+    }
+    return [];
+  }
+}
 const defaultTokenize = ({ text }) => {
   try {
     if (typeof Intl < "u" && typeof Intl.Segmenter == "function" && typeof Array.from == "function") {
@@ -294,6 +512,9 @@ class SearchEngine {
   #segments;
   #initialized = !1;
   #config;
+  #isHash64Bit = !0;
+  #hashAlgorithm32;
+  #hashAlgorithm64;
   // 批处理状态
   #inBatch = !1;
   #pendingTokenCounts = { word: 0, char: 0 };
@@ -305,12 +526,19 @@ class SearchEngine {
       minCharTokenSave: 0,
       indexingTokenizer: config.indexingTokenizer || defaultTokenize,
       ...config
-    }, (this.#config.minWordTokenSave || 0) >= (this.#config.wordSegmentTokenThreshold || 1e5))
+    }, this.#processHashAlgorithmConfig(), (this.#config.minWordTokenSave || 0) >= (this.#config.wordSegmentTokenThreshold || 1e5))
       throw new Error("minWordTokenSave must be less than wordSegmentTokenThreshold");
     if ((this.#config.minCharTokenSave || 0) >= (this.#config.charSegmentTokenThreshold || 5e5))
       throw new Error("minCharTokenSave must be less than charSegmentTokenThreshold");
     this.#storage = config.storage, this.#meta = new MetaManager(this.#storage), this.#cache = new IntermediateCache(this.#storage), this.#segments = /* @__PURE__ */ new Map();
   }
+  /**
+   * 处理哈希算法配置
+   */
+  #processHashAlgorithmConfig() {
+    const hashConfig = this.#config.hashAlgorithm;
+    hashConfig === 64 ? (this.#isHash64Bit = !0, this.#hashAlgorithm64 = new Murmur3_64()) : hashConfig === 32 ? (this.#isHash64Bit = !1, this.#hashAlgorithm32 = new Murmur3_32()) : hashConfig && typeof hashConfig.hash == "function" ? typeof hashConfig.hash("test") == "bigint" ? (this.#isHash64Bit = !0, this.#hashAlgorithm64 = hashConfig) : (this.#isHash64Bit = !1, this.#hashAlgorithm32 = hashConfig) : (this.#isHash64Bit = !0, this.#hashAlgorithm64 = new Murmur3_64());
+  }
   /**
    * 开启批处理
    * 批处理期间 addDocuments 只写入缓存，不触发索引段构建
@@ -396,7 +624,7 @@ class SearchEngine {
       const segmentsMeta = this.#meta.getSegments(type);
       for (const meta of segmentsMeta) {
         const filename = meta.filename;
-        !this.#segments.has(filename) && !segmentsToLoad.has(filename) && segmentsToLoad.set(filename, new IndexSegment(filename, this.#storage));
+        !this.#segments.has(filename) && !segmentsToLoad.has(filename) && (this.#isHash64Bit ? segmentsToLoad.set(filename, new IndexSegment64(filename, this.#storage, this.#hashAlgorithm64)) : segmentsToLoad.set(filename, new IndexSegment(filename, this.#storage, this.#hashAlgorithm32)));
       }
     };
     collectSegments("word"), collectSegments("char"), await Promise.all(
@@ -465,7 +693,7 @@ class SearchEngine {
       ...this.#meta.getSegments("char")
     ];
     for (const seg of allSegments)
-      this.#segments.has(seg.filename) || this.#segments.set(seg.filename, new IndexSegment(seg.filename, this.#storage)), await this.#segments.get(seg.filename).loadIndex();
+      this.#segments.has(seg.filename) || (this.#isHash64Bit ? this.#segments.set(seg.filename, new IndexSegment64(seg.filename, this.#storage, this.#hashAlgorithm64)) : this.#segments.set(seg.filename, new IndexSegment(seg.filename, this.#storage, this.#hashAlgorithm32))), await this.#segments.get(seg.filename).loadIndex();
     this.#initialized = !0;
   }
   #getIndexingTokens(doc) {
@@ -497,11 +725,16 @@ class SearchEngine {
     }
     const docsToBuild = await this.#cache.readRange(cacheFilename, startOffset, currentCacheSize);
     let segment = this.#segments.get(targetSegmentName);
-    segment || (segment = new IndexSegment(targetSegmentName, this.#storage), this.#segments.set(targetSegmentName, segment)), await segment.buildAndSave(docsToBuild), this.#meta.updateSegment(type, targetSegmentName, startOffset, currentCacheSize, newTokenCountTotal, isNew);
+    segment || (this.#isHash64Bit ? segment = new IndexSegment64(targetSegmentName, this.#storage, this.#hashAlgorithm64) : segment = new IndexSegment(targetSegmentName, this.#storage, this.#hashAlgorithm32), this.#segments.set(targetSegmentName, segment)), await segment.buildAndSave(docsToBuild), this.#meta.updateSegment(type, targetSegmentName, startOffset, currentCacheSize, newTokenCountTotal, isNew);
   }
 }
 export {
+  Murmur3HashFactory,
+  Murmur3_128,
+  Murmur3_32,
+  Murmur3_64,
   SearchEngine,
-  murmur3_32 as hash,
-  murmur3_32
+  defaultHashFactory,
+  murmur3_32,
+  murmur3_64
 };

package/lib/type.d.ts CHANGED Viewed

@@ -61,6 +61,35 @@ interface IStorage {
     getFileSize(filename: string): Promise<number>;
 }
+interface IHashAlgorithm<T> {
+    /**
+     * 计算字符串的哈希值
+     * @param str 输入字符串
+     * @returns 哈希值
+     */
+    hash(str: string): T;
+}
+interface IHashAlgorithm32 extends IHashAlgorithm<number> {
+}
+interface IHashAlgorithm64 extends IHashAlgorithm<bigint> {
+}
+interface IHashAlgorithm128 extends IHashAlgorithm<bigint> {
+}
+interface IHashAlgorithmFactory {
+    /**
+     * 创建32位哈希算法实例
+     */
+    create32(): IHashAlgorithm32;
+    /**
+     * 创建64位哈希算法实例
+     */
+    create64(): IHashAlgorithm64;
+    /**
+     * 创建128位哈希算法实例
+     */
+    create128(): IHashAlgorithm128;
+}
 /**
  * 索引类型
  */
@@ -93,6 +122,15 @@ interface ISearchEngineOption {
      * - 影响: 直接决定搜索匹配的范围和结果的相关性
      */
     searchTokenizer?: SearchTokenizer;
+    /**
+     * 哈希算法配置 (可选)
+     * - 32: 使用默认32位哈希算法
+     * - 64: 使用默认64位哈希算法
+     * - IHashAlgorithm32: 使用自定义32位哈希算法
+     * - IHashAlgorithm64: 使用自定义64位哈希算法
+     * - undefined: 默认使用32位哈希算法
+     */
+    hashAlgorithm?: 32 | 64 | IHashAlgorithm32 | IHashAlgorithm64;
     /**
      * 词索引分段阈值 (Token数) - 分段算法配置
      * - 作用: 控制词索引文件的大小，超过阈值时创建新的索引段
@@ -173,4 +211,38 @@ interface ISearchEngine {
     hasDocument(id: number): Promise<boolean>;
 }
-export type { IDocument, IDocumentBase, IIndexMeta, IResult, ISearchEngine, ISearchEngineOption, ISearchEngineStatus, ISegmentMeta, IStorage, ITokenizedDoc, IndexType, IndexingTokenizer, SearchTokenizer };
+/**
+ * 索引段接口，定义了IndexSegment和IndexSegment64的共同方法
+ */
+interface IIndexSegment {
+    /**
+     * 使用当前哈希算法计算字符串哈希值
+     * @param str 要哈希的字符串
+     * @returns 哈希值（number | bigint）
+     */
+    hash(str: string): number | bigint;
+    /**
+     * 设置哈希算法
+     * @param hashAlgorithm 新的哈希算法实例
+     */
+    setHashAlgorithm(hashAlgorithm: IHashAlgorithm32 | IHashAlgorithm64): void;
+    /**
+     * 加载索引
+     * @returns 是否成功加载索引
+     */
+    loadIndex(): Promise<boolean>;
+    /**
+     * 构建并保存索引
+     * @param docs 要索引的文档
+     */
+    buildAndSave(docs: ITokenizedDoc[]): Promise<void>;
+    /**
+     * 搜索索引
+     * @param term 搜索词
+     * @returns 匹配的文档ID数组
+     */
+    search(term: string): number[];
+}
+export type { IDocument, IDocumentBase, IHashAlgorithm, IHashAlgorithm128, IHashAlgorithm32, IHashAlgorithm64, IHashAlgorithmFactory, IIndexMeta, IIndexSegment, IResult, ISearchEngine, ISearchEngineOption, ISearchEngineStatus, ISegmentMeta, IStorage, ITokenizedDoc, IndexType, IndexingTokenizer, SearchTokenizer };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "gs-search",
-  "version": "0.1.7",
+  "version": "0.1.8",
   "type": "module",
   "main": "lib/index.cjs",
   "module": "lib/index.js",