npm - document-dataply - Versions diffs - 0.0.7 → 0.0.8 - Mend

document-dataply 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/cjs/index.js +146 -62
package/dist/types/core/bptree/documentComparator.d.ts +1 -1
package/dist/types/core/documentAPI.d.ts +9 -2
package/package.json +1 -1
package/readme.md +4 -0

package/dist/cjs/index.js CHANGED Viewed

@@ -10127,13 +10127,14 @@ function ngramTokenize(text, gramSize) {
   if (typeof text !== "string") return [];
   const tokens = /* @__PURE__ */ new Set();
   const words = text.split(/\s+/).filter(Boolean);
-  for (const word of words) {
+  for (let i = 0, len = words.length; i < len; i++) {
+    const word = words[i];
     if (word.length < gramSize) {
       if (word.length > 0) tokens.add(word);
       continue;
     }
-    for (let i = 0; i <= word.length - gramSize; i++) {
-      tokens.add(word.slice(i, i + gramSize));
+    for (let j = 0, wLen = word.length; j <= wLen - gramSize; j++) {
+      tokens.add(word.slice(j, j + gramSize));
     }
   }
   return Array.from(tokens);
@@ -10501,6 +10502,23 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
         };
       }
     }
+    const ftsCandidate = candidates.find(
+      (c) => c.isFtsMatch && c.matchTokens && c.matchTokens.length > 0
+    );
+    if (ftsCandidate) {
+      const hasHigherPriority = candidates.some((c) => {
+        if (c === ftsCandidate) return false;
+        const cond = c.condition;
+        return "equal" in cond || "primaryEqual" in cond;
+      });
+      if (!hasHigherPriority) {
+        return {
+          driver: ftsCandidate,
+          others: candidates.filter((c) => c !== ftsCandidate),
+          rollback
+        };
+      }
+    }
     let res = import_dataply3.BPTreeAsync.ChooseDriver(candidates);
     if (!res && candidates.length > 0) {
       res = candidates[0];
@@ -10588,6 +10606,39 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
     rollback();
     return new Float64Array(Array.from(keys || []));
   }
+  /**
+   * 드라이버 인덱스만으로 PK를 가져옵니다. (교집합 없이)
+   * selectDocuments에서 사용하며, 나머지 조건(others)은 스트리밍 중 tree.verify()로 검증합니다.
+   * @returns 드라이버 키 배열, others 후보 목록, rollback 함수. 또는 null.
+   */
+  async getDriverKeys(query, orderBy, sortOrder = "asc") {
+    const isQueryEmpty = Object.keys(query).length === 0;
+    const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
+    const selectivity = await this.getSelectivityCandidate(
+      this.verboseQuery(normalizedQuery),
+      orderBy
+    );
+    if (!selectivity) return null;
+    const { driver, others, rollback } = selectivity;
+    const useIndexOrder = orderBy === void 0 || driver.field === orderBy;
+    const currentOrder = useIndexOrder ? sortOrder : void 0;
+    let keys;
+    if (driver.isFtsMatch && driver.matchTokens && driver.matchTokens.length > 0) {
+      keys = await this.applyCandidateByFTS(
+        driver,
+        driver.matchTokens,
+        void 0,
+        currentOrder
+      );
+    } else {
+      keys = await this.applyCandidate(driver, void 0, currentOrder);
+    }
+    return {
+      keys: new Float64Array(Array.from(keys)),
+      others,
+      rollback
+    };
+  }
   async insertDocumentInternal(document, tx) {
     const metadata = await this.getDocumentInnerMetadata(tx);
     const id = ++metadata.lastId;
@@ -10873,9 +10924,11 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
   }
   /**
    * Prefetch 방식으로 키 배열을 청크 단위로 조회하여 문서를 순회합니다.
-   * FTS 검증을 통과한 문서만 yield 합니다.
+   * FTS 검증 및 others 후보에 대한 tree.verify() 검증을 통과한 문서만 yield 합니다.
+   * 교집합 대신 스트리밍 중 검증하여 첫 결과 반환 시간을 단축합니다.
    */
-  async *processChunkedKeys(keys, startIdx, initialChunkSize, ftsConditions, tx) {
+  async *processChunkedKeysWithVerify(keys, startIdx, initialChunkSize, ftsConditions, others, tx) {
+    const verifyOthers = others.filter((o) => !o.isFtsMatch);
     let i = startIdx;
     const totalKeys = keys.length;
     let currentChunkSize = initialChunkSize;
@@ -10900,6 +10953,24 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
         const doc = JSON.parse(s);
         chunkTotalSize += s.length * 2;
         if (ftsConditions.length > 0 && !this.verifyFts(doc, ftsConditions)) continue;
+        if (verifyOthers.length > 0) {
+          const flatDoc = this.flattenDocument(doc);
+          let passed = true;
+          for (let k = 0, kLen = verifyOthers.length; k < kLen; k++) {
+            const other = verifyOthers[k];
+            const fieldValue = flatDoc[other.field];
+            if (fieldValue === void 0) {
+              passed = false;
+              break;
+            }
+            const treeValue = { k: doc._id, v: fieldValue };
+            if (!other.tree.verify(treeValue, other.condition)) {
+              passed = false;
+              break;
+            }
+          }
+          if (!passed) continue;
+        }
         yield doc;
       }
       currentChunkSize = this.adjustChunkSize(currentChunkSize, chunkTotalSize);
@@ -10942,73 +11013,86 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
           }
         }
       }
-      const keys = await self.getKeys(query, orderByField, sortOrder);
-      if (keys.length === 0) return;
+      const driverResult = await self.getDriverKeys(query, orderByField, sortOrder);
+      if (!driverResult) return;
+      const { keys, others, rollback } = driverResult;
+      if (keys.length === 0) {
+        rollback();
+        return;
+      }
+      const isQueryEmpty = Object.keys(query).length === 0;
+      const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
       const selectivity = await self.getSelectivityCandidate(
-        self.verboseQuery(query),
+        self.verboseQuery(normalizedQuery),
         orderByField
       );
       const isDriverOrderByField = orderByField === void 0 || selectivity && selectivity.driver.field === orderByField;
       if (selectivity) selectivity.rollback();
-      if (!isDriverOrderByField && orderByField) {
-        const topK = limit === Infinity ? Infinity : offset + limit;
-        let heap = null;
-        if (topK !== Infinity) {
-          heap = new BinaryHeap((a, b) => {
+      try {
+        if (!isDriverOrderByField && orderByField) {
+          const topK = limit === Infinity ? Infinity : offset + limit;
+          let heap = null;
+          if (topK !== Infinity) {
+            heap = new BinaryHeap((a, b) => {
+              const aVal = a[orderByField] ?? a._id;
+              const bVal = b[orderByField] ?? b._id;
+              const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
+              return sortOrder === "asc" ? -cmp : cmp;
+            });
+          }
+          const results = [];
+          for await (const doc of self.processChunkedKeysWithVerify(
+            keys,
+            0,
+            self.options.pageSize,
+            ftsConditions,
+            others,
+            tx2
+          )) {
+            if (heap) {
+              if (heap.size < topK) heap.push(doc);
+              else {
+                const top = heap.peek();
+                if (top) {
+                  const aVal = doc[orderByField] ?? doc._id;
+                  const bVal = top[orderByField] ?? top._id;
+                  const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
+                  if (sortOrder === "asc" ? cmp < 0 : cmp > 0) heap.replace(doc);
+                }
+              }
+            } else {
+              results.push(doc);
+            }
+          }
+          const finalDocs = heap ? heap.toArray() : results;
+          finalDocs.sort((a, b) => {
             const aVal = a[orderByField] ?? a._id;
             const bVal = b[orderByField] ?? b._id;
             const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
-            return sortOrder === "asc" ? -cmp : cmp;
+            return sortOrder === "asc" ? cmp : -cmp;
           });
-        }
-        const results = [];
-        for await (const doc of self.processChunkedKeys(
-          keys,
-          0,
-          self.options.pageSize,
-          ftsConditions,
-          tx2
-        )) {
-          if (heap) {
-            if (heap.size < topK) heap.push(doc);
-            else {
-              const top = heap.peek();
-              if (top) {
-                const aVal = doc[orderByField] ?? doc._id;
-                const bVal = top[orderByField] ?? top._id;
-                const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
-                if (sortOrder === "asc" ? cmp < 0 : cmp > 0) heap.replace(doc);
-              }
-            }
-          } else {
-            results.push(doc);
+          const end = limit === Infinity ? void 0 : offset + limit;
+          const limitedResults = finalDocs.slice(offset, end);
+          for (let j = 0, len = limitedResults.length; j < len; j++) {
+            yield limitedResults[j];
           }
-        }
-        const finalDocs = heap ? heap.toArray() : results;
-        finalDocs.sort((a, b) => {
-          const aVal = a[orderByField] ?? a._id;
-          const bVal = b[orderByField] ?? b._id;
-          const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
-          return sortOrder === "asc" ? cmp : -cmp;
-        });
-        const end = limit === Infinity ? void 0 : offset + limit;
-        const limitedResults = finalDocs.slice(offset, end);
-        for (let j = 0, len = limitedResults.length; j < len; j++) {
-          yield limitedResults[j];
-        }
-      } else {
-        let yieldedCount = 0;
-        for await (const doc of self.processChunkedKeys(
-          keys,
-          offset,
-          self.options.pageSize,
-          ftsConditions,
-          tx2
-        )) {
-          if (yieldedCount >= limit) break;
-          yield doc;
-          yieldedCount++;
-        }
+        } else {
+          let yieldedCount = 0;
+          for await (const doc of self.processChunkedKeysWithVerify(
+            keys,
+            offset,
+            self.options.pageSize,
+            ftsConditions,
+            others,
+            tx2
+          )) {
+            if (yieldedCount >= limit) break;
+            yield doc;
+            yieldedCount++;
+          }
+        }
+      } finally {
+        rollback();
       }
     }, tx);
     const drain = async () => {

package/dist/types/core/bptree/documentComparator.d.ts CHANGED Viewed

@@ -1,5 +1,5 @@
-import { ValueComparator } from 'dataply';
 import type { DataplyTreeValue, Primitive } from '../../types';
+import { ValueComparator } from 'dataply';
 export declare class DocumentValueComparator<T extends DataplyTreeValue<U>, U extends Primitive> extends ValueComparator<T> {
     primaryAsc(a: T, b: T): number;
     asc(a: T, b: T): number;

package/dist/types/core/documentAPI.d.ts CHANGED Viewed

@@ -94,6 +94,12 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON, IC extends Index
      * 쿼리 최적화를 통합하기 위한 내부 공통 메서드입니다.
      */
     getKeys(query: Partial<DocumentDataplyIndexedQuery<T, IC>>, orderBy?: keyof IC | '_id', sortOrder?: 'asc' | 'desc'): Promise<Float64Array>;
+    /**
+     * 드라이버 인덱스만으로 PK를 가져옵니다. (교집합 없이)
+     * selectDocuments에서 사용하며, 나머지 조건(others)은 스트리밍 중 tree.verify()로 검증합니다.
+     * @returns 드라이버 키 배열, others 후보 목록, rollback 함수. 또는 null.
+     */
+    private getDriverKeys;
     private insertDocumentInternal;
     /**
      * Insert a document into the database
@@ -157,9 +163,10 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON, IC extends Index
     private adjustChunkSize;
     /**
      * Prefetch 방식으로 키 배열을 청크 단위로 조회하여 문서를 순회합니다.
-     * FTS 검증을 통과한 문서만 yield 합니다.
+     * FTS 검증 및 others 후보에 대한 tree.verify() 검증을 통과한 문서만 yield 합니다.
+     * 교집합 대신 스트리밍 중 검증하여 첫 결과 반환 시간을 단축합니다.
      */
-    private processChunkedKeys;
+    private processChunkedKeysWithVerify;
     /**
      * Select documents from the database
      * @param query The query to use (only indexed fields + _id allowed)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "document-dataply",
-  "version": "0.0.7",
+  "version": "0.0.8",
   "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
   "license": "MIT",
   "author": "izure <admin@izure.org>",

package/readme.md CHANGED Viewed

@@ -124,9 +124,13 @@ const ids = await db.insertBatch([
 | `equal`, `notEqual` | Equality check |
 | `like` | Pattern matching |
 | `or` | Matching within an array |
+| `match` | Full-text search (Requires FTS Index) |
 For detailed operator usage, index constraints (including full scans), and sorting methods, see the [Query Guide (QUERY.md)](./docs/QUERY.md).
+> [!IMPORTANT]
+> **Full-Text Search (match)**: To use the `match` operator, you must configure the field as an FTS index (e.g., `{ type: 'fts', tokenizer: 'whitespace' }`). Standard boolean indices do not support `match`. See [QUERY.md](./docs/QUERY.md#4-full-text-search-fts-indexing) for details.
 ### Transactions
 Ensure data integrity with ACID-compliant transactions. Use `commit()` and `rollback()` to process multiple operations atomically.