document-dataply 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -10127,13 +10127,14 @@ function ngramTokenize(text, gramSize) {
10127
10127
  if (typeof text !== "string") return [];
10128
10128
  const tokens = /* @__PURE__ */ new Set();
10129
10129
  const words = text.split(/\s+/).filter(Boolean);
10130
- for (const word of words) {
10130
+ for (let i = 0, len = words.length; i < len; i++) {
10131
+ const word = words[i];
10131
10132
  if (word.length < gramSize) {
10132
10133
  if (word.length > 0) tokens.add(word);
10133
10134
  continue;
10134
10135
  }
10135
- for (let i = 0; i <= word.length - gramSize; i++) {
10136
- tokens.add(word.slice(i, i + gramSize));
10136
+ for (let j = 0, wLen = word.length; j <= wLen - gramSize; j++) {
10137
+ tokens.add(word.slice(j, j + gramSize));
10137
10138
  }
10138
10139
  }
10139
10140
  return Array.from(tokens);
@@ -10501,6 +10502,23 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10501
10502
  };
10502
10503
  }
10503
10504
  }
10505
+ const ftsCandidate = candidates.find(
10506
+ (c) => c.isFtsMatch && c.matchTokens && c.matchTokens.length > 0
10507
+ );
10508
+ if (ftsCandidate) {
10509
+ const hasHigherPriority = candidates.some((c) => {
10510
+ if (c === ftsCandidate) return false;
10511
+ const cond = c.condition;
10512
+ return "equal" in cond || "primaryEqual" in cond;
10513
+ });
10514
+ if (!hasHigherPriority) {
10515
+ return {
10516
+ driver: ftsCandidate,
10517
+ others: candidates.filter((c) => c !== ftsCandidate),
10518
+ rollback
10519
+ };
10520
+ }
10521
+ }
10504
10522
  let res = import_dataply3.BPTreeAsync.ChooseDriver(candidates);
10505
10523
  if (!res && candidates.length > 0) {
10506
10524
  res = candidates[0];
@@ -10588,6 +10606,39 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10588
10606
  rollback();
10589
10607
  return new Float64Array(Array.from(keys || []));
10590
10608
  }
10609
+ /**
10610
+ * 드라이버 인덱스만으로 PK를 가져옵니다. (교집합 없이)
10611
+ * selectDocuments에서 사용하며, 나머지 조건(others)은 스트리밍 중 tree.verify()로 검증합니다.
10612
+ * @returns 드라이버 키 배열, others 후보 목록, rollback 함수. 또는 null.
10613
+ */
10614
+ async getDriverKeys(query, orderBy, sortOrder = "asc") {
10615
+ const isQueryEmpty = Object.keys(query).length === 0;
10616
+ const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
10617
+ const selectivity = await this.getSelectivityCandidate(
10618
+ this.verboseQuery(normalizedQuery),
10619
+ orderBy
10620
+ );
10621
+ if (!selectivity) return null;
10622
+ const { driver, others, rollback } = selectivity;
10623
+ const useIndexOrder = orderBy === void 0 || driver.field === orderBy;
10624
+ const currentOrder = useIndexOrder ? sortOrder : void 0;
10625
+ let keys;
10626
+ if (driver.isFtsMatch && driver.matchTokens && driver.matchTokens.length > 0) {
10627
+ keys = await this.applyCandidateByFTS(
10628
+ driver,
10629
+ driver.matchTokens,
10630
+ void 0,
10631
+ currentOrder
10632
+ );
10633
+ } else {
10634
+ keys = await this.applyCandidate(driver, void 0, currentOrder);
10635
+ }
10636
+ return {
10637
+ keys: new Float64Array(Array.from(keys)),
10638
+ others,
10639
+ rollback
10640
+ };
10641
+ }
10591
10642
  async insertDocumentInternal(document, tx) {
10592
10643
  const metadata = await this.getDocumentInnerMetadata(tx);
10593
10644
  const id = ++metadata.lastId;
@@ -10873,9 +10924,11 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10873
10924
  }
10874
10925
  /**
10875
10926
  * Prefetch 방식으로 키 배열을 청크 단위로 조회하여 문서를 순회합니다.
10876
- * FTS 검증을 통과한 문서만 yield 합니다.
10927
+ * FTS 검증 및 others 후보에 대한 tree.verify() 검증을 통과한 문서만 yield 합니다.
10928
+ * 교집합 대신 스트리밍 중 검증하여 첫 결과 반환 시간을 단축합니다.
10877
10929
  */
10878
- async *processChunkedKeys(keys, startIdx, initialChunkSize, ftsConditions, tx) {
10930
+ async *processChunkedKeysWithVerify(keys, startIdx, initialChunkSize, ftsConditions, others, tx) {
10931
+ const verifyOthers = others.filter((o) => !o.isFtsMatch);
10879
10932
  let i = startIdx;
10880
10933
  const totalKeys = keys.length;
10881
10934
  let currentChunkSize = initialChunkSize;
@@ -10900,6 +10953,24 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10900
10953
  const doc = JSON.parse(s);
10901
10954
  chunkTotalSize += s.length * 2;
10902
10955
  if (ftsConditions.length > 0 && !this.verifyFts(doc, ftsConditions)) continue;
10956
+ if (verifyOthers.length > 0) {
10957
+ const flatDoc = this.flattenDocument(doc);
10958
+ let passed = true;
10959
+ for (let k = 0, kLen = verifyOthers.length; k < kLen; k++) {
10960
+ const other = verifyOthers[k];
10961
+ const fieldValue = flatDoc[other.field];
10962
+ if (fieldValue === void 0) {
10963
+ passed = false;
10964
+ break;
10965
+ }
10966
+ const treeValue = { k: doc._id, v: fieldValue };
10967
+ if (!other.tree.verify(treeValue, other.condition)) {
10968
+ passed = false;
10969
+ break;
10970
+ }
10971
+ }
10972
+ if (!passed) continue;
10973
+ }
10903
10974
  yield doc;
10904
10975
  }
10905
10976
  currentChunkSize = this.adjustChunkSize(currentChunkSize, chunkTotalSize);
@@ -10942,73 +11013,86 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10942
11013
  }
10943
11014
  }
10944
11015
  }
10945
- const keys = await self.getKeys(query, orderByField, sortOrder);
10946
- if (keys.length === 0) return;
11016
+ const driverResult = await self.getDriverKeys(query, orderByField, sortOrder);
11017
+ if (!driverResult) return;
11018
+ const { keys, others, rollback } = driverResult;
11019
+ if (keys.length === 0) {
11020
+ rollback();
11021
+ return;
11022
+ }
11023
+ const isQueryEmpty = Object.keys(query).length === 0;
11024
+ const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
10947
11025
  const selectivity = await self.getSelectivityCandidate(
10948
- self.verboseQuery(query),
11026
+ self.verboseQuery(normalizedQuery),
10949
11027
  orderByField
10950
11028
  );
10951
11029
  const isDriverOrderByField = orderByField === void 0 || selectivity && selectivity.driver.field === orderByField;
10952
11030
  if (selectivity) selectivity.rollback();
10953
- if (!isDriverOrderByField && orderByField) {
10954
- const topK = limit === Infinity ? Infinity : offset + limit;
10955
- let heap = null;
10956
- if (topK !== Infinity) {
10957
- heap = new BinaryHeap((a, b) => {
11031
+ try {
11032
+ if (!isDriverOrderByField && orderByField) {
11033
+ const topK = limit === Infinity ? Infinity : offset + limit;
11034
+ let heap = null;
11035
+ if (topK !== Infinity) {
11036
+ heap = new BinaryHeap((a, b) => {
11037
+ const aVal = a[orderByField] ?? a._id;
11038
+ const bVal = b[orderByField] ?? b._id;
11039
+ const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
11040
+ return sortOrder === "asc" ? -cmp : cmp;
11041
+ });
11042
+ }
11043
+ const results = [];
11044
+ for await (const doc of self.processChunkedKeysWithVerify(
11045
+ keys,
11046
+ 0,
11047
+ self.options.pageSize,
11048
+ ftsConditions,
11049
+ others,
11050
+ tx2
11051
+ )) {
11052
+ if (heap) {
11053
+ if (heap.size < topK) heap.push(doc);
11054
+ else {
11055
+ const top = heap.peek();
11056
+ if (top) {
11057
+ const aVal = doc[orderByField] ?? doc._id;
11058
+ const bVal = top[orderByField] ?? top._id;
11059
+ const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
11060
+ if (sortOrder === "asc" ? cmp < 0 : cmp > 0) heap.replace(doc);
11061
+ }
11062
+ }
11063
+ } else {
11064
+ results.push(doc);
11065
+ }
11066
+ }
11067
+ const finalDocs = heap ? heap.toArray() : results;
11068
+ finalDocs.sort((a, b) => {
10958
11069
  const aVal = a[orderByField] ?? a._id;
10959
11070
  const bVal = b[orderByField] ?? b._id;
10960
11071
  const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10961
- return sortOrder === "asc" ? -cmp : cmp;
11072
+ return sortOrder === "asc" ? cmp : -cmp;
10962
11073
  });
10963
- }
10964
- const results = [];
10965
- for await (const doc of self.processChunkedKeys(
10966
- keys,
10967
- 0,
10968
- self.options.pageSize,
10969
- ftsConditions,
10970
- tx2
10971
- )) {
10972
- if (heap) {
10973
- if (heap.size < topK) heap.push(doc);
10974
- else {
10975
- const top = heap.peek();
10976
- if (top) {
10977
- const aVal = doc[orderByField] ?? doc._id;
10978
- const bVal = top[orderByField] ?? top._id;
10979
- const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10980
- if (sortOrder === "asc" ? cmp < 0 : cmp > 0) heap.replace(doc);
10981
- }
10982
- }
10983
- } else {
10984
- results.push(doc);
11074
+ const end = limit === Infinity ? void 0 : offset + limit;
11075
+ const limitedResults = finalDocs.slice(offset, end);
11076
+ for (let j = 0, len = limitedResults.length; j < len; j++) {
11077
+ yield limitedResults[j];
10985
11078
  }
10986
- }
10987
- const finalDocs = heap ? heap.toArray() : results;
10988
- finalDocs.sort((a, b) => {
10989
- const aVal = a[orderByField] ?? a._id;
10990
- const bVal = b[orderByField] ?? b._id;
10991
- const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10992
- return sortOrder === "asc" ? cmp : -cmp;
10993
- });
10994
- const end = limit === Infinity ? void 0 : offset + limit;
10995
- const limitedResults = finalDocs.slice(offset, end);
10996
- for (let j = 0, len = limitedResults.length; j < len; j++) {
10997
- yield limitedResults[j];
10998
- }
10999
- } else {
11000
- let yieldedCount = 0;
11001
- for await (const doc of self.processChunkedKeys(
11002
- keys,
11003
- offset,
11004
- self.options.pageSize,
11005
- ftsConditions,
11006
- tx2
11007
- )) {
11008
- if (yieldedCount >= limit) break;
11009
- yield doc;
11010
- yieldedCount++;
11011
- }
11079
+ } else {
11080
+ let yieldedCount = 0;
11081
+ for await (const doc of self.processChunkedKeysWithVerify(
11082
+ keys,
11083
+ offset,
11084
+ self.options.pageSize,
11085
+ ftsConditions,
11086
+ others,
11087
+ tx2
11088
+ )) {
11089
+ if (yieldedCount >= limit) break;
11090
+ yield doc;
11091
+ yieldedCount++;
11092
+ }
11093
+ }
11094
+ } finally {
11095
+ rollback();
11012
11096
  }
11013
11097
  }, tx);
11014
11098
  const drain = async () => {
@@ -1,5 +1,5 @@
1
- import { ValueComparator } from 'dataply';
2
1
  import type { DataplyTreeValue, Primitive } from '../../types';
2
+ import { ValueComparator } from 'dataply';
3
3
  export declare class DocumentValueComparator<T extends DataplyTreeValue<U>, U extends Primitive> extends ValueComparator<T> {
4
4
  primaryAsc(a: T, b: T): number;
5
5
  asc(a: T, b: T): number;
@@ -94,6 +94,12 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON, IC extends Index
94
94
  * 쿼리 최적화를 통합하기 위한 내부 공통 메서드입니다.
95
95
  */
96
96
  getKeys(query: Partial<DocumentDataplyIndexedQuery<T, IC>>, orderBy?: keyof IC | '_id', sortOrder?: 'asc' | 'desc'): Promise<Float64Array>;
97
+ /**
98
+ * 드라이버 인덱스만으로 PK를 가져옵니다. (교집합 없이)
99
+ * selectDocuments에서 사용하며, 나머지 조건(others)은 스트리밍 중 tree.verify()로 검증합니다.
100
+ * @returns 드라이버 키 배열, others 후보 목록, rollback 함수. 또는 null.
101
+ */
102
+ private getDriverKeys;
97
103
  private insertDocumentInternal;
98
104
  /**
99
105
  * Insert a document into the database
@@ -157,9 +163,10 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON, IC extends Index
157
163
  private adjustChunkSize;
158
164
  /**
159
165
  * Prefetch 방식으로 키 배열을 청크 단위로 조회하여 문서를 순회합니다.
160
- * FTS 검증을 통과한 문서만 yield 합니다.
166
+ * FTS 검증 및 others 후보에 대한 tree.verify() 검증을 통과한 문서만 yield 합니다.
167
+ * 교집합 대신 스트리밍 중 검증하여 첫 결과 반환 시간을 단축합니다.
161
168
  */
162
- private processChunkedKeys;
169
+ private processChunkedKeysWithVerify;
163
170
  /**
164
171
  * Select documents from the database
165
172
  * @param query The query to use (only indexed fields + _id allowed)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.7",
3
+ "version": "0.0.8",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",
package/readme.md CHANGED
@@ -124,9 +124,13 @@ const ids = await db.insertBatch([
124
124
  | `equal`, `notEqual` | Equality check |
125
125
  | `like` | Pattern matching |
126
126
  | `or` | Matching within an array |
127
+ | `match` | Full-text search (Requires FTS Index) |
127
128
 
128
129
  For detailed operator usage, index constraints (including full scans), and sorting methods, see the [Query Guide (QUERY.md)](./docs/QUERY.md).
129
130
 
131
+ > [!IMPORTANT]
132
+ > **Full-Text Search (match)**: To use the `match` operator, you must configure the field as an FTS index (e.g., `{ type: 'fts', tokenizer: 'whitespace' }`). Standard boolean indices do not support `match`. See [QUERY.md](./docs/QUERY.md#4-full-text-search-fts-indexing) for details.
133
+
130
134
  ### Transactions
131
135
 
132
136
  Ensure data integrity with ACID-compliant transactions. Use `commit()` and `rollback()` to process multiple operations atomically.