document-dataply 0.0.7 → 0.0.9-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -6206,6 +6206,22 @@ var require_cjs = __commonJS({
6206
6206
  }
6207
6207
  return (crc ^ -1) >>> 0;
6208
6208
  }
6209
+ function getMinMaxValue(array) {
6210
+ let i = 0;
6211
+ let min = Infinity;
6212
+ let max = -Infinity;
6213
+ let len = array.length;
6214
+ while (i < len) {
6215
+ if (array[i] < min) {
6216
+ min = array[i];
6217
+ }
6218
+ if (array[i] > max) {
6219
+ max = array[i];
6220
+ }
6221
+ i++;
6222
+ }
6223
+ return [min, max];
6224
+ }
6209
6225
  var Row = class _Row {
6210
6226
  static CONSTANT = {
6211
6227
  FLAG_DELETED: 0,
@@ -9120,8 +9136,7 @@ var require_cjs = __commonJS({
9120
9136
  for (let i = 0, len = pks.length; i < len; i++) {
9121
9137
  pkIndexMap.set(pks[i], i);
9122
9138
  }
9123
- const minPk = Math.min(...pks);
9124
- const maxPk = Math.max(...pks);
9139
+ const [minPk, maxPk] = getMinMaxValue(pks);
9125
9140
  const pkRidPairs = new Array(pks.length).fill(null);
9126
9141
  const btx = await this.getBPTreeTransaction(tx);
9127
9142
  const stream = btx.whereStream({ gte: minPk, lte: maxPk });
@@ -10127,13 +10142,14 @@ function ngramTokenize(text, gramSize) {
10127
10142
  if (typeof text !== "string") return [];
10128
10143
  const tokens = /* @__PURE__ */ new Set();
10129
10144
  const words = text.split(/\s+/).filter(Boolean);
10130
- for (const word of words) {
10145
+ for (let i = 0, len = words.length; i < len; i++) {
10146
+ const word = words[i];
10131
10147
  if (word.length < gramSize) {
10132
10148
  if (word.length > 0) tokens.add(word);
10133
10149
  continue;
10134
10150
  }
10135
- for (let i = 0; i <= word.length - gramSize; i++) {
10136
- tokens.add(word.slice(i, i + gramSize));
10151
+ for (let j = 0, wLen = word.length; j <= wLen - gramSize; j++) {
10152
+ tokens.add(word.slice(j, j + gramSize));
10137
10153
  }
10138
10154
  }
10139
10155
  return Array.from(tokens);
@@ -10501,6 +10517,23 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10501
10517
  };
10502
10518
  }
10503
10519
  }
10520
+ const ftsCandidate = candidates.find(
10521
+ (c) => c.isFtsMatch && c.matchTokens && c.matchTokens.length > 0
10522
+ );
10523
+ if (ftsCandidate) {
10524
+ const hasHigherPriority = candidates.some((c) => {
10525
+ if (c === ftsCandidate) return false;
10526
+ const cond = c.condition;
10527
+ return "equal" in cond || "primaryEqual" in cond;
10528
+ });
10529
+ if (!hasHigherPriority) {
10530
+ return {
10531
+ driver: ftsCandidate,
10532
+ others: candidates.filter((c) => c !== ftsCandidate),
10533
+ rollback
10534
+ };
10535
+ }
10536
+ }
10504
10537
  let res = import_dataply3.BPTreeAsync.ChooseDriver(candidates);
10505
10538
  if (!res && candidates.length > 0) {
10506
10539
  res = candidates[0];
@@ -10588,6 +10621,39 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10588
10621
  rollback();
10589
10622
  return new Float64Array(Array.from(keys || []));
10590
10623
  }
10624
+ /**
10625
+ * 드라이버 인덱스만으로 PK를 가져옵니다. (교집합 없이)
10626
+ * selectDocuments에서 사용하며, 나머지 조건(others)은 스트리밍 중 tree.verify()로 검증합니다.
10627
+ * @returns 드라이버 키 배열, others 후보 목록, rollback 함수. 또는 null.
10628
+ */
10629
+ async getDriverKeys(query, orderBy, sortOrder = "asc") {
10630
+ const isQueryEmpty = Object.keys(query).length === 0;
10631
+ const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
10632
+ const selectivity = await this.getSelectivityCandidate(
10633
+ this.verboseQuery(normalizedQuery),
10634
+ orderBy
10635
+ );
10636
+ if (!selectivity) return null;
10637
+ const { driver, others, rollback } = selectivity;
10638
+ const useIndexOrder = orderBy === void 0 || driver.field === orderBy;
10639
+ const currentOrder = useIndexOrder ? sortOrder : void 0;
10640
+ let keys;
10641
+ if (driver.isFtsMatch && driver.matchTokens && driver.matchTokens.length > 0) {
10642
+ keys = await this.applyCandidateByFTS(
10643
+ driver,
10644
+ driver.matchTokens,
10645
+ void 0,
10646
+ currentOrder
10647
+ );
10648
+ } else {
10649
+ keys = await this.applyCandidate(driver, void 0, currentOrder);
10650
+ }
10651
+ return {
10652
+ keys: new Float64Array(Array.from(keys)),
10653
+ others,
10654
+ rollback
10655
+ };
10656
+ }
10591
10657
  async insertDocumentInternal(document, tx) {
10592
10658
  const metadata = await this.getDocumentInnerMetadata(tx);
10593
10659
  const id = ++metadata.lastId;
@@ -10873,9 +10939,11 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10873
10939
  }
10874
10940
  /**
10875
10941
  * Prefetch 방식으로 키 배열을 청크 단위로 조회하여 문서를 순회합니다.
10876
- * FTS 검증을 통과한 문서만 yield 합니다.
10942
+ * FTS 검증 및 others 후보에 대한 tree.verify() 검증을 통과한 문서만 yield 합니다.
10943
+ * 교집합 대신 스트리밍 중 검증하여 첫 결과 반환 시간을 단축합니다.
10877
10944
  */
10878
- async *processChunkedKeys(keys, startIdx, initialChunkSize, ftsConditions, tx) {
10945
+ async *processChunkedKeysWithVerify(keys, startIdx, initialChunkSize, ftsConditions, others, tx) {
10946
+ const verifyOthers = others.filter((o) => !o.isFtsMatch);
10879
10947
  let i = startIdx;
10880
10948
  const totalKeys = keys.length;
10881
10949
  let currentChunkSize = initialChunkSize;
@@ -10900,6 +10968,24 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10900
10968
  const doc = JSON.parse(s);
10901
10969
  chunkTotalSize += s.length * 2;
10902
10970
  if (ftsConditions.length > 0 && !this.verifyFts(doc, ftsConditions)) continue;
10971
+ if (verifyOthers.length > 0) {
10972
+ const flatDoc = this.flattenDocument(doc);
10973
+ let passed = true;
10974
+ for (let k = 0, kLen = verifyOthers.length; k < kLen; k++) {
10975
+ const other = verifyOthers[k];
10976
+ const fieldValue = flatDoc[other.field];
10977
+ if (fieldValue === void 0) {
10978
+ passed = false;
10979
+ break;
10980
+ }
10981
+ const treeValue = { k: doc._id, v: fieldValue };
10982
+ if (!other.tree.verify(treeValue, other.condition)) {
10983
+ passed = false;
10984
+ break;
10985
+ }
10986
+ }
10987
+ if (!passed) continue;
10988
+ }
10903
10989
  yield doc;
10904
10990
  }
10905
10991
  currentChunkSize = this.adjustChunkSize(currentChunkSize, chunkTotalSize);
@@ -10942,73 +11028,86 @@ var DocumentDataplyAPI = class extends import_dataply3.DataplyAPI {
10942
11028
  }
10943
11029
  }
10944
11030
  }
10945
- const keys = await self.getKeys(query, orderByField, sortOrder);
10946
- if (keys.length === 0) return;
11031
+ const driverResult = await self.getDriverKeys(query, orderByField, sortOrder);
11032
+ if (!driverResult) return;
11033
+ const { keys, others, rollback } = driverResult;
11034
+ if (keys.length === 0) {
11035
+ rollback();
11036
+ return;
11037
+ }
11038
+ const isQueryEmpty = Object.keys(query).length === 0;
11039
+ const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
10947
11040
  const selectivity = await self.getSelectivityCandidate(
10948
- self.verboseQuery(query),
11041
+ self.verboseQuery(normalizedQuery),
10949
11042
  orderByField
10950
11043
  );
10951
11044
  const isDriverOrderByField = orderByField === void 0 || selectivity && selectivity.driver.field === orderByField;
10952
11045
  if (selectivity) selectivity.rollback();
10953
- if (!isDriverOrderByField && orderByField) {
10954
- const topK = limit === Infinity ? Infinity : offset + limit;
10955
- let heap = null;
10956
- if (topK !== Infinity) {
10957
- heap = new BinaryHeap((a, b) => {
11046
+ try {
11047
+ if (!isDriverOrderByField && orderByField) {
11048
+ const topK = limit === Infinity ? Infinity : offset + limit;
11049
+ let heap = null;
11050
+ if (topK !== Infinity) {
11051
+ heap = new BinaryHeap((a, b) => {
11052
+ const aVal = a[orderByField] ?? a._id;
11053
+ const bVal = b[orderByField] ?? b._id;
11054
+ const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
11055
+ return sortOrder === "asc" ? -cmp : cmp;
11056
+ });
11057
+ }
11058
+ const results = [];
11059
+ for await (const doc of self.processChunkedKeysWithVerify(
11060
+ keys,
11061
+ 0,
11062
+ self.options.pageSize,
11063
+ ftsConditions,
11064
+ others,
11065
+ tx2
11066
+ )) {
11067
+ if (heap) {
11068
+ if (heap.size < topK) heap.push(doc);
11069
+ else {
11070
+ const top = heap.peek();
11071
+ if (top) {
11072
+ const aVal = doc[orderByField] ?? doc._id;
11073
+ const bVal = top[orderByField] ?? top._id;
11074
+ const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
11075
+ if (sortOrder === "asc" ? cmp < 0 : cmp > 0) heap.replace(doc);
11076
+ }
11077
+ }
11078
+ } else {
11079
+ results.push(doc);
11080
+ }
11081
+ }
11082
+ const finalDocs = heap ? heap.toArray() : results;
11083
+ finalDocs.sort((a, b) => {
10958
11084
  const aVal = a[orderByField] ?? a._id;
10959
11085
  const bVal = b[orderByField] ?? b._id;
10960
11086
  const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10961
- return sortOrder === "asc" ? -cmp : cmp;
11087
+ return sortOrder === "asc" ? cmp : -cmp;
10962
11088
  });
10963
- }
10964
- const results = [];
10965
- for await (const doc of self.processChunkedKeys(
10966
- keys,
10967
- 0,
10968
- self.options.pageSize,
10969
- ftsConditions,
10970
- tx2
10971
- )) {
10972
- if (heap) {
10973
- if (heap.size < topK) heap.push(doc);
10974
- else {
10975
- const top = heap.peek();
10976
- if (top) {
10977
- const aVal = doc[orderByField] ?? doc._id;
10978
- const bVal = top[orderByField] ?? top._id;
10979
- const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10980
- if (sortOrder === "asc" ? cmp < 0 : cmp > 0) heap.replace(doc);
10981
- }
10982
- }
10983
- } else {
10984
- results.push(doc);
11089
+ const end = limit === Infinity ? void 0 : offset + limit;
11090
+ const limitedResults = finalDocs.slice(offset, end);
11091
+ for (let j = 0, len = limitedResults.length; j < len; j++) {
11092
+ yield limitedResults[j];
10985
11093
  }
10986
- }
10987
- const finalDocs = heap ? heap.toArray() : results;
10988
- finalDocs.sort((a, b) => {
10989
- const aVal = a[orderByField] ?? a._id;
10990
- const bVal = b[orderByField] ?? b._id;
10991
- const cmp = aVal < bVal ? -1 : aVal > bVal ? 1 : 0;
10992
- return sortOrder === "asc" ? cmp : -cmp;
10993
- });
10994
- const end = limit === Infinity ? void 0 : offset + limit;
10995
- const limitedResults = finalDocs.slice(offset, end);
10996
- for (let j = 0, len = limitedResults.length; j < len; j++) {
10997
- yield limitedResults[j];
10998
- }
10999
- } else {
11000
- let yieldedCount = 0;
11001
- for await (const doc of self.processChunkedKeys(
11002
- keys,
11003
- offset,
11004
- self.options.pageSize,
11005
- ftsConditions,
11006
- tx2
11007
- )) {
11008
- if (yieldedCount >= limit) break;
11009
- yield doc;
11010
- yieldedCount++;
11011
- }
11094
+ } else {
11095
+ let yieldedCount = 0;
11096
+ for await (const doc of self.processChunkedKeysWithVerify(
11097
+ keys,
11098
+ offset,
11099
+ self.options.pageSize,
11100
+ ftsConditions,
11101
+ others,
11102
+ tx2
11103
+ )) {
11104
+ if (yieldedCount >= limit) break;
11105
+ yield doc;
11106
+ yieldedCount++;
11107
+ }
11108
+ }
11109
+ } finally {
11110
+ rollback();
11012
11111
  }
11013
11112
  }, tx);
11014
11113
  const drain = async () => {
@@ -1,5 +1,5 @@
1
- import { ValueComparator } from 'dataply';
2
1
  import type { DataplyTreeValue, Primitive } from '../../types';
2
+ import { ValueComparator } from 'dataply';
3
3
  export declare class DocumentValueComparator<T extends DataplyTreeValue<U>, U extends Primitive> extends ValueComparator<T> {
4
4
  primaryAsc(a: T, b: T): number;
5
5
  asc(a: T, b: T): number;
@@ -94,6 +94,12 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON, IC extends Index
94
94
  * 쿼리 최적화를 통합하기 위한 내부 공통 메서드입니다.
95
95
  */
96
96
  getKeys(query: Partial<DocumentDataplyIndexedQuery<T, IC>>, orderBy?: keyof IC | '_id', sortOrder?: 'asc' | 'desc'): Promise<Float64Array>;
97
+ /**
98
+ * 드라이버 인덱스만으로 PK를 가져옵니다. (교집합 없이)
99
+ * selectDocuments에서 사용하며, 나머지 조건(others)은 스트리밍 중 tree.verify()로 검증합니다.
100
+ * @returns 드라이버 키 배열, others 후보 목록, rollback 함수. 또는 null.
101
+ */
102
+ private getDriverKeys;
97
103
  private insertDocumentInternal;
98
104
  /**
99
105
  * Insert a document into the database
@@ -157,9 +163,10 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON, IC extends Index
157
163
  private adjustChunkSize;
158
164
  /**
159
165
  * Prefetch 방식으로 키 배열을 청크 단위로 조회하여 문서를 순회합니다.
160
- * FTS 검증을 통과한 문서만 yield 합니다.
166
+ * FTS 검증 및 others 후보에 대한 tree.verify() 검증을 통과한 문서만 yield 합니다.
167
+ * 교집합 대신 스트리밍 중 검증하여 첫 결과 반환 시간을 단축합니다.
161
168
  */
162
- private processChunkedKeys;
169
+ private processChunkedKeysWithVerify;
163
170
  /**
164
171
  * Select documents from the database
165
172
  * @param query The query to use (only indexed fields + _id allowed)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.7",
3
+ "version": "0.0.9-alpha.0",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",
@@ -42,7 +42,7 @@
42
42
  "dataply"
43
43
  ],
44
44
  "dependencies": {
45
- "dataply": "^0.0.23"
45
+ "dataply": "^0.0.24-alpha.0"
46
46
  },
47
47
  "devDependencies": {
48
48
  "@types/jest": "^30.0.0",
package/readme.md CHANGED
@@ -124,9 +124,13 @@ const ids = await db.insertBatch([
124
124
  | `equal`, `notEqual` | Equality check |
125
125
  | `like` | Pattern matching |
126
126
  | `or` | Matching within an array |
127
+ | `match` | Full-text search (Requires FTS Index) |
127
128
 
128
129
  For detailed operator usage, index constraints (including full scans), and sorting methods, see the [Query Guide (QUERY.md)](./docs/QUERY.md).
129
130
 
131
+ > [!IMPORTANT]
132
+ > **Full-Text Search (match)**: To use the `match` operator, you must configure the field as an FTS index (e.g., `{ type: 'fts', tokenizer: 'whitespace' }`). Standard boolean indices do not support `match`. See [QUERY.md](./docs/QUERY.md#4-full-text-search-fts-indexing) for details.
133
+
130
134
  ### Transactions
131
135
 
132
136
  Ensure data integrity with ACID-compliant transactions. Use `commit()` and `rollback()` to process multiple operations atomically.