document-dataply 0.0.10-alpha.4 → 0.0.10-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -10466,6 +10466,26 @@ function tokenize(text, options) {
10466
10466
  }
10467
10467
 
10468
10468
  // src/core/Optimizer.ts
10469
+ var SELECTIVITY = {
10470
+ /** O(log N) 포인트 룩업 */
10471
+ EQUAL: 0.01,
10472
+ /** 양쪽 바운드(gte+lte) 범위 스캔 */
10473
+ BOUNDED_RANGE: 0.33,
10474
+ /** 한쪽 바운드(gte 또는 lte)만 있을 때, 중간부터 풀스캔 */
10475
+ HALF_RANGE: 0.5,
10476
+ /** Or 조건: B+Tree 내부 풀스캔 */
10477
+ OR: 0.9,
10478
+ /** Like 조건: B+Tree 내부 풀스캔 */
10479
+ LIKE: 0.9,
10480
+ /** 알 수 없는 조건 */
10481
+ UNKNOWN: 0.9,
10482
+ /** FTS 통계 없을 때 보수적 추정 */
10483
+ FTS_DEFAULT: 0.5,
10484
+ /** 정렬 비용 가중치 (orderBy 미지원 시) */
10485
+ SORT_PENALTY: 0.3,
10486
+ /** 인메모리 정렬이 유의미해지는 임계 문서 수 */
10487
+ SORT_THRESHOLD: 1e4
10488
+ };
10469
10489
  var Optimizer = class {
10470
10490
  constructor(api) {
10471
10491
  this.api = api;
@@ -10477,7 +10497,7 @@ var Optimizer = class {
10477
10497
  const primaryField = config.fields[0];
10478
10498
  if (!queryFields.has(primaryField)) return null;
10479
10499
  const builtCondition = {};
10480
- let score = 0;
10500
+ let selectivity = 1;
10481
10501
  let isConsecutive = true;
10482
10502
  const coveredFields = [];
10483
10503
  const compositeVerifyFields = [];
@@ -10492,13 +10512,12 @@ var Optimizer = class {
10492
10512
  continue;
10493
10513
  }
10494
10514
  coveredFields.push(field);
10495
- score += 1;
10496
10515
  if (isConsecutive) {
10497
10516
  const cond = query[field];
10498
10517
  if (cond !== void 0) {
10499
10518
  let isBounded = false;
10500
10519
  if (typeof cond !== "object" || cond === null) {
10501
- score += 100;
10520
+ selectivity *= SELECTIVITY.EQUAL;
10502
10521
  startValues.push(cond);
10503
10522
  endValues.push(cond);
10504
10523
  startOperator = "primaryGte";
@@ -10506,7 +10525,7 @@ var Optimizer = class {
10506
10525
  isBounded = true;
10507
10526
  } else if ("primaryEqual" in cond || "equal" in cond) {
10508
10527
  const val = cond.primaryEqual?.v ?? cond.equal?.v ?? cond.primaryEqual ?? cond.equal;
10509
- score += 100;
10528
+ selectivity *= SELECTIVITY.EQUAL;
10510
10529
  startValues.push(val);
10511
10530
  endValues.push(val);
10512
10531
  startOperator = "primaryGte";
@@ -10514,7 +10533,7 @@ var Optimizer = class {
10514
10533
  isBounded = true;
10515
10534
  } else if ("primaryGte" in cond || "gte" in cond) {
10516
10535
  const val = cond.primaryGte?.v ?? cond.gte?.v ?? cond.primaryGte ?? cond.gte;
10517
- score += 50;
10536
+ selectivity *= SELECTIVITY.HALF_RANGE;
10518
10537
  isConsecutive = false;
10519
10538
  startValues.push(val);
10520
10539
  startOperator = "primaryGte";
@@ -10522,7 +10541,7 @@ var Optimizer = class {
10522
10541
  isBounded = true;
10523
10542
  } else if ("primaryGt" in cond || "gt" in cond) {
10524
10543
  const val = cond.primaryGt?.v ?? cond.gt?.v ?? cond.primaryGt ?? cond.gt;
10525
- score += 50;
10544
+ selectivity *= SELECTIVITY.HALF_RANGE;
10526
10545
  isConsecutive = false;
10527
10546
  startValues.push(val);
10528
10547
  startOperator = "primaryGt";
@@ -10530,7 +10549,7 @@ var Optimizer = class {
10530
10549
  isBounded = true;
10531
10550
  } else if ("primaryLte" in cond || "lte" in cond) {
10532
10551
  const val = cond.primaryLte?.v ?? cond.lte?.v ?? cond.primaryLte ?? cond.lte;
10533
- score += 50;
10552
+ selectivity *= SELECTIVITY.HALF_RANGE;
10534
10553
  isConsecutive = false;
10535
10554
  endValues.push(val);
10536
10555
  endOperator = "primaryLte";
@@ -10538,20 +10557,20 @@ var Optimizer = class {
10538
10557
  isBounded = true;
10539
10558
  } else if ("primaryLt" in cond || "lt" in cond) {
10540
10559
  const val = cond.primaryLt?.v ?? cond.lt?.v ?? cond.primaryLt ?? cond.lt;
10541
- score += 50;
10560
+ selectivity *= SELECTIVITY.HALF_RANGE;
10542
10561
  isConsecutive = false;
10543
10562
  endValues.push(val);
10544
10563
  endOperator = "primaryLt";
10545
10564
  if (startValues.length > 0) startOperator = "primaryGte";
10546
10565
  isBounded = true;
10547
10566
  } else if ("primaryOr" in cond || "or" in cond) {
10548
- score += 20;
10567
+ selectivity *= SELECTIVITY.OR;
10549
10568
  isConsecutive = false;
10550
10569
  } else if ("like" in cond) {
10551
- score += 15;
10570
+ selectivity *= SELECTIVITY.LIKE;
10552
10571
  isConsecutive = false;
10553
10572
  } else {
10554
- score += 10;
10573
+ selectivity *= SELECTIVITY.UNKNOWN;
10555
10574
  isConsecutive = false;
10556
10575
  }
10557
10576
  if (!isBounded && field !== primaryField) {
@@ -10598,9 +10617,6 @@ var Optimizer = class {
10598
10617
  }
10599
10618
  if (!isExactMatch) break;
10600
10619
  }
10601
- if (isIndexOrderSupported) {
10602
- score += 200;
10603
- }
10604
10620
  }
10605
10621
  return {
10606
10622
  tree: treeTx,
@@ -10608,7 +10624,7 @@ var Optimizer = class {
10608
10624
  field: primaryField,
10609
10625
  indexName,
10610
10626
  isFtsMatch: false,
10611
- score,
10627
+ selectivity,
10612
10628
  compositeVerifyFields,
10613
10629
  coveredFields,
10614
10630
  isIndexOrderSupported
@@ -10616,6 +10632,7 @@ var Optimizer = class {
10616
10632
  }
10617
10633
  /**
10618
10634
  * FTS 타입 인덱스의 선택도를 평가합니다.
10635
+ * FTSTermCount 통계가 있으면 실측 데이터 기반으로 선택도를 산출합니다.
10619
10636
  */
10620
10637
  evaluateFTSCandidate(indexName, config, query, queryFields, treeTx) {
10621
10638
  const field = config.fields;
@@ -10624,6 +10641,16 @@ var Optimizer = class {
10624
10641
  if (!condition || typeof condition !== "object" || !("match" in condition)) return null;
10625
10642
  const ftsConfig = this.api.indexManager.getFtsConfig(config);
10626
10643
  const matchTokens = ftsConfig ? tokenize(condition.match, ftsConfig) : [];
10644
+ let selectivity = SELECTIVITY.FTS_DEFAULT;
10645
+ const termCountProvider = this.api.analysisManager.getProvider("fts_term_count");
10646
+ if (termCountProvider && termCountProvider.hasSampleData && ftsConfig && matchTokens.length > 0) {
10647
+ const strategy = ftsConfig.tokenizer === "ngram" ? `${ftsConfig.gramSize}gram` : ftsConfig.tokenizer;
10648
+ const minCount = termCountProvider.getMinTokenCount(field, strategy, matchTokens);
10649
+ if (minCount >= 0) {
10650
+ const sampleSize = termCountProvider.getSampleSize();
10651
+ selectivity = Math.min(minCount / sampleSize, 1);
10652
+ }
10653
+ }
10627
10654
  return {
10628
10655
  tree: treeTx,
10629
10656
  condition,
@@ -10631,18 +10658,33 @@ var Optimizer = class {
10631
10658
  indexName,
10632
10659
  isFtsMatch: true,
10633
10660
  matchTokens,
10634
- score: 90,
10661
+ selectivity,
10635
10662
  compositeVerifyFields: [],
10636
10663
  coveredFields: [field],
10637
10664
  isIndexOrderSupported: false
10638
10665
  };
10639
10666
  }
10640
10667
  /**
10641
- * 실행할 최적의 인덱스를 선택합니다. (최적 드라이버 선택)
10668
+ * 비용 계산: effectiveScanCost + sortPenalty
10669
+ * - effectiveScanCost: 인덱스 순서 지원 + limit 존재 시 조기 종료 이점 반영
10670
+ * - sortPenalty: 인메모리 정렬의 절대 문서 수 기반 비용
10642
10671
  */
10643
- async getSelectivityCandidate(query, orderByField) {
10672
+ calculateCost(selectivity, isIndexOrderSupported, orderByField, N, topK) {
10673
+ const effectiveScanCost = isIndexOrderSupported && isFinite(topK) && N > 0 ? Math.min(topK / N, selectivity) : selectivity;
10674
+ const estimatedSortDocs = selectivity * N;
10675
+ const sortPenalty = orderByField && !isIndexOrderSupported ? Math.min(estimatedSortDocs / SELECTIVITY.SORT_THRESHOLD, 1) * SELECTIVITY.SORT_PENALTY : 0;
10676
+ return effectiveScanCost + sortPenalty;
10677
+ }
10678
+ /**
10679
+ * 실행할 최적의 인덱스를 선택합니다. (비용 기반 최적 드라이버 선택)
10680
+ * cost = selectivity + sortPenalty (낮을수록 좋음)
10681
+ */
10682
+ async getSelectivityCandidate(query, orderByField, limit = Infinity, offset = 0) {
10644
10683
  const queryFields = new Set(Object.keys(query));
10645
10684
  const candidates = [];
10685
+ const metadata = await this.api.getMetadata();
10686
+ const N = metadata.rowCount;
10687
+ const topK = isFinite(limit) ? offset + limit : Infinity;
10646
10688
  for (const [indexName, config] of this.api.indexManager.registeredIndices) {
10647
10689
  const tree = this.api.trees.get(indexName);
10648
10690
  if (!tree) continue;
@@ -10656,7 +10698,12 @@ var Optimizer = class {
10656
10698
  treeTx,
10657
10699
  orderByField
10658
10700
  );
10659
- if (candidate) candidates.push(candidate);
10701
+ if (candidate) {
10702
+ candidates.push({
10703
+ ...candidate,
10704
+ cost: this.calculateCost(candidate.selectivity, candidate.isIndexOrderSupported, orderByField, N, topK)
10705
+ });
10706
+ }
10660
10707
  } else if (config.type === "fts") {
10661
10708
  const treeTx = await tree.createTransaction();
10662
10709
  const candidate = this.evaluateFTSCandidate(
@@ -10666,7 +10713,12 @@ var Optimizer = class {
10666
10713
  queryFields,
10667
10714
  treeTx
10668
10715
  );
10669
- if (candidate) candidates.push(candidate);
10716
+ if (candidate) {
10717
+ candidates.push({
10718
+ ...candidate,
10719
+ cost: this.calculateCost(candidate.selectivity, candidate.isIndexOrderSupported, orderByField, N, topK)
10720
+ });
10721
+ }
10670
10722
  }
10671
10723
  }
10672
10724
  const rollback = () => {
@@ -10679,7 +10731,7 @@ var Optimizer = class {
10679
10731
  return null;
10680
10732
  }
10681
10733
  candidates.sort((a, b) => {
10682
- if (b.score !== a.score) return b.score - a.score;
10734
+ if (a.cost !== b.cost) return a.cost - b.cost;
10683
10735
  const aConfig = this.api.indexManager.registeredIndices.get(a.indexName);
10684
10736
  const bConfig = this.api.indexManager.registeredIndices.get(b.indexName);
10685
10737
  const aFieldCount = aConfig ? Array.isArray(aConfig.fields) ? aConfig.fields.length : 1 : 0;
@@ -10688,7 +10740,20 @@ var Optimizer = class {
10688
10740
  });
10689
10741
  const driver = candidates[0];
10690
10742
  const driverCoveredFields = new Set(driver.coveredFields);
10691
- const others = candidates.slice(1).filter((c) => !driverCoveredFields.has(c.field));
10743
+ const nonDriverCandidates = candidates.slice(1).filter((c) => !driverCoveredFields.has(c.field));
10744
+ const others = [];
10745
+ for (let i = 0, len = nonDriverCandidates.length; i < len; i++) {
10746
+ const candidate = nonDriverCandidates[i];
10747
+ let isSubset = false;
10748
+ for (let j = 0, oLen = others.length; j < oLen; j++) {
10749
+ const better = others[j];
10750
+ if (candidate.coveredFields.every((f) => better.coveredFields.includes(f))) {
10751
+ isSubset = true;
10752
+ break;
10753
+ }
10754
+ }
10755
+ if (!isSubset) others.push(candidate);
10756
+ }
10692
10757
  const compositeVerifyConditions = [];
10693
10758
  for (let i = 0, len = driver.compositeVerifyFields.length; i < len; i++) {
10694
10759
  const field = driver.compositeVerifyFields[i];
@@ -10887,12 +10952,14 @@ var QueryManager = class {
10887
10952
  rollback();
10888
10953
  return new Float64Array(Array.from(keys || []));
10889
10954
  }
10890
- async getDriverKeys(query, orderBy, sortOrder = "asc") {
10955
+ async getDriverKeys(query, orderBy, sortOrder = "asc", limit = Infinity, offset = 0) {
10891
10956
  const isQueryEmpty = Object.keys(query).length === 0;
10892
10957
  const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
10893
10958
  const selectivity = await this.optimizer.getSelectivityCandidate(
10894
10959
  this.verboseQuery(normalizedQuery),
10895
- orderBy
10960
+ orderBy,
10961
+ limit,
10962
+ offset
10896
10963
  );
10897
10964
  if (!selectivity) return null;
10898
10965
  const { driver, others, compositeVerifyConditions, rollback } = selectivity;
@@ -10981,7 +11048,10 @@ var QueryManager = class {
10981
11048
  const isCompositeVerify = compositeVerifyConditions.length > 0;
10982
11049
  const isVerifyOthers = verifyOthers.length > 0;
10983
11050
  const isInfinityLimit = !isFinite(limit);
10984
- const isReadQuotaLimited = !isInfinityLimit || !isCompositeVerify || !isVerifyOthers || !isFts;
11051
+ const isReadQuotaLimited = !isInfinityLimit && // limit이 임의의 유한한 값으로 설정되어 있으며
11052
+ !isCompositeVerify && // 문서를 가져온 후 복합 인덱스 기준으로 2차 필터링할 필요가 없고
11053
+ !isVerifyOthers && // 문서를 가져온 후 다른 인덱스 기준으로 2차 필터링할 필요가 없으며
11054
+ !isFts;
10985
11055
  let currentChunkSize = isReadQuotaLimited ? limit : initialChunkSize;
10986
11056
  let chunk = [];
10987
11057
  let chunkSize = 0;
@@ -11002,10 +11072,30 @@ var QueryManager = class {
11002
11072
  let passed = true;
11003
11073
  for (let k = 0, kLen = verifyOthers.length; k < kLen; k++) {
11004
11074
  const other = verifyOthers[k];
11005
- const fieldValue = flatDoc[other.field];
11006
- if (fieldValue === void 0) {
11007
- passed = false;
11008
- break;
11075
+ const coveredFields = other.coveredFields;
11076
+ let fieldValue;
11077
+ if (coveredFields && coveredFields.length > 1) {
11078
+ const values = [];
11079
+ let hasMissing = false;
11080
+ for (let f = 0, fLen = coveredFields.length; f < fLen; f++) {
11081
+ const v = flatDoc[coveredFields[f]];
11082
+ if (v === void 0) {
11083
+ hasMissing = true;
11084
+ break;
11085
+ }
11086
+ values.push(v);
11087
+ }
11088
+ if (hasMissing) {
11089
+ passed = false;
11090
+ break;
11091
+ }
11092
+ fieldValue = values;
11093
+ } else {
11094
+ fieldValue = flatDoc[other.field];
11095
+ if (fieldValue === void 0) {
11096
+ passed = false;
11097
+ break;
11098
+ }
11009
11099
  }
11010
11100
  const treeValue = { k: doc._id, v: fieldValue };
11011
11101
  if (!other.tree.verify(treeValue, other.condition)) {
@@ -11088,12 +11178,13 @@ var QueryManager = class {
11088
11178
  }
11089
11179
  }
11090
11180
  }
11091
- const driverResult = await self.getDriverKeys(query, orderByField, sortOrder);
11181
+ const driverResult = await self.getDriverKeys(query, orderByField, sortOrder, limit, offset);
11092
11182
  if (!driverResult) return;
11093
11183
  const { keysStream, others, compositeVerifyConditions, isDriverOrderByField, rollback } = driverResult;
11094
11184
  const initialChunkSize = self.api.options.pageSize;
11185
+ const isInMemorySort = !isDriverOrderByField && orderByField;
11095
11186
  try {
11096
- if (!isDriverOrderByField && orderByField) {
11187
+ if (isInMemorySort) {
11097
11188
  const topK = limit === Infinity ? Infinity : offset + limit;
11098
11189
  let heap = null;
11099
11190
  if (topK !== Infinity) {
@@ -11665,12 +11756,6 @@ var MutationManager = class {
11665
11756
  document: dataplyDocument
11666
11757
  };
11667
11758
  }
11668
- /**
11669
- * Insert a document into the database
11670
- * @param document The document to insert
11671
- * @param tx The transaction to use
11672
- * @returns The primary key of the inserted document
11673
- */
11674
11759
  async insertSingleDocument(document, tx) {
11675
11760
  return this.api.runWithDefaultWrite(async (tx2) => {
11676
11761
  const { pk: dpk, document: dataplyDocument } = await this.insertDocumentInternal(document, tx2);
@@ -11697,15 +11782,10 @@ var MutationManager = class {
11697
11782
  if (error) throw error;
11698
11783
  }
11699
11784
  }
11785
+ await this.api.analysisManager.notifyInsert([flattenDocument], tx2);
11700
11786
  return dataplyDocument._id;
11701
11787
  }, tx);
11702
11788
  }
11703
- /**
11704
- * Insert a batch of documents into the database
11705
- * @param documents The documents to insert
11706
- * @param tx The transaction to use
11707
- * @returns The primary keys of the inserted documents
11708
- */
11709
11789
  async insertBatchDocuments(documents, tx) {
11710
11790
  return this.api.runWithDefaultWrite(async (tx2) => {
11711
11791
  const metadata = await this.api.getDocumentInnerMetadata(tx2);
@@ -11765,19 +11845,18 @@ var MutationManager = class {
11765
11845
  throw res.error;
11766
11846
  }
11767
11847
  }
11848
+ const flatDocs = [];
11849
+ for (let i = 0, len = flattenedData.length; i < len; i++) {
11850
+ flatDocs.push(flattenedData[i].data);
11851
+ }
11852
+ await this.api.analysisManager.notifyInsert(flatDocs, tx2);
11768
11853
  return ids;
11769
11854
  }, tx);
11770
11855
  }
11771
- /**
11772
- * Internal update method used by both fullUpdate and partialUpdate
11773
- * @param query The query to use
11774
- * @param computeUpdatedDoc Function that computes the updated document from the original
11775
- * @param tx The transaction to use
11776
- * @returns The number of updated documents
11777
- */
11778
11856
  async updateInternal(query, computeUpdatedDoc, tx) {
11779
11857
  const pks = await this.api.queryManager.getKeys(query);
11780
11858
  let updatedCount = 0;
11859
+ const updatePairs = [];
11781
11860
  const treeTxs = /* @__PURE__ */ new Map();
11782
11861
  for (const [indexName, tree] of this.api.trees) {
11783
11862
  treeTxs.set(indexName, await tree.createTransaction());
@@ -11825,6 +11904,7 @@ var MutationManager = class {
11825
11904
  }
11826
11905
  }
11827
11906
  }
11907
+ updatePairs.push({ oldDocument: oldFlatDoc, newDocument: newFlatDoc });
11828
11908
  await this.api.update(pk, JSON.stringify(updatedDoc), tx);
11829
11909
  updatedCount++;
11830
11910
  }
@@ -11837,15 +11917,9 @@ var MutationManager = class {
11837
11917
  throw result.error;
11838
11918
  }
11839
11919
  }
11920
+ await this.api.analysisManager.notifyUpdate(updatePairs, tx);
11840
11921
  return updatedCount;
11841
11922
  }
11842
- /**
11843
- * Fully update documents from the database that match the query
11844
- * @param query The query to use
11845
- * @param newRecord Complete document to replace with, or function that receives current document and returns new document
11846
- * @param tx The transaction to use
11847
- * @returns The number of updated documents
11848
- */
11849
11923
  async fullUpdate(query, newRecord, tx) {
11850
11924
  return this.api.runWithDefaultWrite(async (tx2) => {
11851
11925
  return this.updateInternal(query, (doc) => {
@@ -11854,13 +11928,6 @@ var MutationManager = class {
11854
11928
  }, tx2);
11855
11929
  }, tx);
11856
11930
  }
11857
- /**
11858
- * Partially update documents from the database that match the query
11859
- * @param query The query to use
11860
- * @param newRecord Partial document to merge, or function that receives current document and returns partial update
11861
- * @param tx The transaction to use
11862
- * @returns The number of updated documents
11863
- */
11864
11931
  async partialUpdate(query, newRecord, tx) {
11865
11932
  return this.api.runWithDefaultWrite(async (tx2) => {
11866
11933
  return this.updateInternal(query, (doc) => {
@@ -11871,16 +11938,11 @@ var MutationManager = class {
11871
11938
  }, tx2);
11872
11939
  }, tx);
11873
11940
  }
11874
- /**
11875
- * Delete documents from the database that match the query
11876
- * @param query The query to use
11877
- * @param tx The transaction to use
11878
- * @returns The number of deleted documents
11879
- */
11880
11941
  async deleteDocuments(query, tx) {
11881
11942
  return this.api.runWithDefaultWrite(async (tx2) => {
11882
11943
  const pks = await this.api.queryManager.getKeys(query);
11883
11944
  let deletedCount = 0;
11945
+ const deletedFlatDocs = [];
11884
11946
  for (let i = 0, len = pks.length; i < len; i++) {
11885
11947
  const pk = pks[i];
11886
11948
  const doc = await this.api.getDocument(pk, tx2);
@@ -11904,9 +11966,11 @@ var MutationManager = class {
11904
11966
  await tree.delete(pk, { k: pk, v: indexVal });
11905
11967
  }
11906
11968
  }
11969
+ deletedFlatDocs.push(flatDoc);
11907
11970
  await this.api.delete(pk, true, tx2);
11908
11971
  deletedCount++;
11909
11972
  }
11973
+ await this.api.analysisManager.notifyDelete(deletedFlatDocs, tx2);
11910
11974
  return deletedCount;
11911
11975
  }, tx);
11912
11976
  }
@@ -11945,13 +12009,6 @@ var MetadataManager = class {
11945
12009
  async updateDocumentInnerMetadata(metadata, tx) {
11946
12010
  await this.api.update(1, JSON.stringify(metadata), tx);
11947
12011
  }
11948
- /**
11949
- * Run a migration if the current schemeVersion is lower than the target version.
11950
- * After the callback completes, schemeVersion is updated to the target version.
11951
- * @param version The target scheme version
11952
- * @param callback The migration callback
11953
- * @param tx Optional transaction
11954
- */
11955
12012
  async migration(version, callback, tx) {
11956
12013
  await this.api.runWithDefaultWrite(async (tx2) => {
11957
12014
  const innerMetadata = await this.getDocumentInnerMetadata(tx2);
@@ -11980,16 +12037,392 @@ var DocumentFormatter = class {
11980
12037
  }
11981
12038
  return result;
11982
12039
  }
11983
- /**
11984
- * returns flattened document
11985
- * @param document
11986
- * @returns
11987
- */
11988
12040
  flattenDocument(document) {
11989
12041
  return this.flattenInternal(document, "", {});
11990
12042
  }
11991
12043
  };
11992
12044
 
12045
+ // src/core/AnalysisProvider.ts
12046
+ var AnalysisProvider = class {
12047
+ constructor(api) {
12048
+ this.api = api;
12049
+ }
12050
+ /** Overflow row PK assigned by AnalysisManager during initialization. */
12051
+ storageKey = -1;
12052
+ };
12053
+
12054
+ // src/core/RealtimeAnalysisProvider.ts
12055
+ var RealtimeAnalysisProvider = class extends AnalysisProvider {
12056
+ };
12057
+
12058
+ // src/core/IntervalAnalysisProvider.ts
12059
+ var IntervalAnalysisProvider = class extends AnalysisProvider {
12060
+ /**
12061
+ * Sample random documents from the entire dataset.
12062
+ * Fetches only PK index, then reads only the selected documents from disk.
12063
+ * @param sampleOptions Sampling strategy — either `{ rate }` or `{ count }`
12064
+ * @param tx Optional transaction
12065
+ * @returns Randomly selected documents
12066
+ */
12067
+ async sample(sampleOptions, tx) {
12068
+ const pks = await this.api.queryManager.getKeys({});
12069
+ const total = pks.length;
12070
+ if (total === 0) return [];
12071
+ const k = "rate" in sampleOptions && sampleOptions.rate != null ? Math.ceil(total * Math.min(Math.max(sampleOptions.rate, 0), 1)) : sampleOptions.count;
12072
+ const sampleCount = Math.min(Math.max(k, 0), total);
12073
+ if (sampleCount === 0) return [];
12074
+ for (let i = 0; i < sampleCount; i++) {
12075
+ const j = i + Math.floor(Math.random() * (total - i));
12076
+ const tmp = pks[i];
12077
+ pks[i] = pks[j];
12078
+ pks[j] = tmp;
12079
+ }
12080
+ const selectedPks = pks.slice(0, sampleCount);
12081
+ const rawResults = await this.api.selectMany(selectedPks, false, tx);
12082
+ const docs = [];
12083
+ for (let i = 0, len = rawResults.length; i < len; i++) {
12084
+ const raw = rawResults[i];
12085
+ if (raw) docs.push(JSON.parse(raw));
12086
+ }
12087
+ return docs;
12088
+ }
12089
+ };
12090
+
12091
+ // src/core/analysis/FTSTermCount.ts
12092
+ var FTSTermCount = class extends IntervalAnalysisProvider {
12093
+ name = "fts_term_count";
12094
+ termCount = {};
12095
+ sampleSize = 0;
12096
+ async serialize(tx) {
12097
+ const docs = await this.sample({ count: 1e3 }, tx);
12098
+ this.termCount = {};
12099
+ this.sampleSize = docs.length;
12100
+ if (docs.length === 0) return JSON.stringify({ _sampleSize: 0 });
12101
+ const ftsIndices = /* @__PURE__ */ new Map();
12102
+ for (const [indexName, config] of this.api.indexManager.registeredIndices) {
12103
+ if (config.type === "fts") {
12104
+ ftsIndices.set(indexName, config);
12105
+ }
12106
+ }
12107
+ if (ftsIndices.size === 0) return JSON.stringify({ _sampleSize: this.sampleSize });
12108
+ for (let i = 0, len = docs.length; i < len; i++) {
12109
+ const doc = docs[i];
12110
+ const flatDoc = this.api.flattenDocument(doc);
12111
+ for (const [indexName, config] of ftsIndices) {
12112
+ const primaryField = this.api.indexManager.getPrimaryField(config);
12113
+ const v = flatDoc[primaryField];
12114
+ if (typeof v === "string" && v.length > 0) {
12115
+ const ftsConfig = this.api.indexManager.getFtsConfig(config);
12116
+ const tokens = ftsConfig ? tokenize(v, ftsConfig) : [v];
12117
+ const tokenizerStrategy = ftsConfig ? ftsConfig.tokenizer === "ngram" ? `${ftsConfig.gramSize}gram` : ftsConfig.tokenizer : "whitespace";
12118
+ if (!this.termCount[primaryField]) {
12119
+ this.termCount[primaryField] = {};
12120
+ }
12121
+ if (!this.termCount[primaryField][tokenizerStrategy]) {
12122
+ this.termCount[primaryField][tokenizerStrategy] = {};
12123
+ }
12124
+ const targetMap = this.termCount[primaryField][tokenizerStrategy];
12125
+ for (let j = 0, len2 = tokens.length; j < len2; j++) {
12126
+ const token = tokens[j];
12127
+ targetMap[token] = (targetMap[token] || 0) + 1;
12128
+ }
12129
+ }
12130
+ }
12131
+ }
12132
+ const optimizedTermCount = {};
12133
+ for (const field in this.termCount) {
12134
+ optimizedTermCount[field] = {};
12135
+ for (const strategy in this.termCount[field]) {
12136
+ const tokenMap = this.termCount[field][strategy];
12137
+ const sorted = Object.entries(tokenMap).sort((a, b) => b[1] - a[1]).slice(0, 1e3);
12138
+ optimizedTermCount[field][strategy] = {};
12139
+ for (let i = 0, len = sorted.length; i < len; i++) {
12140
+ optimizedTermCount[field][strategy][sorted[i][0]] = sorted[i][1];
12141
+ }
12142
+ }
12143
+ }
12144
+ this.termCount = optimizedTermCount;
12145
+ return JSON.stringify({ _sampleSize: this.sampleSize, ...this.termCount });
12146
+ }
12147
+ async load(data, tx) {
12148
+ this.termCount = {};
12149
+ this.sampleSize = 0;
12150
+ if (!data) {
12151
+ return;
12152
+ }
12153
+ try {
12154
+ const parsed = JSON.parse(data);
12155
+ if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
12156
+ const { _sampleSize, ...rest } = parsed;
12157
+ this.sampleSize = typeof _sampleSize === "number" ? _sampleSize : 0;
12158
+ this.termCount = rest;
12159
+ }
12160
+ } catch (e) {
12161
+ }
12162
+ }
12163
+ /**
12164
+ * 특정 field/strategy/token의 문서 빈도를 반환합니다.
12165
+ * 통계에 없으면 0을 반환합니다.
12166
+ */
12167
+ getTermCount(field, strategy, token) {
12168
+ return this.termCount[field]?.[strategy]?.[token] ?? 0;
12169
+ }
12170
+ /**
12171
+ * 쿼리 토큰 배열에서 최소 빈도(AND 시맨틱스 상한선)를 반환합니다.
12172
+ * 통계가 없거나 sampleSize가 0이면 -1을 반환합니다.
12173
+ */
12174
+ getMinTokenCount(field, strategy, tokens) {
12175
+ if (this.sampleSize === 0 || tokens.length === 0) return -1;
12176
+ let minCount = Infinity;
12177
+ for (let i = 0, len = tokens.length; i < len; i++) {
12178
+ const count = this.getTermCount(field, strategy, tokens[i]);
12179
+ if (count < minCount) minCount = count;
12180
+ }
12181
+ return minCount === Infinity ? -1 : minCount;
12182
+ }
12183
+ /**
12184
+ * 통계가 유효한지 여부를 반환합니다.
12185
+ */
12186
+ get hasSampleData() {
12187
+ return this.sampleSize > 0;
12188
+ }
12189
+ /**
12190
+ * 통계 수집 시 사용된 샘플 크기를 반환합니다.
12191
+ */
12192
+ getSampleSize() {
12193
+ return this.sampleSize;
12194
+ }
12195
+ };
12196
+
12197
+ // src/core/analysis/index.ts
12198
+ var BuiltinAnalysisProviders = [
12199
+ FTSTermCount
12200
+ ];
12201
+
12202
+ // src/core/AnalysisManager.ts
12203
+ var AnalysisManager = class {
12204
+ constructor(api) {
12205
+ this.api = api;
12206
+ }
12207
+ providers = /* @__PURE__ */ new Map();
12208
+ /**
12209
+ * Register all built-in analysis providers.
12210
+ * Each provider class is instantiated with the API reference and registered.
12211
+ */
12212
+ registerBuiltinProviders() {
12213
+ for (const Provider of BuiltinAnalysisProviders) {
12214
+ const instance = new Provider(this.api);
12215
+ this.registerProvider(instance);
12216
+ }
12217
+ }
12218
+ /**
12219
+ * Register an analysis provider.
12220
+ * @param provider The provider instance to register
12221
+ */
12222
+ registerProvider(provider) {
12223
+ if (this.providers.has(provider.name)) {
12224
+ throw new Error(`Analysis provider "${provider.name}" is already registered.`);
12225
+ }
12226
+ this.providers.set(provider.name, provider);
12227
+ }
12228
+ /**
12229
+ * Get a registered analysis provider by name.
12230
+ * @param name The provider name
12231
+ * @returns The provider instance, or null if not found
12232
+ */
12233
+ getProvider(name) {
12234
+ return this.providers.get(name) ?? null;
12235
+ }
12236
+ /**
12237
+ * Initialize all registered providers by loading existing data from disk.
12238
+ * Should be called after database initialization.
12239
+ * @param tx The transaction to use
12240
+ */
12241
+ async initializeProviders(tx) {
12242
+ const header = await this.getOrCreateAnalysisHeader(tx);
12243
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12244
+ for (const [name, provider] of this.providers) {
12245
+ if (header[name] !== null) {
12246
+ provider.storageKey = header[name];
12247
+ const raw = await this.api.select(header[name], false, tx);
12248
+ await provider.load(raw, tx);
12249
+ } else {
12250
+ const pk = await this.api.insertAsOverflow(JSON.stringify(null), false, tx);
12251
+ provider.storageKey = pk;
12252
+ header[name] = pk;
12253
+ await this.api.update(metadata.analysis, JSON.stringify(header), tx);
12254
+ await provider.load(null, tx);
12255
+ }
12256
+ }
12257
+ }
12258
+ /**
12259
+ * Notify all realtime providers that documents were inserted.
12260
+ * Data is persisted immediately after each provider processes the mutation.
12261
+ * @param documents The flattened documents that were inserted
12262
+ * @param tx The transaction to use
12263
+ */
12264
+ async notifyInsert(documents, tx) {
12265
+ if (documents.length === 0) return;
12266
+ for (const [name, provider] of this.providers) {
12267
+ if (provider instanceof RealtimeAnalysisProvider) {
12268
+ await provider.onInsert(documents);
12269
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12270
+ }
12271
+ }
12272
+ }
12273
+ /**
12274
+ * Notify all realtime providers that documents were deleted.
12275
+ * Data is persisted immediately after each provider processes the mutation.
12276
+ * @param documents The flattened documents that were deleted
12277
+ * @param tx The transaction to use
12278
+ */
12279
+ async notifyDelete(documents, tx) {
12280
+ if (documents.length === 0) return;
12281
+ for (const [name, provider] of this.providers) {
12282
+ if (provider instanceof RealtimeAnalysisProvider) {
12283
+ await provider.onDelete(documents);
12284
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12285
+ }
12286
+ }
12287
+ }
12288
+ /**
12289
+ * Notify all realtime providers that documents were updated.
12290
+ * Data is persisted immediately after each provider processes the mutation.
12291
+ * @param pairs Array of { oldDocument, newDocument } pairs
12292
+ * @param tx The transaction to use
12293
+ */
12294
+ async notifyUpdate(pairs, tx) {
12295
+ if (pairs.length === 0) return;
12296
+ for (const [name, provider] of this.providers) {
12297
+ if (provider instanceof RealtimeAnalysisProvider) {
12298
+ await provider.onUpdate(pairs);
12299
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12300
+ }
12301
+ }
12302
+ }
12303
+ /**
12304
+ * Flush all interval providers' data to disk.
12305
+ * @param tx The transaction to use (must be a write transaction)
12306
+ */
12307
+ async flush(tx) {
12308
+ for (const [name, provider] of this.providers) {
12309
+ if (provider instanceof IntervalAnalysisProvider) {
12310
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12311
+ }
12312
+ }
12313
+ }
12314
+ /**
12315
+ * Get the analysis header row.
12316
+ * Returns null if no analysis header exists yet.
12317
+ * @param tx The transaction to use
12318
+ */
12319
+ async getAnalysisHeader(tx) {
12320
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12321
+ if (metadata.analysis == null) {
12322
+ return null;
12323
+ }
12324
+ const row = await this.api.select(metadata.analysis, false, tx);
12325
+ if (!row) {
12326
+ return null;
12327
+ }
12328
+ return JSON.parse(row);
12329
+ }
12330
+ /**
12331
+ * Get the analysis header row, creating it if it doesn't exist.
12332
+ * @param tx The transaction to use (must be a write transaction)
12333
+ */
12334
+ async getOrCreateAnalysisHeader(tx) {
12335
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12336
+ if (metadata.analysis != null) {
12337
+ const row = await this.api.select(metadata.analysis, false, tx);
12338
+ if (row) {
12339
+ return JSON.parse(row);
12340
+ }
12341
+ }
12342
+ const header = {};
12343
+ const pk = await this.api.insertAsOverflow(JSON.stringify(header), false, tx);
12344
+ metadata.analysis = pk;
12345
+ await this.api.updateDocumentInnerMetadata(metadata, tx);
12346
+ return header;
12347
+ }
12348
+ /**
12349
+ * Get analysis data for a specific type as a raw string.
12350
+ * Returns null if the type doesn't exist in the analysis header.
12351
+ * @param type The analysis type name
12352
+ * @param tx The transaction to use
12353
+ */
12354
+ async getAnalysisData(type, tx) {
12355
+ const header = await this.getAnalysisHeader(tx);
12356
+ if (!header || header[type] == null) {
12357
+ return null;
12358
+ }
12359
+ const row = await this.api.select(header[type], false, tx);
12360
+ if (!row) {
12361
+ return null;
12362
+ }
12363
+ return row;
12364
+ }
12365
+ /**
12366
+ * Set analysis data for a specific type.
12367
+ * Creates a new overflow row if the type doesn't exist yet,
12368
+ * or updates the existing row if it does.
12369
+ * @param type The analysis type name
12370
+ * @param data The raw string data to store
12371
+ * @param tx The transaction to use (must be a write transaction)
12372
+ */
12373
+ async setAnalysisData(type, data, tx) {
12374
+ const header = await this.getOrCreateAnalysisHeader(tx);
12375
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12376
+ if (header[type] != null) {
12377
+ await this.api.update(header[type], data, tx);
12378
+ } else {
12379
+ const pk = await this.api.insertAsOverflow(data, false, tx);
12380
+ header[type] = pk;
12381
+ await this.api.update(metadata.analysis, JSON.stringify(header), tx);
12382
+ }
12383
+ }
12384
+ /**
12385
+ * Delete analysis data for a specific type.
12386
+ * Removes the type entry from the analysis header.
12387
+ * @param type The analysis type name
12388
+ * @param tx The transaction to use (must be a write transaction)
12389
+ */
12390
+ async deleteAnalysisData(type, tx) {
12391
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12392
+ if (metadata.analysis == null) {
12393
+ return false;
12394
+ }
12395
+ const header = await this.getAnalysisHeader(tx);
12396
+ if (!header || header[type] == null) {
12397
+ return false;
12398
+ }
12399
+ await this.api.delete(header[type], false, tx);
12400
+ delete header[type];
12401
+ await this.api.update(metadata.analysis, JSON.stringify(header), tx);
12402
+ return true;
12403
+ }
12404
+ /**
12405
+ * Check if analysis data exists for a specific type.
12406
+ * @param type The analysis type name
12407
+ * @param tx The transaction to use
12408
+ */
12409
+ async hasAnalysisData(type, tx) {
12410
+ const header = await this.getAnalysisHeader(tx);
12411
+ return header != null && header[type] != null;
12412
+ }
12413
+ /**
12414
+ * Get all registered analysis type names.
12415
+ * @param tx The transaction to use
12416
+ */
12417
+ async getAnalysisTypes(tx) {
12418
+ const header = await this.getAnalysisHeader(tx);
12419
+ if (!header) {
12420
+ return [];
12421
+ }
12422
+ return Object.keys(header);
12423
+ }
12424
+ };
12425
+
11993
12426
  // src/core/documentAPI.ts
11994
12427
  var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
11995
12428
  comparator = new DocumentValueComparator();
@@ -12000,6 +12433,7 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12000
12433
  mutationManager;
12001
12434
  metadataManager;
12002
12435
  documentFormatter;
12436
+ analysisManager;
12003
12437
  constructor(file, options) {
12004
12438
  super(file, options);
12005
12439
  this.optimizer = new Optimizer(this);
@@ -12008,6 +12442,7 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12008
12442
  this.mutationManager = new MutationManager(this);
12009
12443
  this.metadataManager = new MetadataManager(this);
12010
12444
  this.documentFormatter = new DocumentFormatter();
12445
+ this.analysisManager = new AnalysisManager(this);
12011
12446
  this.hook.onceAfter("init", async (tx, isNewlyCreated) => {
12012
12447
  if (isNewlyCreated) {
12013
12448
  await this.initializeDocumentFile(tx);
@@ -12017,6 +12452,8 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12017
12452
  }
12018
12453
  const metadata = await this.getDocumentInnerMetadata(tx);
12019
12454
  await this.indexManager.initializeIndices(metadata, isNewlyCreated, tx);
12455
+ this.analysisManager.registerBuiltinProviders();
12456
+ await this.analysisManager.initializeProviders(tx);
12020
12457
  this._initialized = true;
12021
12458
  return tx;
12022
12459
  });
@@ -12036,15 +12473,29 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12036
12473
  get indexedFields() {
12037
12474
  return this.indexManager.indexedFields;
12038
12475
  }
12476
+ /**
12477
+ * Register an index.
12478
+ * @param name The name of the index
12479
+ * @param option The option of the index
12480
+ * @param tx The transaction to use
12481
+ */
12039
12482
  async registerIndex(name, option, tx) {
12040
12483
  return this.indexManager.registerIndex(name, option, tx);
12041
12484
  }
12042
12485
  /**
12043
12486
  * Drop (remove) a named index.
12487
+ * @param name The name of the index
12488
+ * @param tx The transaction to use
12044
12489
  */
12045
12490
  async dropIndex(name, tx) {
12046
12491
  return this.indexManager.dropIndex(name, tx);
12047
12492
  }
12493
+ /**
12494
+ * Get a document by its primary key.
12495
+ * @param pk The primary key of the document
12496
+ * @param tx The transaction to use
12497
+ * @returns The document
12498
+ */
12048
12499
  async getDocument(pk, tx) {
12049
12500
  return this.runWithDefault(async (tx2) => {
12050
12501
  const row = await this.select(pk, false, tx2);
@@ -12061,6 +12512,16 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12061
12512
  async backfillIndices(tx) {
12062
12513
  return this.indexManager.backfillIndices(tx);
12063
12514
  }
12515
+ /**
12516
+ * Flush all interval analysis providers, forcing statistics to be recalculated.
12517
+ * Call this after bulk inserts or periodically to keep statistics fresh.
12518
+ * @param tx The transaction to use
12519
+ */
12520
+ async flushAnalysis(tx) {
12521
+ return this.runWithDefaultWrite(async (tx2) => {
12522
+ await this.analysisManager.flush(tx2);
12523
+ }, tx);
12524
+ }
12064
12525
  createDocumentInnerMetadata(indices) {
12065
12526
  return {
12066
12527
  magicString: "document-dataply",
@@ -12072,16 +12533,28 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12072
12533
  indices
12073
12534
  };
12074
12535
  }
12536
+ /**
12537
+ * Initialize the document database file.
12538
+ * @param tx The transaction to use
12539
+ */
12075
12540
  async initializeDocumentFile(tx) {
12076
12541
  const metadata = await this.select(1, false, tx);
12077
12542
  if (metadata) {
12078
12543
  throw new Error("Document metadata already exists");
12079
12544
  }
12080
12545
  const metaObj = this.createDocumentInnerMetadata({
12081
- _id: [-1, { type: "btree", fields: ["_id"] }]
12546
+ _id: [-1, {
12547
+ type: "btree",
12548
+ fields: ["_id"]
12549
+ }]
12082
12550
  });
12083
12551
  await this.insertAsOverflow(JSON.stringify(metaObj), false, tx);
12084
12552
  }
12553
+ /**
12554
+ * Verify the document database file.
12555
+ * @param tx The transaction to use
12556
+ * @returns True if the document database file is valid, false otherwise
12557
+ */
12085
12558
  async verifyDocumentFile(tx) {
12086
12559
  const row = await this.select(1, false, tx);
12087
12560
  if (!row) {
@@ -12098,12 +12571,27 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12098
12571
  flattenDocument(document) {
12099
12572
  return this.documentFormatter.flattenDocument(document);
12100
12573
  }
12574
+ /**
12575
+ * Get the document metadata.
12576
+ * @param tx The transaction to use
12577
+ * @returns The document metadata
12578
+ */
12101
12579
  async getDocumentMetadata(tx) {
12102
12580
  return this.metadataManager.getDocumentMetadata(tx);
12103
12581
  }
12582
+ /**
12583
+ * Get the document inner metadata.
12584
+ * @param tx The transaction to use
12585
+ * @returns The document inner metadata
12586
+ */
12104
12587
  async getDocumentInnerMetadata(tx) {
12105
12588
  return this.metadataManager.getDocumentInnerMetadata(tx);
12106
12589
  }
12590
+ /**
12591
+ * Update the document inner metadata.
12592
+ * @param metadata The document inner metadata
12593
+ * @param tx The transaction to use
12594
+ */
12107
12595
  async updateDocumentInnerMetadata(metadata, tx) {
12108
12596
  return this.metadataManager.updateDocumentInnerMetadata(metadata, tx);
12109
12597
  }
@@ -12255,6 +12743,14 @@ var DocumentDataply = class _DocumentDataply {
12255
12743
  await this.api.init();
12256
12744
  await this.api.backfillIndices();
12257
12745
  }
12746
+ /**
12747
+ * Flush all interval analysis providers, forcing statistics to be recalculated.
12748
+ * Call this after bulk inserts or periodically to keep FTS statistics fresh.
12749
+ * @param tx Optional transaction
12750
+ */
12751
+ async flushAnalysis(tx) {
12752
+ return this.api.flushAnalysis(tx);
12753
+ }
12258
12754
  /**
12259
12755
  * Run a migration if the current schemeVersion is lower than the target version.
12260
12756
  * The callback is only executed when the database's schemeVersion is below the given version.