document-dataply 0.0.10-alpha.5 → 0.0.10-alpha.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -10466,6 +10466,26 @@ function tokenize(text, options) {
10466
10466
  }
10467
10467
 
10468
10468
  // src/core/Optimizer.ts
10469
+ var SELECTIVITY = {
10470
+ /** O(log N) 포인트 룩업 */
10471
+ EQUAL: 0.01,
10472
+ /** 양쪽 바운드(gte+lte) 범위 스캔 */
10473
+ BOUNDED_RANGE: 0.33,
10474
+ /** 한쪽 바운드(gte 또는 lte)만 있을 때, 중간부터 풀스캔 */
10475
+ HALF_RANGE: 0.5,
10476
+ /** Or 조건: B+Tree 내부 풀스캔 */
10477
+ OR: 0.9,
10478
+ /** Like 조건: B+Tree 내부 풀스캔 */
10479
+ LIKE: 0.9,
10480
+ /** 알 수 없는 조건 */
10481
+ UNKNOWN: 0.9,
10482
+ /** FTS 통계 없을 때 보수적 추정 */
10483
+ FTS_DEFAULT: 0.5,
10484
+ /** 정렬 비용 가중치 (orderBy 미지원 시) */
10485
+ SORT_PENALTY: 0.3,
10486
+ /** 인메모리 정렬이 유의미해지는 임계 문서 수 */
10487
+ SORT_THRESHOLD: 1e4
10488
+ };
10469
10489
  var Optimizer = class {
10470
10490
  constructor(api) {
10471
10491
  this.api = api;
@@ -10477,7 +10497,7 @@ var Optimizer = class {
10477
10497
  const primaryField = config.fields[0];
10478
10498
  if (!queryFields.has(primaryField)) return null;
10479
10499
  const builtCondition = {};
10480
- let score = 0;
10500
+ let selectivity = 1;
10481
10501
  let isConsecutive = true;
10482
10502
  const coveredFields = [];
10483
10503
  const compositeVerifyFields = [];
@@ -10492,13 +10512,12 @@ var Optimizer = class {
10492
10512
  continue;
10493
10513
  }
10494
10514
  coveredFields.push(field);
10495
- score += 1;
10496
10515
  if (isConsecutive) {
10497
10516
  const cond = query[field];
10498
10517
  if (cond !== void 0) {
10499
10518
  let isBounded = false;
10500
10519
  if (typeof cond !== "object" || cond === null) {
10501
- score += 100;
10520
+ selectivity *= SELECTIVITY.EQUAL;
10502
10521
  startValues.push(cond);
10503
10522
  endValues.push(cond);
10504
10523
  startOperator = "primaryGte";
@@ -10506,7 +10525,7 @@ var Optimizer = class {
10506
10525
  isBounded = true;
10507
10526
  } else if ("primaryEqual" in cond || "equal" in cond) {
10508
10527
  const val = cond.primaryEqual?.v ?? cond.equal?.v ?? cond.primaryEqual ?? cond.equal;
10509
- score += 100;
10528
+ selectivity *= SELECTIVITY.EQUAL;
10510
10529
  startValues.push(val);
10511
10530
  endValues.push(val);
10512
10531
  startOperator = "primaryGte";
@@ -10514,7 +10533,7 @@ var Optimizer = class {
10514
10533
  isBounded = true;
10515
10534
  } else if ("primaryGte" in cond || "gte" in cond) {
10516
10535
  const val = cond.primaryGte?.v ?? cond.gte?.v ?? cond.primaryGte ?? cond.gte;
10517
- score += 50;
10536
+ selectivity *= SELECTIVITY.HALF_RANGE;
10518
10537
  isConsecutive = false;
10519
10538
  startValues.push(val);
10520
10539
  startOperator = "primaryGte";
@@ -10522,7 +10541,7 @@ var Optimizer = class {
10522
10541
  isBounded = true;
10523
10542
  } else if ("primaryGt" in cond || "gt" in cond) {
10524
10543
  const val = cond.primaryGt?.v ?? cond.gt?.v ?? cond.primaryGt ?? cond.gt;
10525
- score += 50;
10544
+ selectivity *= SELECTIVITY.HALF_RANGE;
10526
10545
  isConsecutive = false;
10527
10546
  startValues.push(val);
10528
10547
  startOperator = "primaryGt";
@@ -10530,7 +10549,7 @@ var Optimizer = class {
10530
10549
  isBounded = true;
10531
10550
  } else if ("primaryLte" in cond || "lte" in cond) {
10532
10551
  const val = cond.primaryLte?.v ?? cond.lte?.v ?? cond.primaryLte ?? cond.lte;
10533
- score += 50;
10552
+ selectivity *= SELECTIVITY.HALF_RANGE;
10534
10553
  isConsecutive = false;
10535
10554
  endValues.push(val);
10536
10555
  endOperator = "primaryLte";
@@ -10538,20 +10557,20 @@ var Optimizer = class {
10538
10557
  isBounded = true;
10539
10558
  } else if ("primaryLt" in cond || "lt" in cond) {
10540
10559
  const val = cond.primaryLt?.v ?? cond.lt?.v ?? cond.primaryLt ?? cond.lt;
10541
- score += 50;
10560
+ selectivity *= SELECTIVITY.HALF_RANGE;
10542
10561
  isConsecutive = false;
10543
10562
  endValues.push(val);
10544
10563
  endOperator = "primaryLt";
10545
10564
  if (startValues.length > 0) startOperator = "primaryGte";
10546
10565
  isBounded = true;
10547
10566
  } else if ("primaryOr" in cond || "or" in cond) {
10548
- score += 20;
10567
+ selectivity *= SELECTIVITY.OR;
10549
10568
  isConsecutive = false;
10550
10569
  } else if ("like" in cond) {
10551
- score += 15;
10570
+ selectivity *= SELECTIVITY.LIKE;
10552
10571
  isConsecutive = false;
10553
10572
  } else {
10554
- score += 10;
10573
+ selectivity *= SELECTIVITY.UNKNOWN;
10555
10574
  isConsecutive = false;
10556
10575
  }
10557
10576
  if (!isBounded && field !== primaryField) {
@@ -10598,9 +10617,6 @@ var Optimizer = class {
10598
10617
  }
10599
10618
  if (!isExactMatch) break;
10600
10619
  }
10601
- if (isIndexOrderSupported) {
10602
- score += 200;
10603
- }
10604
10620
  }
10605
10621
  return {
10606
10622
  tree: treeTx,
@@ -10608,7 +10624,7 @@ var Optimizer = class {
10608
10624
  field: primaryField,
10609
10625
  indexName,
10610
10626
  isFtsMatch: false,
10611
- score,
10627
+ selectivity,
10612
10628
  compositeVerifyFields,
10613
10629
  coveredFields,
10614
10630
  isIndexOrderSupported
@@ -10616,7 +10632,7 @@ var Optimizer = class {
10616
10632
  }
10617
10633
  /**
10618
10634
  * FTS 타입 인덱스의 선택도를 평가합니다.
10619
- * FTSTermCount 통계가 있으면 토큰 빈도 기반 동적 score를 산출합니다.
10635
+ * FTSTermCount 통계가 있으면 실측 데이터 기반으로 선택도를 산출합니다.
10620
10636
  */
10621
10637
  evaluateFTSCandidate(indexName, config, query, queryFields, treeTx) {
10622
10638
  const field = config.fields;
@@ -10625,18 +10641,14 @@ var Optimizer = class {
10625
10641
  if (!condition || typeof condition !== "object" || !("match" in condition)) return null;
10626
10642
  const ftsConfig = this.api.indexManager.getFtsConfig(config);
10627
10643
  const matchTokens = ftsConfig ? tokenize(condition.match, ftsConfig) : [];
10628
- const MAX_FTS_SCORE = 400;
10629
- const MIN_FTS_SCORE = 10;
10630
- const DEFAULT_FTS_SCORE = 90;
10631
- let score = DEFAULT_FTS_SCORE;
10644
+ let selectivity = SELECTIVITY.FTS_DEFAULT;
10632
10645
  const termCountProvider = this.api.analysisManager.getProvider("fts_term_count");
10633
10646
  if (termCountProvider && termCountProvider.hasSampleData && ftsConfig && matchTokens.length > 0) {
10634
10647
  const strategy = ftsConfig.tokenizer === "ngram" ? `${ftsConfig.gramSize}gram` : ftsConfig.tokenizer;
10635
10648
  const minCount = termCountProvider.getMinTokenCount(field, strategy, matchTokens);
10636
10649
  if (minCount >= 0) {
10637
10650
  const sampleSize = termCountProvider.getSampleSize();
10638
- const selectivityRatio = Math.min(minCount / sampleSize, 1);
10639
- score = Math.round(MAX_FTS_SCORE * (1 - selectivityRatio) + MIN_FTS_SCORE);
10651
+ selectivity = Math.min(minCount / sampleSize, 1);
10640
10652
  }
10641
10653
  }
10642
10654
  return {
@@ -10646,18 +10658,33 @@ var Optimizer = class {
10646
10658
  indexName,
10647
10659
  isFtsMatch: true,
10648
10660
  matchTokens,
10649
- score,
10661
+ selectivity,
10650
10662
  compositeVerifyFields: [],
10651
10663
  coveredFields: [field],
10652
10664
  isIndexOrderSupported: false
10653
10665
  };
10654
10666
  }
10655
10667
  /**
10656
- * 실행할 최적의 인덱스를 선택합니다. (최적 드라이버 선택)
10668
+ * 비용 계산: effectiveScanCost + sortPenalty
10669
+ * - effectiveScanCost: 인덱스 순서 지원 + limit 존재 시 조기 종료 이점 반영
10670
+ * - sortPenalty: 인메모리 정렬의 절대 문서 수 기반 비용
10671
+ */
10672
+ calculateCost(selectivity, isIndexOrderSupported, orderByField, N, topK) {
10673
+ const effectiveScanCost = isIndexOrderSupported && isFinite(topK) && N > 0 ? Math.min(topK / N, selectivity) : selectivity;
10674
+ const estimatedSortDocs = selectivity * N;
10675
+ const sortPenalty = orderByField && !isIndexOrderSupported ? Math.min(estimatedSortDocs / SELECTIVITY.SORT_THRESHOLD, 1) * SELECTIVITY.SORT_PENALTY : 0;
10676
+ return effectiveScanCost + sortPenalty;
10677
+ }
10678
+ /**
10679
+ * 실행할 최적의 인덱스를 선택합니다. (비용 기반 최적 드라이버 선택)
10680
+ * cost = selectivity + sortPenalty (낮을수록 좋음)
10657
10681
  */
10658
- async getSelectivityCandidate(query, orderByField) {
10682
+ async getSelectivityCandidate(query, orderByField, limit = Infinity, offset = 0) {
10659
10683
  const queryFields = new Set(Object.keys(query));
10660
10684
  const candidates = [];
10685
+ const metadata = await this.api.getMetadata();
10686
+ const N = metadata.rowCount;
10687
+ const topK = isFinite(limit) ? offset + limit : Infinity;
10661
10688
  for (const [indexName, config] of this.api.indexManager.registeredIndices) {
10662
10689
  const tree = this.api.trees.get(indexName);
10663
10690
  if (!tree) continue;
@@ -10671,7 +10698,12 @@ var Optimizer = class {
10671
10698
  treeTx,
10672
10699
  orderByField
10673
10700
  );
10674
- if (candidate) candidates.push(candidate);
10701
+ if (candidate) {
10702
+ candidates.push({
10703
+ ...candidate,
10704
+ cost: this.calculateCost(candidate.selectivity, candidate.isIndexOrderSupported, orderByField, N, topK)
10705
+ });
10706
+ }
10675
10707
  } else if (config.type === "fts") {
10676
10708
  const treeTx = await tree.createTransaction();
10677
10709
  const candidate = this.evaluateFTSCandidate(
@@ -10681,7 +10713,12 @@ var Optimizer = class {
10681
10713
  queryFields,
10682
10714
  treeTx
10683
10715
  );
10684
- if (candidate) candidates.push(candidate);
10716
+ if (candidate) {
10717
+ candidates.push({
10718
+ ...candidate,
10719
+ cost: this.calculateCost(candidate.selectivity, candidate.isIndexOrderSupported, orderByField, N, topK)
10720
+ });
10721
+ }
10685
10722
  }
10686
10723
  }
10687
10724
  const rollback = () => {
@@ -10694,7 +10731,7 @@ var Optimizer = class {
10694
10731
  return null;
10695
10732
  }
10696
10733
  candidates.sort((a, b) => {
10697
- if (b.score !== a.score) return b.score - a.score;
10734
+ if (a.cost !== b.cost) return a.cost - b.cost;
10698
10735
  const aConfig = this.api.indexManager.registeredIndices.get(a.indexName);
10699
10736
  const bConfig = this.api.indexManager.registeredIndices.get(b.indexName);
10700
10737
  const aFieldCount = aConfig ? Array.isArray(aConfig.fields) ? aConfig.fields.length : 1 : 0;
@@ -10709,8 +10746,8 @@ var Optimizer = class {
10709
10746
  const candidate = nonDriverCandidates[i];
10710
10747
  let isSubset = false;
10711
10748
  for (let j = 0, oLen = others.length; j < oLen; j++) {
10712
- const higher = others[j];
10713
- if (candidate.coveredFields.every((f) => higher.coveredFields.includes(f))) {
10749
+ const better = others[j];
10750
+ if (candidate.coveredFields.every((f) => better.coveredFields.includes(f))) {
10714
10751
  isSubset = true;
10715
10752
  break;
10716
10753
  }
@@ -10915,12 +10952,14 @@ var QueryManager = class {
10915
10952
  rollback();
10916
10953
  return new Float64Array(Array.from(keys || []));
10917
10954
  }
10918
- async getDriverKeys(query, orderBy, sortOrder = "asc") {
10955
+ async getDriverKeys(query, orderBy, sortOrder = "asc", limit = Infinity, offset = 0) {
10919
10956
  const isQueryEmpty = Object.keys(query).length === 0;
10920
10957
  const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
10921
10958
  const selectivity = await this.optimizer.getSelectivityCandidate(
10922
10959
  this.verboseQuery(normalizedQuery),
10923
- orderBy
10960
+ orderBy,
10961
+ limit,
10962
+ offset
10924
10963
  );
10925
10964
  if (!selectivity) return null;
10926
10965
  const { driver, others, compositeVerifyConditions, rollback } = selectivity;
@@ -11139,12 +11178,13 @@ var QueryManager = class {
11139
11178
  }
11140
11179
  }
11141
11180
  }
11142
- const driverResult = await self.getDriverKeys(query, orderByField, sortOrder);
11181
+ const driverResult = await self.getDriverKeys(query, orderByField, sortOrder, limit, offset);
11143
11182
  if (!driverResult) return;
11144
11183
  const { keysStream, others, compositeVerifyConditions, isDriverOrderByField, rollback } = driverResult;
11145
11184
  const initialChunkSize = self.api.options.pageSize;
11185
+ const isInMemorySort = !isDriverOrderByField && orderByField;
11146
11186
  try {
11147
- if (!isDriverOrderByField && orderByField) {
11187
+ if (isInMemorySort) {
11148
11188
  const topK = limit === Infinity ? Infinity : offset + limit;
11149
11189
  let heap = null;
11150
11190
  if (topK !== Infinity) {
@@ -13,31 +13,38 @@ export declare class Optimizer<T extends Record<string, any>> {
13
13
  readonly field: any;
14
14
  readonly indexName: string;
15
15
  readonly isFtsMatch: false;
16
- readonly score: number;
16
+ readonly selectivity: number;
17
17
  readonly compositeVerifyFields: string[];
18
18
  readonly coveredFields: string[];
19
19
  readonly isIndexOrderSupported: boolean;
20
20
  } | null;
21
21
  /**
22
22
  * FTS 타입 인덱스의 선택도를 평가합니다.
23
- * FTSTermCount 통계가 있으면 토큰 빈도 기반 동적 score를 산출합니다.
23
+ * FTSTermCount 통계가 있으면 실측 데이터 기반으로 선택도를 산출합니다.
24
24
  */
25
25
  evaluateFTSCandidate<U extends Partial<DocumentDataplyQuery<T>>, V extends DataplyTreeValue<U>>(indexName: string, config: any, query: Partial<DocumentDataplyQuery<V>>, queryFields: Set<string>, treeTx: BPTreeAsync<string | number, V>): {
26
- readonly tree: BPTreeAsync<string | number, V>;
27
- readonly condition: any;
28
- readonly field: any;
29
- readonly indexName: string;
30
- readonly isFtsMatch: true;
31
- readonly matchTokens: string[];
32
- readonly score: number;
33
- readonly compositeVerifyFields: readonly [];
34
- readonly coveredFields: readonly [any];
35
- readonly isIndexOrderSupported: false;
26
+ tree: BPTreeAsync<string | number, V>;
27
+ condition: any;
28
+ field: any;
29
+ indexName: string;
30
+ isFtsMatch: boolean;
31
+ matchTokens: string[];
32
+ selectivity: number;
33
+ compositeVerifyFields: never[];
34
+ coveredFields: any[];
35
+ isIndexOrderSupported: boolean;
36
36
  } | null;
37
37
  /**
38
- * 실행할 최적의 인덱스를 선택합니다. (최적 드라이버 선택)
38
+ * 비용 계산: effectiveScanCost + sortPenalty
39
+ * - effectiveScanCost: 인덱스 순서 지원 + limit 존재 시 조기 종료 이점 반영
40
+ * - sortPenalty: 인메모리 정렬의 절대 문서 수 기반 비용
41
+ */
42
+ private calculateCost;
43
+ /**
44
+ * 실행할 최적의 인덱스를 선택합니다. (비용 기반 최적 드라이버 선택)
45
+ * cost = selectivity + sortPenalty (낮을수록 좋음)
39
46
  */
40
- getSelectivityCandidate<U extends Partial<DocumentDataplyQuery<T>>, V extends DataplyTreeValue<U>>(query: Partial<DocumentDataplyQuery<V>>, orderByField?: string): Promise<{
47
+ getSelectivityCandidate<U extends Partial<DocumentDataplyQuery<T>>, V extends DataplyTreeValue<U>>(query: Partial<DocumentDataplyQuery<V>>, orderByField?: string, limit?: number, offset?: number): Promise<{
41
48
  driver: ({
42
49
  tree: BPTreeAsync<number, V>;
43
50
  condition: Partial<DocumentDataplyCondition<U>>;
@@ -18,7 +18,7 @@ export declare class QueryManager<T extends DocumentJSON> {
18
18
  private applyCandidateByFTSStream;
19
19
  private applyCandidateStream;
20
20
  getKeys(query: Partial<DocumentDataplyQuery<T>>, orderBy?: string, sortOrder?: 'asc' | 'desc'): Promise<Float64Array>;
21
- getDriverKeys(query: Partial<DocumentDataplyQuery<T>>, orderBy?: string, sortOrder?: 'asc' | 'desc'): Promise<{
21
+ getDriverKeys(query: Partial<DocumentDataplyQuery<T>>, orderBy?: string, sortOrder?: 'asc' | 'desc', limit?: number, offset?: number): Promise<{
22
22
  keysStream: AsyncIterableIterator<number>;
23
23
  others: {
24
24
  tree: BPTreeAsync<string | number, DataplyTreeValue<Primitive>>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.10-alpha.5",
3
+ "version": "0.0.10-alpha.6",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",