document-dataply 0.0.10-alpha.5 → 0.0.10-alpha.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cjs/index.js +74 -34
- package/dist/types/core/Optimizer.d.ts +21 -14
- package/dist/types/core/QueryManager.d.ts +1 -1
- package/package.json +1 -1
package/dist/cjs/index.js
CHANGED
|
@@ -10466,6 +10466,26 @@ function tokenize(text, options) {
|
|
|
10466
10466
|
}
|
|
10467
10467
|
|
|
10468
10468
|
// src/core/Optimizer.ts
|
|
10469
|
+
var SELECTIVITY = {
|
|
10470
|
+
/** O(log N) 포인트 룩업 */
|
|
10471
|
+
EQUAL: 0.01,
|
|
10472
|
+
/** 양쪽 바운드(gte+lte) 범위 스캔 */
|
|
10473
|
+
BOUNDED_RANGE: 0.33,
|
|
10474
|
+
/** 한쪽 바운드(gte 또는 lte)만 있을 때, 중간부터 풀스캔 */
|
|
10475
|
+
HALF_RANGE: 0.5,
|
|
10476
|
+
/** Or 조건: B+Tree 내부 풀스캔 */
|
|
10477
|
+
OR: 0.9,
|
|
10478
|
+
/** Like 조건: B+Tree 내부 풀스캔 */
|
|
10479
|
+
LIKE: 0.9,
|
|
10480
|
+
/** 알 수 없는 조건 */
|
|
10481
|
+
UNKNOWN: 0.9,
|
|
10482
|
+
/** FTS 통계 없을 때 보수적 추정 */
|
|
10483
|
+
FTS_DEFAULT: 0.5,
|
|
10484
|
+
/** 정렬 비용 가중치 (orderBy 미지원 시) */
|
|
10485
|
+
SORT_PENALTY: 0.3,
|
|
10486
|
+
/** 인메모리 정렬이 유의미해지는 임계 문서 수 */
|
|
10487
|
+
SORT_THRESHOLD: 1e4
|
|
10488
|
+
};
|
|
10469
10489
|
var Optimizer = class {
|
|
10470
10490
|
constructor(api) {
|
|
10471
10491
|
this.api = api;
|
|
@@ -10477,7 +10497,7 @@ var Optimizer = class {
|
|
|
10477
10497
|
const primaryField = config.fields[0];
|
|
10478
10498
|
if (!queryFields.has(primaryField)) return null;
|
|
10479
10499
|
const builtCondition = {};
|
|
10480
|
-
let
|
|
10500
|
+
let selectivity = 1;
|
|
10481
10501
|
let isConsecutive = true;
|
|
10482
10502
|
const coveredFields = [];
|
|
10483
10503
|
const compositeVerifyFields = [];
|
|
@@ -10492,13 +10512,12 @@ var Optimizer = class {
|
|
|
10492
10512
|
continue;
|
|
10493
10513
|
}
|
|
10494
10514
|
coveredFields.push(field);
|
|
10495
|
-
score += 1;
|
|
10496
10515
|
if (isConsecutive) {
|
|
10497
10516
|
const cond = query[field];
|
|
10498
10517
|
if (cond !== void 0) {
|
|
10499
10518
|
let isBounded = false;
|
|
10500
10519
|
if (typeof cond !== "object" || cond === null) {
|
|
10501
|
-
|
|
10520
|
+
selectivity *= SELECTIVITY.EQUAL;
|
|
10502
10521
|
startValues.push(cond);
|
|
10503
10522
|
endValues.push(cond);
|
|
10504
10523
|
startOperator = "primaryGte";
|
|
@@ -10506,7 +10525,7 @@ var Optimizer = class {
|
|
|
10506
10525
|
isBounded = true;
|
|
10507
10526
|
} else if ("primaryEqual" in cond || "equal" in cond) {
|
|
10508
10527
|
const val = cond.primaryEqual?.v ?? cond.equal?.v ?? cond.primaryEqual ?? cond.equal;
|
|
10509
|
-
|
|
10528
|
+
selectivity *= SELECTIVITY.EQUAL;
|
|
10510
10529
|
startValues.push(val);
|
|
10511
10530
|
endValues.push(val);
|
|
10512
10531
|
startOperator = "primaryGte";
|
|
@@ -10514,7 +10533,7 @@ var Optimizer = class {
|
|
|
10514
10533
|
isBounded = true;
|
|
10515
10534
|
} else if ("primaryGte" in cond || "gte" in cond) {
|
|
10516
10535
|
const val = cond.primaryGte?.v ?? cond.gte?.v ?? cond.primaryGte ?? cond.gte;
|
|
10517
|
-
|
|
10536
|
+
selectivity *= SELECTIVITY.HALF_RANGE;
|
|
10518
10537
|
isConsecutive = false;
|
|
10519
10538
|
startValues.push(val);
|
|
10520
10539
|
startOperator = "primaryGte";
|
|
@@ -10522,7 +10541,7 @@ var Optimizer = class {
|
|
|
10522
10541
|
isBounded = true;
|
|
10523
10542
|
} else if ("primaryGt" in cond || "gt" in cond) {
|
|
10524
10543
|
const val = cond.primaryGt?.v ?? cond.gt?.v ?? cond.primaryGt ?? cond.gt;
|
|
10525
|
-
|
|
10544
|
+
selectivity *= SELECTIVITY.HALF_RANGE;
|
|
10526
10545
|
isConsecutive = false;
|
|
10527
10546
|
startValues.push(val);
|
|
10528
10547
|
startOperator = "primaryGt";
|
|
@@ -10530,7 +10549,7 @@ var Optimizer = class {
|
|
|
10530
10549
|
isBounded = true;
|
|
10531
10550
|
} else if ("primaryLte" in cond || "lte" in cond) {
|
|
10532
10551
|
const val = cond.primaryLte?.v ?? cond.lte?.v ?? cond.primaryLte ?? cond.lte;
|
|
10533
|
-
|
|
10552
|
+
selectivity *= SELECTIVITY.HALF_RANGE;
|
|
10534
10553
|
isConsecutive = false;
|
|
10535
10554
|
endValues.push(val);
|
|
10536
10555
|
endOperator = "primaryLte";
|
|
@@ -10538,20 +10557,20 @@ var Optimizer = class {
|
|
|
10538
10557
|
isBounded = true;
|
|
10539
10558
|
} else if ("primaryLt" in cond || "lt" in cond) {
|
|
10540
10559
|
const val = cond.primaryLt?.v ?? cond.lt?.v ?? cond.primaryLt ?? cond.lt;
|
|
10541
|
-
|
|
10560
|
+
selectivity *= SELECTIVITY.HALF_RANGE;
|
|
10542
10561
|
isConsecutive = false;
|
|
10543
10562
|
endValues.push(val);
|
|
10544
10563
|
endOperator = "primaryLt";
|
|
10545
10564
|
if (startValues.length > 0) startOperator = "primaryGte";
|
|
10546
10565
|
isBounded = true;
|
|
10547
10566
|
} else if ("primaryOr" in cond || "or" in cond) {
|
|
10548
|
-
|
|
10567
|
+
selectivity *= SELECTIVITY.OR;
|
|
10549
10568
|
isConsecutive = false;
|
|
10550
10569
|
} else if ("like" in cond) {
|
|
10551
|
-
|
|
10570
|
+
selectivity *= SELECTIVITY.LIKE;
|
|
10552
10571
|
isConsecutive = false;
|
|
10553
10572
|
} else {
|
|
10554
|
-
|
|
10573
|
+
selectivity *= SELECTIVITY.UNKNOWN;
|
|
10555
10574
|
isConsecutive = false;
|
|
10556
10575
|
}
|
|
10557
10576
|
if (!isBounded && field !== primaryField) {
|
|
@@ -10598,9 +10617,6 @@ var Optimizer = class {
|
|
|
10598
10617
|
}
|
|
10599
10618
|
if (!isExactMatch) break;
|
|
10600
10619
|
}
|
|
10601
|
-
if (isIndexOrderSupported) {
|
|
10602
|
-
score += 200;
|
|
10603
|
-
}
|
|
10604
10620
|
}
|
|
10605
10621
|
return {
|
|
10606
10622
|
tree: treeTx,
|
|
@@ -10608,7 +10624,7 @@ var Optimizer = class {
|
|
|
10608
10624
|
field: primaryField,
|
|
10609
10625
|
indexName,
|
|
10610
10626
|
isFtsMatch: false,
|
|
10611
|
-
|
|
10627
|
+
selectivity,
|
|
10612
10628
|
compositeVerifyFields,
|
|
10613
10629
|
coveredFields,
|
|
10614
10630
|
isIndexOrderSupported
|
|
@@ -10616,7 +10632,7 @@ var Optimizer = class {
|
|
|
10616
10632
|
}
|
|
10617
10633
|
/**
|
|
10618
10634
|
* FTS 타입 인덱스의 선택도를 평가합니다.
|
|
10619
|
-
* FTSTermCount 통계가 있으면
|
|
10635
|
+
* FTSTermCount 통계가 있으면 실측 데이터 기반으로 선택도를 산출합니다.
|
|
10620
10636
|
*/
|
|
10621
10637
|
evaluateFTSCandidate(indexName, config, query, queryFields, treeTx) {
|
|
10622
10638
|
const field = config.fields;
|
|
@@ -10625,18 +10641,14 @@ var Optimizer = class {
|
|
|
10625
10641
|
if (!condition || typeof condition !== "object" || !("match" in condition)) return null;
|
|
10626
10642
|
const ftsConfig = this.api.indexManager.getFtsConfig(config);
|
|
10627
10643
|
const matchTokens = ftsConfig ? tokenize(condition.match, ftsConfig) : [];
|
|
10628
|
-
|
|
10629
|
-
const MIN_FTS_SCORE = 10;
|
|
10630
|
-
const DEFAULT_FTS_SCORE = 90;
|
|
10631
|
-
let score = DEFAULT_FTS_SCORE;
|
|
10644
|
+
let selectivity = SELECTIVITY.FTS_DEFAULT;
|
|
10632
10645
|
const termCountProvider = this.api.analysisManager.getProvider("fts_term_count");
|
|
10633
10646
|
if (termCountProvider && termCountProvider.hasSampleData && ftsConfig && matchTokens.length > 0) {
|
|
10634
10647
|
const strategy = ftsConfig.tokenizer === "ngram" ? `${ftsConfig.gramSize}gram` : ftsConfig.tokenizer;
|
|
10635
10648
|
const minCount = termCountProvider.getMinTokenCount(field, strategy, matchTokens);
|
|
10636
10649
|
if (minCount >= 0) {
|
|
10637
10650
|
const sampleSize = termCountProvider.getSampleSize();
|
|
10638
|
-
|
|
10639
|
-
score = Math.round(MAX_FTS_SCORE * (1 - selectivityRatio) + MIN_FTS_SCORE);
|
|
10651
|
+
selectivity = Math.min(minCount / sampleSize, 1);
|
|
10640
10652
|
}
|
|
10641
10653
|
}
|
|
10642
10654
|
return {
|
|
@@ -10646,18 +10658,33 @@ var Optimizer = class {
|
|
|
10646
10658
|
indexName,
|
|
10647
10659
|
isFtsMatch: true,
|
|
10648
10660
|
matchTokens,
|
|
10649
|
-
|
|
10661
|
+
selectivity,
|
|
10650
10662
|
compositeVerifyFields: [],
|
|
10651
10663
|
coveredFields: [field],
|
|
10652
10664
|
isIndexOrderSupported: false
|
|
10653
10665
|
};
|
|
10654
10666
|
}
|
|
10655
10667
|
/**
|
|
10656
|
-
*
|
|
10668
|
+
* 비용 계산: effectiveScanCost + sortPenalty
|
|
10669
|
+
* - effectiveScanCost: 인덱스 순서 지원 + limit 존재 시 조기 종료 이점 반영
|
|
10670
|
+
* - sortPenalty: 인메모리 정렬의 절대 문서 수 기반 비용
|
|
10671
|
+
*/
|
|
10672
|
+
calculateCost(selectivity, isIndexOrderSupported, orderByField, N, topK) {
|
|
10673
|
+
const effectiveScanCost = isIndexOrderSupported && isFinite(topK) && N > 0 ? Math.min(topK / N, selectivity) : selectivity;
|
|
10674
|
+
const estimatedSortDocs = selectivity * N;
|
|
10675
|
+
const sortPenalty = orderByField && !isIndexOrderSupported ? Math.min(estimatedSortDocs / SELECTIVITY.SORT_THRESHOLD, 1) * SELECTIVITY.SORT_PENALTY : 0;
|
|
10676
|
+
return effectiveScanCost + sortPenalty;
|
|
10677
|
+
}
|
|
10678
|
+
/**
|
|
10679
|
+
* 실행할 최적의 인덱스를 선택합니다. (비용 기반 최적 드라이버 선택)
|
|
10680
|
+
* cost = selectivity + sortPenalty (낮을수록 좋음)
|
|
10657
10681
|
*/
|
|
10658
|
-
async getSelectivityCandidate(query, orderByField) {
|
|
10682
|
+
async getSelectivityCandidate(query, orderByField, limit = Infinity, offset = 0) {
|
|
10659
10683
|
const queryFields = new Set(Object.keys(query));
|
|
10660
10684
|
const candidates = [];
|
|
10685
|
+
const metadata = await this.api.getMetadata();
|
|
10686
|
+
const N = metadata.rowCount;
|
|
10687
|
+
const topK = isFinite(limit) ? offset + limit : Infinity;
|
|
10661
10688
|
for (const [indexName, config] of this.api.indexManager.registeredIndices) {
|
|
10662
10689
|
const tree = this.api.trees.get(indexName);
|
|
10663
10690
|
if (!tree) continue;
|
|
@@ -10671,7 +10698,12 @@ var Optimizer = class {
|
|
|
10671
10698
|
treeTx,
|
|
10672
10699
|
orderByField
|
|
10673
10700
|
);
|
|
10674
|
-
if (candidate)
|
|
10701
|
+
if (candidate) {
|
|
10702
|
+
candidates.push({
|
|
10703
|
+
...candidate,
|
|
10704
|
+
cost: this.calculateCost(candidate.selectivity, candidate.isIndexOrderSupported, orderByField, N, topK)
|
|
10705
|
+
});
|
|
10706
|
+
}
|
|
10675
10707
|
} else if (config.type === "fts") {
|
|
10676
10708
|
const treeTx = await tree.createTransaction();
|
|
10677
10709
|
const candidate = this.evaluateFTSCandidate(
|
|
@@ -10681,7 +10713,12 @@ var Optimizer = class {
|
|
|
10681
10713
|
queryFields,
|
|
10682
10714
|
treeTx
|
|
10683
10715
|
);
|
|
10684
|
-
if (candidate)
|
|
10716
|
+
if (candidate) {
|
|
10717
|
+
candidates.push({
|
|
10718
|
+
...candidate,
|
|
10719
|
+
cost: this.calculateCost(candidate.selectivity, candidate.isIndexOrderSupported, orderByField, N, topK)
|
|
10720
|
+
});
|
|
10721
|
+
}
|
|
10685
10722
|
}
|
|
10686
10723
|
}
|
|
10687
10724
|
const rollback = () => {
|
|
@@ -10694,7 +10731,7 @@ var Optimizer = class {
|
|
|
10694
10731
|
return null;
|
|
10695
10732
|
}
|
|
10696
10733
|
candidates.sort((a, b) => {
|
|
10697
|
-
if (
|
|
10734
|
+
if (a.cost !== b.cost) return a.cost - b.cost;
|
|
10698
10735
|
const aConfig = this.api.indexManager.registeredIndices.get(a.indexName);
|
|
10699
10736
|
const bConfig = this.api.indexManager.registeredIndices.get(b.indexName);
|
|
10700
10737
|
const aFieldCount = aConfig ? Array.isArray(aConfig.fields) ? aConfig.fields.length : 1 : 0;
|
|
@@ -10709,8 +10746,8 @@ var Optimizer = class {
|
|
|
10709
10746
|
const candidate = nonDriverCandidates[i];
|
|
10710
10747
|
let isSubset = false;
|
|
10711
10748
|
for (let j = 0, oLen = others.length; j < oLen; j++) {
|
|
10712
|
-
const
|
|
10713
|
-
if (candidate.coveredFields.every((f) =>
|
|
10749
|
+
const better = others[j];
|
|
10750
|
+
if (candidate.coveredFields.every((f) => better.coveredFields.includes(f))) {
|
|
10714
10751
|
isSubset = true;
|
|
10715
10752
|
break;
|
|
10716
10753
|
}
|
|
@@ -10915,12 +10952,14 @@ var QueryManager = class {
|
|
|
10915
10952
|
rollback();
|
|
10916
10953
|
return new Float64Array(Array.from(keys || []));
|
|
10917
10954
|
}
|
|
10918
|
-
async getDriverKeys(query, orderBy, sortOrder = "asc") {
|
|
10955
|
+
async getDriverKeys(query, orderBy, sortOrder = "asc", limit = Infinity, offset = 0) {
|
|
10919
10956
|
const isQueryEmpty = Object.keys(query).length === 0;
|
|
10920
10957
|
const normalizedQuery = isQueryEmpty ? { _id: { gte: 0 } } : query;
|
|
10921
10958
|
const selectivity = await this.optimizer.getSelectivityCandidate(
|
|
10922
10959
|
this.verboseQuery(normalizedQuery),
|
|
10923
|
-
orderBy
|
|
10960
|
+
orderBy,
|
|
10961
|
+
limit,
|
|
10962
|
+
offset
|
|
10924
10963
|
);
|
|
10925
10964
|
if (!selectivity) return null;
|
|
10926
10965
|
const { driver, others, compositeVerifyConditions, rollback } = selectivity;
|
|
@@ -11139,12 +11178,13 @@ var QueryManager = class {
|
|
|
11139
11178
|
}
|
|
11140
11179
|
}
|
|
11141
11180
|
}
|
|
11142
|
-
const driverResult = await self.getDriverKeys(query, orderByField, sortOrder);
|
|
11181
|
+
const driverResult = await self.getDriverKeys(query, orderByField, sortOrder, limit, offset);
|
|
11143
11182
|
if (!driverResult) return;
|
|
11144
11183
|
const { keysStream, others, compositeVerifyConditions, isDriverOrderByField, rollback } = driverResult;
|
|
11145
11184
|
const initialChunkSize = self.api.options.pageSize;
|
|
11185
|
+
const isInMemorySort = !isDriverOrderByField && orderByField;
|
|
11146
11186
|
try {
|
|
11147
|
-
if (
|
|
11187
|
+
if (isInMemorySort) {
|
|
11148
11188
|
const topK = limit === Infinity ? Infinity : offset + limit;
|
|
11149
11189
|
let heap = null;
|
|
11150
11190
|
if (topK !== Infinity) {
|
|
@@ -13,31 +13,38 @@ export declare class Optimizer<T extends Record<string, any>> {
|
|
|
13
13
|
readonly field: any;
|
|
14
14
|
readonly indexName: string;
|
|
15
15
|
readonly isFtsMatch: false;
|
|
16
|
-
readonly
|
|
16
|
+
readonly selectivity: number;
|
|
17
17
|
readonly compositeVerifyFields: string[];
|
|
18
18
|
readonly coveredFields: string[];
|
|
19
19
|
readonly isIndexOrderSupported: boolean;
|
|
20
20
|
} | null;
|
|
21
21
|
/**
|
|
22
22
|
* FTS 타입 인덱스의 선택도를 평가합니다.
|
|
23
|
-
* FTSTermCount 통계가 있으면
|
|
23
|
+
* FTSTermCount 통계가 있으면 실측 데이터 기반으로 선택도를 산출합니다.
|
|
24
24
|
*/
|
|
25
25
|
evaluateFTSCandidate<U extends Partial<DocumentDataplyQuery<T>>, V extends DataplyTreeValue<U>>(indexName: string, config: any, query: Partial<DocumentDataplyQuery<V>>, queryFields: Set<string>, treeTx: BPTreeAsync<string | number, V>): {
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
26
|
+
tree: BPTreeAsync<string | number, V>;
|
|
27
|
+
condition: any;
|
|
28
|
+
field: any;
|
|
29
|
+
indexName: string;
|
|
30
|
+
isFtsMatch: boolean;
|
|
31
|
+
matchTokens: string[];
|
|
32
|
+
selectivity: number;
|
|
33
|
+
compositeVerifyFields: never[];
|
|
34
|
+
coveredFields: any[];
|
|
35
|
+
isIndexOrderSupported: boolean;
|
|
36
36
|
} | null;
|
|
37
37
|
/**
|
|
38
|
-
*
|
|
38
|
+
* 비용 계산: effectiveScanCost + sortPenalty
|
|
39
|
+
* - effectiveScanCost: 인덱스 순서 지원 + limit 존재 시 조기 종료 이점 반영
|
|
40
|
+
* - sortPenalty: 인메모리 정렬의 절대 문서 수 기반 비용
|
|
41
|
+
*/
|
|
42
|
+
private calculateCost;
|
|
43
|
+
/**
|
|
44
|
+
* 실행할 최적의 인덱스를 선택합니다. (비용 기반 최적 드라이버 선택)
|
|
45
|
+
* cost = selectivity + sortPenalty (낮을수록 좋음)
|
|
39
46
|
*/
|
|
40
|
-
getSelectivityCandidate<U extends Partial<DocumentDataplyQuery<T>>, V extends DataplyTreeValue<U>>(query: Partial<DocumentDataplyQuery<V>>, orderByField?: string): Promise<{
|
|
47
|
+
getSelectivityCandidate<U extends Partial<DocumentDataplyQuery<T>>, V extends DataplyTreeValue<U>>(query: Partial<DocumentDataplyQuery<V>>, orderByField?: string, limit?: number, offset?: number): Promise<{
|
|
41
48
|
driver: ({
|
|
42
49
|
tree: BPTreeAsync<number, V>;
|
|
43
50
|
condition: Partial<DocumentDataplyCondition<U>>;
|
|
@@ -18,7 +18,7 @@ export declare class QueryManager<T extends DocumentJSON> {
|
|
|
18
18
|
private applyCandidateByFTSStream;
|
|
19
19
|
private applyCandidateStream;
|
|
20
20
|
getKeys(query: Partial<DocumentDataplyQuery<T>>, orderBy?: string, sortOrder?: 'asc' | 'desc'): Promise<Float64Array>;
|
|
21
|
-
getDriverKeys(query: Partial<DocumentDataplyQuery<T>>, orderBy?: string, sortOrder?: 'asc' | 'desc'): Promise<{
|
|
21
|
+
getDriverKeys(query: Partial<DocumentDataplyQuery<T>>, orderBy?: string, sortOrder?: 'asc' | 'desc', limit?: number, offset?: number): Promise<{
|
|
22
22
|
keysStream: AsyncIterableIterator<number>;
|
|
23
23
|
others: {
|
|
24
24
|
tree: BPTreeAsync<string | number, DataplyTreeValue<Primitive>>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "document-dataply",
|
|
3
|
-
"version": "0.0.10-alpha.
|
|
3
|
+
"version": "0.0.10-alpha.6",
|
|
4
4
|
"description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "izure <admin@izure.org>",
|