document-dataply 0.0.10-alpha.4 → 0.0.10-alpha.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cjs/index.js CHANGED
@@ -10616,6 +10616,7 @@ var Optimizer = class {
10616
10616
  }
10617
10617
  /**
10618
10618
  * FTS 타입 인덱스의 선택도를 평가합니다.
10619
+ * FTSTermCount 통계가 있으면 토큰 빈도 기반 동적 score를 산출합니다.
10619
10620
  */
10620
10621
  evaluateFTSCandidate(indexName, config, query, queryFields, treeTx) {
10621
10622
  const field = config.fields;
@@ -10624,6 +10625,20 @@ var Optimizer = class {
10624
10625
  if (!condition || typeof condition !== "object" || !("match" in condition)) return null;
10625
10626
  const ftsConfig = this.api.indexManager.getFtsConfig(config);
10626
10627
  const matchTokens = ftsConfig ? tokenize(condition.match, ftsConfig) : [];
10628
+ const MAX_FTS_SCORE = 400;
10629
+ const MIN_FTS_SCORE = 10;
10630
+ const DEFAULT_FTS_SCORE = 90;
10631
+ let score = DEFAULT_FTS_SCORE;
10632
+ const termCountProvider = this.api.analysisManager.getProvider("fts_term_count");
10633
+ if (termCountProvider && termCountProvider.hasSampleData && ftsConfig && matchTokens.length > 0) {
10634
+ const strategy = ftsConfig.tokenizer === "ngram" ? `${ftsConfig.gramSize}gram` : ftsConfig.tokenizer;
10635
+ const minCount = termCountProvider.getMinTokenCount(field, strategy, matchTokens);
10636
+ if (minCount >= 0) {
10637
+ const sampleSize = termCountProvider.getSampleSize();
10638
+ const selectivityRatio = Math.min(minCount / sampleSize, 1);
10639
+ score = Math.round(MAX_FTS_SCORE * (1 - selectivityRatio) + MIN_FTS_SCORE);
10640
+ }
10641
+ }
10627
10642
  return {
10628
10643
  tree: treeTx,
10629
10644
  condition,
@@ -10631,7 +10646,7 @@ var Optimizer = class {
10631
10646
  indexName,
10632
10647
  isFtsMatch: true,
10633
10648
  matchTokens,
10634
- score: 90,
10649
+ score,
10635
10650
  compositeVerifyFields: [],
10636
10651
  coveredFields: [field],
10637
10652
  isIndexOrderSupported: false
@@ -10688,7 +10703,20 @@ var Optimizer = class {
10688
10703
  });
10689
10704
  const driver = candidates[0];
10690
10705
  const driverCoveredFields = new Set(driver.coveredFields);
10691
- const others = candidates.slice(1).filter((c) => !driverCoveredFields.has(c.field));
10706
+ const nonDriverCandidates = candidates.slice(1).filter((c) => !driverCoveredFields.has(c.field));
10707
+ const others = [];
10708
+ for (let i = 0, len = nonDriverCandidates.length; i < len; i++) {
10709
+ const candidate = nonDriverCandidates[i];
10710
+ let isSubset = false;
10711
+ for (let j = 0, oLen = others.length; j < oLen; j++) {
10712
+ const higher = others[j];
10713
+ if (candidate.coveredFields.every((f) => higher.coveredFields.includes(f))) {
10714
+ isSubset = true;
10715
+ break;
10716
+ }
10717
+ }
10718
+ if (!isSubset) others.push(candidate);
10719
+ }
10692
10720
  const compositeVerifyConditions = [];
10693
10721
  for (let i = 0, len = driver.compositeVerifyFields.length; i < len; i++) {
10694
10722
  const field = driver.compositeVerifyFields[i];
@@ -10981,7 +11009,10 @@ var QueryManager = class {
10981
11009
  const isCompositeVerify = compositeVerifyConditions.length > 0;
10982
11010
  const isVerifyOthers = verifyOthers.length > 0;
10983
11011
  const isInfinityLimit = !isFinite(limit);
10984
- const isReadQuotaLimited = !isInfinityLimit || !isCompositeVerify || !isVerifyOthers || !isFts;
11012
+ const isReadQuotaLimited = !isInfinityLimit && // limit이 임의의 유한한 값으로 설정되어 있으며
11013
+ !isCompositeVerify && // 문서를 가져온 후 복합 인덱스 기준으로 2차 필터링할 필요가 없고
11014
+ !isVerifyOthers && // 문서를 가져온 후 다른 인덱스 기준으로 2차 필터링할 필요가 없으며
11015
+ !isFts;
10985
11016
  let currentChunkSize = isReadQuotaLimited ? limit : initialChunkSize;
10986
11017
  let chunk = [];
10987
11018
  let chunkSize = 0;
@@ -11002,10 +11033,30 @@ var QueryManager = class {
11002
11033
  let passed = true;
11003
11034
  for (let k = 0, kLen = verifyOthers.length; k < kLen; k++) {
11004
11035
  const other = verifyOthers[k];
11005
- const fieldValue = flatDoc[other.field];
11006
- if (fieldValue === void 0) {
11007
- passed = false;
11008
- break;
11036
+ const coveredFields = other.coveredFields;
11037
+ let fieldValue;
11038
+ if (coveredFields && coveredFields.length > 1) {
11039
+ const values = [];
11040
+ let hasMissing = false;
11041
+ for (let f = 0, fLen = coveredFields.length; f < fLen; f++) {
11042
+ const v = flatDoc[coveredFields[f]];
11043
+ if (v === void 0) {
11044
+ hasMissing = true;
11045
+ break;
11046
+ }
11047
+ values.push(v);
11048
+ }
11049
+ if (hasMissing) {
11050
+ passed = false;
11051
+ break;
11052
+ }
11053
+ fieldValue = values;
11054
+ } else {
11055
+ fieldValue = flatDoc[other.field];
11056
+ if (fieldValue === void 0) {
11057
+ passed = false;
11058
+ break;
11059
+ }
11009
11060
  }
11010
11061
  const treeValue = { k: doc._id, v: fieldValue };
11011
11062
  if (!other.tree.verify(treeValue, other.condition)) {
@@ -11665,12 +11716,6 @@ var MutationManager = class {
11665
11716
  document: dataplyDocument
11666
11717
  };
11667
11718
  }
11668
- /**
11669
- * Insert a document into the database
11670
- * @param document The document to insert
11671
- * @param tx The transaction to use
11672
- * @returns The primary key of the inserted document
11673
- */
11674
11719
  async insertSingleDocument(document, tx) {
11675
11720
  return this.api.runWithDefaultWrite(async (tx2) => {
11676
11721
  const { pk: dpk, document: dataplyDocument } = await this.insertDocumentInternal(document, tx2);
@@ -11697,15 +11742,10 @@ var MutationManager = class {
11697
11742
  if (error) throw error;
11698
11743
  }
11699
11744
  }
11745
+ await this.api.analysisManager.notifyInsert([flattenDocument], tx2);
11700
11746
  return dataplyDocument._id;
11701
11747
  }, tx);
11702
11748
  }
11703
- /**
11704
- * Insert a batch of documents into the database
11705
- * @param documents The documents to insert
11706
- * @param tx The transaction to use
11707
- * @returns The primary keys of the inserted documents
11708
- */
11709
11749
  async insertBatchDocuments(documents, tx) {
11710
11750
  return this.api.runWithDefaultWrite(async (tx2) => {
11711
11751
  const metadata = await this.api.getDocumentInnerMetadata(tx2);
@@ -11765,19 +11805,18 @@ var MutationManager = class {
11765
11805
  throw res.error;
11766
11806
  }
11767
11807
  }
11808
+ const flatDocs = [];
11809
+ for (let i = 0, len = flattenedData.length; i < len; i++) {
11810
+ flatDocs.push(flattenedData[i].data);
11811
+ }
11812
+ await this.api.analysisManager.notifyInsert(flatDocs, tx2);
11768
11813
  return ids;
11769
11814
  }, tx);
11770
11815
  }
11771
- /**
11772
- * Internal update method used by both fullUpdate and partialUpdate
11773
- * @param query The query to use
11774
- * @param computeUpdatedDoc Function that computes the updated document from the original
11775
- * @param tx The transaction to use
11776
- * @returns The number of updated documents
11777
- */
11778
11816
  async updateInternal(query, computeUpdatedDoc, tx) {
11779
11817
  const pks = await this.api.queryManager.getKeys(query);
11780
11818
  let updatedCount = 0;
11819
+ const updatePairs = [];
11781
11820
  const treeTxs = /* @__PURE__ */ new Map();
11782
11821
  for (const [indexName, tree] of this.api.trees) {
11783
11822
  treeTxs.set(indexName, await tree.createTransaction());
@@ -11825,6 +11864,7 @@ var MutationManager = class {
11825
11864
  }
11826
11865
  }
11827
11866
  }
11867
+ updatePairs.push({ oldDocument: oldFlatDoc, newDocument: newFlatDoc });
11828
11868
  await this.api.update(pk, JSON.stringify(updatedDoc), tx);
11829
11869
  updatedCount++;
11830
11870
  }
@@ -11837,15 +11877,9 @@ var MutationManager = class {
11837
11877
  throw result.error;
11838
11878
  }
11839
11879
  }
11880
+ await this.api.analysisManager.notifyUpdate(updatePairs, tx);
11840
11881
  return updatedCount;
11841
11882
  }
11842
- /**
11843
- * Fully update documents from the database that match the query
11844
- * @param query The query to use
11845
- * @param newRecord Complete document to replace with, or function that receives current document and returns new document
11846
- * @param tx The transaction to use
11847
- * @returns The number of updated documents
11848
- */
11849
11883
  async fullUpdate(query, newRecord, tx) {
11850
11884
  return this.api.runWithDefaultWrite(async (tx2) => {
11851
11885
  return this.updateInternal(query, (doc) => {
@@ -11854,13 +11888,6 @@ var MutationManager = class {
11854
11888
  }, tx2);
11855
11889
  }, tx);
11856
11890
  }
11857
- /**
11858
- * Partially update documents from the database that match the query
11859
- * @param query The query to use
11860
- * @param newRecord Partial document to merge, or function that receives current document and returns partial update
11861
- * @param tx The transaction to use
11862
- * @returns The number of updated documents
11863
- */
11864
11891
  async partialUpdate(query, newRecord, tx) {
11865
11892
  return this.api.runWithDefaultWrite(async (tx2) => {
11866
11893
  return this.updateInternal(query, (doc) => {
@@ -11871,16 +11898,11 @@ var MutationManager = class {
11871
11898
  }, tx2);
11872
11899
  }, tx);
11873
11900
  }
11874
- /**
11875
- * Delete documents from the database that match the query
11876
- * @param query The query to use
11877
- * @param tx The transaction to use
11878
- * @returns The number of deleted documents
11879
- */
11880
11901
  async deleteDocuments(query, tx) {
11881
11902
  return this.api.runWithDefaultWrite(async (tx2) => {
11882
11903
  const pks = await this.api.queryManager.getKeys(query);
11883
11904
  let deletedCount = 0;
11905
+ const deletedFlatDocs = [];
11884
11906
  for (let i = 0, len = pks.length; i < len; i++) {
11885
11907
  const pk = pks[i];
11886
11908
  const doc = await this.api.getDocument(pk, tx2);
@@ -11904,9 +11926,11 @@ var MutationManager = class {
11904
11926
  await tree.delete(pk, { k: pk, v: indexVal });
11905
11927
  }
11906
11928
  }
11929
+ deletedFlatDocs.push(flatDoc);
11907
11930
  await this.api.delete(pk, true, tx2);
11908
11931
  deletedCount++;
11909
11932
  }
11933
+ await this.api.analysisManager.notifyDelete(deletedFlatDocs, tx2);
11910
11934
  return deletedCount;
11911
11935
  }, tx);
11912
11936
  }
@@ -11945,13 +11969,6 @@ var MetadataManager = class {
11945
11969
  async updateDocumentInnerMetadata(metadata, tx) {
11946
11970
  await this.api.update(1, JSON.stringify(metadata), tx);
11947
11971
  }
11948
- /**
11949
- * Run a migration if the current schemeVersion is lower than the target version.
11950
- * After the callback completes, schemeVersion is updated to the target version.
11951
- * @param version The target scheme version
11952
- * @param callback The migration callback
11953
- * @param tx Optional transaction
11954
- */
11955
11972
  async migration(version, callback, tx) {
11956
11973
  await this.api.runWithDefaultWrite(async (tx2) => {
11957
11974
  const innerMetadata = await this.getDocumentInnerMetadata(tx2);
@@ -11980,16 +11997,392 @@ var DocumentFormatter = class {
11980
11997
  }
11981
11998
  return result;
11982
11999
  }
11983
- /**
11984
- * returns flattened document
11985
- * @param document
11986
- * @returns
11987
- */
11988
12000
  flattenDocument(document) {
11989
12001
  return this.flattenInternal(document, "", {});
11990
12002
  }
11991
12003
  };
11992
12004
 
12005
+ // src/core/AnalysisProvider.ts
12006
+ var AnalysisProvider = class {
12007
+ constructor(api) {
12008
+ this.api = api;
12009
+ }
12010
+ /** Overflow row PK assigned by AnalysisManager during initialization. */
12011
+ storageKey = -1;
12012
+ };
12013
+
12014
+ // src/core/RealtimeAnalysisProvider.ts
12015
+ var RealtimeAnalysisProvider = class extends AnalysisProvider {
12016
+ };
12017
+
12018
+ // src/core/IntervalAnalysisProvider.ts
12019
+ var IntervalAnalysisProvider = class extends AnalysisProvider {
12020
+ /**
12021
+ * Sample random documents from the entire dataset.
12022
+ * Fetches only PK index, then reads only the selected documents from disk.
12023
+ * @param sampleOptions Sampling strategy — either `{ rate }` or `{ count }`
12024
+ * @param tx Optional transaction
12025
+ * @returns Randomly selected documents
12026
+ */
12027
+ async sample(sampleOptions, tx) {
12028
+ const pks = await this.api.queryManager.getKeys({});
12029
+ const total = pks.length;
12030
+ if (total === 0) return [];
12031
+ const k = "rate" in sampleOptions && sampleOptions.rate != null ? Math.ceil(total * Math.min(Math.max(sampleOptions.rate, 0), 1)) : sampleOptions.count;
12032
+ const sampleCount = Math.min(Math.max(k, 0), total);
12033
+ if (sampleCount === 0) return [];
12034
+ for (let i = 0; i < sampleCount; i++) {
12035
+ const j = i + Math.floor(Math.random() * (total - i));
12036
+ const tmp = pks[i];
12037
+ pks[i] = pks[j];
12038
+ pks[j] = tmp;
12039
+ }
12040
+ const selectedPks = pks.slice(0, sampleCount);
12041
+ const rawResults = await this.api.selectMany(selectedPks, false, tx);
12042
+ const docs = [];
12043
+ for (let i = 0, len = rawResults.length; i < len; i++) {
12044
+ const raw = rawResults[i];
12045
+ if (raw) docs.push(JSON.parse(raw));
12046
+ }
12047
+ return docs;
12048
+ }
12049
+ };
12050
+
12051
+ // src/core/analysis/FTSTermCount.ts
12052
+ var FTSTermCount = class extends IntervalAnalysisProvider {
12053
+ name = "fts_term_count";
12054
+ termCount = {};
12055
+ sampleSize = 0;
12056
+ async serialize(tx) {
12057
+ const docs = await this.sample({ count: 1e3 }, tx);
12058
+ this.termCount = {};
12059
+ this.sampleSize = docs.length;
12060
+ if (docs.length === 0) return JSON.stringify({ _sampleSize: 0 });
12061
+ const ftsIndices = /* @__PURE__ */ new Map();
12062
+ for (const [indexName, config] of this.api.indexManager.registeredIndices) {
12063
+ if (config.type === "fts") {
12064
+ ftsIndices.set(indexName, config);
12065
+ }
12066
+ }
12067
+ if (ftsIndices.size === 0) return JSON.stringify({ _sampleSize: this.sampleSize });
12068
+ for (let i = 0, len = docs.length; i < len; i++) {
12069
+ const doc = docs[i];
12070
+ const flatDoc = this.api.flattenDocument(doc);
12071
+ for (const [indexName, config] of ftsIndices) {
12072
+ const primaryField = this.api.indexManager.getPrimaryField(config);
12073
+ const v = flatDoc[primaryField];
12074
+ if (typeof v === "string" && v.length > 0) {
12075
+ const ftsConfig = this.api.indexManager.getFtsConfig(config);
12076
+ const tokens = ftsConfig ? tokenize(v, ftsConfig) : [v];
12077
+ const tokenizerStrategy = ftsConfig ? ftsConfig.tokenizer === "ngram" ? `${ftsConfig.gramSize}gram` : ftsConfig.tokenizer : "whitespace";
12078
+ if (!this.termCount[primaryField]) {
12079
+ this.termCount[primaryField] = {};
12080
+ }
12081
+ if (!this.termCount[primaryField][tokenizerStrategy]) {
12082
+ this.termCount[primaryField][tokenizerStrategy] = {};
12083
+ }
12084
+ const targetMap = this.termCount[primaryField][tokenizerStrategy];
12085
+ for (let j = 0, len2 = tokens.length; j < len2; j++) {
12086
+ const token = tokens[j];
12087
+ targetMap[token] = (targetMap[token] || 0) + 1;
12088
+ }
12089
+ }
12090
+ }
12091
+ }
12092
+ const optimizedTermCount = {};
12093
+ for (const field in this.termCount) {
12094
+ optimizedTermCount[field] = {};
12095
+ for (const strategy in this.termCount[field]) {
12096
+ const tokenMap = this.termCount[field][strategy];
12097
+ const sorted = Object.entries(tokenMap).sort((a, b) => b[1] - a[1]).slice(0, 1e3);
12098
+ optimizedTermCount[field][strategy] = {};
12099
+ for (let i = 0, len = sorted.length; i < len; i++) {
12100
+ optimizedTermCount[field][strategy][sorted[i][0]] = sorted[i][1];
12101
+ }
12102
+ }
12103
+ }
12104
+ this.termCount = optimizedTermCount;
12105
+ return JSON.stringify({ _sampleSize: this.sampleSize, ...this.termCount });
12106
+ }
12107
+ async load(data, tx) {
12108
+ this.termCount = {};
12109
+ this.sampleSize = 0;
12110
+ if (!data) {
12111
+ return;
12112
+ }
12113
+ try {
12114
+ const parsed = JSON.parse(data);
12115
+ if (typeof parsed === "object" && parsed !== null && !Array.isArray(parsed)) {
12116
+ const { _sampleSize, ...rest } = parsed;
12117
+ this.sampleSize = typeof _sampleSize === "number" ? _sampleSize : 0;
12118
+ this.termCount = rest;
12119
+ }
12120
+ } catch (e) {
12121
+ }
12122
+ }
12123
+ /**
12124
+ * 특정 field/strategy/token의 문서 빈도를 반환합니다.
12125
+ * 통계에 없으면 0을 반환합니다.
12126
+ */
12127
+ getTermCount(field, strategy, token) {
12128
+ return this.termCount[field]?.[strategy]?.[token] ?? 0;
12129
+ }
12130
+ /**
12131
+ * 쿼리 토큰 배열에서 최소 빈도(AND 시맨틱스 상한선)를 반환합니다.
12132
+ * 통계가 없거나 sampleSize가 0이면 -1을 반환합니다.
12133
+ */
12134
+ getMinTokenCount(field, strategy, tokens) {
12135
+ if (this.sampleSize === 0 || tokens.length === 0) return -1;
12136
+ let minCount = Infinity;
12137
+ for (let i = 0, len = tokens.length; i < len; i++) {
12138
+ const count = this.getTermCount(field, strategy, tokens[i]);
12139
+ if (count < minCount) minCount = count;
12140
+ }
12141
+ return minCount === Infinity ? -1 : minCount;
12142
+ }
12143
+ /**
12144
+ * 통계가 유효한지 여부를 반환합니다.
12145
+ */
12146
+ get hasSampleData() {
12147
+ return this.sampleSize > 0;
12148
+ }
12149
+ /**
12150
+ * 통계 수집 시 사용된 샘플 크기를 반환합니다.
12151
+ */
12152
+ getSampleSize() {
12153
+ return this.sampleSize;
12154
+ }
12155
+ };
12156
+
12157
+ // src/core/analysis/index.ts
12158
+ var BuiltinAnalysisProviders = [
12159
+ FTSTermCount
12160
+ ];
12161
+
12162
+ // src/core/AnalysisManager.ts
12163
+ var AnalysisManager = class {
12164
+ constructor(api) {
12165
+ this.api = api;
12166
+ }
12167
+ providers = /* @__PURE__ */ new Map();
12168
+ /**
12169
+ * Register all built-in analysis providers.
12170
+ * Each provider class is instantiated with the API reference and registered.
12171
+ */
12172
+ registerBuiltinProviders() {
12173
+ for (const Provider of BuiltinAnalysisProviders) {
12174
+ const instance = new Provider(this.api);
12175
+ this.registerProvider(instance);
12176
+ }
12177
+ }
12178
+ /**
12179
+ * Register an analysis provider.
12180
+ * @param provider The provider instance to register
12181
+ */
12182
+ registerProvider(provider) {
12183
+ if (this.providers.has(provider.name)) {
12184
+ throw new Error(`Analysis provider "${provider.name}" is already registered.`);
12185
+ }
12186
+ this.providers.set(provider.name, provider);
12187
+ }
12188
+ /**
12189
+ * Get a registered analysis provider by name.
12190
+ * @param name The provider name
12191
+ * @returns The provider instance, or null if not found
12192
+ */
12193
+ getProvider(name) {
12194
+ return this.providers.get(name) ?? null;
12195
+ }
12196
+ /**
12197
+ * Initialize all registered providers by loading existing data from disk.
12198
+ * Should be called after database initialization.
12199
+ * @param tx The transaction to use
12200
+ */
12201
+ async initializeProviders(tx) {
12202
+ const header = await this.getOrCreateAnalysisHeader(tx);
12203
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12204
+ for (const [name, provider] of this.providers) {
12205
+ if (header[name] !== null) {
12206
+ provider.storageKey = header[name];
12207
+ const raw = await this.api.select(header[name], false, tx);
12208
+ await provider.load(raw, tx);
12209
+ } else {
12210
+ const pk = await this.api.insertAsOverflow(JSON.stringify(null), false, tx);
12211
+ provider.storageKey = pk;
12212
+ header[name] = pk;
12213
+ await this.api.update(metadata.analysis, JSON.stringify(header), tx);
12214
+ await provider.load(null, tx);
12215
+ }
12216
+ }
12217
+ }
12218
+ /**
12219
+ * Notify all realtime providers that documents were inserted.
12220
+ * Data is persisted immediately after each provider processes the mutation.
12221
+ * @param documents The flattened documents that were inserted
12222
+ * @param tx The transaction to use
12223
+ */
12224
+ async notifyInsert(documents, tx) {
12225
+ if (documents.length === 0) return;
12226
+ for (const [name, provider] of this.providers) {
12227
+ if (provider instanceof RealtimeAnalysisProvider) {
12228
+ await provider.onInsert(documents);
12229
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12230
+ }
12231
+ }
12232
+ }
12233
+ /**
12234
+ * Notify all realtime providers that documents were deleted.
12235
+ * Data is persisted immediately after each provider processes the mutation.
12236
+ * @param documents The flattened documents that were deleted
12237
+ * @param tx The transaction to use
12238
+ */
12239
+ async notifyDelete(documents, tx) {
12240
+ if (documents.length === 0) return;
12241
+ for (const [name, provider] of this.providers) {
12242
+ if (provider instanceof RealtimeAnalysisProvider) {
12243
+ await provider.onDelete(documents);
12244
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12245
+ }
12246
+ }
12247
+ }
12248
+ /**
12249
+ * Notify all realtime providers that documents were updated.
12250
+ * Data is persisted immediately after each provider processes the mutation.
12251
+ * @param pairs Array of { oldDocument, newDocument } pairs
12252
+ * @param tx The transaction to use
12253
+ */
12254
+ async notifyUpdate(pairs, tx) {
12255
+ if (pairs.length === 0) return;
12256
+ for (const [name, provider] of this.providers) {
12257
+ if (provider instanceof RealtimeAnalysisProvider) {
12258
+ await provider.onUpdate(pairs);
12259
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12260
+ }
12261
+ }
12262
+ }
12263
+ /**
12264
+ * Flush all interval providers' data to disk.
12265
+ * @param tx The transaction to use (must be a write transaction)
12266
+ */
12267
+ async flush(tx) {
12268
+ for (const [name, provider] of this.providers) {
12269
+ if (provider instanceof IntervalAnalysisProvider) {
12270
+ await this.setAnalysisData(name, await provider.serialize(tx), tx);
12271
+ }
12272
+ }
12273
+ }
12274
+ /**
12275
+ * Get the analysis header row.
12276
+ * Returns null if no analysis header exists yet.
12277
+ * @param tx The transaction to use
12278
+ */
12279
+ async getAnalysisHeader(tx) {
12280
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12281
+ if (metadata.analysis == null) {
12282
+ return null;
12283
+ }
12284
+ const row = await this.api.select(metadata.analysis, false, tx);
12285
+ if (!row) {
12286
+ return null;
12287
+ }
12288
+ return JSON.parse(row);
12289
+ }
12290
+ /**
12291
+ * Get the analysis header row, creating it if it doesn't exist.
12292
+ * @param tx The transaction to use (must be a write transaction)
12293
+ */
12294
+ async getOrCreateAnalysisHeader(tx) {
12295
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12296
+ if (metadata.analysis != null) {
12297
+ const row = await this.api.select(metadata.analysis, false, tx);
12298
+ if (row) {
12299
+ return JSON.parse(row);
12300
+ }
12301
+ }
12302
+ const header = {};
12303
+ const pk = await this.api.insertAsOverflow(JSON.stringify(header), false, tx);
12304
+ metadata.analysis = pk;
12305
+ await this.api.updateDocumentInnerMetadata(metadata, tx);
12306
+ return header;
12307
+ }
12308
+ /**
12309
+ * Get analysis data for a specific type as a raw string.
12310
+ * Returns null if the type doesn't exist in the analysis header.
12311
+ * @param type The analysis type name
12312
+ * @param tx The transaction to use
12313
+ */
12314
+ async getAnalysisData(type, tx) {
12315
+ const header = await this.getAnalysisHeader(tx);
12316
+ if (!header || header[type] == null) {
12317
+ return null;
12318
+ }
12319
+ const row = await this.api.select(header[type], false, tx);
12320
+ if (!row) {
12321
+ return null;
12322
+ }
12323
+ return row;
12324
+ }
12325
+ /**
12326
+ * Set analysis data for a specific type.
12327
+ * Creates a new overflow row if the type doesn't exist yet,
12328
+ * or updates the existing row if it does.
12329
+ * @param type The analysis type name
12330
+ * @param data The raw string data to store
12331
+ * @param tx The transaction to use (must be a write transaction)
12332
+ */
12333
+ async setAnalysisData(type, data, tx) {
12334
+ const header = await this.getOrCreateAnalysisHeader(tx);
12335
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12336
+ if (header[type] != null) {
12337
+ await this.api.update(header[type], data, tx);
12338
+ } else {
12339
+ const pk = await this.api.insertAsOverflow(data, false, tx);
12340
+ header[type] = pk;
12341
+ await this.api.update(metadata.analysis, JSON.stringify(header), tx);
12342
+ }
12343
+ }
12344
+ /**
12345
+ * Delete analysis data for a specific type.
12346
+ * Removes the type entry from the analysis header.
12347
+ * @param type The analysis type name
12348
+ * @param tx The transaction to use (must be a write transaction)
12349
+ */
12350
+ async deleteAnalysisData(type, tx) {
12351
+ const metadata = await this.api.getDocumentInnerMetadata(tx);
12352
+ if (metadata.analysis == null) {
12353
+ return false;
12354
+ }
12355
+ const header = await this.getAnalysisHeader(tx);
12356
+ if (!header || header[type] == null) {
12357
+ return false;
12358
+ }
12359
+ await this.api.delete(header[type], false, tx);
12360
+ delete header[type];
12361
+ await this.api.update(metadata.analysis, JSON.stringify(header), tx);
12362
+ return true;
12363
+ }
12364
+ /**
12365
+ * Check if analysis data exists for a specific type.
12366
+ * @param type The analysis type name
12367
+ * @param tx The transaction to use
12368
+ */
12369
+ async hasAnalysisData(type, tx) {
12370
+ const header = await this.getAnalysisHeader(tx);
12371
+ return header != null && header[type] != null;
12372
+ }
12373
+ /**
12374
+ * Get all registered analysis type names.
12375
+ * @param tx The transaction to use
12376
+ */
12377
+ async getAnalysisTypes(tx) {
12378
+ const header = await this.getAnalysisHeader(tx);
12379
+ if (!header) {
12380
+ return [];
12381
+ }
12382
+ return Object.keys(header);
12383
+ }
12384
+ };
12385
+
11993
12386
  // src/core/documentAPI.ts
11994
12387
  var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
11995
12388
  comparator = new DocumentValueComparator();
@@ -12000,6 +12393,7 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12000
12393
  mutationManager;
12001
12394
  metadataManager;
12002
12395
  documentFormatter;
12396
+ analysisManager;
12003
12397
  constructor(file, options) {
12004
12398
  super(file, options);
12005
12399
  this.optimizer = new Optimizer(this);
@@ -12008,6 +12402,7 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12008
12402
  this.mutationManager = new MutationManager(this);
12009
12403
  this.metadataManager = new MetadataManager(this);
12010
12404
  this.documentFormatter = new DocumentFormatter();
12405
+ this.analysisManager = new AnalysisManager(this);
12011
12406
  this.hook.onceAfter("init", async (tx, isNewlyCreated) => {
12012
12407
  if (isNewlyCreated) {
12013
12408
  await this.initializeDocumentFile(tx);
@@ -12017,6 +12412,8 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12017
12412
  }
12018
12413
  const metadata = await this.getDocumentInnerMetadata(tx);
12019
12414
  await this.indexManager.initializeIndices(metadata, isNewlyCreated, tx);
12415
+ this.analysisManager.registerBuiltinProviders();
12416
+ await this.analysisManager.initializeProviders(tx);
12020
12417
  this._initialized = true;
12021
12418
  return tx;
12022
12419
  });
@@ -12036,15 +12433,29 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12036
12433
  get indexedFields() {
12037
12434
  return this.indexManager.indexedFields;
12038
12435
  }
12436
+ /**
12437
+ * Register an index.
12438
+ * @param name The name of the index
12439
+ * @param option The option of the index
12440
+ * @param tx The transaction to use
12441
+ */
12039
12442
  async registerIndex(name, option, tx) {
12040
12443
  return this.indexManager.registerIndex(name, option, tx);
12041
12444
  }
12042
12445
  /**
12043
12446
  * Drop (remove) a named index.
12447
+ * @param name The name of the index
12448
+ * @param tx The transaction to use
12044
12449
  */
12045
12450
  async dropIndex(name, tx) {
12046
12451
  return this.indexManager.dropIndex(name, tx);
12047
12452
  }
12453
+ /**
12454
+ * Get a document by its primary key.
12455
+ * @param pk The primary key of the document
12456
+ * @param tx The transaction to use
12457
+ * @returns The document
12458
+ */
12048
12459
  async getDocument(pk, tx) {
12049
12460
  return this.runWithDefault(async (tx2) => {
12050
12461
  const row = await this.select(pk, false, tx2);
@@ -12061,6 +12472,16 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12061
12472
  async backfillIndices(tx) {
12062
12473
  return this.indexManager.backfillIndices(tx);
12063
12474
  }
12475
+ /**
12476
+ * Flush all interval analysis providers, forcing statistics to be recalculated.
12477
+ * Call this after bulk inserts or periodically to keep statistics fresh.
12478
+ * @param tx The transaction to use
12479
+ */
12480
+ async flushAnalysis(tx) {
12481
+ return this.runWithDefaultWrite(async (tx2) => {
12482
+ await this.analysisManager.flush(tx2);
12483
+ }, tx);
12484
+ }
12064
12485
  createDocumentInnerMetadata(indices) {
12065
12486
  return {
12066
12487
  magicString: "document-dataply",
@@ -12072,16 +12493,28 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12072
12493
  indices
12073
12494
  };
12074
12495
  }
12496
+ /**
12497
+ * Initialize the document database file.
12498
+ * @param tx The transaction to use
12499
+ */
12075
12500
  async initializeDocumentFile(tx) {
12076
12501
  const metadata = await this.select(1, false, tx);
12077
12502
  if (metadata) {
12078
12503
  throw new Error("Document metadata already exists");
12079
12504
  }
12080
12505
  const metaObj = this.createDocumentInnerMetadata({
12081
- _id: [-1, { type: "btree", fields: ["_id"] }]
12506
+ _id: [-1, {
12507
+ type: "btree",
12508
+ fields: ["_id"]
12509
+ }]
12082
12510
  });
12083
12511
  await this.insertAsOverflow(JSON.stringify(metaObj), false, tx);
12084
12512
  }
12513
+ /**
12514
+ * Verify the document database file.
12515
+ * @param tx The transaction to use
12516
+ * @returns True if the document database file is valid, false otherwise
12517
+ */
12085
12518
  async verifyDocumentFile(tx) {
12086
12519
  const row = await this.select(1, false, tx);
12087
12520
  if (!row) {
@@ -12098,12 +12531,27 @@ var DocumentDataplyAPI = class extends import_dataply4.DataplyAPI {
12098
12531
  flattenDocument(document) {
12099
12532
  return this.documentFormatter.flattenDocument(document);
12100
12533
  }
12534
+ /**
12535
+ * Get the document metadata.
12536
+ * @param tx The transaction to use
12537
+ * @returns The document metadata
12538
+ */
12101
12539
  async getDocumentMetadata(tx) {
12102
12540
  return this.metadataManager.getDocumentMetadata(tx);
12103
12541
  }
12542
+ /**
12543
+ * Get the document inner metadata.
12544
+ * @param tx The transaction to use
12545
+ * @returns The document inner metadata
12546
+ */
12104
12547
  async getDocumentInnerMetadata(tx) {
12105
12548
  return this.metadataManager.getDocumentInnerMetadata(tx);
12106
12549
  }
12550
+ /**
12551
+ * Update the document inner metadata.
12552
+ * @param metadata The document inner metadata
12553
+ * @param tx The transaction to use
12554
+ */
12107
12555
  async updateDocumentInnerMetadata(metadata, tx) {
12108
12556
  return this.metadataManager.updateDocumentInnerMetadata(metadata, tx);
12109
12557
  }
@@ -12255,6 +12703,14 @@ var DocumentDataply = class _DocumentDataply {
12255
12703
  await this.api.init();
12256
12704
  await this.api.backfillIndices();
12257
12705
  }
12706
+ /**
12707
+ * Flush all interval analysis providers, forcing statistics to be recalculated.
12708
+ * Call this after bulk inserts or periodically to keep FTS statistics fresh.
12709
+ * @param tx Optional transaction
12710
+ */
12711
+ async flushAnalysis(tx) {
12712
+ return this.api.flushAnalysis(tx);
12713
+ }
12258
12714
  /**
12259
12715
  * Run a migration if the current schemeVersion is lower than the target version.
12260
12716
  * The callback is only executed when the database's schemeVersion is below the given version.
@@ -0,0 +1,105 @@
1
+ import type { AnalysisHeader, DocumentJSON, FlattenedDocumentJSON } from '../types';
2
+ import type { DocumentDataplyAPI } from './documentAPI';
3
+ import type { AnalysisProvider } from './AnalysisProvider';
4
+ import { Transaction } from 'dataply';
5
+ export declare class AnalysisManager<T extends DocumentJSON> {
6
+ private api;
7
+ private providers;
8
+ constructor(api: DocumentDataplyAPI<T>);
9
+ /**
10
+ * Register all built-in analysis providers.
11
+ * Each provider class is instantiated with the API reference and registered.
12
+ */
13
+ registerBuiltinProviders(): void;
14
+ /**
15
+ * Register an analysis provider.
16
+ * @param provider The provider instance to register
17
+ */
18
+ registerProvider(provider: AnalysisProvider<T>): void;
19
+ /**
20
+ * Get a registered analysis provider by name.
21
+ * @param name The provider name
22
+ * @returns The provider instance, or null if not found
23
+ */
24
+ getProvider<P extends AnalysisProvider<T> = AnalysisProvider<T>>(name: string): P | null;
25
+ /**
26
+ * Initialize all registered providers by loading existing data from disk.
27
+ * Should be called after database initialization.
28
+ * @param tx The transaction to use
29
+ */
30
+ initializeProviders(tx: Transaction): Promise<void>;
31
+ /**
32
+ * Notify all realtime providers that documents were inserted.
33
+ * Data is persisted immediately after each provider processes the mutation.
34
+ * @param documents The flattened documents that were inserted
35
+ * @param tx The transaction to use
36
+ */
37
+ notifyInsert(documents: FlattenedDocumentJSON[], tx: Transaction): Promise<void>;
38
+ /**
39
+ * Notify all realtime providers that documents were deleted.
40
+ * Data is persisted immediately after each provider processes the mutation.
41
+ * @param documents The flattened documents that were deleted
42
+ * @param tx The transaction to use
43
+ */
44
+ notifyDelete(documents: FlattenedDocumentJSON[], tx: Transaction): Promise<void>;
45
+ /**
46
+ * Notify all realtime providers that documents were updated.
47
+ * Data is persisted immediately after each provider processes the mutation.
48
+ * @param pairs Array of { oldDocument, newDocument } pairs
49
+ * @param tx The transaction to use
50
+ */
51
+ notifyUpdate(pairs: {
52
+ oldDocument: FlattenedDocumentJSON;
53
+ newDocument: FlattenedDocumentJSON;
54
+ }[], tx: Transaction): Promise<void>;
55
+ /**
56
+ * Flush all interval providers' data to disk.
57
+ * @param tx The transaction to use (must be a write transaction)
58
+ */
59
+ flush(tx: Transaction): Promise<void>;
60
+ /**
61
+ * Get the analysis header row.
62
+ * Returns null if no analysis header exists yet.
63
+ * @param tx The transaction to use
64
+ */
65
+ getAnalysisHeader(tx: Transaction): Promise<AnalysisHeader | null>;
66
+ /**
67
+ * Get the analysis header row, creating it if it doesn't exist.
68
+ * @param tx The transaction to use (must be a write transaction)
69
+ */
70
+ getOrCreateAnalysisHeader(tx: Transaction): Promise<AnalysisHeader>;
71
+ /**
72
+ * Get analysis data for a specific type as a raw string.
73
+ * Returns null if the type doesn't exist in the analysis header.
74
+ * @param type The analysis type name
75
+ * @param tx The transaction to use
76
+ */
77
+ getAnalysisData(type: string, tx: Transaction): Promise<string | null>;
78
+ /**
79
+ * Set analysis data for a specific type.
80
+ * Creates a new overflow row if the type doesn't exist yet,
81
+ * or updates the existing row if it does.
82
+ * @param type The analysis type name
83
+ * @param data The raw string data to store
84
+ * @param tx The transaction to use (must be a write transaction)
85
+ */
86
+ setAnalysisData(type: string, data: string, tx: Transaction): Promise<void>;
87
+ /**
88
+ * Delete analysis data for a specific type.
89
+ * Removes the type entry from the analysis header.
90
+ * @param type The analysis type name
91
+ * @param tx The transaction to use (must be a write transaction)
92
+ */
93
+ deleteAnalysisData(type: string, tx: Transaction): Promise<boolean>;
94
+ /**
95
+ * Check if analysis data exists for a specific type.
96
+ * @param type The analysis type name
97
+ * @param tx The transaction to use
98
+ */
99
+ hasAnalysisData(type: string, tx: Transaction): Promise<boolean>;
100
+ /**
101
+ * Get all registered analysis type names.
102
+ * @param tx The transaction to use
103
+ */
104
+ getAnalysisTypes(tx: Transaction): Promise<string[]>;
105
+ }
@@ -0,0 +1,30 @@
1
+ import type { DocumentJSON } from '../types';
2
+ import type { DocumentDataplyAPI } from './documentAPI';
3
+ import type { Transaction } from 'dataply';
4
+ /**
5
+ * Abstract base class for analysis providers.
6
+ * Subclasses should extend either RealtimeAnalysisProvider or IntervalAnalysisProvider.
7
+ */
8
+ export declare abstract class AnalysisProvider<T extends DocumentJSON = DocumentJSON> {
9
+ protected api: DocumentDataplyAPI<T>;
10
+ /** Overflow row PK assigned by AnalysisManager during initialization. */
11
+ storageKey: number;
12
+ constructor(api: DocumentDataplyAPI<T>);
13
+ /**
14
+ * Unique name of this analysis type (e.g. 'ftsTermCount').
15
+ * Used as the key in the AnalysisHeader.
16
+ */
17
+ abstract readonly name: string;
18
+ /**
19
+ * Load existing statistics data from a raw string.
20
+ * Called during initialization to restore state from disk.
21
+ * @param raw The raw string from the overflow row, or null if no data exists yet
22
+ * @param tx Optional transaction
23
+ */
24
+ abstract load(raw: string | null, tx: Transaction): Promise<void>;
25
+ /**
26
+ * Serialize the current statistics data to a raw string for storage.
27
+ * @param tx Optional transaction
28
+ */
29
+ abstract serialize(tx: Transaction): Promise<string>;
30
+ }
@@ -1,10 +1,5 @@
1
1
  import type { DocumentJSON, FlattenedDocumentJSON } from '../types';
2
2
  export declare class DocumentFormatter<T extends DocumentJSON> {
3
3
  private flattenInternal;
4
- /**
5
- * returns flattened document
6
- * @param document
7
- * @returns
8
- */
9
4
  flattenDocument(document: T): FlattenedDocumentJSON;
10
5
  }
@@ -0,0 +1,31 @@
1
+ import type { DocumentJSON, DataplyDocument } from '../types';
2
+ import { AnalysisProvider } from './AnalysisProvider';
3
+ import type { Transaction } from 'dataply';
4
+ /**
5
+ * Sampling options for interval analysis providers.
6
+ * Specify either a ratio (0~1) or an exact count.
7
+ */
8
+ export type SampleOptions = {
9
+ /** Ratio of documents to sample (0 exclusive ~ 1 inclusive) */
10
+ rate: number;
11
+ count?: never;
12
+ } | {
13
+ rate?: never;
14
+ /** Exact number of documents to sample */
15
+ count: number;
16
+ };
17
+ /**
18
+ * Abstract base class for interval analysis providers.
19
+ * Data is accumulated in memory and persisted only when flush() is called.
20
+ * No mutation hooks — state is computed independently (e.g. on a schedule or at init).
21
+ */
22
+ export declare abstract class IntervalAnalysisProvider<T extends DocumentJSON = DocumentJSON> extends AnalysisProvider<T> {
23
+ /**
24
+ * Sample random documents from the entire dataset.
25
+ * Fetches only PK index, then reads only the selected documents from disk.
26
+ * @param sampleOptions Sampling strategy — either `{ rate }` or `{ count }`
27
+ * @param tx Optional transaction
28
+ * @returns Randomly selected documents
29
+ */
30
+ sample(sampleOptions: SampleOptions, tx?: Transaction): Promise<DataplyDocument<T>[]>;
31
+ }
@@ -7,12 +7,5 @@ export declare class MetadataManager<T extends DocumentJSON> {
7
7
  getDocumentMetadata(tx: Transaction): Promise<DocumentDataplyMetadata>;
8
8
  getDocumentInnerMetadata(tx: Transaction): Promise<DocumentDataplyInnerMetadata>;
9
9
  updateDocumentInnerMetadata(metadata: DocumentDataplyInnerMetadata, tx: Transaction): Promise<void>;
10
- /**
11
- * Run a migration if the current schemeVersion is lower than the target version.
12
- * After the callback completes, schemeVersion is updated to the target version.
13
- * @param version The target scheme version
14
- * @param callback The migration callback
15
- * @param tx Optional transaction
16
- */
17
10
  migration(version: number, callback: (tx: Transaction) => Promise<void>, tx?: Transaction): Promise<void>;
18
11
  }
@@ -5,49 +5,10 @@ export declare class MutationManager<T extends DocumentJSON> {
5
5
  private api;
6
6
  constructor(api: DocumentDataplyAPI<T>);
7
7
  private insertDocumentInternal;
8
- /**
9
- * Insert a document into the database
10
- * @param document The document to insert
11
- * @param tx The transaction to use
12
- * @returns The primary key of the inserted document
13
- */
14
8
  insertSingleDocument(document: T, tx?: Transaction): Promise<number>;
15
- /**
16
- * Insert a batch of documents into the database
17
- * @param documents The documents to insert
18
- * @param tx The transaction to use
19
- * @returns The primary keys of the inserted documents
20
- */
21
9
  insertBatchDocuments(documents: T[], tx?: Transaction): Promise<number[]>;
22
- /**
23
- * Internal update method used by both fullUpdate and partialUpdate
24
- * @param query The query to use
25
- * @param computeUpdatedDoc Function that computes the updated document from the original
26
- * @param tx The transaction to use
27
- * @returns The number of updated documents
28
- */
29
10
  private updateInternal;
30
- /**
31
- * Fully update documents from the database that match the query
32
- * @param query The query to use
33
- * @param newRecord Complete document to replace with, or function that receives current document and returns new document
34
- * @param tx The transaction to use
35
- * @returns The number of updated documents
36
- */
37
11
  fullUpdate(query: Partial<DocumentDataplyQuery<T>>, newRecord: T | ((document: DataplyDocument<T>) => T), tx?: Transaction): Promise<number>;
38
- /**
39
- * Partially update documents from the database that match the query
40
- * @param query The query to use
41
- * @param newRecord Partial document to merge, or function that receives current document and returns partial update
42
- * @param tx The transaction to use
43
- * @returns The number of updated documents
44
- */
45
12
  partialUpdate(query: Partial<DocumentDataplyQuery<T>>, newRecord: Partial<DataplyDocument<T>> | ((document: DataplyDocument<T>) => Partial<DataplyDocument<T>>), tx?: Transaction): Promise<number>;
46
- /**
47
- * Delete documents from the database that match the query
48
- * @param query The query to use
49
- * @param tx The transaction to use
50
- * @returns The number of deleted documents
51
- */
52
13
  deleteDocuments(query: Partial<DocumentDataplyQuery<T>>, tx?: Transaction): Promise<number>;
53
14
  }
@@ -20,6 +20,7 @@ export declare class Optimizer<T extends Record<string, any>> {
20
20
  } | null;
21
21
  /**
22
22
  * FTS 타입 인덱스의 선택도를 평가합니다.
23
+ * FTSTermCount 통계가 있으면 토큰 빈도 기반 동적 score를 산출합니다.
23
24
  */
24
25
  evaluateFTSCandidate<U extends Partial<DocumentDataplyQuery<T>>, V extends DataplyTreeValue<U>>(indexName: string, config: any, query: Partial<DocumentDataplyQuery<V>>, queryFields: Set<string>, treeTx: BPTreeAsync<string | number, V>): {
25
26
  readonly tree: BPTreeAsync<string | number, V>;
@@ -28,7 +29,7 @@ export declare class Optimizer<T extends Record<string, any>> {
28
29
  readonly indexName: string;
29
30
  readonly isFtsMatch: true;
30
31
  readonly matchTokens: string[];
31
- readonly score: 90;
32
+ readonly score: number;
32
33
  readonly compositeVerifyFields: readonly [];
33
34
  readonly coveredFields: readonly [any];
34
35
  readonly isIndexOrderSupported: false;
@@ -27,6 +27,7 @@ export declare class QueryManager<T extends DocumentJSON> {
27
27
  indexName: string;
28
28
  isFtsMatch: boolean;
29
29
  matchTokens?: string[];
30
+ coveredFields?: string[];
30
31
  }[];
31
32
  compositeVerifyConditions: {
32
33
  field: string;
@@ -58,6 +59,7 @@ export declare class QueryManager<T extends DocumentJSON> {
58
59
  indexName: string;
59
60
  isFtsMatch: boolean;
60
61
  matchTokens?: string[];
62
+ coveredFields?: string[];
61
63
  }[], tx: any): AsyncGenerator<DataplyDocument<T>>;
62
64
  /**
63
65
  * Count documents from the database that match the query
@@ -0,0 +1,27 @@
1
+ import type { FlattenedDocumentJSON, DocumentJSON } from '../types';
2
+ import { AnalysisProvider } from './AnalysisProvider';
3
+ /**
4
+ * Abstract base class for realtime analysis providers.
5
+ * Mutation hooks (onInsert, onDelete, onUpdate) are called on every mutation
6
+ * and the result is persisted immediately.
7
+ */
8
+ export declare abstract class RealtimeAnalysisProvider<T extends DocumentJSON = DocumentJSON> extends AnalysisProvider<T> {
9
+ /**
10
+ * Called when documents are inserted.
11
+ * @param documents The flattened documents that were inserted
12
+ */
13
+ abstract onInsert(documents: FlattenedDocumentJSON[]): Promise<void>;
14
+ /**
15
+ * Called when documents are deleted.
16
+ * @param documents The flattened documents that were deleted
17
+ */
18
+ abstract onDelete(documents: FlattenedDocumentJSON[]): Promise<void>;
19
+ /**
20
+ * Called when documents are updated.
21
+ * @param pairs Array of { oldDocument, newDocument } pairs
22
+ */
23
+ abstract onUpdate(pairs: {
24
+ oldDocument: FlattenedDocumentJSON;
25
+ newDocument: FlattenedDocumentJSON;
26
+ }[]): Promise<void>;
27
+ }
@@ -0,0 +1,28 @@
1
+ import type { DocumentJSON } from '../../types';
2
+ import { Transaction } from 'dataply';
3
+ import { IntervalAnalysisProvider } from '../IntervalAnalysisProvider';
4
+ export declare class FTSTermCount<T extends DocumentJSON = DocumentJSON> extends IntervalAnalysisProvider<T> {
5
+ readonly name = "fts_term_count";
6
+ private termCount;
7
+ private sampleSize;
8
+ serialize(tx: Transaction): Promise<string>;
9
+ load(data: string | null, tx: Transaction): Promise<void>;
10
+ /**
11
+ * 특정 field/strategy/token의 문서 빈도를 반환합니다.
12
+ * 통계에 없으면 0을 반환합니다.
13
+ */
14
+ getTermCount(field: string, strategy: string, token: string): number;
15
+ /**
16
+ * 쿼리 토큰 배열에서 최소 빈도(AND 시맨틱스 상한선)를 반환합니다.
17
+ * 통계가 없거나 sampleSize가 0이면 -1을 반환합니다.
18
+ */
19
+ getMinTokenCount(field: string, strategy: string, tokens: string[]): number;
20
+ /**
21
+ * 통계가 유효한지 여부를 반환합니다.
22
+ */
23
+ get hasSampleData(): boolean;
24
+ /**
25
+ * 통계 수집 시 사용된 샘플 크기를 반환합니다.
26
+ */
27
+ getSampleSize(): number;
28
+ }
@@ -0,0 +1,2 @@
1
+ import { FTSTermCount } from './FTSTermCount';
2
+ export declare const BuiltinAnalysisProviders: (typeof FTSTermCount)[];
@@ -52,6 +52,12 @@ export declare class DocumentDataply<T extends DocumentJSON> {
52
52
  * Initialize the document database
53
53
  */
54
54
  init(): Promise<void>;
55
+ /**
56
+ * Flush all interval analysis providers, forcing statistics to be recalculated.
57
+ * Call this after bulk inserts or periodically to keep FTS statistics fresh.
58
+ * @param tx Optional transaction
59
+ */
60
+ flushAnalysis(tx?: Transaction): Promise<void>;
55
61
  /**
56
62
  * Run a migration if the current schemeVersion is lower than the target version.
57
63
  * The callback is only executed when the database's schemeVersion is below the given version.
@@ -7,6 +7,7 @@ import { IndexManager } from './IndexManager';
7
7
  import { MutationManager } from './MutationManager';
8
8
  import { MetadataManager } from './MetadataManager';
9
9
  import { DocumentFormatter } from './DocumentFormatter';
10
+ import { AnalysisManager } from './AnalysisManager';
10
11
  export declare class DocumentDataplyAPI<T extends DocumentJSON> extends DataplyAPI {
11
12
  runWithDefault: <T_1>(callback: (tx: Transaction) => Promise<T_1>, tx?: Transaction) => Promise<T_1>;
12
13
  runWithDefaultWrite: <T_1>(callback: (tx: Transaction) => Promise<T_1>, tx?: Transaction) => Promise<T_1>;
@@ -19,6 +20,7 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON> extends DataplyA
19
20
  readonly mutationManager: MutationManager<T>;
20
21
  readonly metadataManager: MetadataManager<T>;
21
22
  readonly documentFormatter: DocumentFormatter<T>;
23
+ readonly analysisManager: AnalysisManager<T>;
22
24
  constructor(file: string, options: DocumentDataplyOptions);
23
25
  /**
24
26
  * Whether the document database has been initialized.
@@ -29,19 +31,48 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON> extends DataplyA
29
31
  };
30
32
  get trees(): Map<string, import("dataply").BPTreeAsync<string | number, import("../types").DataplyTreeValue<import("../types").Primitive>>>;
31
33
  get indexedFields(): Set<string>;
34
+ /**
35
+ * Register an index.
36
+ * @param name The name of the index
37
+ * @param option The option of the index
38
+ * @param tx The transaction to use
39
+ */
32
40
  registerIndex(name: string, option: CreateIndexOption<T>, tx?: Transaction): Promise<void>;
33
41
  /**
34
42
  * Drop (remove) a named index.
43
+ * @param name The name of the index
44
+ * @param tx The transaction to use
35
45
  */
36
46
  dropIndex(name: string, tx?: Transaction): Promise<void>;
47
+ /**
48
+ * Get a document by its primary key.
49
+ * @param pk The primary key of the document
50
+ * @param tx The transaction to use
51
+ * @returns The document
52
+ */
37
53
  getDocument(pk: number, tx?: Transaction): Promise<DataplyDocument<T>>;
38
54
  /**
39
55
  * Backfill indices for newly created indices after data was inserted.
40
56
  * Delegated to IndexManager.
41
57
  */
42
58
  backfillIndices(tx?: Transaction): Promise<number>;
59
+ /**
60
+ * Flush all interval analysis providers, forcing statistics to be recalculated.
61
+ * Call this after bulk inserts or periodically to keep statistics fresh.
62
+ * @param tx The transaction to use
63
+ */
64
+ flushAnalysis(tx?: Transaction): Promise<void>;
43
65
  createDocumentInnerMetadata(indices: DocumentDataplyInnerMetadata['indices']): DocumentDataplyInnerMetadata;
66
+ /**
67
+ * Initialize the document database file.
68
+ * @param tx The transaction to use
69
+ */
44
70
  initializeDocumentFile(tx: Transaction): Promise<void>;
71
+ /**
72
+ * Verify the document database file.
73
+ * @param tx The transaction to use
74
+ * @returns True if the document database file is valid, false otherwise
75
+ */
45
76
  verifyDocumentFile(tx: Transaction): Promise<boolean>;
46
77
  /**
47
78
  * returns flattened document
@@ -49,8 +80,23 @@ export declare class DocumentDataplyAPI<T extends DocumentJSON> extends DataplyA
49
80
  * @returns
50
81
  */
51
82
  flattenDocument(document: T): FlattenedDocumentJSON;
83
+ /**
84
+ * Get the document metadata.
85
+ * @param tx The transaction to use
86
+ * @returns The document metadata
87
+ */
52
88
  getDocumentMetadata(tx: Transaction): Promise<DocumentDataplyMetadata>;
89
+ /**
90
+ * Get the document inner metadata.
91
+ * @param tx The transaction to use
92
+ * @returns The document inner metadata
93
+ */
53
94
  getDocumentInnerMetadata(tx: Transaction): Promise<DocumentDataplyInnerMetadata>;
95
+ /**
96
+ * Update the document inner metadata.
97
+ * @param metadata The document inner metadata
98
+ * @param tx The transaction to use
99
+ */
54
100
  updateDocumentInnerMetadata(metadata: DocumentDataplyInnerMetadata, tx: Transaction): Promise<void>;
55
101
  /**
56
102
  * Run a migration if the current schemeVersion is lower than the target version.
@@ -39,6 +39,14 @@ export interface DocumentDataplyInnerMetadata {
39
39
  IndexMetaConfig
40
40
  ];
41
41
  };
42
+ analysis?: number;
43
+ }
44
+ /**
45
+ * Analysis header row structure.
46
+ * Maps analysis type names to their overflow row PKs.
47
+ */
48
+ export interface AnalysisHeader {
49
+ [type: string]: number;
42
50
  }
43
51
  export interface DocumentDataplyMetadata {
44
52
  /**
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "document-dataply",
3
- "version": "0.0.10-alpha.4",
3
+ "version": "0.0.10-alpha.5",
4
4
  "description": "Simple and powerful JSON document database supporting complex queries and flexible indexing policies.",
5
5
  "license": "MIT",
6
6
  "author": "izure <admin@izure.org>",