@prisma/streams-server 0.1.1 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CONTRIBUTING.md +8 -0
- package/package.json +2 -1
- package/src/app.ts +290 -17
- package/src/app_core.ts +1833 -698
- package/src/app_local.ts +144 -4
- package/src/auto_tune.ts +62 -0
- package/src/bootstrap.ts +159 -1
- package/src/concurrency_gate.ts +108 -0
- package/src/config.ts +116 -14
- package/src/db/db.ts +1201 -131
- package/src/db/schema.ts +308 -8
- package/src/foreground_activity.ts +55 -0
- package/src/index/indexer.ts +254 -124
- package/src/index/lexicon_file_cache.ts +261 -0
- package/src/index/lexicon_format.ts +93 -0
- package/src/index/lexicon_indexer.ts +789 -0
- package/src/index/secondary_indexer.ts +824 -0
- package/src/index/secondary_schema.ts +105 -0
- package/src/ingest.ts +10 -12
- package/src/manifest.ts +143 -8
- package/src/memory.ts +183 -8
- package/src/metrics.ts +15 -29
- package/src/metrics_emitter.ts +26 -3
- package/src/notifier.ts +121 -5
- package/src/objectstore/accounting.ts +92 -0
- package/src/objectstore/mock_r2.ts +1 -1
- package/src/objectstore/r2.ts +17 -1
- package/src/profiles/evlog/schema.ts +234 -0
- package/src/profiles/evlog.ts +299 -0
- package/src/profiles/generic.ts +47 -0
- package/src/profiles/index.ts +205 -0
- package/src/profiles/metrics/block_format.ts +109 -0
- package/src/profiles/metrics/normalize.ts +366 -0
- package/src/profiles/metrics/schema.ts +319 -0
- package/src/profiles/metrics.ts +85 -0
- package/src/profiles/profile.ts +225 -0
- package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
- package/src/profiles/stateProtocol/routes.ts +389 -0
- package/src/profiles/stateProtocol/types.ts +6 -0
- package/src/profiles/stateProtocol/validation.ts +51 -0
- package/src/profiles/stateProtocol.ts +100 -0
- package/src/read_filter.ts +468 -0
- package/src/reader.ts +2151 -164
- package/src/runtime/host_runtime.ts +5 -0
- package/src/runtime_memory.ts +200 -0
- package/src/runtime_memory_sampler.ts +235 -0
- package/src/schema/read_json.ts +43 -0
- package/src/schema/registry.ts +563 -59
- package/src/search/agg_format.ts +638 -0
- package/src/search/aggregate.ts +389 -0
- package/src/search/binary/codec.ts +162 -0
- package/src/search/binary/docset.ts +67 -0
- package/src/search/binary/restart_strings.ts +181 -0
- package/src/search/binary/varint.ts +34 -0
- package/src/search/bitset.ts +19 -0
- package/src/search/col_format.ts +382 -0
- package/src/search/col_runtime.ts +59 -0
- package/src/search/column_encoding.ts +43 -0
- package/src/search/companion_file_cache.ts +319 -0
- package/src/search/companion_format.ts +313 -0
- package/src/search/companion_manager.ts +1086 -0
- package/src/search/companion_plan.ts +218 -0
- package/src/search/fts_format.ts +423 -0
- package/src/search/fts_runtime.ts +333 -0
- package/src/search/query.ts +875 -0
- package/src/search/schema.ts +245 -0
- package/src/segment/cache.ts +93 -2
- package/src/segment/cached_segment.ts +89 -0
- package/src/segment/format.ts +108 -36
- package/src/segment/segmenter.ts +79 -5
- package/src/segment/segmenter_worker.ts +35 -6
- package/src/segment/segmenter_workers.ts +42 -12
- package/src/server.ts +150 -36
- package/src/sqlite/adapter.ts +185 -14
- package/src/sqlite/runtime_stats.ts +163 -0
- package/src/stats.ts +3 -3
- package/src/stream_size_reconciler.ts +100 -0
- package/src/touch/canonical_change.ts +7 -0
- package/src/touch/live_metrics.ts +94 -64
- package/src/touch/live_templates.ts +15 -1
- package/src/touch/manager.ts +166 -88
- package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
- package/src/touch/spec.ts +95 -92
- package/src/touch/touch_journal.ts +4 -0
- package/src/touch/worker_pool.ts +8 -14
- package/src/touch/worker_protocol.ts +3 -3
- package/src/uploader.ts +77 -6
- package/src/util/bloom256.ts +2 -2
- package/src/util/byte_lru.ts +73 -0
- package/src/util/lru.ts +8 -0
- package/src/util/stream_paths.ts +19 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
import { createHash } from "node:crypto";
|
|
2
|
+
import { Result } from "better-result";
|
|
3
|
+
import type {
|
|
4
|
+
SchemaRegistry,
|
|
5
|
+
SearchFieldBinding,
|
|
6
|
+
SearchFieldConfig,
|
|
7
|
+
SearchFieldKind,
|
|
8
|
+
SearchRollupMeasureConfig,
|
|
9
|
+
} from "../schema/registry";
|
|
10
|
+
import { parseDurationMsResult } from "../util/duration";
|
|
11
|
+
import { dsError } from "../util/ds_error";
|
|
12
|
+
|
|
13
|
+
export type SearchCompanionFamily = "col" | "fts" | "agg" | "mblk";
|
|
14
|
+
|
|
15
|
+
export type SearchCompanionPlanField = {
|
|
16
|
+
ordinal: number;
|
|
17
|
+
name: string;
|
|
18
|
+
kind: SearchFieldKind;
|
|
19
|
+
bindings: SearchFieldBinding[];
|
|
20
|
+
normalizer: SearchFieldConfig["normalizer"] | null;
|
|
21
|
+
analyzer: SearchFieldConfig["analyzer"] | null;
|
|
22
|
+
exact: boolean;
|
|
23
|
+
prefix: boolean;
|
|
24
|
+
column: boolean;
|
|
25
|
+
exists: boolean;
|
|
26
|
+
sortable: boolean;
|
|
27
|
+
aggregatable: boolean;
|
|
28
|
+
contains: boolean;
|
|
29
|
+
positions: boolean;
|
|
30
|
+
};
|
|
31
|
+
|
|
32
|
+
export type SearchCompanionPlanRollupMeasure = {
|
|
33
|
+
ordinal: number;
|
|
34
|
+
name: string;
|
|
35
|
+
kind: SearchRollupMeasureConfig["kind"];
|
|
36
|
+
field_ordinal: number | null;
|
|
37
|
+
histogram: "log2_v1" | null;
|
|
38
|
+
};
|
|
39
|
+
|
|
40
|
+
export type SearchCompanionPlanRollupInterval = {
|
|
41
|
+
ordinal: number;
|
|
42
|
+
name: string;
|
|
43
|
+
ms: number;
|
|
44
|
+
};
|
|
45
|
+
|
|
46
|
+
export type SearchCompanionPlanRollup = {
|
|
47
|
+
ordinal: number;
|
|
48
|
+
name: string;
|
|
49
|
+
timestamp_field_ordinal: number | null;
|
|
50
|
+
dimension_ordinals: number[];
|
|
51
|
+
intervals: SearchCompanionPlanRollupInterval[];
|
|
52
|
+
measures: SearchCompanionPlanRollupMeasure[];
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
export type SearchCompanionPlan = {
|
|
56
|
+
families: Record<SearchCompanionFamily, boolean>;
|
|
57
|
+
fields: SearchCompanionPlanField[];
|
|
58
|
+
rollups: SearchCompanionPlanRollup[];
|
|
59
|
+
summary: Record<string, unknown>;
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
export function buildDesiredSearchCompanionPlan(registry: SchemaRegistry): SearchCompanionPlan {
|
|
63
|
+
const search = registry.search;
|
|
64
|
+
const families: Record<SearchCompanionFamily, boolean> = {
|
|
65
|
+
col: false,
|
|
66
|
+
fts: false,
|
|
67
|
+
agg: false,
|
|
68
|
+
mblk: false,
|
|
69
|
+
};
|
|
70
|
+
if (!search) {
|
|
71
|
+
return { families, fields: [], rollups: [], summary: { search: null } };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
const wantedFieldNames = new Set<string>();
|
|
75
|
+
for (const [name, field] of Object.entries(search.fields)) {
|
|
76
|
+
if (field.column === true) wantedFieldNames.add(name);
|
|
77
|
+
if (field.kind === "text" || (field.kind === "keyword" && field.prefix === true)) wantedFieldNames.add(name);
|
|
78
|
+
}
|
|
79
|
+
for (const rollup of Object.values(search.rollups ?? {})) {
|
|
80
|
+
const timestampField = rollup.timestampField ?? search.primaryTimestampField;
|
|
81
|
+
if (timestampField) wantedFieldNames.add(timestampField);
|
|
82
|
+
for (const dimension of rollup.dimensions ?? []) wantedFieldNames.add(dimension);
|
|
83
|
+
for (const measure of Object.values(rollup.measures)) {
|
|
84
|
+
if (measure.kind === "summary") wantedFieldNames.add(measure.field);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
const orderedFieldNames = Array.from(wantedFieldNames).sort((a, b) => a.localeCompare(b));
|
|
89
|
+
const fieldOrdinalByName = new Map<string, number>();
|
|
90
|
+
const fields = orderedFieldNames.map((name, ordinal) => {
|
|
91
|
+
const field = search.fields[name]!;
|
|
92
|
+
fieldOrdinalByName.set(name, ordinal);
|
|
93
|
+
return {
|
|
94
|
+
ordinal,
|
|
95
|
+
name,
|
|
96
|
+
kind: field.kind,
|
|
97
|
+
bindings: field.bindings.map((binding) => ({ version: binding.version, jsonPointer: binding.jsonPointer })),
|
|
98
|
+
normalizer: field.normalizer ?? null,
|
|
99
|
+
analyzer: field.analyzer ?? null,
|
|
100
|
+
exact: field.exact === true,
|
|
101
|
+
prefix: field.prefix === true,
|
|
102
|
+
column: field.column === true,
|
|
103
|
+
exists: field.exists === true,
|
|
104
|
+
sortable: field.sortable === true,
|
|
105
|
+
aggregatable: field.aggregatable === true,
|
|
106
|
+
contains: field.contains === true,
|
|
107
|
+
positions: field.positions === true,
|
|
108
|
+
} satisfies SearchCompanionPlanField;
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
const colFields = fields.filter((field) => field.column);
|
|
112
|
+
const ftsFields = fields.filter((field) => field.kind === "text" || (field.kind === "keyword" && field.prefix));
|
|
113
|
+
const rollups = Object.entries(search.rollups ?? {})
|
|
114
|
+
.sort((a, b) => a[0].localeCompare(b[0]))
|
|
115
|
+
.map(([name, rollup], rollupOrdinal) => {
|
|
116
|
+
const intervals = [...rollup.intervals]
|
|
117
|
+
.sort()
|
|
118
|
+
.map((intervalName, intervalOrdinal) => {
|
|
119
|
+
const parsed = parseDurationMsResult(intervalName);
|
|
120
|
+
if (Result.isError(parsed)) {
|
|
121
|
+
throw dsError(parsed.error.message);
|
|
122
|
+
}
|
|
123
|
+
return {
|
|
124
|
+
ordinal: intervalOrdinal,
|
|
125
|
+
name: intervalName,
|
|
126
|
+
ms: parsed.value,
|
|
127
|
+
} satisfies SearchCompanionPlanRollupInterval;
|
|
128
|
+
});
|
|
129
|
+
const measures = Object.entries(rollup.measures)
|
|
130
|
+
.sort((a, b) => a[0].localeCompare(b[0]))
|
|
131
|
+
.map(([measureName, measure], measureOrdinal) => ({
|
|
132
|
+
ordinal: measureOrdinal,
|
|
133
|
+
name: measureName,
|
|
134
|
+
kind: measure.kind,
|
|
135
|
+
field_ordinal: measure.kind === "summary" ? (fieldOrdinalByName.get(measure.field) ?? null) : null,
|
|
136
|
+
histogram: measure.kind === "summary" ? measure.histogram ?? null : null,
|
|
137
|
+
}));
|
|
138
|
+
return {
|
|
139
|
+
ordinal: rollupOrdinal,
|
|
140
|
+
name,
|
|
141
|
+
timestamp_field_ordinal: fieldOrdinalByName.get(rollup.timestampField ?? search.primaryTimestampField) ?? null,
|
|
142
|
+
dimension_ordinals: [...(rollup.dimensions ?? [])]
|
|
143
|
+
.sort((a, b) => a.localeCompare(b))
|
|
144
|
+
.map((dimension) => fieldOrdinalByName.get(dimension))
|
|
145
|
+
.filter((value): value is number => typeof value === "number"),
|
|
146
|
+
intervals,
|
|
147
|
+
measures,
|
|
148
|
+
} satisfies SearchCompanionPlanRollup;
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
families.col = colFields.length > 0;
|
|
152
|
+
families.fts = ftsFields.length > 0;
|
|
153
|
+
families.agg = rollups.length > 0;
|
|
154
|
+
families.mblk = search.profile === "metrics";
|
|
155
|
+
return {
|
|
156
|
+
families,
|
|
157
|
+
fields,
|
|
158
|
+
rollups,
|
|
159
|
+
summary: {
|
|
160
|
+
primaryTimestampField: search.primaryTimestampField ?? null,
|
|
161
|
+
primaryTimestampFieldOrdinal: fieldOrdinalByName.get(search.primaryTimestampField) ?? null,
|
|
162
|
+
profile: search.profile ?? null,
|
|
163
|
+
colFields: colFields.map((field) => ({
|
|
164
|
+
ordinal: field.ordinal,
|
|
165
|
+
name: field.name,
|
|
166
|
+
kind: field.kind,
|
|
167
|
+
bindings: field.bindings,
|
|
168
|
+
exists: field.exists,
|
|
169
|
+
sortable: field.sortable,
|
|
170
|
+
})),
|
|
171
|
+
ftsFields: ftsFields.map((field) => ({
|
|
172
|
+
ordinal: field.ordinal,
|
|
173
|
+
name: field.name,
|
|
174
|
+
kind: field.kind,
|
|
175
|
+
bindings: field.bindings,
|
|
176
|
+
exact: field.exact,
|
|
177
|
+
prefix: field.prefix,
|
|
178
|
+
positions: field.positions,
|
|
179
|
+
analyzer: field.analyzer,
|
|
180
|
+
normalizer: field.normalizer,
|
|
181
|
+
})),
|
|
182
|
+
aggRollups: rollups.map((rollup) => ({
|
|
183
|
+
ordinal: rollup.ordinal,
|
|
184
|
+
name: rollup.name,
|
|
185
|
+
timestampFieldOrdinal: rollup.timestamp_field_ordinal,
|
|
186
|
+
dimensions: rollup.dimension_ordinals,
|
|
187
|
+
intervals: rollup.intervals.map((interval) => ({ ordinal: interval.ordinal, name: interval.name, ms: interval.ms })),
|
|
188
|
+
measures: rollup.measures.map((measure) => ({
|
|
189
|
+
ordinal: measure.ordinal,
|
|
190
|
+
name: measure.name,
|
|
191
|
+
kind: measure.kind,
|
|
192
|
+
fieldOrdinal: measure.field_ordinal,
|
|
193
|
+
histogram: measure.histogram,
|
|
194
|
+
})),
|
|
195
|
+
})),
|
|
196
|
+
},
|
|
197
|
+
};
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
export function hashSearchCompanionPlan(plan: SearchCompanionPlan): string {
|
|
201
|
+
return createHash("sha256").update(JSON.stringify(plan)).digest("hex");
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
export function getPlanFieldByName(plan: SearchCompanionPlan, fieldName: string): SearchCompanionPlanField | null {
|
|
205
|
+
return plan.fields.find((field) => field.name === fieldName) ?? null;
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
export function getPlanFieldByOrdinal(plan: SearchCompanionPlan, ordinal: number): SearchCompanionPlanField | null {
|
|
209
|
+
return plan.fields.find((field) => field.ordinal === ordinal) ?? null;
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
export function getPlanRollupByName(plan: SearchCompanionPlan, rollupName: string): SearchCompanionPlanRollup | null {
|
|
213
|
+
return plan.rollups.find((rollup) => rollup.name === rollupName) ?? null;
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
export function getPlanRollupByOrdinal(plan: SearchCompanionPlan, ordinal: number): SearchCompanionPlanRollup | null {
|
|
217
|
+
return plan.rollups.find((rollup) => rollup.ordinal === ordinal) ?? null;
|
|
218
|
+
}
|
|
@@ -0,0 +1,423 @@
|
|
|
1
|
+
import { Result } from "better-result";
|
|
2
|
+
import type { SearchFieldKind } from "../schema/registry";
|
|
3
|
+
import { decodeDocIds, encodeDocSet } from "./binary/docset";
|
|
4
|
+
import { BinaryCursor, BinaryPayloadError, BinaryWriter, concatBytes, readU16, readU32 } from "./binary/codec";
|
|
5
|
+
import { RestartStringTableView, encodeRestartStringTable } from "./binary/restart_strings";
|
|
6
|
+
import { readUVarint, writeUVarint } from "./binary/varint";
|
|
7
|
+
import type { SearchCompanionPlan } from "./companion_plan";
|
|
8
|
+
|
|
9
|
+
export type FtsTermInput = {
|
|
10
|
+
doc_ids: number[];
|
|
11
|
+
freqs?: number[];
|
|
12
|
+
positions?: number[];
|
|
13
|
+
};
|
|
14
|
+
|
|
15
|
+
export type FtsFieldInput = {
|
|
16
|
+
kind: SearchFieldKind;
|
|
17
|
+
exact?: boolean;
|
|
18
|
+
prefix?: boolean;
|
|
19
|
+
positions?: boolean;
|
|
20
|
+
exists_docs: number[];
|
|
21
|
+
terms: Record<string, FtsTermInput>;
|
|
22
|
+
};
|
|
23
|
+
|
|
24
|
+
export type FtsSectionInput = {
|
|
25
|
+
doc_count: number;
|
|
26
|
+
fields: Record<string, FtsFieldInput>;
|
|
27
|
+
};
|
|
28
|
+
|
|
29
|
+
export type FtsPostingBlock = {
|
|
30
|
+
docIds: Uint32Array;
|
|
31
|
+
freqs: Uint32Array;
|
|
32
|
+
positions?: Uint32Array;
|
|
33
|
+
posOffsets?: Uint32Array;
|
|
34
|
+
};
|
|
35
|
+
|
|
36
|
+
const KIND_CODE: Record<SearchFieldKind, number> = {
|
|
37
|
+
keyword: 0,
|
|
38
|
+
text: 1,
|
|
39
|
+
integer: 2,
|
|
40
|
+
float: 3,
|
|
41
|
+
date: 4,
|
|
42
|
+
bool: 5,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
const CODE_KIND: Record<number, SearchFieldKind> = {
|
|
46
|
+
0: "keyword",
|
|
47
|
+
1: "text",
|
|
48
|
+
2: "integer",
|
|
49
|
+
3: "float",
|
|
50
|
+
4: "date",
|
|
51
|
+
5: "bool",
|
|
52
|
+
};
|
|
53
|
+
|
|
54
|
+
const FLAG_EXACT = 1 << 0;
|
|
55
|
+
const FLAG_PREFIX = 1 << 1;
|
|
56
|
+
const FLAG_POSITIONS = 1 << 2;
|
|
57
|
+
const BLOCK_FLAG_POSITIONS = 1 << 0;
|
|
58
|
+
|
|
59
|
+
const FIELD_DIR_ENTRY_BYTES = 52;
|
|
60
|
+
const BLOCK_POSTING_LIMIT = 128;
|
|
61
|
+
|
|
62
|
+
export type FtsFormatError = { kind: "invalid_fts_segment"; message: string };
|
|
63
|
+
|
|
64
|
+
function invalidFts<T = never>(message: string): Result<T, FtsFormatError> {
|
|
65
|
+
return Result.err({ kind: "invalid_fts_segment", message });
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
type FieldDirectoryEntry = {
|
|
69
|
+
fieldOrdinal: number;
|
|
70
|
+
kind: SearchFieldKind;
|
|
71
|
+
exact: boolean;
|
|
72
|
+
prefix: boolean;
|
|
73
|
+
positions: boolean;
|
|
74
|
+
termCount: number;
|
|
75
|
+
existsOffset: number;
|
|
76
|
+
existsLength: number;
|
|
77
|
+
existsCodec: number;
|
|
78
|
+
dictOffset: number;
|
|
79
|
+
dictLength: number;
|
|
80
|
+
dfOffset: number;
|
|
81
|
+
dfLength: number;
|
|
82
|
+
postingsOffsetTableOffset: number;
|
|
83
|
+
postingsOffsetTableLength: number;
|
|
84
|
+
postingsDataOffset: number;
|
|
85
|
+
postingsDataLength: number;
|
|
86
|
+
};
|
|
87
|
+
|
|
88
|
+
class U32LeView {
|
|
89
|
+
private readonly view: DataView;
|
|
90
|
+
readonly length: number;
|
|
91
|
+
|
|
92
|
+
constructor(private readonly bytes: Uint8Array) {
|
|
93
|
+
this.view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
|
|
94
|
+
this.length = Math.floor(bytes.byteLength / 4);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
get(index: number): number {
|
|
98
|
+
if (index < 0 || index >= this.length) return 0;
|
|
99
|
+
return this.view.getUint32(index * 4, true);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
export class PostingIterator {
|
|
104
|
+
private cursor: BinaryCursor;
|
|
105
|
+
private readonly endOffset: number;
|
|
106
|
+
|
|
107
|
+
constructor(private readonly bytes: Uint8Array) {
|
|
108
|
+
this.cursor = new BinaryCursor(bytes);
|
|
109
|
+
this.endOffset = bytes.byteLength;
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
nextBlock(): FtsPostingBlock | null {
|
|
113
|
+
if (this.cursor.offset >= this.endOffset) return null;
|
|
114
|
+
const docsInBlock = this.cursor.readU16();
|
|
115
|
+
const flags = this.cursor.readU8();
|
|
116
|
+
this.cursor.skip(1);
|
|
117
|
+
const firstDocId = this.cursor.readU32();
|
|
118
|
+
const docDeltaLength = this.cursor.readU32();
|
|
119
|
+
const freqLength = this.cursor.readU32();
|
|
120
|
+
const posLength = this.cursor.readU32();
|
|
121
|
+
const docDeltaBytes = this.cursor.readBytes(docDeltaLength);
|
|
122
|
+
const freqBytes = this.cursor.readBytes(freqLength);
|
|
123
|
+
const posBytes = this.cursor.readBytes(posLength);
|
|
124
|
+
|
|
125
|
+
const docIds = new Uint32Array(docsInBlock);
|
|
126
|
+
docIds[0] = firstDocId;
|
|
127
|
+
if (docsInBlock > 1) {
|
|
128
|
+
const deltaCursor = new BinaryCursor(docDeltaBytes);
|
|
129
|
+
let previous = firstDocId;
|
|
130
|
+
for (let index = 1; index < docsInBlock; index++) {
|
|
131
|
+
previous += Number(readUVarint(deltaCursor));
|
|
132
|
+
docIds[index] = previous;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
const freqs = new Uint32Array(docsInBlock);
|
|
137
|
+
const freqCursor = new BinaryCursor(freqBytes);
|
|
138
|
+
let totalPositions = 0;
|
|
139
|
+
for (let index = 0; index < docsInBlock; index++) {
|
|
140
|
+
freqs[index] = Number(readUVarint(freqCursor));
|
|
141
|
+
totalPositions += freqs[index]!;
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
if ((flags & BLOCK_FLAG_POSITIONS) === 0) {
|
|
145
|
+
return { docIds, freqs };
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const positions = new Uint32Array(totalPositions);
|
|
149
|
+
const posOffsets = new Uint32Array(docsInBlock + 1);
|
|
150
|
+
const posCursor = new BinaryCursor(posBytes);
|
|
151
|
+
let writeOffset = 0;
|
|
152
|
+
for (let docIndex = 0; docIndex < docsInBlock; docIndex++) {
|
|
153
|
+
posOffsets[docIndex] = writeOffset;
|
|
154
|
+
let previous = 0;
|
|
155
|
+
for (let freqIndex = 0; freqIndex < freqs[docIndex]!; freqIndex++) {
|
|
156
|
+
previous += Number(readUVarint(posCursor));
|
|
157
|
+
positions[writeOffset++] = previous;
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
posOffsets[docsInBlock] = writeOffset;
|
|
161
|
+
return { docIds, freqs, positions, posOffsets };
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
export class FtsFieldView {
|
|
166
|
+
private readonly termsView: RestartStringTableView;
|
|
167
|
+
private readonly docFreqs: U32LeView;
|
|
168
|
+
private readonly postingOffsets: U32LeView;
|
|
169
|
+
private existsDocIdsCache: number[] | null = null;
|
|
170
|
+
|
|
171
|
+
constructor(
|
|
172
|
+
readonly name: string,
|
|
173
|
+
readonly kind: SearchFieldKind,
|
|
174
|
+
readonly exact: boolean,
|
|
175
|
+
readonly prefix: boolean,
|
|
176
|
+
readonly positions: boolean,
|
|
177
|
+
private readonly docCount: number,
|
|
178
|
+
private readonly existsCodec: number,
|
|
179
|
+
private readonly existsPayload: Uint8Array,
|
|
180
|
+
dictPayload: Uint8Array,
|
|
181
|
+
docFreqPayload: Uint8Array,
|
|
182
|
+
postingOffsetsPayload: Uint8Array,
|
|
183
|
+
private readonly postingsPayload: Uint8Array
|
|
184
|
+
) {
|
|
185
|
+
this.termsView = new RestartStringTableView(dictPayload);
|
|
186
|
+
this.docFreqs = new U32LeView(docFreqPayload);
|
|
187
|
+
this.postingOffsets = new U32LeView(postingOffsetsPayload);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
existsDocIds(): number[] {
|
|
191
|
+
if (!this.existsDocIdsCache) {
|
|
192
|
+
this.existsDocIdsCache = decodeDocIds(this.docCount, this.existsCodec, this.existsPayload);
|
|
193
|
+
}
|
|
194
|
+
return this.existsDocIdsCache;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
terms(): string[] {
|
|
198
|
+
return this.termsView.terms();
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
lookupTerm(term: string): number | null {
|
|
202
|
+
return this.termsView.lookup(term);
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
expandPrefixResult(prefix: string, limit: number): Result<number[], { message: string }> {
|
|
206
|
+
return this.termsView.expandPrefixResult(prefix, limit);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
docFreq(termOrdinal: number): number {
|
|
210
|
+
return this.docFreqs.get(termOrdinal);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
postings(termOrdinal: number): PostingIterator {
|
|
214
|
+
const start = this.postingOffsets.get(termOrdinal);
|
|
215
|
+
const end = this.postingOffsets.get(termOrdinal + 1) || start;
|
|
216
|
+
return new PostingIterator(this.postingsPayload.subarray(start, end));
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
export class FtsSectionView {
|
|
221
|
+
private readonly fieldByName = new Map<string, FtsFieldView>();
|
|
222
|
+
|
|
223
|
+
constructor(readonly docCount: number, readonly fields: FtsFieldView[]) {
|
|
224
|
+
for (const field of fields) this.fieldByName.set(field.name, field);
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
getField(fieldName: string): FtsFieldView | null {
|
|
228
|
+
return this.fieldByName.get(fieldName) ?? null;
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
export function encodeFtsSegmentCompanion(input: FtsSectionInput, plan: SearchCompanionPlan): Uint8Array {
|
|
233
|
+
const orderedFields = plan.fields
|
|
234
|
+
.filter((field) => input.fields[field.name] && (field.kind === "text" || (field.kind === "keyword" && field.prefix)))
|
|
235
|
+
.sort((a, b) => a.ordinal - b.ordinal);
|
|
236
|
+
const fieldPayloads: Array<{
|
|
237
|
+
entry: FieldDirectoryEntry;
|
|
238
|
+
exists: Uint8Array;
|
|
239
|
+
dict: Uint8Array;
|
|
240
|
+
dfs: Uint8Array;
|
|
241
|
+
postingOffsets: Uint8Array;
|
|
242
|
+
postings: Uint8Array;
|
|
243
|
+
}> = [];
|
|
244
|
+
|
|
245
|
+
for (const planField of orderedFields) {
|
|
246
|
+
const field = input.fields[planField.name]!;
|
|
247
|
+
const terms = Object.keys(field.terms).sort((a, b) => a.localeCompare(b));
|
|
248
|
+
const dict = encodeRestartStringTable(terms);
|
|
249
|
+
const encodedDocSet = encodeDocSet(input.doc_count, field.exists_docs);
|
|
250
|
+
const dfWriter = new BinaryWriter();
|
|
251
|
+
const postingOffsetWriter = new BinaryWriter();
|
|
252
|
+
const postingsWriter = new BinaryWriter();
|
|
253
|
+
let postingOffset = 0;
|
|
254
|
+
for (const term of terms) {
|
|
255
|
+
const postings = field.terms[term] ?? { doc_ids: [] };
|
|
256
|
+
dfWriter.writeU32(postings.doc_ids.length);
|
|
257
|
+
postingOffsetWriter.writeU32(postingOffset);
|
|
258
|
+
const payload = encodePostingList(postings, field.positions === true);
|
|
259
|
+
postingsWriter.writeBytes(payload);
|
|
260
|
+
postingOffset += payload.byteLength;
|
|
261
|
+
}
|
|
262
|
+
postingOffsetWriter.writeU32(postingOffset);
|
|
263
|
+
fieldPayloads.push({
|
|
264
|
+
entry: {
|
|
265
|
+
fieldOrdinal: planField.ordinal,
|
|
266
|
+
kind: field.kind,
|
|
267
|
+
exact: field.exact === true,
|
|
268
|
+
prefix: field.prefix === true,
|
|
269
|
+
positions: field.positions === true,
|
|
270
|
+
termCount: terms.length,
|
|
271
|
+
existsOffset: 0,
|
|
272
|
+
existsLength: encodedDocSet.payload.byteLength,
|
|
273
|
+
existsCodec: encodedDocSet.codec,
|
|
274
|
+
dictOffset: 0,
|
|
275
|
+
dictLength: dict.byteLength,
|
|
276
|
+
dfOffset: 0,
|
|
277
|
+
dfLength: dfWriter.length,
|
|
278
|
+
postingsOffsetTableOffset: 0,
|
|
279
|
+
postingsOffsetTableLength: postingOffsetWriter.length,
|
|
280
|
+
postingsDataOffset: 0,
|
|
281
|
+
postingsDataLength: postingsWriter.length,
|
|
282
|
+
},
|
|
283
|
+
exists: encodedDocSet.payload,
|
|
284
|
+
dict,
|
|
285
|
+
dfs: dfWriter.finish(),
|
|
286
|
+
postingOffsets: postingOffsetWriter.finish(),
|
|
287
|
+
postings: postingsWriter.finish(),
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
const header = new BinaryWriter();
|
|
292
|
+
header.writeU32(input.doc_count);
|
|
293
|
+
header.writeU16(fieldPayloads.length);
|
|
294
|
+
header.writeU16(0);
|
|
295
|
+
|
|
296
|
+
let payloadOffset = header.length + FIELD_DIR_ENTRY_BYTES * fieldPayloads.length;
|
|
297
|
+
for (const payload of fieldPayloads) {
|
|
298
|
+
payload.entry.existsOffset = payloadOffset;
|
|
299
|
+
payloadOffset += payload.exists.byteLength;
|
|
300
|
+
payload.entry.dictOffset = payloadOffset;
|
|
301
|
+
payloadOffset += payload.dict.byteLength;
|
|
302
|
+
payload.entry.dfOffset = payloadOffset;
|
|
303
|
+
payloadOffset += payload.dfs.byteLength;
|
|
304
|
+
payload.entry.postingsOffsetTableOffset = payloadOffset;
|
|
305
|
+
payloadOffset += payload.postingOffsets.byteLength;
|
|
306
|
+
payload.entry.postingsDataOffset = payloadOffset;
|
|
307
|
+
payloadOffset += payload.postings.byteLength;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
const directory = new BinaryWriter();
|
|
311
|
+
for (const payload of fieldPayloads) {
|
|
312
|
+
const flags = (payload.entry.exact ? FLAG_EXACT : 0) | (payload.entry.prefix ? FLAG_PREFIX : 0) | (payload.entry.positions ? FLAG_POSITIONS : 0);
|
|
313
|
+
directory.writeU16(payload.entry.fieldOrdinal);
|
|
314
|
+
directory.writeU8(KIND_CODE[payload.entry.kind] ?? 0);
|
|
315
|
+
directory.writeU8(flags);
|
|
316
|
+
directory.writeU32(payload.entry.termCount);
|
|
317
|
+
directory.writeU32(payload.entry.existsOffset);
|
|
318
|
+
directory.writeU32(payload.entry.existsLength);
|
|
319
|
+
directory.writeU32((payload.entry.existsCodec << 24) | 0);
|
|
320
|
+
directory.writeU32(payload.entry.dictOffset);
|
|
321
|
+
directory.writeU32(payload.entry.dictLength);
|
|
322
|
+
directory.writeU32(payload.entry.dfOffset);
|
|
323
|
+
directory.writeU32(payload.entry.dfLength);
|
|
324
|
+
directory.writeU32(payload.entry.postingsOffsetTableOffset);
|
|
325
|
+
directory.writeU32(payload.entry.postingsOffsetTableLength);
|
|
326
|
+
directory.writeU32(payload.entry.postingsDataOffset);
|
|
327
|
+
directory.writeU32(payload.entry.postingsDataLength);
|
|
328
|
+
}
|
|
329
|
+
|
|
330
|
+
return concatBytes([
|
|
331
|
+
header.finish(),
|
|
332
|
+
directory.finish(),
|
|
333
|
+
...fieldPayloads.flatMap((payload) => [payload.exists, payload.dict, payload.dfs, payload.postingOffsets, payload.postings]),
|
|
334
|
+
]);
|
|
335
|
+
}
|
|
336
|
+
|
|
337
|
+
export function decodeFtsSegmentCompanionResult(bytes: Uint8Array, plan: SearchCompanionPlan): Result<FtsSectionView, FtsFormatError> {
|
|
338
|
+
try {
|
|
339
|
+
const cursor = new BinaryCursor(bytes);
|
|
340
|
+
const docCount = cursor.readU32();
|
|
341
|
+
const fieldCount = cursor.readU16();
|
|
342
|
+
cursor.readU16();
|
|
343
|
+
const directoryOffset = cursor.offset;
|
|
344
|
+
const fields: FtsFieldView[] = [];
|
|
345
|
+
for (let index = 0; index < fieldCount; index++) {
|
|
346
|
+
const entryOffset = directoryOffset + index * FIELD_DIR_ENTRY_BYTES;
|
|
347
|
+
if (entryOffset + FIELD_DIR_ENTRY_BYTES > bytes.byteLength) return invalidFts("invalid .fts2 directory");
|
|
348
|
+
const fieldOrdinal = readU16(bytes, entryOffset);
|
|
349
|
+
const kindCode = bytes[entryOffset + 2]!;
|
|
350
|
+
const flags = bytes[entryOffset + 3]!;
|
|
351
|
+
const existsCodec = readU32(bytes, entryOffset + 16) >>> 24;
|
|
352
|
+
const planField = plan.fields.find((field) => field.ordinal === fieldOrdinal);
|
|
353
|
+
if (!planField) return invalidFts(`missing .fts2 plan field ordinal ${fieldOrdinal}`);
|
|
354
|
+
const kind = CODE_KIND[kindCode];
|
|
355
|
+
if (!kind) return invalidFts("invalid .fts2 field kind");
|
|
356
|
+
fields.push(
|
|
357
|
+
new FtsFieldView(
|
|
358
|
+
planField.name,
|
|
359
|
+
kind,
|
|
360
|
+
(flags & FLAG_EXACT) !== 0,
|
|
361
|
+
(flags & FLAG_PREFIX) !== 0,
|
|
362
|
+
(flags & FLAG_POSITIONS) !== 0,
|
|
363
|
+
docCount,
|
|
364
|
+
existsCodec,
|
|
365
|
+
slicePayload(bytes, readU32(bytes, entryOffset + 8), readU32(bytes, entryOffset + 12), "invalid .fts2 exists payload"),
|
|
366
|
+
slicePayload(bytes, readU32(bytes, entryOffset + 20), readU32(bytes, entryOffset + 24), "invalid .fts2 dict payload"),
|
|
367
|
+
slicePayload(bytes, readU32(bytes, entryOffset + 28), readU32(bytes, entryOffset + 32), "invalid .fts2 docfreq payload"),
|
|
368
|
+
slicePayload(bytes, readU32(bytes, entryOffset + 36), readU32(bytes, entryOffset + 40), "invalid .fts2 posting-offset payload"),
|
|
369
|
+
slicePayload(bytes, readU32(bytes, entryOffset + 44), readU32(bytes, entryOffset + 48), "invalid .fts2 postings payload")
|
|
370
|
+
)
|
|
371
|
+
);
|
|
372
|
+
}
|
|
373
|
+
return Result.ok(new FtsSectionView(docCount, fields));
|
|
374
|
+
} catch (e: unknown) {
|
|
375
|
+
return invalidFts(String((e as any)?.message ?? e));
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
function encodePostingList(postings: FtsTermInput, withPositions: boolean): Uint8Array {
|
|
380
|
+
const writer = new BinaryWriter();
|
|
381
|
+
const docIds = postings.doc_ids;
|
|
382
|
+
const freqs = postings.freqs ?? [];
|
|
383
|
+
const positions = postings.positions ?? [];
|
|
384
|
+
let positionOffset = 0;
|
|
385
|
+
for (let start = 0; start < docIds.length; start += BLOCK_POSTING_LIMIT) {
|
|
386
|
+
const end = Math.min(docIds.length, start + BLOCK_POSTING_LIMIT);
|
|
387
|
+
const docDeltaWriter = new BinaryWriter();
|
|
388
|
+
const freqWriter = new BinaryWriter();
|
|
389
|
+
const posWriter = new BinaryWriter();
|
|
390
|
+
for (let index = start + 1; index < end; index++) {
|
|
391
|
+
writeUVarint(docDeltaWriter, docIds[index]! - docIds[index - 1]!);
|
|
392
|
+
}
|
|
393
|
+
for (let index = start; index < end; index++) {
|
|
394
|
+
const freq = withPositions ? freqs[index] ?? 0 : 1;
|
|
395
|
+
writeUVarint(freqWriter, freq);
|
|
396
|
+
if (!withPositions) continue;
|
|
397
|
+
let previous = 0;
|
|
398
|
+
for (let posIndex = 0; posIndex < freq; posIndex++) {
|
|
399
|
+
const position = positions[positionOffset++] ?? 0;
|
|
400
|
+
writeUVarint(posWriter, position - previous);
|
|
401
|
+
previous = position;
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
writer.writeU16(end - start);
|
|
405
|
+
writer.writeU8(withPositions ? BLOCK_FLAG_POSITIONS : 0);
|
|
406
|
+
writer.writeU8(0);
|
|
407
|
+
writer.writeU32(docIds[start] ?? 0);
|
|
408
|
+
writer.writeU32(docDeltaWriter.length);
|
|
409
|
+
writer.writeU32(freqWriter.length);
|
|
410
|
+
writer.writeU32(posWriter.length);
|
|
411
|
+
writer.writeBytes(docDeltaWriter.finish());
|
|
412
|
+
writer.writeBytes(freqWriter.finish());
|
|
413
|
+
writer.writeBytes(posWriter.finish());
|
|
414
|
+
}
|
|
415
|
+
return writer.finish();
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
function slicePayload(bytes: Uint8Array, offset: number, length: number, message: string): Uint8Array {
|
|
419
|
+
if (offset < 0 || length < 0 || offset + length > bytes.byteLength) {
|
|
420
|
+
throw new BinaryPayloadError(message);
|
|
421
|
+
}
|
|
422
|
+
return bytes.subarray(offset, offset + length);
|
|
423
|
+
}
|