@prisma/streams-server 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/CONTRIBUTING.md +8 -0
  2. package/package.json +2 -1
  3. package/src/app.ts +290 -17
  4. package/src/app_core.ts +1833 -698
  5. package/src/app_local.ts +144 -4
  6. package/src/auto_tune.ts +62 -0
  7. package/src/bootstrap.ts +159 -1
  8. package/src/concurrency_gate.ts +108 -0
  9. package/src/config.ts +116 -14
  10. package/src/db/db.ts +1201 -131
  11. package/src/db/schema.ts +308 -8
  12. package/src/foreground_activity.ts +55 -0
  13. package/src/index/indexer.ts +254 -124
  14. package/src/index/lexicon_file_cache.ts +261 -0
  15. package/src/index/lexicon_format.ts +93 -0
  16. package/src/index/lexicon_indexer.ts +789 -0
  17. package/src/index/secondary_indexer.ts +824 -0
  18. package/src/index/secondary_schema.ts +105 -0
  19. package/src/ingest.ts +10 -12
  20. package/src/manifest.ts +143 -8
  21. package/src/memory.ts +183 -8
  22. package/src/metrics.ts +15 -29
  23. package/src/metrics_emitter.ts +26 -3
  24. package/src/notifier.ts +121 -5
  25. package/src/objectstore/accounting.ts +92 -0
  26. package/src/objectstore/mock_r2.ts +1 -1
  27. package/src/objectstore/r2.ts +17 -1
  28. package/src/profiles/evlog/schema.ts +234 -0
  29. package/src/profiles/evlog.ts +299 -0
  30. package/src/profiles/generic.ts +47 -0
  31. package/src/profiles/index.ts +205 -0
  32. package/src/profiles/metrics/block_format.ts +109 -0
  33. package/src/profiles/metrics/normalize.ts +366 -0
  34. package/src/profiles/metrics/schema.ts +319 -0
  35. package/src/profiles/metrics.ts +85 -0
  36. package/src/profiles/profile.ts +225 -0
  37. package/src/{touch/engine.ts → profiles/stateProtocol/changes.ts} +3 -20
  38. package/src/profiles/stateProtocol/routes.ts +389 -0
  39. package/src/profiles/stateProtocol/types.ts +6 -0
  40. package/src/profiles/stateProtocol/validation.ts +51 -0
  41. package/src/profiles/stateProtocol.ts +100 -0
  42. package/src/read_filter.ts +468 -0
  43. package/src/reader.ts +2151 -164
  44. package/src/runtime/host_runtime.ts +5 -0
  45. package/src/runtime_memory.ts +200 -0
  46. package/src/runtime_memory_sampler.ts +235 -0
  47. package/src/schema/read_json.ts +43 -0
  48. package/src/schema/registry.ts +563 -59
  49. package/src/search/agg_format.ts +638 -0
  50. package/src/search/aggregate.ts +389 -0
  51. package/src/search/binary/codec.ts +162 -0
  52. package/src/search/binary/docset.ts +67 -0
  53. package/src/search/binary/restart_strings.ts +181 -0
  54. package/src/search/binary/varint.ts +34 -0
  55. package/src/search/bitset.ts +19 -0
  56. package/src/search/col_format.ts +382 -0
  57. package/src/search/col_runtime.ts +59 -0
  58. package/src/search/column_encoding.ts +43 -0
  59. package/src/search/companion_file_cache.ts +319 -0
  60. package/src/search/companion_format.ts +313 -0
  61. package/src/search/companion_manager.ts +1086 -0
  62. package/src/search/companion_plan.ts +218 -0
  63. package/src/search/fts_format.ts +423 -0
  64. package/src/search/fts_runtime.ts +333 -0
  65. package/src/search/query.ts +875 -0
  66. package/src/search/schema.ts +245 -0
  67. package/src/segment/cache.ts +93 -2
  68. package/src/segment/cached_segment.ts +89 -0
  69. package/src/segment/format.ts +108 -36
  70. package/src/segment/segmenter.ts +79 -5
  71. package/src/segment/segmenter_worker.ts +35 -6
  72. package/src/segment/segmenter_workers.ts +42 -12
  73. package/src/server.ts +150 -36
  74. package/src/sqlite/adapter.ts +185 -14
  75. package/src/sqlite/runtime_stats.ts +163 -0
  76. package/src/stats.ts +3 -3
  77. package/src/stream_size_reconciler.ts +100 -0
  78. package/src/touch/canonical_change.ts +7 -0
  79. package/src/touch/live_metrics.ts +94 -64
  80. package/src/touch/live_templates.ts +15 -1
  81. package/src/touch/manager.ts +166 -88
  82. package/src/touch/{interpreter_worker.ts → processor_worker.ts} +19 -14
  83. package/src/touch/spec.ts +95 -92
  84. package/src/touch/touch_journal.ts +4 -0
  85. package/src/touch/worker_pool.ts +8 -14
  86. package/src/touch/worker_protocol.ts +3 -3
  87. package/src/uploader.ts +77 -6
  88. package/src/util/bloom256.ts +2 -2
  89. package/src/util/byte_lru.ts +73 -0
  90. package/src/util/lru.ts +8 -0
  91. package/src/util/stream_paths.ts +19 -0
@@ -0,0 +1,181 @@
1
+ import { Result } from "better-result";
2
+ import { BinaryCursor, BinaryPayloadError, BinaryWriter, concatBytes } from "./codec";
3
+ import { readUVarint, writeUVarint } from "./varint";
4
+
5
+ const TEXT_ENCODER = new TextEncoder();
6
+ const TEXT_DECODER = new TextDecoder();
7
+
8
+ export class RestartStringTableView {
9
+ private readonly termCount: number;
10
+ private readonly restartInterval: number;
11
+ private readonly restartOffsets: number[];
12
+ private readonly entriesOffset: number;
13
+ private readonly blockFirstTerms = new Map<number, string>();
14
+ private termsCache: string[] | null = null;
15
+
16
+ constructor(private readonly bytes: Uint8Array) {
17
+ try {
18
+ const cursor = new BinaryCursor(bytes);
19
+ this.termCount = cursor.readU32();
20
+ this.restartInterval = Math.max(1, cursor.readU16());
21
+ const restartCount = cursor.readU16();
22
+ this.restartOffsets = [];
23
+ for (let i = 0; i < restartCount; i++) this.restartOffsets.push(cursor.readU32());
24
+ this.entriesOffset = cursor.offset;
25
+ } catch {
26
+ this.termCount = 0;
27
+ this.restartInterval = 1;
28
+ this.restartOffsets = [];
29
+ this.entriesOffset = bytes.byteLength;
30
+ }
31
+ }
32
+
33
+ count(): number {
34
+ return this.termCount;
35
+ }
36
+
37
+ terms(): string[] {
38
+ if (this.termsCache) return this.termsCache;
39
+ try {
40
+ const cursor = new BinaryCursor(this.bytes.subarray(this.entriesOffset));
41
+ const terms: string[] = [];
42
+ let previous = "";
43
+ for (let index = 0; index < this.termCount; index++) {
44
+ if (index % this.restartInterval === 0) previous = "";
45
+ const prefixLength = Number(readUVarint(cursor));
46
+ const suffixLength = Number(readUVarint(cursor));
47
+ const suffix = TEXT_DECODER.decode(cursor.readBytes(suffixLength));
48
+ previous = previous.slice(0, prefixLength) + suffix;
49
+ terms.push(previous);
50
+ }
51
+ this.termsCache = terms;
52
+ return terms;
53
+ } catch {
54
+ this.termsCache = [];
55
+ return this.termsCache;
56
+ }
57
+ }
58
+
59
+ lookup(term: string): number | null {
60
+ let low = 0;
61
+ let high = this.termCount - 1;
62
+ while (low <= high) {
63
+ const mid = (low + high) >> 1;
64
+ const current = this.decodeTermAt(mid);
65
+ if (current == null) return null;
66
+ const cmp = current.localeCompare(term);
67
+ if (cmp === 0) return mid;
68
+ if (cmp < 0) low = mid + 1;
69
+ else high = mid - 1;
70
+ }
71
+ return null;
72
+ }
73
+
74
+ expandPrefixResult(prefix: string, limit: number): Result<number[], { message: string }> {
75
+ const start = this.lowerBoundOrdinal(prefix);
76
+ const matches: number[] = [];
77
+ for (let index = start; index < this.termCount; index++) {
78
+ const term = this.decodeTermAt(index);
79
+ if (term == null) break;
80
+ if (!term.startsWith(prefix)) break;
81
+ matches.push(index);
82
+ if (matches.length > limit) {
83
+ return Result.err({ message: `prefix expansion exceeds limit (${limit})` });
84
+ }
85
+ }
86
+ return Result.ok(matches);
87
+ }
88
+
89
+ lowerBoundOrdinal(target: string): number {
90
+ let low = 0;
91
+ let high = this.termCount;
92
+ while (low < high) {
93
+ const mid = (low + high) >> 1;
94
+ const current = this.decodeTermAt(mid);
95
+ if (current != null && current.localeCompare(target) < 0) low = mid + 1;
96
+ else high = mid;
97
+ }
98
+ return low;
99
+ }
100
+
101
+ termAt(termOrdinal: number): string | null {
102
+ return this.decodeTermAt(termOrdinal);
103
+ }
104
+
105
+ private decodeTermAt(termOrdinal: number): string | null {
106
+ if (termOrdinal < 0 || termOrdinal >= this.termCount) return null;
107
+ const blockIndex = Math.floor(termOrdinal / this.restartInterval);
108
+ const blockStartOrdinal = blockIndex * this.restartInterval;
109
+ const blockOffset = this.restartOffsets[blockIndex];
110
+ if (blockOffset == null) return null;
111
+ if (termOrdinal === blockStartOrdinal) return this.decodeBlockFirstTerm(blockIndex);
112
+ try {
113
+ const cursor = new BinaryCursor(this.bytes.subarray(this.entriesOffset + blockOffset));
114
+ let previous = "";
115
+ let current = "";
116
+ for (let ordinal = blockStartOrdinal; ordinal <= termOrdinal; ordinal++) {
117
+ const prefixLength = Number(readUVarint(cursor));
118
+ const suffixLength = Number(readUVarint(cursor));
119
+ const suffix = TEXT_DECODER.decode(cursor.readBytes(suffixLength));
120
+ current = previous.slice(0, prefixLength) + suffix;
121
+ previous = current;
122
+ }
123
+ return current;
124
+ } catch {
125
+ return null;
126
+ }
127
+ }
128
+
129
+ private decodeBlockFirstTerm(blockIndex: number): string | null {
130
+ const cached = this.blockFirstTerms.get(blockIndex);
131
+ if (cached != null) return cached;
132
+ const blockOffset = this.restartOffsets[blockIndex];
133
+ if (blockOffset == null) return null;
134
+ try {
135
+ const cursor = new BinaryCursor(this.bytes.subarray(this.entriesOffset + blockOffset));
136
+ const prefixLength = Number(readUVarint(cursor));
137
+ const suffixLength = Number(readUVarint(cursor));
138
+ if (prefixLength !== 0) {
139
+ throw new BinaryPayloadError("restart block must begin with zero prefix length");
140
+ }
141
+ const term = TEXT_DECODER.decode(cursor.readBytes(suffixLength));
142
+ this.blockFirstTerms.set(blockIndex, term);
143
+ return term;
144
+ } catch {
145
+ return null;
146
+ }
147
+ }
148
+ }
149
+
150
+ export function encodeRestartStringTable(values: string[], restartInterval = 16): Uint8Array {
151
+ const sorted = [...values];
152
+ const entryWriter = new BinaryWriter();
153
+ const restartOffsets: number[] = [];
154
+ let previous = "";
155
+ for (let index = 0; index < sorted.length; index++) {
156
+ if (restartInterval > 0 && index % restartInterval === 0) {
157
+ restartOffsets.push(entryWriter.length);
158
+ previous = "";
159
+ }
160
+ const value = sorted[index]!;
161
+ const prefixLength = sharedPrefixLength(previous, value);
162
+ const suffixBytes = TEXT_ENCODER.encode(value.slice(prefixLength));
163
+ writeUVarint(entryWriter, prefixLength);
164
+ writeUVarint(entryWriter, suffixBytes.byteLength);
165
+ entryWriter.writeBytes(suffixBytes);
166
+ previous = value;
167
+ }
168
+ const header = new BinaryWriter();
169
+ header.writeU32(sorted.length);
170
+ header.writeU16(Math.max(1, restartInterval));
171
+ header.writeU16(restartOffsets.length);
172
+ for (const offset of restartOffsets) header.writeU32(offset);
173
+ return concatBytes([header.finish(), entryWriter.finish()]);
174
+ }
175
+
176
+ function sharedPrefixLength(left: string, right: string): number {
177
+ const max = Math.min(left.length, right.length);
178
+ let index = 0;
179
+ while (index < max && left.charCodeAt(index) === right.charCodeAt(index)) index += 1;
180
+ return index;
181
+ }
@@ -0,0 +1,34 @@
1
+ import { BinaryCursor, BinaryPayloadError, BinaryWriter } from "./codec";
2
+
3
+ export function writeUVarint(writer: BinaryWriter, value: number | bigint): void {
4
+ let remaining = typeof value === "bigint" ? value : BigInt(value >>> 0 === value ? value : Math.trunc(value));
5
+ if (remaining < 0n) throw new BinaryPayloadError("uvarint cannot encode negative values");
6
+ while (remaining >= 0x80n) {
7
+ writer.writeU8(Number((remaining & 0x7fn) | 0x80n));
8
+ remaining >>= 7n;
9
+ }
10
+ writer.writeU8(Number(remaining));
11
+ }
12
+
13
+ export function readUVarint(cursor: BinaryCursor): bigint {
14
+ let shift = 0n;
15
+ let value = 0n;
16
+ for (let i = 0; i < 10; i++) {
17
+ const next = BigInt(cursor.readU8());
18
+ value |= (next & 0x7fn) << shift;
19
+ if ((next & 0x80n) === 0n) return value;
20
+ shift += 7n;
21
+ }
22
+ throw new BinaryPayloadError("invalid uvarint");
23
+ }
24
+
25
+ export function writeZigZagVarint(writer: BinaryWriter, value: number | bigint): void {
26
+ const bigintValue = typeof value === "bigint" ? value : BigInt(Math.trunc(value));
27
+ const zigzag = bigintValue >= 0n ? bigintValue << 1n : ((-bigintValue) << 1n) - 1n;
28
+ writeUVarint(writer, zigzag);
29
+ }
30
+
31
+ export function readZigZagVarint(cursor: BinaryCursor): bigint {
32
+ const raw = readUVarint(cursor);
33
+ return (raw & 1n) === 0n ? raw >> 1n : -((raw + 1n) >> 1n);
34
+ }
@@ -0,0 +1,19 @@
1
+ export function createBitset(size: number): Uint8Array {
2
+ return new Uint8Array(Math.ceil(size / 8));
3
+ }
4
+
5
+ export function bitsetSet(bitset: Uint8Array, index: number): void {
6
+ bitset[index >> 3] |= 1 << (index & 7);
7
+ }
8
+
9
+ export function bitsetGet(bitset: Uint8Array, index: number): boolean {
10
+ return (bitset[index >> 3] & (1 << (index & 7))) !== 0;
11
+ }
12
+
13
+ export function listFromBitset(bitset: Uint8Array, size: number): number[] {
14
+ const out: number[] = [];
15
+ for (let i = 0; i < size; i++) {
16
+ if (bitsetGet(bitset, i)) out.push(i);
17
+ }
18
+ return out;
19
+ }
@@ -0,0 +1,382 @@
1
+ import { Result } from "better-result";
2
+ import type { SearchFieldKind } from "../schema/registry";
3
+ import { decodeDocIds, encodeDocSet } from "./binary/docset";
4
+ import { BinaryCursor, BinaryPayloadError, BinaryWriter, concatBytes, readF64, readI64, readU16, readU32 } from "./binary/codec";
5
+ import type { SearchCompanionPlan } from "./companion_plan";
6
+
7
+ export type ColScalar = bigint | number | boolean;
8
+
9
+ export type ColFieldInput = {
10
+ kind: SearchFieldKind;
11
+ doc_ids: number[];
12
+ values: ColScalar[];
13
+ min: ColScalar | null;
14
+ max: ColScalar | null;
15
+ };
16
+
17
+ export type ColSectionInput = {
18
+ doc_count: number;
19
+ primary_timestamp_field?: string;
20
+ fields: Record<string, ColFieldInput>;
21
+ };
22
+
23
+ type ColFieldDirectoryEntry = {
24
+ fieldOrdinal: number;
25
+ kind: SearchFieldKind;
26
+ presentCount: number;
27
+ existsCodec: number;
28
+ existsOffset: number;
29
+ existsLength: number;
30
+ valuesOffset: number;
31
+ valuesLength: number;
32
+ pageIndexOffset: number;
33
+ pageIndexLength: number;
34
+ minValue: ColScalar | null;
35
+ maxValue: ColScalar | null;
36
+ };
37
+
38
+ type ColPageEntry = {
39
+ firstDocId: number;
40
+ valueStartIndex: number;
41
+ min: ColScalar;
42
+ max: ColScalar;
43
+ };
44
+
45
+ const PAGE_SIZE = 256;
46
+
47
+ const KIND_CODE: Record<SearchFieldKind, number> = {
48
+ keyword: 0,
49
+ text: 1,
50
+ integer: 2,
51
+ float: 3,
52
+ date: 4,
53
+ bool: 5,
54
+ };
55
+
56
+ const CODE_KIND: Record<number, SearchFieldKind> = {
57
+ 0: "keyword",
58
+ 1: "text",
59
+ 2: "integer",
60
+ 3: "float",
61
+ 4: "date",
62
+ 5: "bool",
63
+ };
64
+
65
+ const FLAG_HAS_MINMAX = 1 << 0;
66
+ const FLAG_HAS_PAGE_INDEX = 1 << 1;
67
+
68
+ const DIRECTORY_ENTRY_BYTES = 52;
69
+ const PAGE_ENTRY_BYTES = 24;
70
+
71
+ export type ColFormatError = { kind: "invalid_col_segment"; message: string };
72
+
73
+ function invalidCol<T = never>(message: string): Result<T, ColFormatError> {
74
+ return Result.err({ kind: "invalid_col_segment", message });
75
+ }
76
+
77
+ export class ColFieldView {
78
+ private pageCache: ColPageEntry[] | null = null;
79
+ private docIdsCache: number[] | null = null;
80
+
81
+ constructor(
82
+ readonly name: string,
83
+ readonly kind: SearchFieldKind,
84
+ private readonly docCount: number,
85
+ private readonly presentCount: number,
86
+ private readonly existsCodec: number,
87
+ private readonly existsPayload: Uint8Array,
88
+ private readonly valuesPayload: Uint8Array,
89
+ private readonly pageIndexPayload: Uint8Array,
90
+ private readonly minValueInternal: ColScalar | null,
91
+ private readonly maxValueInternal: ColScalar | null
92
+ ) {}
93
+
94
+ minValue(): ColScalar | null {
95
+ return this.minValueInternal;
96
+ }
97
+
98
+ maxValue(): ColScalar | null {
99
+ return this.maxValueInternal;
100
+ }
101
+
102
+ hasPageIndex(): boolean {
103
+ return this.pageIndexPayload.byteLength > 0;
104
+ }
105
+
106
+ docIds(): number[] {
107
+ if (!this.docIdsCache) {
108
+ this.docIdsCache = decodeDocIds(this.docCount, this.existsCodec, this.existsPayload);
109
+ }
110
+ return this.docIdsCache;
111
+ }
112
+
113
+ forEachValue(fn: (docId: number, value: ColScalar) => void): void {
114
+ if (this.presentCount === 0) return;
115
+ const docIds = this.docIds();
116
+ const width = valueWidth(this.kind);
117
+ for (let index = 0; index < docIds.length; index++) {
118
+ fn(docIds[index]!, decodeValue(this.kind, this.valuesPayload, index * width));
119
+ }
120
+ }
121
+
122
+ forEachValueRange(startValueIndex: number, endValueIndex: number, fn: (docId: number, value: ColScalar) => void): void {
123
+ const docIds = this.docIds();
124
+ const width = valueWidth(this.kind);
125
+ for (let index = startValueIndex; index < endValueIndex && index < docIds.length; index++) {
126
+ fn(docIds[index]!, decodeValue(this.kind, this.valuesPayload, index * width));
127
+ }
128
+ }
129
+
130
+ pageEntries(): ColPageEntry[] {
131
+ if (this.pageCache) return this.pageCache;
132
+ const entries: ColPageEntry[] = [];
133
+ for (let offset = 0; offset + PAGE_ENTRY_BYTES <= this.pageIndexPayload.byteLength; offset += PAGE_ENTRY_BYTES) {
134
+ entries.push({
135
+ firstDocId: readU32(this.pageIndexPayload, offset),
136
+ valueStartIndex: readU32(this.pageIndexPayload, offset + 4),
137
+ min: decodeScalarInline(this.kind, this.pageIndexPayload.subarray(offset + 8, offset + 16))!,
138
+ max: decodeScalarInline(this.kind, this.pageIndexPayload.subarray(offset + 16, offset + 24))!,
139
+ });
140
+ }
141
+ this.pageCache = entries;
142
+ return entries;
143
+ }
144
+ }
145
+
146
+ export class ColSectionView {
147
+ private readonly fieldByName = new Map<string, ColFieldView>();
148
+
149
+ constructor(
150
+ readonly docCount: number,
151
+ readonly primaryTimestampField: string | null,
152
+ readonly fields: ColFieldView[]
153
+ ) {
154
+ for (const field of fields) this.fieldByName.set(field.name, field);
155
+ }
156
+
157
+ getField(fieldName: string): ColFieldView | null {
158
+ return this.fieldByName.get(fieldName) ?? null;
159
+ }
160
+
161
+ minTimestampMs(): bigint | null {
162
+ const field = this.primaryTimestampField ? this.getField(this.primaryTimestampField) : null;
163
+ const value = field?.minValue();
164
+ return typeof value === "bigint" ? value : null;
165
+ }
166
+
167
+ maxTimestampMs(): bigint | null {
168
+ const field = this.primaryTimestampField ? this.getField(this.primaryTimestampField) : null;
169
+ const value = field?.maxValue();
170
+ return typeof value === "bigint" ? value : null;
171
+ }
172
+ }
173
+
174
+ export function compareColScalars(left: ColScalar, right: ColScalar): number {
175
+ if (typeof left === "bigint" && typeof right === "bigint") return left < right ? -1 : left > right ? 1 : 0;
176
+ if (typeof left === "number" && typeof right === "number") return left < right ? -1 : left > right ? 1 : 0;
177
+ if (typeof left === "boolean" && typeof right === "boolean") return left === right ? 0 : left ? 1 : -1;
178
+ return String(left).localeCompare(String(right));
179
+ }
180
+
181
+ export function encodeColSegmentCompanion(input: ColSectionInput, plan: SearchCompanionPlan): Uint8Array {
182
+ const orderedFields = plan.fields
183
+ .filter((field) => input.fields[field.name] && (field.kind === "integer" || field.kind === "float" || field.kind === "date" || field.kind === "bool"))
184
+ .sort((a, b) => a.ordinal - b.ordinal);
185
+ const fieldPayloads: Array<{ entry: ColFieldDirectoryEntry; exists: Uint8Array; values: Uint8Array; pages: Uint8Array }> = [];
186
+ for (const planField of orderedFields) {
187
+ const field = input.fields[planField.name]!;
188
+ const encodedDocSet = encodeDocSet(input.doc_count, field.doc_ids);
189
+ const values = encodeFieldValues(field.kind, field.values);
190
+ const pages = encodePageIndex(field.kind, field.doc_ids, field.values);
191
+ fieldPayloads.push({
192
+ entry: {
193
+ fieldOrdinal: planField.ordinal,
194
+ kind: field.kind,
195
+ presentCount: field.doc_ids.length,
196
+ existsCodec: encodedDocSet.codec,
197
+ existsOffset: 0,
198
+ existsLength: encodedDocSet.payload.byteLength,
199
+ valuesOffset: 0,
200
+ valuesLength: values.byteLength,
201
+ pageIndexOffset: 0,
202
+ pageIndexLength: pages.byteLength,
203
+ minValue: field.min,
204
+ maxValue: field.max,
205
+ },
206
+ exists: encodedDocSet.payload,
207
+ values,
208
+ pages,
209
+ });
210
+ }
211
+
212
+ const header = new BinaryWriter();
213
+ header.writeU32(input.doc_count);
214
+ header.writeU16(fieldPayloads.length);
215
+ header.writeU16(
216
+ input.primary_timestamp_field == null ? 0xffff : (plan.fields.find((field) => field.name === input.primary_timestamp_field)?.ordinal ?? 0xffff)
217
+ );
218
+
219
+ const directoryBase = header.length + DIRECTORY_ENTRY_BYTES * fieldPayloads.length;
220
+ let payloadOffset = directoryBase;
221
+ for (const payload of fieldPayloads) {
222
+ payload.entry.existsOffset = payloadOffset;
223
+ payloadOffset += payload.exists.byteLength;
224
+ payload.entry.valuesOffset = payloadOffset;
225
+ payloadOffset += payload.values.byteLength;
226
+ payload.entry.pageIndexOffset = payload.pages.byteLength > 0 ? payloadOffset : 0;
227
+ payloadOffset += payload.pages.byteLength;
228
+ }
229
+
230
+ const directory = new BinaryWriter();
231
+ for (const payload of fieldPayloads) {
232
+ const flags =
233
+ (payload.entry.minValue != null && payload.entry.maxValue != null ? FLAG_HAS_MINMAX : 0) |
234
+ (payload.pages.byteLength > 0 ? FLAG_HAS_PAGE_INDEX : 0);
235
+ directory.writeU16(payload.entry.fieldOrdinal);
236
+ directory.writeU8(KIND_CODE[payload.entry.kind] ?? 0);
237
+ directory.writeU8(flags);
238
+ directory.writeU32(payload.entry.presentCount);
239
+ directory.writeU8(payload.entry.existsCodec);
240
+ directory.writeU8(0);
241
+ directory.writeU16(0);
242
+ directory.writeU32(payload.entry.existsOffset);
243
+ directory.writeU32(payload.entry.existsLength);
244
+ directory.writeU32(payload.entry.valuesOffset);
245
+ directory.writeU32(payload.entry.valuesLength);
246
+ directory.writeU32(payload.entry.pageIndexOffset);
247
+ directory.writeU32(payload.entry.pageIndexLength);
248
+ directory.writeBytes(encodeScalarInline(payload.entry.kind, payload.entry.minValue));
249
+ directory.writeBytes(encodeScalarInline(payload.entry.kind, payload.entry.maxValue));
250
+ }
251
+
252
+ return concatBytes([header.finish(), directory.finish(), ...fieldPayloads.flatMap((payload) => [payload.exists, payload.values, payload.pages])]);
253
+ }
254
+
255
+ export function decodeColSegmentCompanionResult(bytes: Uint8Array, plan: SearchCompanionPlan): Result<ColSectionView, ColFormatError> {
256
+ try {
257
+ const cursor = new BinaryCursor(bytes);
258
+ const docCount = cursor.readU32();
259
+ const fieldCount = cursor.readU16();
260
+ const primaryTimestampOrdinal = cursor.readU16();
261
+ const directoryOffset = cursor.offset;
262
+ const fields: ColFieldView[] = [];
263
+ for (let index = 0; index < fieldCount; index++) {
264
+ const entryOffset = directoryOffset + index * DIRECTORY_ENTRY_BYTES;
265
+ if (entryOffset + DIRECTORY_ENTRY_BYTES > bytes.byteLength) return invalidCol("invalid .col2 directory");
266
+ const fieldOrdinal = readU16(bytes, entryOffset);
267
+ const kindCode = bytes[entryOffset + 2]!;
268
+ const flags = bytes[entryOffset + 3]!;
269
+ const presentCount = readU32(bytes, entryOffset + 4);
270
+ const existsCodec = bytes[entryOffset + 8]!;
271
+ const existsOffset = readU32(bytes, entryOffset + 12);
272
+ const existsLength = readU32(bytes, entryOffset + 16);
273
+ const valuesOffset = readU32(bytes, entryOffset + 20);
274
+ const valuesLength = readU32(bytes, entryOffset + 24);
275
+ const pageIndexOffset = readU32(bytes, entryOffset + 28);
276
+ const pageIndexLength = readU32(bytes, entryOffset + 32);
277
+ const kind = CODE_KIND[kindCode];
278
+ if (!kind || (kind !== "integer" && kind !== "float" && kind !== "date" && kind !== "bool")) {
279
+ return invalidCol("invalid .col2 field kind");
280
+ }
281
+ const planField = plan.fields.find((field) => field.ordinal === fieldOrdinal);
282
+ if (!planField) return invalidCol(`missing .col2 plan field ordinal ${fieldOrdinal}`);
283
+ const minValue = (flags & FLAG_HAS_MINMAX) !== 0 ? decodeScalarInline(kind, bytes.subarray(entryOffset + 36, entryOffset + 44)) : null;
284
+ const maxValue = (flags & FLAG_HAS_MINMAX) !== 0 ? decodeScalarInline(kind, bytes.subarray(entryOffset + 44, entryOffset + 52)) : null;
285
+ fields.push(
286
+ new ColFieldView(
287
+ planField.name,
288
+ kind,
289
+ docCount,
290
+ presentCount,
291
+ existsCodec,
292
+ slicePayload(bytes, existsOffset, existsLength, "invalid .col2 exists payload"),
293
+ slicePayload(bytes, valuesOffset, valuesLength, "invalid .col2 values payload"),
294
+ pageIndexLength > 0 ? slicePayload(bytes, pageIndexOffset, pageIndexLength, "invalid .col2 page index") : new Uint8Array(),
295
+ minValue,
296
+ maxValue
297
+ )
298
+ );
299
+ }
300
+ const primaryTimestampField =
301
+ primaryTimestampOrdinal === 0xffff ? null : (plan.fields.find((field) => field.ordinal === primaryTimestampOrdinal)?.name ?? null);
302
+ return Result.ok(new ColSectionView(docCount, primaryTimestampField, fields));
303
+ } catch (e: unknown) {
304
+ return invalidCol(String((e as any)?.message ?? e));
305
+ }
306
+ }
307
+
308
+ function encodeFieldValues(kind: SearchFieldKind, values: ColScalar[]): Uint8Array {
309
+ const writer = new BinaryWriter();
310
+ for (const value of values) {
311
+ if (kind === "integer" || kind === "date") writer.writeI64(value as bigint);
312
+ else if (kind === "float") writer.writeF64(value as number);
313
+ else if (kind === "bool") writer.writeU8((value as boolean) ? 1 : 0);
314
+ }
315
+ return writer.finish();
316
+ }
317
+
318
+ function encodePageIndex(kind: SearchFieldKind, docIds: number[], values: ColScalar[]): Uint8Array {
319
+ if (docIds.length <= PAGE_SIZE) return new Uint8Array();
320
+ const writer = new BinaryWriter();
321
+ for (let start = 0; start < docIds.length; start += PAGE_SIZE) {
322
+ const end = Math.min(docIds.length, start + PAGE_SIZE);
323
+ let min = values[start]!;
324
+ let max = values[start]!;
325
+ for (let index = start + 1; index < end; index++) {
326
+ const value = values[index]!;
327
+ if (compareColScalars(value, min) < 0) min = value;
328
+ if (compareColScalars(value, max) > 0) max = value;
329
+ }
330
+ writer.writeU32(docIds[start]!);
331
+ writer.writeU32(start);
332
+ writer.writeBytes(encodeScalarInline(kind, min));
333
+ writer.writeBytes(encodeScalarInline(kind, max));
334
+ }
335
+ return writer.finish();
336
+ }
337
+
338
+ function encodeScalarInline(kind: SearchFieldKind, value: ColScalar | null): Uint8Array {
339
+ const writer = new BinaryWriter();
340
+ if (kind === "integer" || kind === "date") writer.writeI64(value == null ? 0n : (value as bigint));
341
+ else if (kind === "float") writer.writeF64(value == null ? Number.NaN : (value as number));
342
+ else if (kind === "bool") {
343
+ writer.writeU8(value == null ? 0xff : (value as boolean) ? 1 : 0);
344
+ writer.writeBytes(new Uint8Array(7));
345
+ } else {
346
+ writer.writeBytes(new Uint8Array(8));
347
+ }
348
+ return writer.finish();
349
+ }
350
+
351
+ function decodeScalarInline(kind: SearchFieldKind, bytes: Uint8Array): ColScalar | null {
352
+ if (kind === "integer" || kind === "date") return readI64(bytes, 0);
353
+ if (kind === "float") {
354
+ const value = readF64(bytes, 0);
355
+ return Number.isNaN(value) ? null : value;
356
+ }
357
+ if (kind === "bool") {
358
+ const value = bytes[0];
359
+ if (value === 0xff) return null;
360
+ return value === 1;
361
+ }
362
+ return null;
363
+ }
364
+
365
+ function decodeValue(kind: SearchFieldKind, bytes: Uint8Array, offset: number): ColScalar {
366
+ if (kind === "integer" || kind === "date") return readI64(bytes, offset);
367
+ if (kind === "float") return readF64(bytes, offset);
368
+ return bytes[offset] === 1;
369
+ }
370
+
371
+ function valueWidth(kind: SearchFieldKind): number {
372
+ if (kind === "bool") return 1;
373
+ return 8;
374
+ }
375
+
376
+ function slicePayload(bytes: Uint8Array, offset: number, length: number, message: string): Uint8Array {
377
+ if (offset < 0 || length < 0 || offset + length > bytes.byteLength) {
378
+ throw new BinaryPayloadError(message);
379
+ }
380
+ if (length === 0) return new Uint8Array();
381
+ return bytes.subarray(offset, offset + length);
382
+ }
@@ -0,0 +1,59 @@
1
+ import { Result } from "better-result";
2
+ import { ColSectionView, compareColScalars, type ColScalar } from "./col_format";
3
+
4
+ function pageMayMatch(page: { min: ColScalar; max: ColScalar }, op: "eq" | "gt" | "gte" | "lt" | "lte", target: ColScalar): boolean {
5
+ if (op === "eq") return compareColScalars(page.min, target) <= 0 && compareColScalars(page.max, target) >= 0;
6
+ if (op === "gt") return compareColScalars(page.max, target) > 0;
7
+ if (op === "gte") return compareColScalars(page.max, target) >= 0;
8
+ if (op === "lt") return compareColScalars(page.min, target) < 0;
9
+ return compareColScalars(page.min, target) <= 0;
10
+ }
11
+
12
+ function compareCurrent(op: "eq" | "gt" | "gte" | "lt" | "lte", current: ColScalar, target: ColScalar): boolean {
13
+ const cmp = compareColScalars(current, target);
14
+ if (op === "eq") return cmp === 0;
15
+ if (op === "gt") return cmp > 0;
16
+ if (op === "gte") return cmp >= 0;
17
+ if (op === "lt") return cmp < 0;
18
+ return cmp <= 0;
19
+ }
20
+
21
+ export function filterDocIdsByColumnResult(args: {
22
+ companion: ColSectionView;
23
+ field: string;
24
+ op: "eq" | "gt" | "gte" | "lt" | "lte" | "has";
25
+ value?: ColScalar;
26
+ }): Result<Set<number>, { message: string }> {
27
+ const field = args.companion.getField(args.field);
28
+ if (!field) return Result.err({ message: `missing .col2 field ${args.field}` });
29
+ if (args.op === "has") return Result.ok(new Set(field.docIds()));
30
+ const target = args.value!;
31
+ const op = args.op;
32
+ const min = field.minValue();
33
+ const max = field.maxValue();
34
+ if (min != null && max != null) {
35
+ if ((op === "gt" || op === "gte") && compareColScalars(max, target) < (op === "gt" ? 1 : 0)) return Result.ok(new Set());
36
+ if ((op === "lt" || op === "lte") && compareColScalars(min, target) > (op === "lt" ? -1 : 0)) return Result.ok(new Set());
37
+ if (op === "eq" && (compareColScalars(min, target) > 0 || compareColScalars(max, target) < 0)) return Result.ok(new Set());
38
+ }
39
+
40
+ const matches = new Set<number>();
41
+ if (!field.hasPageIndex()) {
42
+ field.forEachValue((docId, value) => {
43
+ if (compareCurrent(op, value, target)) matches.add(docId);
44
+ });
45
+ return Result.ok(matches);
46
+ }
47
+
48
+ const pages = field.pageEntries();
49
+ for (let pageIndex = 0; pageIndex < pages.length; pageIndex++) {
50
+ const page = pages[pageIndex]!;
51
+ if (!pageMayMatch(page, op, target)) continue;
52
+ const start = page.valueStartIndex;
53
+ const end = pageIndex === pages.length - 1 ? field.docIds().length : pages[pageIndex + 1]!.valueStartIndex;
54
+ field.forEachValueRange(start, end, (docId, value) => {
55
+ if (compareCurrent(op, value, target)) matches.add(docId);
56
+ });
57
+ }
58
+ return Result.ok(matches);
59
+ }