@tungthedev/streams-server 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (183) hide show
  1. package/CODE_OF_CONDUCT.md +45 -0
  2. package/CONTRIBUTING.md +76 -0
  3. package/LICENSE +201 -0
  4. package/README.md +58 -0
  5. package/SECURITY.md +42 -0
  6. package/bin/prisma-streams-server +2 -0
  7. package/package.json +46 -0
  8. package/src/app.ts +583 -0
  9. package/src/app_core.ts +3144 -0
  10. package/src/app_local.ts +206 -0
  11. package/src/auth.ts +124 -0
  12. package/src/auto_tune.ts +69 -0
  13. package/src/backpressure.ts +66 -0
  14. package/src/bootstrap.ts +613 -0
  15. package/src/compute/demo_entry.ts +415 -0
  16. package/src/compute/demo_site.ts +1242 -0
  17. package/src/compute/entry.ts +19 -0
  18. package/src/compute/package_entry.ts +4 -0
  19. package/src/compute/virtual-modules.d.ts +15 -0
  20. package/src/compute/worker_module_url.ts +9 -0
  21. package/src/concurrency_gate.ts +108 -0
  22. package/src/config.ts +402 -0
  23. package/src/db/bootstrap_store.ts +9 -0
  24. package/src/db/db.ts +2424 -0
  25. package/src/db/schema.ts +925 -0
  26. package/src/db/sqlite_manifest_snapshot.ts +81 -0
  27. package/src/db/sqlite_touch_store.ts +491 -0
  28. package/src/db/sqlite_wal_store.ts +472 -0
  29. package/src/details/full_mode_details.ts +568 -0
  30. package/src/expiry_sweeper.ts +47 -0
  31. package/src/foreground_activity.ts +55 -0
  32. package/src/hist.ts +169 -0
  33. package/src/index/binary_fuse.ts +379 -0
  34. package/src/index/indexer.ts +947 -0
  35. package/src/index/lexicon_file_cache.ts +261 -0
  36. package/src/index/lexicon_format.ts +93 -0
  37. package/src/index/lexicon_indexer.ts +863 -0
  38. package/src/index/run_cache.ts +84 -0
  39. package/src/index/run_format.ts +213 -0
  40. package/src/index/schedule.ts +28 -0
  41. package/src/index/secondary_indexer.ts +901 -0
  42. package/src/index/secondary_schema.ts +105 -0
  43. package/src/ingest.ts +309 -0
  44. package/src/lens/lens.ts +501 -0
  45. package/src/manifest.ts +249 -0
  46. package/src/memory.ts +334 -0
  47. package/src/metrics.ts +147 -0
  48. package/src/metrics_emitter.ts +83 -0
  49. package/src/notifier.ts +180 -0
  50. package/src/objectstore/accounting.ts +151 -0
  51. package/src/objectstore/interface.ts +13 -0
  52. package/src/objectstore/mock_r2.ts +269 -0
  53. package/src/objectstore/null.ts +32 -0
  54. package/src/objectstore/r2.ts +318 -0
  55. package/src/observe/pairing.ts +61 -0
  56. package/src/observe/request.ts +772 -0
  57. package/src/offset.ts +70 -0
  58. package/src/postgres/bootstrap.ts +269 -0
  59. package/src/postgres/companions.ts +197 -0
  60. package/src/postgres/control_restore.ts +109 -0
  61. package/src/postgres/details.ts +189 -0
  62. package/src/postgres/lexicon_index.ts +260 -0
  63. package/src/postgres/routing_index.ts +189 -0
  64. package/src/postgres/rows.ts +132 -0
  65. package/src/postgres/schema.ts +355 -0
  66. package/src/postgres/secondary_index.ts +238 -0
  67. package/src/postgres/segments.ts +900 -0
  68. package/src/postgres/stats.ts +103 -0
  69. package/src/postgres/store.ts +947 -0
  70. package/src/postgres/touch.ts +591 -0
  71. package/src/postgres/types.ts +32 -0
  72. package/src/profiles/evlog/schema.ts +234 -0
  73. package/src/profiles/evlog.ts +473 -0
  74. package/src/profiles/generic.ts +51 -0
  75. package/src/profiles/index.ts +237 -0
  76. package/src/profiles/metrics/block_format.ts +109 -0
  77. package/src/profiles/metrics/normalize.ts +366 -0
  78. package/src/profiles/metrics/schema.ts +319 -0
  79. package/src/profiles/metrics.ts +83 -0
  80. package/src/profiles/otelTraces/normalize.ts +955 -0
  81. package/src/profiles/otelTraces/otlp.ts +1002 -0
  82. package/src/profiles/otelTraces/schema.ts +408 -0
  83. package/src/profiles/otelTraces.ts +390 -0
  84. package/src/profiles/profile.ts +284 -0
  85. package/src/profiles/stateProtocol/change_event_conformance.typecheck.ts +35 -0
  86. package/src/profiles/stateProtocol/changes.ts +24 -0
  87. package/src/profiles/stateProtocol/ingest.ts +115 -0
  88. package/src/profiles/stateProtocol/routes.ts +511 -0
  89. package/src/profiles/stateProtocol/types.ts +6 -0
  90. package/src/profiles/stateProtocol/validation.ts +51 -0
  91. package/src/profiles/stateProtocol.ts +107 -0
  92. package/src/read_filter.ts +468 -0
  93. package/src/reader.ts +2986 -0
  94. package/src/runtime/hash.ts +156 -0
  95. package/src/runtime/hash_vendor/LICENSE.hash-wasm +38 -0
  96. package/src/runtime/hash_vendor/NOTICE.md +8 -0
  97. package/src/runtime/hash_vendor/xxhash3.umd.min.cjs +7 -0
  98. package/src/runtime/hash_vendor/xxhash32.umd.min.cjs +7 -0
  99. package/src/runtime/hash_vendor/xxhash64.umd.min.cjs +7 -0
  100. package/src/runtime/host_runtime.ts +5 -0
  101. package/src/runtime_memory.ts +200 -0
  102. package/src/runtime_memory_sampler.ts +237 -0
  103. package/src/schema/lens_schema.ts +290 -0
  104. package/src/schema/proof.ts +547 -0
  105. package/src/schema/read_json.ts +51 -0
  106. package/src/schema/registry.ts +966 -0
  107. package/src/search/agg_format.ts +638 -0
  108. package/src/search/aggregate.ts +409 -0
  109. package/src/search/binary/codec.ts +162 -0
  110. package/src/search/binary/docset.ts +67 -0
  111. package/src/search/binary/restart_strings.ts +181 -0
  112. package/src/search/binary/varint.ts +34 -0
  113. package/src/search/bitset.ts +19 -0
  114. package/src/search/col_format.ts +382 -0
  115. package/src/search/col_runtime.ts +59 -0
  116. package/src/search/column_encoding.ts +43 -0
  117. package/src/search/companion_file_cache.ts +319 -0
  118. package/src/search/companion_format.ts +327 -0
  119. package/src/search/companion_manager.ts +1305 -0
  120. package/src/search/companion_plan.ts +229 -0
  121. package/src/search/exact_format.ts +281 -0
  122. package/src/search/exact_runtime.ts +55 -0
  123. package/src/search/fts_format.ts +423 -0
  124. package/src/search/fts_runtime.ts +333 -0
  125. package/src/search/query.ts +875 -0
  126. package/src/search/schema.ts +245 -0
  127. package/src/segment/cache.ts +270 -0
  128. package/src/segment/cached_segment.ts +89 -0
  129. package/src/segment/format.ts +403 -0
  130. package/src/segment/segmenter.ts +412 -0
  131. package/src/segment/segmenter_worker.ts +72 -0
  132. package/src/segment/segmenter_workers.ts +130 -0
  133. package/src/server.ts +264 -0
  134. package/src/server_auto_tune.ts +158 -0
  135. package/src/sqlite/adapter.ts +335 -0
  136. package/src/sqlite/runtime_stats.ts +163 -0
  137. package/src/stats.ts +205 -0
  138. package/src/store/append.ts +50 -0
  139. package/src/store/bootstrap_restore_store.ts +71 -0
  140. package/src/store/capabilities.ts +86 -0
  141. package/src/store/full_mode_details_store.ts +71 -0
  142. package/src/store/index_store.ts +104 -0
  143. package/src/store/profile_touch_store.ts +1 -0
  144. package/src/store/rows.ts +144 -0
  145. package/src/store/schema_profile_store.ts +73 -0
  146. package/src/store/schema_publication.ts +6 -0
  147. package/src/store/segment_manifest_store.ts +129 -0
  148. package/src/store/segment_read_store.ts +22 -0
  149. package/src/store/stats_accounting_store.ts +83 -0
  150. package/src/store/touch_store.ts +98 -0
  151. package/src/store/wal_store.ts +21 -0
  152. package/src/stream_size_reconciler.ts +100 -0
  153. package/src/touch/canonical_change.ts +7 -0
  154. package/src/touch/live_keys.ts +158 -0
  155. package/src/touch/live_metrics.ts +841 -0
  156. package/src/touch/live_templates.ts +449 -0
  157. package/src/touch/manager.ts +1292 -0
  158. package/src/touch/process_batch.ts +576 -0
  159. package/src/touch/processor_worker.ts +85 -0
  160. package/src/touch/spec.ts +459 -0
  161. package/src/touch/touch_journal.ts +771 -0
  162. package/src/touch/touch_key_id.ts +20 -0
  163. package/src/touch/worker_pool.ts +191 -0
  164. package/src/touch/worker_protocol.ts +57 -0
  165. package/src/types/proper-lockfile.d.ts +1 -0
  166. package/src/uploader.ts +358 -0
  167. package/src/util/base32_crockford.ts +81 -0
  168. package/src/util/bloom256.ts +67 -0
  169. package/src/util/byte_lru.ts +73 -0
  170. package/src/util/cleanup.ts +22 -0
  171. package/src/util/crc32c.ts +29 -0
  172. package/src/util/ds_error.ts +15 -0
  173. package/src/util/duration.ts +17 -0
  174. package/src/util/endian.ts +53 -0
  175. package/src/util/json_pointer.ts +148 -0
  176. package/src/util/log.ts +25 -0
  177. package/src/util/lru.ts +53 -0
  178. package/src/util/retry.ts +35 -0
  179. package/src/util/siphash.ts +71 -0
  180. package/src/util/stream_paths.ts +50 -0
  181. package/src/util/time.ts +14 -0
  182. package/src/util/yield.ts +3 -0
  183. package/src/util/zstd.ts +24 -0
@@ -0,0 +1,229 @@
1
+ import { createHash } from "node:crypto";
2
+ import { Result } from "better-result";
3
+ import type {
4
+ SchemaRegistry,
5
+ SearchFieldBinding,
6
+ SearchFieldConfig,
7
+ SearchFieldKind,
8
+ SearchRollupMeasureConfig,
9
+ } from "../schema/registry";
10
+ import { parseDurationMsResult } from "../util/duration";
11
+ import { dsError } from "../util/ds_error";
12
+
13
+ export type SearchCompanionFamily = "exact" | "col" | "fts" | "agg" | "mblk";
14
+
15
+ export type SearchCompanionPlanField = {
16
+ ordinal: number;
17
+ name: string;
18
+ kind: SearchFieldKind;
19
+ bindings: SearchFieldBinding[];
20
+ normalizer: SearchFieldConfig["normalizer"] | null;
21
+ analyzer: SearchFieldConfig["analyzer"] | null;
22
+ exact: boolean;
23
+ prefix: boolean;
24
+ column: boolean;
25
+ exists: boolean;
26
+ sortable: boolean;
27
+ aggregatable: boolean;
28
+ contains: boolean;
29
+ positions: boolean;
30
+ };
31
+
32
+ export type SearchCompanionPlanRollupMeasure = {
33
+ ordinal: number;
34
+ name: string;
35
+ kind: SearchRollupMeasureConfig["kind"];
36
+ field_ordinal: number | null;
37
+ histogram: "log2_v1" | null;
38
+ };
39
+
40
+ export type SearchCompanionPlanRollupInterval = {
41
+ ordinal: number;
42
+ name: string;
43
+ ms: number;
44
+ };
45
+
46
+ export type SearchCompanionPlanRollup = {
47
+ ordinal: number;
48
+ name: string;
49
+ timestamp_field_ordinal: number | null;
50
+ dimension_ordinals: number[];
51
+ intervals: SearchCompanionPlanRollupInterval[];
52
+ measures: SearchCompanionPlanRollupMeasure[];
53
+ };
54
+
55
+ export type SearchCompanionPlan = {
56
+ families: Record<SearchCompanionFamily, boolean>;
57
+ fields: SearchCompanionPlanField[];
58
+ rollups: SearchCompanionPlanRollup[];
59
+ summary: Record<string, unknown>;
60
+ };
61
+
62
+ export function buildDesiredSearchCompanionPlan(registry: SchemaRegistry): SearchCompanionPlan {
63
+ const search = registry.search;
64
+ const families: Record<SearchCompanionFamily, boolean> = {
65
+ exact: false,
66
+ col: false,
67
+ fts: false,
68
+ agg: false,
69
+ mblk: false,
70
+ };
71
+ if (!search) {
72
+ return { families, fields: [], rollups: [], summary: { search: null } };
73
+ }
74
+
75
+ const wantedFieldNames = new Set<string>();
76
+ for (const [name, field] of Object.entries(search.fields)) {
77
+ if (field.exact === true && field.kind !== "text") wantedFieldNames.add(name);
78
+ if (field.column === true) wantedFieldNames.add(name);
79
+ if (field.kind === "text" || (field.kind === "keyword" && field.prefix === true)) wantedFieldNames.add(name);
80
+ }
81
+ for (const rollup of Object.values(search.rollups ?? {})) {
82
+ const timestampField = rollup.timestampField ?? search.primaryTimestampField;
83
+ if (timestampField) wantedFieldNames.add(timestampField);
84
+ for (const dimension of rollup.dimensions ?? []) wantedFieldNames.add(dimension);
85
+ for (const measure of Object.values(rollup.measures)) {
86
+ if (measure.kind === "summary") wantedFieldNames.add(measure.field);
87
+ }
88
+ }
89
+
90
+ const orderedFieldNames = Array.from(wantedFieldNames).sort((a, b) => a.localeCompare(b));
91
+ const fieldOrdinalByName = new Map<string, number>();
92
+ const fields = orderedFieldNames.map((name, ordinal) => {
93
+ const field = search.fields[name]!;
94
+ fieldOrdinalByName.set(name, ordinal);
95
+ return {
96
+ ordinal,
97
+ name,
98
+ kind: field.kind,
99
+ bindings: field.bindings.map((binding) => ({ version: binding.version, jsonPointer: binding.jsonPointer })),
100
+ normalizer: field.normalizer ?? null,
101
+ analyzer: field.analyzer ?? null,
102
+ exact: field.exact === true,
103
+ prefix: field.prefix === true,
104
+ column: field.column === true,
105
+ exists: field.exists === true,
106
+ sortable: field.sortable === true,
107
+ aggregatable: field.aggregatable === true,
108
+ contains: field.contains === true,
109
+ positions: field.positions === true,
110
+ } satisfies SearchCompanionPlanField;
111
+ });
112
+
113
+ const colFields = fields.filter((field) => field.column);
114
+ const exactFields = fields.filter((field) => field.exact && field.kind !== "text");
115
+ const ftsFields = fields.filter((field) => field.kind === "text" || (field.kind === "keyword" && field.prefix));
116
+ const rollups = Object.entries(search.rollups ?? {})
117
+ .sort((a, b) => a[0].localeCompare(b[0]))
118
+ .map(([name, rollup], rollupOrdinal) => {
119
+ const intervals = [...rollup.intervals]
120
+ .sort()
121
+ .map((intervalName, intervalOrdinal) => {
122
+ const parsed = parseDurationMsResult(intervalName);
123
+ if (Result.isError(parsed)) {
124
+ throw dsError(parsed.error.message);
125
+ }
126
+ return {
127
+ ordinal: intervalOrdinal,
128
+ name: intervalName,
129
+ ms: parsed.value,
130
+ } satisfies SearchCompanionPlanRollupInterval;
131
+ });
132
+ const measures = Object.entries(rollup.measures)
133
+ .sort((a, b) => a[0].localeCompare(b[0]))
134
+ .map(([measureName, measure], measureOrdinal) => ({
135
+ ordinal: measureOrdinal,
136
+ name: measureName,
137
+ kind: measure.kind,
138
+ field_ordinal: measure.kind === "summary" ? (fieldOrdinalByName.get(measure.field) ?? null) : null,
139
+ histogram: measure.kind === "summary" ? measure.histogram ?? null : null,
140
+ }));
141
+ return {
142
+ ordinal: rollupOrdinal,
143
+ name,
144
+ timestamp_field_ordinal: fieldOrdinalByName.get(rollup.timestampField ?? search.primaryTimestampField) ?? null,
145
+ dimension_ordinals: [...(rollup.dimensions ?? [])]
146
+ .sort((a, b) => a.localeCompare(b))
147
+ .map((dimension) => fieldOrdinalByName.get(dimension))
148
+ .filter((value): value is number => typeof value === "number"),
149
+ intervals,
150
+ measures,
151
+ } satisfies SearchCompanionPlanRollup;
152
+ });
153
+
154
+ families.exact = exactFields.length > 0;
155
+ families.col = colFields.length > 0;
156
+ families.fts = ftsFields.length > 0;
157
+ families.agg = rollups.length > 0;
158
+ families.mblk = search.profile === "metrics";
159
+ return {
160
+ families,
161
+ fields,
162
+ rollups,
163
+ summary: {
164
+ primaryTimestampField: search.primaryTimestampField ?? null,
165
+ primaryTimestampFieldOrdinal: fieldOrdinalByName.get(search.primaryTimestampField) ?? null,
166
+ profile: search.profile ?? null,
167
+ exactFields: exactFields.map((field) => ({
168
+ ordinal: field.ordinal,
169
+ name: field.name,
170
+ kind: field.kind,
171
+ bindings: field.bindings,
172
+ normalizer: field.normalizer,
173
+ })),
174
+ colFields: colFields.map((field) => ({
175
+ ordinal: field.ordinal,
176
+ name: field.name,
177
+ kind: field.kind,
178
+ bindings: field.bindings,
179
+ exists: field.exists,
180
+ sortable: field.sortable,
181
+ })),
182
+ ftsFields: ftsFields.map((field) => ({
183
+ ordinal: field.ordinal,
184
+ name: field.name,
185
+ kind: field.kind,
186
+ bindings: field.bindings,
187
+ exact: field.exact,
188
+ prefix: field.prefix,
189
+ positions: field.positions,
190
+ analyzer: field.analyzer,
191
+ normalizer: field.normalizer,
192
+ })),
193
+ aggRollups: rollups.map((rollup) => ({
194
+ ordinal: rollup.ordinal,
195
+ name: rollup.name,
196
+ timestampFieldOrdinal: rollup.timestamp_field_ordinal,
197
+ dimensions: rollup.dimension_ordinals,
198
+ intervals: rollup.intervals.map((interval) => ({ ordinal: interval.ordinal, name: interval.name, ms: interval.ms })),
199
+ measures: rollup.measures.map((measure) => ({
200
+ ordinal: measure.ordinal,
201
+ name: measure.name,
202
+ kind: measure.kind,
203
+ fieldOrdinal: measure.field_ordinal,
204
+ histogram: measure.histogram,
205
+ })),
206
+ })),
207
+ },
208
+ };
209
+ }
210
+
211
+ export function hashSearchCompanionPlan(plan: SearchCompanionPlan): string {
212
+ return createHash("sha256").update(JSON.stringify(plan)).digest("hex");
213
+ }
214
+
215
+ export function getPlanFieldByName(plan: SearchCompanionPlan, fieldName: string): SearchCompanionPlanField | null {
216
+ return plan.fields.find((field) => field.name === fieldName) ?? null;
217
+ }
218
+
219
+ export function getPlanFieldByOrdinal(plan: SearchCompanionPlan, ordinal: number): SearchCompanionPlanField | null {
220
+ return plan.fields.find((field) => field.ordinal === ordinal) ?? null;
221
+ }
222
+
223
+ export function getPlanRollupByName(plan: SearchCompanionPlan, rollupName: string): SearchCompanionPlanRollup | null {
224
+ return plan.rollups.find((rollup) => rollup.name === rollupName) ?? null;
225
+ }
226
+
227
+ export function getPlanRollupByOrdinal(plan: SearchCompanionPlan, ordinal: number): SearchCompanionPlanRollup | null {
228
+ return plan.rollups.find((rollup) => rollup.ordinal === ordinal) ?? null;
229
+ }
@@ -0,0 +1,281 @@
1
+ import { Result } from "better-result";
2
+ import type { SearchFieldKind } from "../schema/registry";
3
+ import { decodeDocIds, encodeDocSet } from "./binary/docset";
4
+ import { BinaryCursor, BinaryPayloadError, BinaryWriter, concatBytes, readU16, readU32 } from "./binary/codec";
5
+ import { RestartStringTableView, encodeRestartStringTable } from "./binary/restart_strings";
6
+ import type { SearchCompanionPlan } from "./companion_plan";
7
+
8
+ export type ExactFieldInput = {
9
+ kind: SearchFieldKind;
10
+ exists_docs: number[];
11
+ terms: Record<string, number[]>;
12
+ };
13
+
14
+ export type ExactSectionInput = {
15
+ doc_count: number;
16
+ fields: Record<string, ExactFieldInput>;
17
+ };
18
+
19
+ const KIND_CODE: Record<SearchFieldKind, number> = {
20
+ keyword: 0,
21
+ text: 1,
22
+ integer: 2,
23
+ float: 3,
24
+ date: 4,
25
+ bool: 5,
26
+ };
27
+
28
+ const CODE_KIND: Record<number, SearchFieldKind> = {
29
+ 0: "keyword",
30
+ 1: "text",
31
+ 2: "integer",
32
+ 3: "float",
33
+ 4: "date",
34
+ 5: "bool",
35
+ };
36
+
37
+ const FIELD_DIR_ENTRY_BYTES = 52;
38
+
39
+ export type ExactFormatError = { kind: "invalid_exact_segment"; message: string };
40
+
41
+ function invalidExact<T = never>(message: string): Result<T, ExactFormatError> {
42
+ return Result.err({ kind: "invalid_exact_segment", message });
43
+ }
44
+
45
+ class U32LeView {
46
+ private readonly view: DataView;
47
+ readonly length: number;
48
+
49
+ constructor(private readonly bytes: Uint8Array) {
50
+ this.view = new DataView(bytes.buffer, bytes.byteOffset, bytes.byteLength);
51
+ this.length = Math.floor(bytes.byteLength / 4);
52
+ }
53
+
54
+ get(index: number): number {
55
+ if (index < 0 || index >= this.length) return 0;
56
+ return this.view.getUint32(index * 4, true);
57
+ }
58
+ }
59
+
60
+ export class ExactFieldView {
61
+ private readonly termsView: RestartStringTableView;
62
+ private readonly docFreqs: U32LeView;
63
+ private readonly postingOffsets: U32LeView;
64
+ private existsDocIdsCache: number[] | null = null;
65
+ private readonly termDocIdsCache = new Map<number, number[]>();
66
+
67
+ constructor(
68
+ readonly name: string,
69
+ readonly kind: SearchFieldKind,
70
+ private readonly docCount: number,
71
+ private readonly existsCodec: number,
72
+ private readonly existsPayload: Uint8Array,
73
+ dictPayload: Uint8Array,
74
+ docFreqPayload: Uint8Array,
75
+ postingOffsetsPayload: Uint8Array,
76
+ private readonly postingsPayload: Uint8Array
77
+ ) {
78
+ this.termsView = new RestartStringTableView(dictPayload);
79
+ this.docFreqs = new U32LeView(docFreqPayload);
80
+ this.postingOffsets = new U32LeView(postingOffsetsPayload);
81
+ }
82
+
83
+ existsDocIds(): number[] {
84
+ if (!this.existsDocIdsCache) {
85
+ this.existsDocIdsCache = decodeDocIds(this.docCount, this.existsCodec, this.existsPayload);
86
+ }
87
+ return this.existsDocIdsCache;
88
+ }
89
+
90
+ lookupTerm(term: string): number | null {
91
+ return this.termsView.lookup(term);
92
+ }
93
+
94
+ docFreq(termOrdinal: number): number {
95
+ return this.docFreqs.get(termOrdinal);
96
+ }
97
+
98
+ docIds(termOrdinal: number): number[] {
99
+ const cached = this.termDocIdsCache.get(termOrdinal);
100
+ if (cached) return cached;
101
+ const start = this.postingOffsets.get(termOrdinal);
102
+ const end = this.postingOffsets.get(termOrdinal + 1) || start;
103
+ const bytes = this.postingsPayload.subarray(start, end);
104
+ const docIds = decodeDocIds(this.docCount, 0xff & (bytes[0] ?? 0), bytes.subarray(1));
105
+ this.termDocIdsCache.set(termOrdinal, docIds);
106
+ return docIds;
107
+ }
108
+ }
109
+
110
+ export class ExactSectionView {
111
+ private readonly fieldByName = new Map<string, ExactFieldView>();
112
+
113
+ constructor(readonly docCount: number, readonly fields: ExactFieldView[]) {
114
+ for (const field of fields) this.fieldByName.set(field.name, field);
115
+ }
116
+
117
+ getField(fieldName: string): ExactFieldView | null {
118
+ return this.fieldByName.get(fieldName) ?? null;
119
+ }
120
+ }
121
+
122
+ export function encodeExactSegmentCompanion(input: ExactSectionInput, plan: SearchCompanionPlan): Uint8Array {
123
+ const orderedFields = plan.fields
124
+ .filter((field) => input.fields[field.name] && field.exact && field.kind !== "text")
125
+ .sort((a, b) => a.ordinal - b.ordinal);
126
+ const fieldPayloads: Array<{
127
+ entry: {
128
+ fieldOrdinal: number;
129
+ kind: SearchFieldKind;
130
+ termCount: number;
131
+ existsOffset: number;
132
+ existsLength: number;
133
+ existsCodec: number;
134
+ dictOffset: number;
135
+ dictLength: number;
136
+ dfOffset: number;
137
+ dfLength: number;
138
+ postingsOffsetTableOffset: number;
139
+ postingsOffsetTableLength: number;
140
+ postingsDataOffset: number;
141
+ postingsDataLength: number;
142
+ };
143
+ exists: Uint8Array;
144
+ dict: Uint8Array;
145
+ dfs: Uint8Array;
146
+ postingOffsets: Uint8Array;
147
+ postings: Uint8Array;
148
+ }> = [];
149
+
150
+ for (const planField of orderedFields) {
151
+ const field = input.fields[planField.name]!;
152
+ const terms = Object.keys(field.terms).sort((a, b) => a.localeCompare(b));
153
+ const dict = encodeRestartStringTable(terms);
154
+ const encodedExists = encodeDocSet(input.doc_count, field.exists_docs);
155
+ const dfWriter = new BinaryWriter();
156
+ const postingOffsetWriter = new BinaryWriter();
157
+ const postingsWriter = new BinaryWriter();
158
+ let postingOffset = 0;
159
+ for (const term of terms) {
160
+ const encodedPostings = encodeDocSet(input.doc_count, field.terms[term] ?? []);
161
+ dfWriter.writeU32(encodedPostings.docIds.length);
162
+ postingOffsetWriter.writeU32(postingOffset);
163
+ postingsWriter.writeU8(encodedPostings.codec);
164
+ postingsWriter.writeBytes(encodedPostings.payload);
165
+ postingOffset += 1 + encodedPostings.payload.byteLength;
166
+ }
167
+ postingOffsetWriter.writeU32(postingOffset);
168
+ fieldPayloads.push({
169
+ entry: {
170
+ fieldOrdinal: planField.ordinal,
171
+ kind: field.kind,
172
+ termCount: terms.length,
173
+ existsOffset: 0,
174
+ existsLength: encodedExists.payload.byteLength,
175
+ existsCodec: encodedExists.codec,
176
+ dictOffset: 0,
177
+ dictLength: dict.byteLength,
178
+ dfOffset: 0,
179
+ dfLength: dfWriter.length,
180
+ postingsOffsetTableOffset: 0,
181
+ postingsOffsetTableLength: postingOffsetWriter.length,
182
+ postingsDataOffset: 0,
183
+ postingsDataLength: postingsWriter.length,
184
+ },
185
+ exists: encodedExists.payload,
186
+ dict,
187
+ dfs: dfWriter.finish(),
188
+ postingOffsets: postingOffsetWriter.finish(),
189
+ postings: postingsWriter.finish(),
190
+ });
191
+ }
192
+
193
+ const header = new BinaryWriter();
194
+ header.writeU32(input.doc_count);
195
+ header.writeU16(fieldPayloads.length);
196
+ header.writeU16(0);
197
+
198
+ let payloadOffset = header.length + FIELD_DIR_ENTRY_BYTES * fieldPayloads.length;
199
+ for (const payload of fieldPayloads) {
200
+ payload.entry.existsOffset = payloadOffset;
201
+ payloadOffset += payload.exists.byteLength;
202
+ payload.entry.dictOffset = payloadOffset;
203
+ payloadOffset += payload.dict.byteLength;
204
+ payload.entry.dfOffset = payloadOffset;
205
+ payloadOffset += payload.dfs.byteLength;
206
+ payload.entry.postingsOffsetTableOffset = payloadOffset;
207
+ payloadOffset += payload.postingOffsets.byteLength;
208
+ payload.entry.postingsDataOffset = payloadOffset;
209
+ payloadOffset += payload.postings.byteLength;
210
+ }
211
+
212
+ const directory = new BinaryWriter();
213
+ for (const payload of fieldPayloads) {
214
+ directory.writeU16(payload.entry.fieldOrdinal);
215
+ directory.writeU8(KIND_CODE[payload.entry.kind] ?? 0);
216
+ directory.writeU8(0);
217
+ directory.writeU32(payload.entry.termCount);
218
+ directory.writeU32(payload.entry.existsOffset);
219
+ directory.writeU32(payload.entry.existsLength);
220
+ directory.writeU32((payload.entry.existsCodec << 24) | 0);
221
+ directory.writeU32(payload.entry.dictOffset);
222
+ directory.writeU32(payload.entry.dictLength);
223
+ directory.writeU32(payload.entry.dfOffset);
224
+ directory.writeU32(payload.entry.dfLength);
225
+ directory.writeU32(payload.entry.postingsOffsetTableOffset);
226
+ directory.writeU32(payload.entry.postingsOffsetTableLength);
227
+ directory.writeU32(payload.entry.postingsDataOffset);
228
+ directory.writeU32(payload.entry.postingsDataLength);
229
+ }
230
+
231
+ return concatBytes([
232
+ header.finish(),
233
+ directory.finish(),
234
+ ...fieldPayloads.flatMap((payload) => [payload.exists, payload.dict, payload.dfs, payload.postingOffsets, payload.postings]),
235
+ ]);
236
+ }
237
+
238
+ export function decodeExactSegmentCompanionResult(bytes: Uint8Array, plan: SearchCompanionPlan): Result<ExactSectionView, ExactFormatError> {
239
+ try {
240
+ const cursor = new BinaryCursor(bytes);
241
+ const docCount = cursor.readU32();
242
+ const fieldCount = cursor.readU16();
243
+ cursor.readU16();
244
+ const directoryOffset = cursor.offset;
245
+ const fields: ExactFieldView[] = [];
246
+ for (let index = 0; index < fieldCount; index++) {
247
+ const entryOffset = directoryOffset + index * FIELD_DIR_ENTRY_BYTES;
248
+ if (entryOffset + FIELD_DIR_ENTRY_BYTES > bytes.byteLength) return invalidExact("invalid .exact2 directory");
249
+ const fieldOrdinal = readU16(bytes, entryOffset);
250
+ const kindCode = bytes[entryOffset + 2]!;
251
+ const existsCodec = readU32(bytes, entryOffset + 16) >>> 24;
252
+ const planField = plan.fields.find((field) => field.ordinal === fieldOrdinal);
253
+ if (!planField) return invalidExact(`missing .exact2 plan field ordinal ${fieldOrdinal}`);
254
+ const kind = CODE_KIND[kindCode];
255
+ if (!kind) return invalidExact("invalid .exact2 field kind");
256
+ fields.push(
257
+ new ExactFieldView(
258
+ planField.name,
259
+ kind,
260
+ docCount,
261
+ existsCodec,
262
+ slicePayload(bytes, readU32(bytes, entryOffset + 8), readU32(bytes, entryOffset + 12), "invalid .exact2 exists payload"),
263
+ slicePayload(bytes, readU32(bytes, entryOffset + 20), readU32(bytes, entryOffset + 24), "invalid .exact2 dict payload"),
264
+ slicePayload(bytes, readU32(bytes, entryOffset + 28), readU32(bytes, entryOffset + 32), "invalid .exact2 docfreq payload"),
265
+ slicePayload(bytes, readU32(bytes, entryOffset + 36), readU32(bytes, entryOffset + 40), "invalid .exact2 posting-offset payload"),
266
+ slicePayload(bytes, readU32(bytes, entryOffset + 44), readU32(bytes, entryOffset + 48), "invalid .exact2 postings payload")
267
+ )
268
+ );
269
+ }
270
+ return Result.ok(new ExactSectionView(docCount, fields));
271
+ } catch (e: unknown) {
272
+ return invalidExact(String((e as any)?.message ?? e));
273
+ }
274
+ }
275
+
276
+ function slicePayload(bytes: Uint8Array, offset: number, length: number, message: string): Uint8Array {
277
+ if (offset < 0 || length < 0 || offset + length > bytes.byteLength) {
278
+ throw new BinaryPayloadError(message);
279
+ }
280
+ return bytes.subarray(offset, offset + length);
281
+ }
@@ -0,0 +1,55 @@
1
+ import { Result } from "better-result";
2
+ import { ExactSectionView } from "./exact_format";
3
+ import type { SearchExactClause } from "./query";
4
+
5
+ type CandidateDocIds = ReadonlySet<number> | null;
6
+
7
+ function intersectInto(target: Set<number> | null, next: Set<number>): Set<number> {
8
+ if (target == null) return next;
9
+ for (const docId of Array.from(target)) {
10
+ if (!next.has(docId)) target.delete(docId);
11
+ }
12
+ return target;
13
+ }
14
+
15
+ function docsForClauseResult(
16
+ companion: ExactSectionView,
17
+ clause: SearchExactClause,
18
+ candidateDocIds: CandidateDocIds = null
19
+ ): Result<Set<number>, { message: string; docFreq: number }> {
20
+ const field = companion.getField(clause.field);
21
+ if (!field) return Result.err({ message: `missing .exact2 field ${clause.field}`, docFreq: Number.MAX_SAFE_INTEGER });
22
+ const termOrdinal = field.lookupTerm(clause.canonicalValue);
23
+ if (termOrdinal == null) return Result.ok(new Set());
24
+ const docs = new Set<number>();
25
+ for (const docId of field.docIds(termOrdinal)) {
26
+ if (!candidateDocIds || candidateDocIds.has(docId)) docs.add(docId);
27
+ }
28
+ return Result.ok(docs);
29
+ }
30
+
31
+ export function filterDocIdsByExactClausesResult(args: {
32
+ companion: ExactSectionView;
33
+ clauses: SearchExactClause[];
34
+ }): Result<Set<number>, { message: string }> {
35
+ if (args.clauses.length === 0) return Result.ok(new Set());
36
+
37
+ const planned: Array<{ clause: SearchExactClause; docFreq: number }> = [];
38
+ for (const clause of args.clauses) {
39
+ const field = args.companion.getField(clause.field);
40
+ if (!field) return Result.err({ message: `missing .exact2 field ${clause.field}` });
41
+ const termOrdinal = field.lookupTerm(clause.canonicalValue);
42
+ planned.push({ clause, docFreq: termOrdinal == null ? 0 : field.docFreq(termOrdinal) });
43
+ }
44
+
45
+ planned.sort((left, right) => left.docFreq - right.docFreq);
46
+ let intersection: Set<number> | null = null;
47
+ for (const plan of planned) {
48
+ const clauseRes = docsForClauseResult(args.companion, plan.clause, intersection);
49
+ if (Result.isError(clauseRes)) return Result.err({ message: clauseRes.error.message });
50
+ intersection = intersectInto(intersection, clauseRes.value);
51
+ if (intersection.size === 0) break;
52
+ }
53
+
54
+ return Result.ok(intersection ?? new Set());
55
+ }