@flrande/bak-extension 0.6.11 → 0.6.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,790 @@
1
+ import type {
2
+ DynamicDataSchemaHint,
3
+ FreshnessTimestampCategory,
4
+ InspectPageDataCandidateProbe,
5
+ InspectPageDataRecommendation,
6
+ InspectPageDataResult,
7
+ InspectPageDataSource,
8
+ InspectPageDataSourceMapping,
9
+ NetworkEntry,
10
+ TableExtractionMetadata,
11
+ TableHandle,
12
+ TableIntelligence,
13
+ TableSchema
14
+ } from '@flrande/bak-protocol';
15
+
16
+ interface TimestampProbe {
17
+ path: string;
18
+ value: string;
19
+ category: FreshnessTimestampCategory;
20
+ }
21
+
22
+ export interface InlineJsonInspectionSource {
23
+ label: string;
24
+ path: string;
25
+ sample: unknown;
26
+ sampleSize: number | null;
27
+ schemaHint: DynamicDataSchemaHint | null;
28
+ lastObservedAt: number | null;
29
+ timestamps: TimestampProbe[];
30
+ }
31
+
32
+ export interface TableIntelligenceInput {
33
+ kind: TableHandle['kind'];
34
+ visibleRowCount: number;
35
+ rowCount?: number;
36
+ estimatedTotalRows?: number;
37
+ hasScrollContainer: boolean;
38
+ hasTranslatedRows: boolean;
39
+ maxObservedRowIndex?: number;
40
+ minObservedRowIndex?: number;
41
+ knownGridKind: boolean;
42
+ }
43
+
44
+ export interface TableAnalysis {
45
+ table: TableHandle;
46
+ schema: TableSchema;
47
+ sampleRows: Array<Record<string, unknown>>;
48
+ }
49
+
50
+ export interface DynamicSourceAnalysis {
51
+ source: InspectPageDataSource;
52
+ sampleRows: Array<Record<string, unknown>>;
53
+ sampleValues: Set<string>;
54
+ schemaColumns: string[];
55
+ }
56
+
57
+ export interface SourceMappingInput {
58
+ tables: TableAnalysis[];
59
+ windowSources: InspectPageDataCandidateProbe[];
60
+ inlineJsonSources: InlineJsonInspectionSource[];
61
+ recentNetwork: NetworkEntry[];
62
+ now?: number;
63
+ }
64
+
65
+ export interface SourceMappingReport {
66
+ dataSources: InspectPageDataSource[];
67
+ sourceMappings: InspectPageDataSourceMapping[];
68
+ recommendedNextActions: InspectPageDataRecommendation[];
69
+ sourceAnalyses: DynamicSourceAnalysis[];
70
+ }
71
+
72
+ export interface ReplaySchemaMatch {
73
+ table: TableHandle;
74
+ schema: TableSchema;
75
+ mappedRows: Array<Record<string, unknown>>;
76
+ mappingSource: string;
77
+ }
78
+
79
+ interface StructuredRowsCandidate {
80
+ rows: unknown[];
81
+ path: string;
82
+ rowType: 'object' | 'array' | 'scalar';
83
+ }
84
+
85
+ const DATA_PATTERN =
86
+ /\b(updated|update|updatedat|asof|timestamp|generated|generatedat|refresh|freshness|latest|last|quote|trade|price|flow|market|time|snapshot|signal)\b/i;
87
+ const CONTRACT_PATTERN =
88
+ /\b(expiry|expiration|expires|option|contract|strike|maturity|dte|call|put|exercise)\b/i;
89
+ const EVENT_PATTERN = /\b(earnings|event|report|dividend|split|meeting|fomc|release|filing)\b/i;
90
+ const ROW_CANDIDATE_KEYS = ['data', 'rows', 'results', 'items', 'records', 'entries'] as const;
91
+
92
+ function normalizeColumnName(value: string): string {
93
+ return value.trim().toLowerCase().replace(/[^a-z0-9]+/g, '');
94
+ }
95
+
96
+ function normalizedComparableValue(value: unknown): string | null {
97
+ if (value === null || value === undefined) {
98
+ return null;
99
+ }
100
+ if (typeof value === 'object') {
101
+ return null;
102
+ }
103
+ const text = String(value).trim().toLowerCase();
104
+ return text.length > 0 ? text : null;
105
+ }
106
+
107
+ function compareNumbers(left: number, right: number): number {
108
+ return left - right;
109
+ }
110
+
111
+ function latestTimestamp(timestamps: TimestampProbe[]): number | null {
112
+ const values = timestamps
113
+ .map((timestamp) => Date.parse(timestamp.value))
114
+ .filter((value) => Number.isFinite(value));
115
+ return values.length > 0 ? Math.max(...values) : null;
116
+ }
117
+
118
+ export function sampleValue(value: unknown, depth = 0): unknown {
119
+ if (depth >= 2 || value === null || value === undefined || typeof value !== 'object') {
120
+ if (typeof value === 'string') {
121
+ return value.length > 160 ? value.slice(0, 160) : value;
122
+ }
123
+ if (typeof value === 'function') {
124
+ return '[Function]';
125
+ }
126
+ return value;
127
+ }
128
+ if (Array.isArray(value)) {
129
+ return value.slice(0, 3).map((item) => sampleValue(item, depth + 1));
130
+ }
131
+ const sampled: Record<string, unknown> = {};
132
+ for (const key of Object.keys(value).slice(0, 8)) {
133
+ try {
134
+ sampled[key] = sampleValue((value as Record<string, unknown>)[key], depth + 1);
135
+ } catch {
136
+ sampled[key] = '[Unreadable]';
137
+ }
138
+ }
139
+ return sampled;
140
+ }
141
+
142
+ export function estimateSampleSize(value: unknown): number | null {
143
+ if (Array.isArray(value)) {
144
+ return value.length;
145
+ }
146
+ if (value && typeof value === 'object') {
147
+ return Object.keys(value as Record<string, unknown>).length;
148
+ }
149
+ return null;
150
+ }
151
+
152
+ function classifyTimestamp(path: string, value: string, now = Date.now()): FreshnessTimestampCategory {
153
+ const normalized = path.toLowerCase();
154
+ if (DATA_PATTERN.test(normalized)) {
155
+ return 'data';
156
+ }
157
+ if (CONTRACT_PATTERN.test(normalized)) {
158
+ return 'contract';
159
+ }
160
+ if (EVENT_PATTERN.test(normalized)) {
161
+ return 'event';
162
+ }
163
+ const parsed = Date.parse(value.trim());
164
+ return Number.isFinite(parsed) && parsed > now + 36 * 60 * 60 * 1000 ? 'contract' : 'unknown';
165
+ }
166
+
167
+ export function collectTimestampProbes(
168
+ value: unknown,
169
+ path: string,
170
+ options: { now?: number; limit?: number } = {}
171
+ ): TimestampProbe[] {
172
+ const collected: TimestampProbe[] = [];
173
+ const now = typeof options.now === 'number' ? options.now : Date.now();
174
+ const limit = typeof options.limit === 'number' ? Math.max(1, Math.floor(options.limit)) : 16;
175
+
176
+ const visit = (candidate: unknown, candidatePath: string, depth: number): void => {
177
+ if (collected.length >= limit) {
178
+ return;
179
+ }
180
+ if (typeof candidate === 'string' && candidate.trim().length > 0) {
181
+ const parsed = Date.parse(candidate.trim());
182
+ if (Number.isFinite(parsed)) {
183
+ collected.push({
184
+ path: candidatePath,
185
+ value: candidate,
186
+ category: classifyTimestamp(candidatePath, candidate, now)
187
+ });
188
+ }
189
+ return;
190
+ }
191
+ if (depth >= 3 || candidate === null || candidate === undefined) {
192
+ return;
193
+ }
194
+ if (Array.isArray(candidate)) {
195
+ candidate.slice(0, 3).forEach((entry, index) => visit(entry, `${candidatePath}[${index}]`, depth + 1));
196
+ return;
197
+ }
198
+ if (typeof candidate === 'object') {
199
+ Object.keys(candidate as Record<string, unknown>)
200
+ .slice(0, 8)
201
+ .forEach((key) => {
202
+ try {
203
+ visit((candidate as Record<string, unknown>)[key], candidatePath ? `${candidatePath}.${key}` : key, depth + 1);
204
+ } catch {
205
+ // Ignore unreadable nested values.
206
+ }
207
+ });
208
+ }
209
+ };
210
+
211
+ visit(value, path, 0);
212
+ return collected;
213
+ }
214
+
215
+ export function inferSchemaHint(value: unknown): DynamicDataSchemaHint | null {
216
+ const rowsCandidate = extractStructuredRows(value);
217
+ if (rowsCandidate) {
218
+ if (rowsCandidate.rowType === 'object') {
219
+ const firstRecord = rowsCandidate.rows.find(
220
+ (row): row is Record<string, unknown> => typeof row === 'object' && row !== null && !Array.isArray(row)
221
+ );
222
+ return {
223
+ kind: 'rows-object',
224
+ columns: firstRecord ? Object.keys(firstRecord).slice(0, 12) : []
225
+ };
226
+ }
227
+ if (rowsCandidate.rowType === 'array') {
228
+ const firstRow = rowsCandidate.rows.find((row): row is unknown[] => Array.isArray(row));
229
+ return {
230
+ kind: 'rows-array',
231
+ columns: firstRow ? firstRow.map((_, index) => `Column ${index + 1}`) : []
232
+ };
233
+ }
234
+ }
235
+ if (Array.isArray(value)) {
236
+ return { kind: 'array' };
237
+ }
238
+ if (value && typeof value === 'object') {
239
+ return {
240
+ kind: 'object',
241
+ columns: Object.keys(value as Record<string, unknown>).slice(0, 12)
242
+ };
243
+ }
244
+ if (value === null || value === undefined) {
245
+ return null;
246
+ }
247
+ return { kind: 'scalar' };
248
+ }
249
+
250
+ export function extractStructuredRows(value: unknown, path = '$'): StructuredRowsCandidate | null {
251
+ if (Array.isArray(value)) {
252
+ if (value.length === 0) {
253
+ return { rows: value, path, rowType: 'object' };
254
+ }
255
+ const first = value.find((item) => item !== null && item !== undefined);
256
+ if (Array.isArray(first)) {
257
+ return { rows: value, path, rowType: 'array' };
258
+ }
259
+ if (first && typeof first === 'object') {
260
+ return { rows: value, path, rowType: 'object' };
261
+ }
262
+ return { rows: value, path, rowType: 'scalar' };
263
+ }
264
+ if (!value || typeof value !== 'object') {
265
+ return null;
266
+ }
267
+ const record = value as Record<string, unknown>;
268
+ for (const key of ROW_CANDIDATE_KEYS) {
269
+ if (Array.isArray(record[key])) {
270
+ return extractStructuredRows(record[key], `${path}.${key}`);
271
+ }
272
+ }
273
+ return null;
274
+ }
275
+
276
+ function toObjectRow(row: unknown, fallbackColumns: string[] = []): Record<string, unknown> | null {
277
+ if (row && typeof row === 'object' && !Array.isArray(row)) {
278
+ return row as Record<string, unknown>;
279
+ }
280
+ if (Array.isArray(row)) {
281
+ const mapped: Record<string, unknown> = {};
282
+ row.forEach((value, index) => {
283
+ mapped[fallbackColumns[index] ?? `Column ${index + 1}`] = value;
284
+ });
285
+ return mapped;
286
+ }
287
+ if (row === null || row === undefined) {
288
+ return null;
289
+ }
290
+ return { Value: row };
291
+ }
292
+
293
+ function sampleRowsFromValue(value: unknown, limit = 5): Array<Record<string, unknown>> {
294
+ const rowsCandidate = extractStructuredRows(value);
295
+ if (!rowsCandidate) {
296
+ const singleRow = toObjectRow(value);
297
+ return singleRow ? [singleRow] : [];
298
+ }
299
+ const fallbackColumns =
300
+ rowsCandidate.rowType === 'array'
301
+ ? Array.from({ length: Array.isArray(rowsCandidate.rows[0]) ? (rowsCandidate.rows[0] as unknown[]).length : 0 }, (_, index) => `Column ${index + 1}`)
302
+ : [];
303
+ return rowsCandidate.rows
304
+ .slice(0, limit)
305
+ .map((row) => toObjectRow(row, fallbackColumns))
306
+ .filter((row): row is Record<string, unknown> => row !== null);
307
+ }
308
+
309
+ function collectSampleValues(rows: Array<Record<string, unknown>>): Set<string> {
310
+ const values = new Set<string>();
311
+ for (const row of rows) {
312
+ for (const value of Object.values(row)) {
313
+ const comparable = normalizedComparableValue(value);
314
+ if (comparable) {
315
+ values.add(comparable);
316
+ }
317
+ if (values.size >= 24) {
318
+ return values;
319
+ }
320
+ }
321
+ }
322
+ return values;
323
+ }
324
+
325
+ function buildSourceAnalysis(source: InspectPageDataSource, sample: unknown): DynamicSourceAnalysis {
326
+ const sampleRows = sampleRowsFromValue(sample);
327
+ return {
328
+ source,
329
+ sampleRows,
330
+ sampleValues: collectSampleValues(sampleRows),
331
+ schemaColumns: source.schemaHint?.columns?.map(normalizeColumnName).filter(Boolean) ?? []
332
+ };
333
+ }
334
+
335
+ function parseNetworkBody(entry: NetworkEntry): unknown | null {
336
+ const preview = typeof entry.responseBodyPreview === 'string' ? entry.responseBodyPreview.trim() : '';
337
+ if (!preview || entry.responseBodyTruncated === true || entry.binary === true) {
338
+ return null;
339
+ }
340
+ const contentType = typeof entry.contentType === 'string' ? entry.contentType.toLowerCase() : '';
341
+ if (!contentType.includes('json') && !preview.startsWith('{') && !preview.startsWith('[')) {
342
+ return null;
343
+ }
344
+ try {
345
+ return JSON.parse(preview);
346
+ } catch {
347
+ return null;
348
+ }
349
+ }
350
+
351
+ function buildWindowSources(candidates: InspectPageDataCandidateProbe[]): DynamicSourceAnalysis[] {
352
+ return candidates.map((candidate) => {
353
+ const source: InspectPageDataSource = {
354
+ sourceId: `windowGlobal:${candidate.name}`,
355
+ type: 'windowGlobal',
356
+ label: candidate.name,
357
+ path: candidate.name,
358
+ sampleSize: candidate.sampleSize,
359
+ schemaHint: candidate.schemaHint,
360
+ lastObservedAt: candidate.lastObservedAt
361
+ };
362
+ return buildSourceAnalysis(source, candidate.sample);
363
+ });
364
+ }
365
+
366
+ function buildInlineJsonAnalyses(sources: InlineJsonInspectionSource[]): DynamicSourceAnalysis[] {
367
+ return sources.map((sourceItem, index) => {
368
+ const source: InspectPageDataSource = {
369
+ sourceId: `inlineJson:${index + 1}:${sourceItem.path}`,
370
+ type: 'inlineJson',
371
+ label: sourceItem.label,
372
+ path: sourceItem.path,
373
+ sampleSize: sourceItem.sampleSize,
374
+ schemaHint: sourceItem.schemaHint,
375
+ lastObservedAt: sourceItem.lastObservedAt
376
+ };
377
+ return buildSourceAnalysis(source, sourceItem.sample);
378
+ });
379
+ }
380
+
381
+ function buildNetworkAnalyses(entries: NetworkEntry[]): DynamicSourceAnalysis[] {
382
+ const analyses: DynamicSourceAnalysis[] = [];
383
+ for (const entry of entries) {
384
+ const parsed = parseNetworkBody(entry);
385
+ if (parsed === null) {
386
+ continue;
387
+ }
388
+ const rowsCandidate = extractStructuredRows(parsed);
389
+ const schemaHint = inferSchemaHint(parsed);
390
+ const url = new URL(entry.url, 'http://127.0.0.1');
391
+ const source: InspectPageDataSource = {
392
+ sourceId: `networkResponse:${entry.id}`,
393
+ type: 'networkResponse',
394
+ label: `${entry.method} ${url.pathname}`,
395
+ path: rowsCandidate?.path ?? url.pathname,
396
+ sampleSize: estimateSampleSize(rowsCandidate?.rows ?? parsed),
397
+ schemaHint,
398
+ lastObservedAt: entry.ts
399
+ };
400
+ analyses.push(buildSourceAnalysis(source, rowsCandidate?.rows ?? parsed));
401
+ }
402
+ return analyses;
403
+ }
404
+
405
+ export function buildTableIntelligence(input: TableIntelligenceInput): TableIntelligence {
406
+ const signals: TableIntelligence['signals'] = [];
407
+ const visibleRowCount = Math.max(0, Math.floor(input.visibleRowCount));
408
+ const estimatedTotalRows =
409
+ typeof input.estimatedTotalRows === 'number' && Number.isFinite(input.estimatedTotalRows) && input.estimatedTotalRows > 0
410
+ ? Math.max(visibleRowCount, Math.floor(input.estimatedTotalRows))
411
+ : undefined;
412
+ const maxObservedRowIndex =
413
+ typeof input.maxObservedRowIndex === 'number' && Number.isFinite(input.maxObservedRowIndex)
414
+ ? Math.max(0, Math.floor(input.maxObservedRowIndex))
415
+ : undefined;
416
+ const minObservedRowIndex =
417
+ typeof input.minObservedRowIndex === 'number' && Number.isFinite(input.minObservedRowIndex)
418
+ ? Math.max(0, Math.floor(input.minObservedRowIndex))
419
+ : undefined;
420
+
421
+ if (input.knownGridKind) {
422
+ signals.push({
423
+ code: 'known-grid-kind',
424
+ detail: `Detected ${input.kind} container semantics`
425
+ });
426
+ }
427
+ if (input.hasScrollContainer) {
428
+ signals.push({
429
+ code: 'scroll-container',
430
+ detail: 'Scrollable container detected for the table region'
431
+ });
432
+ }
433
+ if (input.hasTranslatedRows) {
434
+ signals.push({
435
+ code: 'row-transform-offsets',
436
+ detail: 'Row transform offsets indicate viewport-based row reuse'
437
+ });
438
+ }
439
+ if (maxObservedRowIndex !== undefined && maxObservedRowIndex > visibleRowCount) {
440
+ signals.push({
441
+ code: 'row-index-gap',
442
+ detail: `Observed row indexes reach ${maxObservedRowIndex} while only ${visibleRowCount} rows are mounted`
443
+ });
444
+ }
445
+ if (estimatedTotalRows !== undefined && estimatedTotalRows > visibleRowCount) {
446
+ signals.push({
447
+ code: 'dom-rows-less-than-expected',
448
+ detail: `Estimated ${estimatedTotalRows} rows with ${visibleRowCount} currently mounted`
449
+ });
450
+ }
451
+
452
+ const virtualized =
453
+ input.hasTranslatedRows ||
454
+ (input.hasScrollContainer &&
455
+ ((estimatedTotalRows !== undefined && estimatedTotalRows > visibleRowCount) ||
456
+ (maxObservedRowIndex !== undefined && maxObservedRowIndex > visibleRowCount + 1)));
457
+ const lazyLoaded =
458
+ input.hasScrollContainer &&
459
+ !virtualized &&
460
+ estimatedTotalRows !== undefined &&
461
+ estimatedTotalRows > visibleRowCount;
462
+ const preferredExtractionMode =
463
+ input.kind === 'html' || input.kind === 'dataTables' ? 'dataSource' : input.hasScrollContainer ? 'scroll' : 'visibleOnly';
464
+ const completeness =
465
+ preferredExtractionMode === 'dataSource'
466
+ ? 'complete'
467
+ : estimatedTotalRows !== undefined && estimatedTotalRows > visibleRowCount
468
+ ? 'partial'
469
+ : minObservedRowIndex !== undefined && minObservedRowIndex > 1
470
+ ? 'partial'
471
+ : 'unknown';
472
+
473
+ return {
474
+ virtualized,
475
+ lazyLoaded,
476
+ preferredExtractionMode,
477
+ estimatedTotalRows,
478
+ completeness,
479
+ signals
480
+ };
481
+ }
482
+
483
+ export function buildExtractionMetadata(
484
+ mode: TableExtractionMetadata['mode'],
485
+ rows: Array<Record<string, unknown>>,
486
+ intelligence?: TableIntelligence,
487
+ warnings: string[] = [],
488
+ options: { reachedEnd?: boolean; limitApplied?: boolean } = {}
489
+ ): TableExtractionMetadata {
490
+ const estimatedTotalRows = intelligence?.estimatedTotalRows;
491
+ const complete =
492
+ options.limitApplied
493
+ ? false
494
+ : mode === 'dataSource'
495
+ ? true
496
+ : options.reachedEnd === true
497
+ ? true
498
+ : intelligence?.completeness === 'complete';
499
+ return {
500
+ mode,
501
+ complete: complete === true,
502
+ observedRows: rows.length,
503
+ estimatedTotalRows,
504
+ warnings
505
+ };
506
+ }
507
+
508
+ function scoreSourceMapping(table: TableAnalysis, source: DynamicSourceAnalysis, now: number): InspectPageDataSourceMapping | null {
509
+ const tableColumns = table.schema.columns.map((column) => column.label);
510
+ const normalizedTableColumns = new Map(tableColumns.map((label) => [normalizeColumnName(label), label]));
511
+ const matchedColumns = [...new Set(source.schemaColumns.filter((column) => normalizedTableColumns.has(column)).map((column) => normalizedTableColumns.get(column)!))];
512
+ const basis: InspectPageDataSourceMapping['basis'] = [];
513
+ if (matchedColumns.length > 0) {
514
+ basis.push({
515
+ type: 'columnOverlap',
516
+ detail: `Column overlap on ${matchedColumns.join(', ')}`
517
+ });
518
+ }
519
+
520
+ const overlappingValues = [...table.sampleRows.flatMap((row) => Object.values(row))]
521
+ .map((value) => normalizedComparableValue(value))
522
+ .filter((value): value is string => value !== null)
523
+ .filter((value) => source.sampleValues.has(value));
524
+ const distinctOverlappingValues = [...new Set(overlappingValues)].slice(0, 5);
525
+ if (distinctOverlappingValues.length > 0) {
526
+ basis.push({
527
+ type: 'sampleValueOverlap',
528
+ detail: `Shared sample values: ${distinctOverlappingValues.join(', ')}`
529
+ });
530
+ }
531
+
532
+ const explicitReferenceHit =
533
+ table.table.name.toLowerCase().includes(source.source.label.toLowerCase()) ||
534
+ (table.table.selector ?? '').toLowerCase().includes(source.source.label.toLowerCase()) ||
535
+ source.source.label.toLowerCase().includes(table.table.name.toLowerCase());
536
+ if (explicitReferenceHit) {
537
+ basis.push({
538
+ type: 'explicitReference',
539
+ detail: `Table label and source label both mention ${source.source.label}`
540
+ });
541
+ }
542
+
543
+ if (
544
+ source.source.type === 'networkResponse' &&
545
+ typeof source.source.lastObservedAt === 'number' &&
546
+ Math.max(0, now - source.source.lastObservedAt) <= 90_000
547
+ ) {
548
+ basis.push({
549
+ type: 'timeProximity',
550
+ detail: 'Recent network response observed within the last 90 seconds'
551
+ });
552
+ }
553
+
554
+ if (basis.length === 0) {
555
+ return null;
556
+ }
557
+
558
+ const confidence =
559
+ matchedColumns.length >= Math.max(2, Math.min(tableColumns.length, 3)) || (matchedColumns.length > 0 && distinctOverlappingValues.length > 0)
560
+ ? 'high'
561
+ : matchedColumns.length > 0 || distinctOverlappingValues.length > 0
562
+ ? 'medium'
563
+ : 'low';
564
+ return {
565
+ tableId: table.table.id,
566
+ sourceId: source.source.sourceId,
567
+ confidence,
568
+ basis,
569
+ matchedColumns
570
+ };
571
+ }
572
+
573
+ function buildRecommendedNextActions(
574
+ tables: TableAnalysis[],
575
+ mappings: InspectPageDataSourceMapping[],
576
+ sourceAnalyses: DynamicSourceAnalysis[]
577
+ ): InspectPageDataRecommendation[] {
578
+ const recommendations: InspectPageDataRecommendation[] = [];
579
+ const pushRecommendation = (item: InspectPageDataRecommendation): void => {
580
+ if (recommendations.some((existing) => existing.command === item.command)) {
581
+ return;
582
+ }
583
+ recommendations.push(item);
584
+ };
585
+
586
+ for (const table of tables) {
587
+ if (table.table.intelligence?.preferredExtractionMode === 'scroll') {
588
+ pushRecommendation({
589
+ title: `Read all rows from ${table.table.id}`,
590
+ command: `bak table rows --table ${table.table.id} --all --max-rows 10000`,
591
+ note: 'The table looks virtualized or lazy-loaded, so a scroll pass is preferred.'
592
+ });
593
+ }
594
+ }
595
+
596
+ for (const mapping of mappings.filter((item) => item.confidence !== 'low')) {
597
+ const source = sourceAnalyses.find((analysis) => analysis.source.sourceId === mapping.sourceId);
598
+ if (!source) {
599
+ continue;
600
+ }
601
+ if (source.source.type === 'windowGlobal') {
602
+ pushRecommendation({
603
+ title: `Read ${source.source.label} directly from page data`,
604
+ command: `bak page extract --path "${source.source.path}" --resolver auto`,
605
+ note: `Mapped to ${mapping.tableId} with ${mapping.confidence} confidence.`
606
+ });
607
+ continue;
608
+ }
609
+ if (source.source.type === 'networkResponse') {
610
+ const requestId = source.source.sourceId.replace(/^networkResponse:/, '');
611
+ pushRecommendation({
612
+ title: `Replay ${requestId} with table schema`,
613
+ command: `bak network replay --request-id ${requestId} --mode json --with-schema auto`,
614
+ note: `Recent response mapped to ${mapping.tableId} with ${mapping.confidence} confidence.`
615
+ });
616
+ continue;
617
+ }
618
+ pushRecommendation({
619
+ title: `Inspect ${source.source.label} inline JSON`,
620
+ command: 'bak page freshness',
621
+ note: `Inline JSON source mapped to ${mapping.tableId}; use freshness or capture commands to inspect it further.`
622
+ });
623
+ }
624
+
625
+ if (recommendations.length === 0) {
626
+ pushRecommendation({
627
+ title: 'Check data freshness',
628
+ command: 'bak page freshness',
629
+ note: 'No strong data-source mapping was found yet.'
630
+ });
631
+ }
632
+ return recommendations.slice(0, 6);
633
+ }
634
+
635
+ export function buildSourceMappingReport(input: SourceMappingInput): SourceMappingReport {
636
+ const now = typeof input.now === 'number' ? input.now : Date.now();
637
+ const windowAnalyses = buildWindowSources(input.windowSources);
638
+ const inlineAnalyses = buildInlineJsonAnalyses(input.inlineJsonSources);
639
+ const networkAnalyses = buildNetworkAnalyses(input.recentNetwork);
640
+ const sourceAnalyses = [...windowAnalyses, ...inlineAnalyses, ...networkAnalyses];
641
+ const sourceMappings = input.tables
642
+ .flatMap((table) => sourceAnalyses.map((source) => scoreSourceMapping(table, source, now)))
643
+ .filter((mapping): mapping is InspectPageDataSourceMapping => mapping !== null)
644
+ .sort((left, right) => {
645
+ const confidenceRank = { high: 0, medium: 1, low: 2 } as const;
646
+ return (
647
+ confidenceRank[left.confidence] - confidenceRank[right.confidence] ||
648
+ left.tableId.localeCompare(right.tableId) ||
649
+ left.sourceId.localeCompare(right.sourceId)
650
+ );
651
+ });
652
+ return {
653
+ dataSources: sourceAnalyses.map((analysis) => analysis.source),
654
+ sourceMappings,
655
+ recommendedNextActions: buildRecommendedNextActions(input.tables, sourceMappings, sourceAnalyses),
656
+ sourceAnalyses
657
+ };
658
+ }
659
+
660
+ function mapObjectRowToSchema(row: Record<string, unknown>, schema: TableSchema): Record<string, unknown> {
661
+ const normalizedKeys = new Map(Object.keys(row).map((key) => [normalizeColumnName(key), key]));
662
+ const mapped: Record<string, unknown> = {};
663
+ for (const column of schema.columns) {
664
+ const normalized = normalizeColumnName(column.label);
665
+ const sourceKey = normalizedKeys.get(normalized);
666
+ if (sourceKey) {
667
+ mapped[column.label] = row[sourceKey];
668
+ }
669
+ }
670
+ if (Object.keys(mapped).length > 0) {
671
+ return mapped;
672
+ }
673
+ return { ...row };
674
+ }
675
+
676
+ export function selectReplaySchemaMatch(
677
+ responseJson: unknown,
678
+ tables: TableAnalysis[],
679
+ options: { preferredSourceId?: string; mappings?: InspectPageDataSourceMapping[] } = {}
680
+ ): ReplaySchemaMatch | null {
681
+ const candidate = extractStructuredRows(responseJson);
682
+ if (!candidate || candidate.rows.length === 0 || tables.length === 0) {
683
+ return null;
684
+ }
685
+
686
+ const preferredTableId =
687
+ options.preferredSourceId && options.mappings
688
+ ? options.mappings.find((mapping) => mapping.sourceId === options.preferredSourceId && mapping.confidence !== 'low')?.tableId
689
+ : undefined;
690
+ const orderedTables = preferredTableId
691
+ ? tables.slice().sort((left, right) => {
692
+ if (left.table.id === preferredTableId) {
693
+ return -1;
694
+ }
695
+ if (right.table.id === preferredTableId) {
696
+ return 1;
697
+ }
698
+ return left.table.id.localeCompare(right.table.id);
699
+ })
700
+ : tables;
701
+
702
+ const firstRow = candidate.rows[0];
703
+ if (Array.isArray(firstRow)) {
704
+ const matchingTable =
705
+ orderedTables.find((table) => table.schema.columns.length === firstRow.length) ??
706
+ orderedTables.find((table) => table.schema.columns.length > 0) ??
707
+ null;
708
+ if (!matchingTable) {
709
+ return null;
710
+ }
711
+ return {
712
+ table: matchingTable.table,
713
+ schema: matchingTable.schema,
714
+ mappedRows: candidate.rows
715
+ .filter((row): row is unknown[] => Array.isArray(row))
716
+ .map((row) => {
717
+ const mapped: Record<string, unknown> = {};
718
+ matchingTable.schema.columns.forEach((column, index) => {
719
+ mapped[column.label] = row[index];
720
+ });
721
+ return mapped;
722
+ }),
723
+ mappingSource: candidate.path
724
+ };
725
+ }
726
+
727
+ if (firstRow && typeof firstRow === 'object') {
728
+ const rowObject = firstRow as Record<string, unknown>;
729
+ const rowKeys = new Set(Object.keys(rowObject).map(normalizeColumnName));
730
+ const matchingEntry =
731
+ orderedTables
732
+ .map((table) => ({
733
+ table,
734
+ score: table.schema.columns.filter((column) => rowKeys.has(normalizeColumnName(column.label))).length
735
+ }))
736
+ .sort((left, right) => compareNumbers(right.score, left.score))[0] ?? null;
737
+ if (!matchingEntry || matchingEntry.score <= 0) {
738
+ return null;
739
+ }
740
+ const matchingTable = matchingEntry.table;
741
+ return {
742
+ table: matchingTable.table,
743
+ schema: matchingTable.schema,
744
+ mappedRows: candidate.rows
745
+ .filter((row): row is Record<string, unknown> => typeof row === 'object' && row !== null && !Array.isArray(row))
746
+ .map((row) => mapObjectRowToSchema(row, matchingTable.schema)),
747
+ mappingSource: candidate.path
748
+ };
749
+ }
750
+
751
+ return null;
752
+ }
753
+
754
+ export function buildInspectPageDataResult(input: {
755
+ suspiciousGlobals: string[];
756
+ tables: TableHandle[];
757
+ visibleTimestamps: string[];
758
+ inlineTimestamps: string[];
759
+ pageDataCandidates: InspectPageDataCandidateProbe[];
760
+ recentNetwork: NetworkEntry[];
761
+ tableAnalyses: TableAnalysis[];
762
+ inlineJsonSources: InlineJsonInspectionSource[];
763
+ now?: number;
764
+ }): Pick<InspectPageDataResult, 'dataSources' | 'sourceMappings' | 'recommendedNextActions'> {
765
+ const report = buildSourceMappingReport({
766
+ tables: input.tableAnalyses,
767
+ windowSources: input.pageDataCandidates,
768
+ inlineJsonSources: input.inlineJsonSources,
769
+ recentNetwork: input.recentNetwork,
770
+ now: input.now
771
+ });
772
+ return {
773
+ dataSources: report.dataSources,
774
+ sourceMappings: report.sourceMappings,
775
+ recommendedNextActions: report.recommendedNextActions
776
+ };
777
+ }
778
+
779
+ export function buildPageDataProbe(name: string, resolver: 'globalThis' | 'lexical', sample: unknown): InspectPageDataCandidateProbe {
780
+ const timestamps = collectTimestampProbes(sample, name);
781
+ return {
782
+ name,
783
+ resolver,
784
+ sample: sampleValue(sample),
785
+ sampleSize: estimateSampleSize(sample),
786
+ schemaHint: inferSchemaHint(sample),
787
+ lastObservedAt: latestTimestamp(timestamps),
788
+ timestamps
789
+ };
790
+ }