@blueharford/scrypted-spatial-awareness 0.1.16 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/out/plugin.zip CHANGED
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blueharford/scrypted-spatial-awareness",
3
- "version": "0.1.16",
3
+ "version": "0.2.1",
4
4
  "description": "Cross-camera object tracking for Scrypted NVR with spatial awareness",
5
5
  "author": "Joshua Seidel <blueharford>",
6
6
  "license": "Apache-2.0",
@@ -0,0 +1,700 @@
1
+ /**
2
+ * Spatial Reasoning Engine
3
+ * Uses RAG (Retrieval Augmented Generation) to provide rich contextual understanding
4
+ * of movement across the property topology
5
+ */
6
+
7
+ import sdk, {
8
+ ScryptedInterface,
9
+ ObjectDetection,
10
+ Camera,
11
+ MediaObject,
12
+ } from '@scrypted/sdk';
13
+ import {
14
+ CameraTopology,
15
+ CameraNode,
16
+ Landmark,
17
+ findCamera,
18
+ findLandmark,
19
+ findConnection,
20
+ getLandmarksVisibleFromCamera,
21
+ generateTopologyDescription,
22
+ generateMovementContext,
23
+ LandmarkSuggestion,
24
+ LANDMARK_TEMPLATES,
25
+ } from '../models/topology';
26
+ import { TrackedObject, ObjectSighting } from '../models/tracked-object';
27
+
28
+ const { systemManager } = sdk;
29
+
30
+ /** Configuration for the spatial reasoning engine */
31
+ export interface SpatialReasoningConfig {
32
+ /** Enable LLM-based descriptions */
33
+ enableLlm: boolean;
34
+ /** Enable landmark learning/suggestions */
35
+ enableLandmarkLearning: boolean;
36
+ /** Minimum confidence for landmark suggestions */
37
+ landmarkConfidenceThreshold: number;
38
+ /** Cache TTL for topology context (ms) */
39
+ contextCacheTtl: number;
40
+ }
41
+
42
+ /** Result of a spatial reasoning query */
43
+ export interface SpatialReasoningResult {
44
+ /** Rich description of the movement */
45
+ description: string;
46
+ /** Landmarks involved in the movement */
47
+ involvedLandmarks: Landmark[];
48
+ /** Suggested path description */
49
+ pathDescription?: string;
50
+ /** Confidence in the reasoning (0-1) */
51
+ confidence: number;
52
+ /** Whether LLM was used */
53
+ usedLlm: boolean;
54
+ }
55
+
56
+ /** Context chunk for RAG retrieval */
57
+ interface ContextChunk {
58
+ id: string;
59
+ type: 'camera' | 'landmark' | 'connection' | 'property';
60
+ content: string;
61
+ metadata: Record<string, any>;
62
+ }
63
+
64
+ export class SpatialReasoningEngine {
65
+ private config: SpatialReasoningConfig;
66
+ private console: Console;
67
+ private topology: CameraTopology | null = null;
68
+ private llmDevice: ObjectDetection | null = null;
69
+ private contextChunks: ContextChunk[] = [];
70
+ private topologyContextCache: string | null = null;
71
+ private contextCacheTime: number = 0;
72
+ private landmarkSuggestions: Map<string, LandmarkSuggestion> = new Map();
73
+
74
+ constructor(config: SpatialReasoningConfig, console: Console) {
75
+ this.config = config;
76
+ this.console = console;
77
+ }
78
+
79
+ /** Update the topology and rebuild context */
80
+ updateTopology(topology: CameraTopology): void {
81
+ this.topology = topology;
82
+ this.rebuildContextChunks();
83
+ this.topologyContextCache = null;
84
+ this.contextCacheTime = 0;
85
+ }
86
+
87
+ /** Build context chunks for RAG retrieval */
88
+ private rebuildContextChunks(): void {
89
+ if (!this.topology) return;
90
+
91
+ this.contextChunks = [];
92
+
93
+ // Property context
94
+ if (this.topology.property) {
95
+ this.contextChunks.push({
96
+ id: 'property',
97
+ type: 'property',
98
+ content: this.buildPropertyContext(),
99
+ metadata: { ...this.topology.property },
100
+ });
101
+ }
102
+
103
+ // Camera contexts
104
+ for (const camera of this.topology.cameras) {
105
+ this.contextChunks.push({
106
+ id: `camera_${camera.deviceId}`,
107
+ type: 'camera',
108
+ content: this.buildCameraContext(camera),
109
+ metadata: {
110
+ deviceId: camera.deviceId,
111
+ name: camera.name,
112
+ isEntryPoint: camera.isEntryPoint,
113
+ isExitPoint: camera.isExitPoint,
114
+ },
115
+ });
116
+ }
117
+
118
+ // Landmark contexts
119
+ for (const landmark of this.topology.landmarks || []) {
120
+ this.contextChunks.push({
121
+ id: `landmark_${landmark.id}`,
122
+ type: 'landmark',
123
+ content: this.buildLandmarkContext(landmark),
124
+ metadata: {
125
+ id: landmark.id,
126
+ name: landmark.name,
127
+ type: landmark.type,
128
+ isEntryPoint: landmark.isEntryPoint,
129
+ isExitPoint: landmark.isExitPoint,
130
+ },
131
+ });
132
+ }
133
+
134
+ // Connection contexts
135
+ for (const connection of this.topology.connections) {
136
+ this.contextChunks.push({
137
+ id: `connection_${connection.id}`,
138
+ type: 'connection',
139
+ content: this.buildConnectionContext(connection),
140
+ metadata: {
141
+ id: connection.id,
142
+ fromCameraId: connection.fromCameraId,
143
+ toCameraId: connection.toCameraId,
144
+ },
145
+ });
146
+ }
147
+
148
+ this.console.log(`Built ${this.contextChunks.length} context chunks for spatial reasoning`);
149
+ }
150
+
151
+ /** Build property context string */
152
+ private buildPropertyContext(): string {
153
+ if (!this.topology?.property) return '';
154
+ const p = this.topology.property;
155
+ const parts: string[] = [];
156
+
157
+ if (p.propertyType) parts.push(`Property type: ${p.propertyType}`);
158
+ if (p.description) parts.push(p.description);
159
+ if (p.frontFacing) parts.push(`Front faces ${p.frontFacing}`);
160
+ if (p.features?.length) parts.push(`Features: ${p.features.join(', ')}`);
161
+
162
+ return parts.join('. ');
163
+ }
164
+
165
+ /** Build camera context string */
166
+ private buildCameraContext(camera: CameraNode): string {
167
+ const parts: string[] = [`Camera: ${camera.name}`];
168
+
169
+ if (camera.context?.mountLocation) {
170
+ parts.push(`Mounted at: ${camera.context.mountLocation}`);
171
+ }
172
+ if (camera.context?.coverageDescription) {
173
+ parts.push(`Coverage: ${camera.context.coverageDescription}`);
174
+ }
175
+ if (camera.context?.mountHeight) {
176
+ parts.push(`Height: ${camera.context.mountHeight} feet`);
177
+ }
178
+ if (camera.isEntryPoint) parts.push('Watches property entry point');
179
+ if (camera.isExitPoint) parts.push('Watches property exit point');
180
+
181
+ // Visible landmarks
182
+ if (this.topology && camera.context?.visibleLandmarks?.length) {
183
+ const landmarkNames = camera.context.visibleLandmarks
184
+ .map(id => findLandmark(this.topology!, id)?.name)
185
+ .filter(Boolean);
186
+ if (landmarkNames.length) {
187
+ parts.push(`Can see: ${landmarkNames.join(', ')}`);
188
+ }
189
+ }
190
+
191
+ return parts.join('. ');
192
+ }
193
+
194
+ /** Build landmark context string */
195
+ private buildLandmarkContext(landmark: Landmark): string {
196
+ const parts: string[] = [`${landmark.name} (${landmark.type})`];
197
+
198
+ if (landmark.description) parts.push(landmark.description);
199
+ if (landmark.isEntryPoint) parts.push('Property entry point');
200
+ if (landmark.isExitPoint) parts.push('Property exit point');
201
+
202
+ // Adjacent landmarks
203
+ if (this.topology && landmark.adjacentTo?.length) {
204
+ const adjacentNames = landmark.adjacentTo
205
+ .map(id => findLandmark(this.topology!, id)?.name)
206
+ .filter(Boolean);
207
+ if (adjacentNames.length) {
208
+ parts.push(`Adjacent to: ${adjacentNames.join(', ')}`);
209
+ }
210
+ }
211
+
212
+ return parts.join('. ');
213
+ }
214
+
215
+ /** Build connection context string */
216
+ private buildConnectionContext(connection: any): string {
217
+ if (!this.topology) return '';
218
+
219
+ const fromCamera = findCamera(this.topology, connection.fromCameraId);
220
+ const toCamera = findCamera(this.topology, connection.toCameraId);
221
+
222
+ if (!fromCamera || !toCamera) return '';
223
+
224
+ const parts: string[] = [
225
+ `Path from ${fromCamera.name} to ${toCamera.name}`,
226
+ ];
227
+
228
+ if (connection.name) parts.push(`Called: ${connection.name}`);
229
+
230
+ const transitSecs = Math.round(connection.transitTime.typical / 1000);
231
+ parts.push(`Typical transit: ${transitSecs} seconds`);
232
+
233
+ if (connection.bidirectional) parts.push('Bidirectional path');
234
+
235
+ // Path landmarks
236
+ if (connection.pathLandmarks?.length) {
237
+ const landmarkNames = connection.pathLandmarks
238
+ .map((id: string) => findLandmark(this.topology!, id)?.name)
239
+ .filter(Boolean);
240
+ if (landmarkNames.length) {
241
+ parts.push(`Passes: ${landmarkNames.join(' → ')}`);
242
+ }
243
+ }
244
+
245
+ return parts.join('. ');
246
+ }
247
+
248
+ /** Get cached or generate topology description */
249
+ private getTopologyContext(): string {
250
+ const now = Date.now();
251
+ if (this.topologyContextCache && (now - this.contextCacheTime) < this.config.contextCacheTtl) {
252
+ return this.topologyContextCache;
253
+ }
254
+
255
+ if (!this.topology) return '';
256
+
257
+ this.topologyContextCache = generateTopologyDescription(this.topology);
258
+ this.contextCacheTime = now;
259
+
260
+ return this.topologyContextCache;
261
+ }
262
+
263
+ /** Retrieve relevant context chunks for a movement query */
264
+ private retrieveRelevantContext(
265
+ fromCameraId: string,
266
+ toCameraId: string
267
+ ): ContextChunk[] {
268
+ const relevant: ContextChunk[] = [];
269
+
270
+ // Always include property context
271
+ const propertyChunk = this.contextChunks.find(c => c.type === 'property');
272
+ if (propertyChunk) relevant.push(propertyChunk);
273
+
274
+ // Include both camera contexts
275
+ const fromChunk = this.contextChunks.find(c => c.id === `camera_${fromCameraId}`);
276
+ const toChunk = this.contextChunks.find(c => c.id === `camera_${toCameraId}`);
277
+ if (fromChunk) relevant.push(fromChunk);
278
+ if (toChunk) relevant.push(toChunk);
279
+
280
+ // Include direct connection if exists
281
+ const connectionChunk = this.contextChunks.find(c =>
282
+ c.type === 'connection' &&
283
+ ((c.metadata.fromCameraId === fromCameraId && c.metadata.toCameraId === toCameraId) ||
284
+ (c.metadata.fromCameraId === toCameraId && c.metadata.toCameraId === fromCameraId))
285
+ );
286
+ if (connectionChunk) relevant.push(connectionChunk);
287
+
288
+ // Include visible landmarks from both cameras
289
+ if (this.topology) {
290
+ const fromLandmarks = getLandmarksVisibleFromCamera(this.topology, fromCameraId);
291
+ const toLandmarks = getLandmarksVisibleFromCamera(this.topology, toCameraId);
292
+ const allLandmarkIds = new Set([
293
+ ...fromLandmarks.map(l => l.id),
294
+ ...toLandmarks.map(l => l.id),
295
+ ]);
296
+
297
+ for (const landmarkId of allLandmarkIds) {
298
+ const chunk = this.contextChunks.find(c => c.id === `landmark_${landmarkId}`);
299
+ if (chunk) relevant.push(chunk);
300
+ }
301
+ }
302
+
303
+ return relevant;
304
+ }
305
+
306
+ /** Find or initialize LLM device */
307
+ private async findLlmDevice(): Promise<ObjectDetection | null> {
308
+ if (this.llmDevice) return this.llmDevice;
309
+
310
+ try {
311
+ for (const id of Object.keys(systemManager.getSystemState())) {
312
+ const device = systemManager.getDeviceById(id);
313
+ if (device?.interfaces?.includes(ScryptedInterface.ObjectDetection)) {
314
+ const name = device.name?.toLowerCase() || '';
315
+ if (name.includes('llm') || name.includes('gpt') || name.includes('claude') ||
316
+ name.includes('ollama') || name.includes('gemini')) {
317
+ this.llmDevice = device as unknown as ObjectDetection;
318
+ this.console.log(`Found LLM device: ${device.name}`);
319
+ return this.llmDevice;
320
+ }
321
+ }
322
+ }
323
+ } catch (e) {
324
+ this.console.warn('Error finding LLM device:', e);
325
+ }
326
+
327
+ return null;
328
+ }
329
+
330
+ /** Generate rich movement description using LLM */
331
+ async generateMovementDescription(
332
+ tracked: TrackedObject,
333
+ fromCameraId: string,
334
+ toCameraId: string,
335
+ transitTime: number,
336
+ mediaObject?: MediaObject
337
+ ): Promise<SpatialReasoningResult> {
338
+ if (!this.topology) {
339
+ return {
340
+ description: `${tracked.className} moving between cameras`,
341
+ involvedLandmarks: [],
342
+ confidence: 0.5,
343
+ usedLlm: false,
344
+ };
345
+ }
346
+
347
+ const fromCamera = findCamera(this.topology, fromCameraId);
348
+ const toCamera = findCamera(this.topology, toCameraId);
349
+
350
+ if (!fromCamera || !toCamera) {
351
+ return {
352
+ description: `${tracked.className} moving between cameras`,
353
+ involvedLandmarks: [],
354
+ confidence: 0.5,
355
+ usedLlm: false,
356
+ };
357
+ }
358
+
359
+ // Get involved landmarks
360
+ const fromLandmarks = getLandmarksVisibleFromCamera(this.topology, fromCameraId);
361
+ const toLandmarks = getLandmarksVisibleFromCamera(this.topology, toCameraId);
362
+ const allLandmarks = [...new Set([...fromLandmarks, ...toLandmarks])];
363
+
364
+ // Build basic description without LLM
365
+ let basicDescription = this.buildBasicDescription(
366
+ tracked,
367
+ fromCamera,
368
+ toCamera,
369
+ transitTime,
370
+ fromLandmarks,
371
+ toLandmarks
372
+ );
373
+
374
+ // Try LLM for enhanced description
375
+ if (this.config.enableLlm && mediaObject) {
376
+ const llmDescription = await this.getLlmEnhancedDescription(
377
+ tracked,
378
+ fromCamera,
379
+ toCamera,
380
+ transitTime,
381
+ fromLandmarks,
382
+ toLandmarks,
383
+ mediaObject
384
+ );
385
+
386
+ if (llmDescription) {
387
+ return {
388
+ description: llmDescription,
389
+ involvedLandmarks: allLandmarks,
390
+ pathDescription: this.buildPathDescription(fromCamera, toCamera),
391
+ confidence: 0.9,
392
+ usedLlm: true,
393
+ };
394
+ }
395
+ }
396
+
397
+ return {
398
+ description: basicDescription,
399
+ involvedLandmarks: allLandmarks,
400
+ pathDescription: this.buildPathDescription(fromCamera, toCamera),
401
+ confidence: 0.7,
402
+ usedLlm: false,
403
+ };
404
+ }
405
+
406
+ /** Build basic movement description without LLM */
407
+ private buildBasicDescription(
408
+ tracked: TrackedObject,
409
+ fromCamera: CameraNode,
410
+ toCamera: CameraNode,
411
+ transitTime: number,
412
+ fromLandmarks: Landmark[],
413
+ toLandmarks: Landmark[]
414
+ ): string {
415
+ const objectType = this.capitalizeFirst(tracked.className);
416
+ const transitSecs = Math.round(transitTime / 1000);
417
+
418
+ // Build origin description
419
+ let origin = fromCamera.name;
420
+ if (fromLandmarks.length > 0) {
421
+ const nearLandmark = fromLandmarks[0];
422
+ origin = `near ${nearLandmark.name}`;
423
+ } else if (fromCamera.context?.coverageDescription) {
424
+ origin = fromCamera.context.coverageDescription.split('.')[0];
425
+ }
426
+
427
+ // Build destination description
428
+ let destination = toCamera.name;
429
+ if (toLandmarks.length > 0) {
430
+ const nearLandmark = toLandmarks[0];
431
+ destination = `towards ${nearLandmark.name}`;
432
+ } else if (toCamera.context?.coverageDescription) {
433
+ destination = `towards ${toCamera.context.coverageDescription.split('.')[0]}`;
434
+ }
435
+
436
+ // Build transit string
437
+ const transitStr = transitSecs > 0 ? ` (${transitSecs}s)` : '';
438
+
439
+ return `${objectType} moving from ${origin} ${destination}${transitStr}`;
440
+ }
441
+
442
+ /** Build path description from connection */
443
+ private buildPathDescription(fromCamera: CameraNode, toCamera: CameraNode): string | undefined {
444
+ if (!this.topology) return undefined;
445
+
446
+ const connection = findConnection(this.topology, fromCamera.deviceId, toCamera.deviceId);
447
+ if (!connection) return undefined;
448
+
449
+ if (connection.pathLandmarks?.length) {
450
+ const landmarkNames = connection.pathLandmarks
451
+ .map(id => findLandmark(this.topology!, id)?.name)
452
+ .filter(Boolean);
453
+ if (landmarkNames.length) {
454
+ return `Via ${landmarkNames.join(' → ')}`;
455
+ }
456
+ }
457
+
458
+ return connection.name || undefined;
459
+ }
460
+
461
+ /** Get LLM-enhanced description */
462
+ private async getLlmEnhancedDescription(
463
+ tracked: TrackedObject,
464
+ fromCamera: CameraNode,
465
+ toCamera: CameraNode,
466
+ transitTime: number,
467
+ fromLandmarks: Landmark[],
468
+ toLandmarks: Landmark[],
469
+ mediaObject: MediaObject
470
+ ): Promise<string | null> {
471
+ const llm = await this.findLlmDevice();
472
+ if (!llm) return null;
473
+
474
+ try {
475
+ // Retrieve relevant context for RAG
476
+ const relevantChunks = this.retrieveRelevantContext(
477
+ fromCamera.deviceId,
478
+ toCamera.deviceId
479
+ );
480
+
481
+ // Build RAG context
482
+ const ragContext = relevantChunks.map(c => c.content).join('\n\n');
483
+
484
+ // Build the prompt
485
+ const prompt = this.buildLlmPrompt(
486
+ tracked,
487
+ fromCamera,
488
+ toCamera,
489
+ transitTime,
490
+ fromLandmarks,
491
+ toLandmarks,
492
+ ragContext
493
+ );
494
+
495
+ // Call LLM
496
+ const result = await llm.detectObjects(mediaObject, {
497
+ settings: { prompt }
498
+ } as any);
499
+
500
+ // Extract description from result
501
+ if (result.detections?.[0]?.label) {
502
+ return result.detections[0].label;
503
+ }
504
+
505
+ return null;
506
+ } catch (e) {
507
+ this.console.warn('LLM description generation failed:', e);
508
+ return null;
509
+ }
510
+ }
511
+
512
+ /** Build LLM prompt with RAG context */
513
+ private buildLlmPrompt(
514
+ tracked: TrackedObject,
515
+ fromCamera: CameraNode,
516
+ toCamera: CameraNode,
517
+ transitTime: number,
518
+ fromLandmarks: Landmark[],
519
+ toLandmarks: Landmark[],
520
+ ragContext: string
521
+ ): string {
522
+ const transitSecs = Math.round(transitTime / 1000);
523
+
524
+ return `You are a security camera system describing movement on a property.
525
+
526
+ PROPERTY CONTEXT:
527
+ ${ragContext}
528
+
529
+ CURRENT EVENT:
530
+ - Object type: ${tracked.className}
531
+ - Moving from: ${fromCamera.name}${fromLandmarks.length ? ` (near ${fromLandmarks.map(l => l.name).join(', ')})` : ''}
532
+ - Moving to: ${toCamera.name}${toLandmarks.length ? ` (near ${toLandmarks.map(l => l.name).join(', ')})` : ''}
533
+ - Transit time: ${transitSecs} seconds
534
+
535
+ INSTRUCTIONS:
536
+ Generate a single, concise sentence describing this movement. Include:
537
+ 1. Description of the ${tracked.className} (if person: gender, clothing; if vehicle: color, type)
538
+ 2. Where they came from (using landmark names if available)
539
+ 3. Where they're heading (using landmark names if available)
540
+
541
+ Examples of good descriptions:
542
+ - "Man in blue jacket walking from the driveway towards the front door"
543
+ - "Black SUV pulling into the driveway from the street"
544
+ - "Woman with dog walking from the backyard towards the side gate"
545
+ - "Delivery person approaching the front porch from the mailbox"
546
+
547
+ Generate ONLY the description, nothing else:`;
548
+ }
549
+
550
+ /** Suggest a new landmark based on AI analysis */
551
+ async suggestLandmark(
552
+ cameraId: string,
553
+ mediaObject: MediaObject,
554
+ objectClass: string,
555
+ position: { x: number; y: number }
556
+ ): Promise<LandmarkSuggestion | null> {
557
+ if (!this.config.enableLandmarkLearning) return null;
558
+
559
+ const llm = await this.findLlmDevice();
560
+ if (!llm) return null;
561
+
562
+ try {
563
+ const prompt = `Analyze this security camera image. A ${objectClass} was detected.
564
+
565
+ Looking at the surroundings and environment, identify any notable landmarks or features visible that could help describe this location. Consider:
566
+ - Structures (house, garage, shed, porch)
567
+ - Features (mailbox, tree, pool, garden)
568
+ - Access points (driveway, walkway, gate, door)
569
+ - Boundaries (fence, wall, hedge)
570
+
571
+ If you can identify a clear landmark feature, respond with ONLY a JSON object:
572
+ {"name": "Landmark Name", "type": "structure|feature|boundary|access|vehicle|neighbor|zone|street", "description": "Brief description"}
573
+
574
+ If no clear landmark is identifiable, respond with: {"name": null}`;
575
+
576
+ const result = await llm.detectObjects(mediaObject, {
577
+ settings: { prompt }
578
+ } as any);
579
+
580
+ if (result.detections?.[0]?.label) {
581
+ try {
582
+ const parsed = JSON.parse(result.detections[0].label);
583
+ if (parsed.name && parsed.type) {
584
+ const suggestionId = `suggest_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`;
585
+
586
+ const suggestion: LandmarkSuggestion = {
587
+ id: suggestionId,
588
+ landmark: {
589
+ id: `landmark_${Date.now()}`,
590
+ name: parsed.name,
591
+ type: parsed.type,
592
+ position,
593
+ description: parsed.description,
594
+ aiSuggested: true,
595
+ aiConfidence: 0.7,
596
+ visibleFromCameras: [cameraId],
597
+ },
598
+ detectedByCameras: [cameraId],
599
+ timestamp: Date.now(),
600
+ detectionCount: 1,
601
+ status: 'pending',
602
+ };
603
+
604
+ // Store suggestion
605
+ const existingKey = this.findSimilarSuggestion(parsed.name, position);
606
+ if (existingKey) {
607
+ // Increment count for similar suggestion
608
+ const existing = this.landmarkSuggestions.get(existingKey)!;
609
+ existing.detectionCount++;
610
+ existing.landmark.aiConfidence = Math.min(0.95, existing.landmark.aiConfidence! + 0.05);
611
+ if (!existing.detectedByCameras.includes(cameraId)) {
612
+ existing.detectedByCameras.push(cameraId);
613
+ }
614
+ return existing;
615
+ } else {
616
+ this.landmarkSuggestions.set(suggestionId, suggestion);
617
+ return suggestion;
618
+ }
619
+ }
620
+ } catch (parseError) {
621
+ // LLM didn't return valid JSON
622
+ }
623
+ }
624
+
625
+ return null;
626
+ } catch (e) {
627
+ this.console.warn('Landmark suggestion failed:', e);
628
+ return null;
629
+ }
630
+ }
631
+
632
+ /** Find similar existing suggestion by name proximity and position */
633
+ private findSimilarSuggestion(name: string, position: { x: number; y: number }): string | null {
634
+ const nameLower = name.toLowerCase();
635
+ const POSITION_THRESHOLD = 100; // pixels
636
+
637
+ for (const [key, suggestion] of this.landmarkSuggestions) {
638
+ if (suggestion.status !== 'pending') continue;
639
+
640
+ const suggestionName = suggestion.landmark.name.toLowerCase();
641
+ const distance = Math.sqrt(
642
+ Math.pow(suggestion.landmark.position.x - position.x, 2) +
643
+ Math.pow(suggestion.landmark.position.y - position.y, 2)
644
+ );
645
+
646
+ // Similar name and nearby position
647
+ if ((suggestionName.includes(nameLower) || nameLower.includes(suggestionName)) &&
648
+ distance < POSITION_THRESHOLD) {
649
+ return key;
650
+ }
651
+ }
652
+
653
+ return null;
654
+ }
655
+
656
+ /** Get pending landmark suggestions above confidence threshold */
657
+ getPendingSuggestions(): LandmarkSuggestion[] {
658
+ return Array.from(this.landmarkSuggestions.values())
659
+ .filter(s =>
660
+ s.status === 'pending' &&
661
+ s.landmark.aiConfidence! >= this.config.landmarkConfidenceThreshold
662
+ )
663
+ .sort((a, b) => b.detectionCount - a.detectionCount);
664
+ }
665
+
666
+ /** Accept a landmark suggestion */
667
+ acceptSuggestion(suggestionId: string): Landmark | null {
668
+ const suggestion = this.landmarkSuggestions.get(suggestionId);
669
+ if (!suggestion) return null;
670
+
671
+ suggestion.status = 'accepted';
672
+ const landmark = { ...suggestion.landmark };
673
+ landmark.aiSuggested = false; // Mark as confirmed
674
+
675
+ this.landmarkSuggestions.delete(suggestionId);
676
+
677
+ return landmark;
678
+ }
679
+
680
+ /** Reject a landmark suggestion */
681
+ rejectSuggestion(suggestionId: string): boolean {
682
+ const suggestion = this.landmarkSuggestions.get(suggestionId);
683
+ if (!suggestion) return false;
684
+
685
+ suggestion.status = 'rejected';
686
+ this.landmarkSuggestions.delete(suggestionId);
687
+
688
+ return true;
689
+ }
690
+
691
+ /** Utility to capitalize first letter */
692
+ private capitalizeFirst(str: string): string {
693
+ return str ? str.charAt(0).toUpperCase() + str.slice(1) : 'Object';
694
+ }
695
+
696
+ /** Get landmark templates for UI */
697
+ getLandmarkTemplates(): typeof LANDMARK_TEMPLATES {
698
+ return LANDMARK_TEMPLATES;
699
+ }
700
+ }