@devo-bmad-custom/agent-orchestration 1.0.4 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (86) hide show
  1. package/package.json +1 -1
  2. package/src/.agents/skills/tmux-commands/SKILL.md +353 -0
  3. package/src/bmm/data/project-context-template.md +26 -26
  4. package/src/bmm/teams/default-party.csv +20 -20
  5. package/src/bmm/workflows/2-plan-workflows/create-prd/data/domain-complexity.csv +14 -14
  6. package/src/bmm/workflows/2-plan-workflows/create-prd/data/prd-purpose.md +197 -197
  7. package/src/bmm/workflows/2-plan-workflows/create-prd/data/project-types.csv +10 -10
  8. package/src/bmm/workflows/2-plan-workflows/create-prd/templates/prd-template.md +10 -10
  9. package/src/bmm/workflows/3-solutioning/create-architecture/data/domain-complexity.csv +12 -12
  10. package/src/bmm/workflows/4-implementation/code-review/instructions.xml +226 -226
  11. package/src/bmm/workflows/4-implementation/correct-course/checklist.md +288 -288
  12. package/src/bmm/workflows/4-implementation/correct-course/instructions.md +207 -207
  13. package/src/bmm/workflows/4-implementation/retrospective/instructions.md +1444 -1444
  14. package/src/bmm/workflows/4-implementation/sprint-planning/sprint-status-template.yaml +55 -55
  15. package/src/bmm/workflows/4-implementation/sprint-status/instructions.md +230 -230
  16. package/src/bmm/workflows/bmad-quick-flow/quick-spec/tech-spec-template.md +74 -74
  17. package/src/bmm/workflows/document-project/instructions.md +130 -130
  18. package/src/bmm/workflows/document-project/templates/project-scan-report-schema.json +160 -160
  19. package/src/bmm/workflows/document-project/workflows/deep-dive-instructions.md +298 -298
  20. package/src/bmm/workflows/document-project/workflows/deep-dive.yaml +31 -31
  21. package/src/bmm/workflows/document-project/workflows/full-scan-instructions.md +1106 -1106
  22. package/src/bmm/workflows/document-project/workflows/full-scan.yaml +31 -31
  23. package/src/bmm/workflows/qa-generate-e2e-tests/checklist.md +33 -33
  24. package/src/bmm/workflows/qa-generate-e2e-tests/instructions.md +110 -110
  25. package/src/core/agents/bmad-master.md +56 -56
  26. package/src/core/workflows/party-mode/steps/step-02-discussion-orchestration.md +187 -187
  27. package/src/core/workflows/party-mode/steps/step-03-graceful-exit.md +168 -168
  28. package/src/.agents/skills/ui-ux-pro-custom/data/swift-ios-skills/vision-framework/SKILL.md +0 -475
  29. package/src/.agents/skills/ui-ux-pro-custom/data/swift-ios-skills/vision-framework/references/vision-requests.md +0 -736
  30. package/src/.agents/skills/ui-ux-pro-custom/data/swift-ios-skills/vision-framework/references/visionkit-scanner.md +0 -738
  31. package/src/.agents/skills/ui-ux-pro-custom/data/swift-ios-skills/weatherkit/SKILL.md +0 -410
  32. package/src/.agents/skills/ui-ux-pro-custom/data/swift-ios-skills/weatherkit/references/weatherkit-patterns.md +0 -567
  33. package/src/.agents/skills/ui-ux-pro-custom/data/swift-ios-skills/widgetkit/SKILL.md +0 -497
  34. package/src/.agents/skills/ui-ux-pro-custom/data/swift-ios-skills/widgetkit/references/widgetkit-advanced.md +0 -871
  35. package/src/.agents/skills/ui-ux-pro-custom/data/typography.csv +0 -58
  36. package/src/.agents/skills/ui-ux-pro-custom/data/ui-reasoning.csv +0 -101
  37. package/src/.agents/skills/ui-ux-pro-custom/data/ux-guidelines.csv +0 -100
  38. package/src/.agents/skills/ui-ux-pro-custom/data/web-interface.csv +0 -31
  39. package/src/.agents/skills/ui-ux-pro-custom/scripts/core.py +0 -253
  40. package/src/.agents/skills/ui-ux-pro-custom/scripts/design_system.py +0 -1067
  41. package/src/.agents/skills/ui-ux-pro-custom/scripts/search.py +0 -114
  42. package/src/.agents/skills/ux-audit/SKILL.md +0 -151
  43. package/src/.agents/skills/websocket-engineer/SKILL.md +0 -168
  44. package/src/.agents/skills/websocket-engineer/references/alternatives.md +0 -391
  45. package/src/.agents/skills/websocket-engineer/references/patterns.md +0 -400
  46. package/src/.agents/skills/websocket-engineer/references/protocol.md +0 -195
  47. package/src/.agents/skills/websocket-engineer/references/scaling.md +0 -333
  48. package/src/.agents/skills/websocket-engineer/references/security.md +0 -474
  49. package/src/.agents/skills/writing-skills/SKILL.md +0 -655
  50. package/src/.agents/skills/writing-skills/anthropic-best-practices.md +0 -1150
  51. package/src/.agents/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +0 -189
  52. package/src/.agents/skills/writing-skills/graphviz-conventions.dot +0 -172
  53. package/src/.agents/skills/writing-skills/persuasion-principles.md +0 -187
  54. package/src/.agents/skills/writing-skills/render-graphs.js +0 -168
  55. package/src/.agents/skills/writing-skills/testing-skills-with-subagents.md +0 -384
  56. package/src/.claude/commands/bmad-track-compact.md +0 -19
  57. package/src/.claude/commands/bmad-track-extended.md +0 -19
  58. package/src/.claude/commands/bmad-track-large.md +0 -19
  59. package/src/.claude/commands/bmad-track-medium.md +0 -19
  60. package/src/.claude/commands/bmad-track-nano.md +0 -19
  61. package/src/.claude/commands/bmad-track-rv.md +0 -18
  62. package/src/.claude/commands/bmad-track-small.md +0 -19
  63. package/src/.claude/commands/master-orchestrator.md +0 -15
  64. package/src/_memory/master-orchestrator-sidecar/docs-index.md +0 -3
  65. package/src/_memory/master-orchestrator-sidecar/instructions.md +0 -2616
  66. package/src/_memory/master-orchestrator-sidecar/memories.md +0 -8
  67. package/src/_memory/master-orchestrator-sidecar/session-state.md +0 -15
  68. package/src/_memory/master-orchestrator-sidecar/triage-history.md +0 -3
  69. package/src/_memory/master-orchestrator-sidecar/workflows-overview.html +0 -1230
  70. package/src/core/agents/master-orchestrator.md +0 -54
  71. package/src/docs/dev/tmux/actions_popup.py +0 -291
  72. package/src/docs/dev/tmux/actions_popup.sh +0 -110
  73. package/src/docs/dev/tmux/claude_usage.sh +0 -15
  74. package/src/docs/dev/tmux/colors.conf +0 -26
  75. package/src/docs/dev/tmux/cpu_usage.sh +0 -7
  76. package/src/docs/dev/tmux/dispatch.sh +0 -10
  77. package/src/docs/dev/tmux/float_init.sh +0 -13
  78. package/src/docs/dev/tmux/float_term.sh +0 -23
  79. package/src/docs/dev/tmux/open_clip.sh +0 -14
  80. package/src/docs/dev/tmux/paste_clipboard.sh +0 -13
  81. package/src/docs/dev/tmux/paste_image_wrapper.sh +0 -94
  82. package/src/docs/dev/tmux/ram_usage.sh +0 -3
  83. package/src/docs/dev/tmux/title_sync.sh +0 -54
  84. package/src/docs/dev/tmux/tmux-setup.md +0 -867
  85. package/src/docs/dev/tmux/tmux.conf +0 -127
  86. package/src/docs/dev/tmux/xclip +0 -18
@@ -1,736 +0,0 @@
1
- # Vision Request Patterns
2
-
3
- Complete implementation patterns for Vision framework requests covering text
4
- recognition, face detection, barcode scanning, segmentation, classification,
5
- and video processing. All patterns target iOS 26+ with Swift 6.2 unless noted.
6
-
7
- ## Contents
8
- - Complete Text Recognition Pipeline
9
- - Face Detection with Landmarks
10
- - Barcode Detection with All Symbologies
11
- - Person Segmentation with Mask Application
12
- - Instance Segmentation (iOS 18+)
13
- - Image Classification
14
- - Saliency Detection
15
- - Rectangle Detection
16
- - Horizon Detection
17
- - Batch Processing Multiple Requests
18
- - Video Frame Processing with CMSampleBuffer
19
- - Object Tracking Across Video Frames
20
- - Coordinate Normalization Utilities
21
- - Performance Considerations
22
-
23
- ## Complete Text Recognition Pipeline
24
-
25
- Full pipeline from image loading through text extraction with coordinate mapping.
26
-
27
- ```swift
28
- import Vision
29
- import UIKit
30
-
31
- @MainActor
32
- final class TextRecognizer {
33
- func recognizeText(in image: UIImage) async throws -> [RecognizedTextBlock] {
34
- guard let cgImage = image.cgImage else {
35
- throw TextRecognitionError.invalidImage
36
- }
37
-
38
- var request = RecognizeTextRequest()
39
- request.recognitionLevel = .accurate
40
- request.recognitionLanguages = [
41
- Locale.Language(identifier: "en-US"),
42
- ]
43
- request.usesLanguageCorrection = true
44
-
45
- let observations = try await request.perform(on: cgImage)
46
- let imageSize = CGSize(
47
- width: cgImage.width,
48
- height: cgImage.height
49
- )
50
-
51
- return observations.compactMap { observation in
52
- guard let candidate = observation.topCandidates(1).first else { return nil }
53
- let boundingBox = observation.boundingBox
54
- let imageRect = VNImageRectForNormalizedRect(
55
- boundingBox,
56
- Int(imageSize.width),
57
- Int(imageSize.height)
58
- )
59
- return RecognizedTextBlock(
60
- text: candidate.string,
61
- confidence: candidate.confidence,
62
- boundingBox: imageRect
63
- )
64
- }
65
- }
66
- }
67
-
68
- struct RecognizedTextBlock: Sendable {
69
- let text: String
70
- let confidence: Float
71
- let boundingBox: CGRect
72
- }
73
-
74
- enum TextRecognitionError: Error {
75
- case invalidImage
76
- }
77
- ```
78
-
79
- ### Text Recognition with Language Hints
80
-
81
- ```swift
82
- func recognizeMultilingualText(in cgImage: CGImage) async throws -> [String] {
83
- var request = RecognizeTextRequest()
84
- request.recognitionLevel = .accurate
85
- request.recognitionLanguages = [
86
- Locale.Language(identifier: "en-US"),
87
- Locale.Language(identifier: "fr-FR"),
88
- Locale.Language(identifier: "de-DE"),
89
- ]
90
- request.usesLanguageCorrection = true
91
- request.customWords = ["iOS", "SwiftUI", "Xcode"]
92
-
93
- let observations = try await request.perform(on: cgImage)
94
- return observations.compactMap { $0.topCandidates(1).first?.string }
95
- }
96
- ```
97
-
98
- ### Fast Text Recognition for Live Video
99
-
100
- ```swift
101
- func recognizeTextFast(in sampleBuffer: CMSampleBuffer) async throws -> [String] {
102
- var request = RecognizeTextRequest()
103
- request.recognitionLevel = .fast
104
- request.recognitionLanguages = [Locale.Language(identifier: "en-US")]
105
-
106
- let observations = try await request.perform(on: sampleBuffer)
107
- return observations.compactMap { $0.topCandidates(1).first?.string }
108
- }
109
- ```
110
-
111
- ### Legacy Text Recognition (Pre-iOS 18)
112
-
113
- ```swift
114
- import Vision
115
-
116
- func recognizeTextLegacy(
117
- in cgImage: CGImage,
118
- completion: @escaping ([String]) -> Void
119
- ) {
120
- let request = VNRecognizeTextRequest { request, error in
121
- guard error == nil,
122
- let observations = request.results as? [VNRecognizedTextObservation]
123
- else {
124
- completion([])
125
- return
126
- }
127
- let strings = observations.compactMap {
128
- $0.topCandidates(1).first?.string
129
- }
130
- completion(strings)
131
- }
132
- request.recognitionLevel = .accurate
133
- request.recognitionLanguages = ["en-US"]
134
- request.usesLanguageCorrection = true
135
-
136
- let handler = VNImageRequestHandler(cgImage: cgImage)
137
- DispatchQueue.global(qos: .userInitiated).async {
138
- try? handler.perform([request])
139
- }
140
- }
141
- ```
142
-
143
- ## Face Detection with Landmarks
144
-
145
- ```swift
146
- import Vision
147
-
148
- struct DetectedFace: Sendable {
149
- let boundingBox: CGRect
150
- let landmarks: FaceLandmarkPoints?
151
- let roll: Measurement<UnitAngle>
152
- let yaw: Measurement<UnitAngle>
153
- let captureQuality: FaceObservation.CaptureQuality?
154
- }
155
-
156
- struct FaceLandmarkPoints: Sendable {
157
- let leftEye: [CGPoint]
158
- let rightEye: [CGPoint]
159
- let nose: [CGPoint]
160
- let outerLips: [CGPoint]
161
- let faceContour: [CGPoint]
162
- }
163
-
164
- func detectFaces(in cgImage: CGImage) async throws -> [DetectedFace] {
165
- // Detect face rectangles
166
- let rectRequest = DetectFaceRectanglesRequest()
167
- let faces = try await rectRequest.perform(on: cgImage)
168
-
169
- // Detect landmarks for detailed features
170
- let landmarkRequest = DetectFaceLandmarksRequest()
171
- let landmarkFaces = try await landmarkRequest.perform(on: cgImage)
172
-
173
- // Detect capture quality for photo selection
174
- let qualityRequest = DetectFaceCaptureQualityRequest()
175
- let qualityFaces = try await qualityRequest.perform(on: cgImage)
176
-
177
- return faces.enumerated().map { index, face in
178
- let landmarks: FaceLandmarkPoints?
179
- if index < landmarkFaces.count,
180
- let lm = landmarkFaces[index].landmarks {
181
- landmarks = FaceLandmarkPoints(
182
- leftEye: lm.leftEye?.normalizedPoints ?? [],
183
- rightEye: lm.rightEye?.normalizedPoints ?? [],
184
- nose: lm.nose?.normalizedPoints ?? [],
185
- outerLips: lm.outerLips?.normalizedPoints ?? [],
186
- faceContour: lm.faceContour?.normalizedPoints ?? []
187
- )
188
- } else {
189
- landmarks = nil
190
- }
191
-
192
- let quality: FaceObservation.CaptureQuality?
193
- if index < qualityFaces.count {
194
- quality = qualityFaces[index].captureQuality
195
- } else {
196
- quality = nil
197
- }
198
-
199
- return DetectedFace(
200
- boundingBox: face.boundingBox,
201
- landmarks: landmarks,
202
- roll: face.roll,
203
- yaw: face.yaw,
204
- captureQuality: quality
205
- )
206
- }
207
- }
208
- ```
209
-
210
- ## Barcode Detection with All Symbologies
211
-
212
- ```swift
213
- import Vision
214
-
215
- struct DetectedBarcode: Sendable {
216
- let payload: String?
217
- let symbology: VNBarcodeSymbology
218
- let boundingBox: CGRect
219
- }
220
-
221
- func detectBarcodes(
222
- in cgImage: CGImage,
223
- symbologies: [VNBarcodeSymbology] = [.qr, .ean13, .code128]
224
- ) async throws -> [DetectedBarcode] {
225
- var request = DetectBarcodesRequest()
226
- request.symbologies = symbologies
227
-
228
- let observations = try await request.perform(on: cgImage)
229
- return observations.map { barcode in
230
- DetectedBarcode(
231
- payload: barcode.payloadString,
232
- symbology: barcode.symbology,
233
- boundingBox: barcode.boundingBox
234
- )
235
- }
236
- }
237
-
238
- // Detect only QR codes with URL content
239
- func detectQRCodes(in cgImage: CGImage) async throws -> [URL] {
240
- var request = DetectBarcodesRequest()
241
- request.symbologies = [.qr]
242
-
243
- let observations = try await request.perform(on: cgImage)
244
- return observations.compactMap { barcode in
245
- guard let payload = barcode.payloadString else { return nil }
246
- return URL(string: payload)
247
- }
248
- }
249
- ```
250
-
251
- ### Supported Symbologies Reference
252
-
253
- ```swift
254
- // 1D barcodes
255
- let linearSymbologies: [VNBarcodeSymbology] = [
256
- .codabar, .code39, .code39Checksum, .code39FullASCII,
257
- .code39FullASCIIChecksum, .code93, .code93i, .code128,
258
- .ean8, .ean13, .gs1DataBar, .gs1DataBarExpanded,
259
- .gs1DataBarLimited, .i2of5, .i2of5Checksum, .itf14,
260
- .msiPlessey, .upce,
261
- ]
262
-
263
- // 2D barcodes
264
- let matrixSymbologies: [VNBarcodeSymbology] = [
265
- .qr, .aztec, .dataMatrix, .pdf417, .microPDF417, .microQR,
266
- ]
267
- ```
268
-
269
- ## Person Segmentation with Mask Application
270
-
271
- ### Modern API (iOS 18+)
272
-
273
- ```swift
274
- import Vision
275
- import CoreImage
276
- import CoreImage.CIFilterBuiltins
277
-
278
- func segmentPerson(in cgImage: CGImage) async throws -> CIImage {
279
- var request = GeneratePersonSegmentationRequest()
280
- request.qualityLevel = .accurate // .balanced, .fast
281
-
282
- let observation = try await request.perform(on: cgImage)
283
- let maskBuffer = observation.pixelBuffer
284
-
285
- let originalImage = CIImage(cgImage: cgImage)
286
- let maskImage = CIImage(cvPixelBuffer: maskBuffer)
287
-
288
- // Scale mask to match original image size
289
- let scaleX = originalImage.extent.width / maskImage.extent.width
290
- let scaleY = originalImage.extent.height / maskImage.extent.height
291
- let scaledMask = maskImage.transformed(by: CGAffineTransform(
292
- scaleX: scaleX, y: scaleY
293
- ))
294
-
295
- return scaledMask
296
- }
297
-
298
- // Apply background blur using person mask
299
- func blurBackground(of cgImage: CGImage, blurRadius: Double = 20.0) async throws -> CIImage {
300
- let mask = try await segmentPerson(in: cgImage)
301
- let original = CIImage(cgImage: cgImage)
302
-
303
- let blurFilter = CIFilter.gaussianBlur()
304
- blurFilter.inputImage = original
305
- blurFilter.radius = Float(blurRadius)
306
- guard let blurredImage = blurFilter.outputImage else {
307
- throw SegmentationError.noMask
308
- }
309
-
310
- let blendFilter = CIFilter.blendWithMask()
311
- blendFilter.inputImage = original // foreground (person)
312
- blendFilter.backgroundImage = blurredImage // blurred background
313
- blendFilter.maskImage = mask
314
-
315
- guard let result = blendFilter.outputImage else {
316
- throw SegmentationError.noMask
317
- }
318
- return result
319
- }
320
-
321
- enum SegmentationError: Error {
322
- case noMask
323
- }
324
- ```
325
-
326
- ### Legacy API (Pre-iOS 18)
327
-
328
- ```swift
329
- func segmentPersonLegacy(in cgImage: CGImage) throws -> CVPixelBuffer {
330
- let request = VNGeneratePersonSegmentationRequest()
331
- request.qualityLevel = .accurate
332
- request.outputPixelFormat = kCVPixelFormatType_OneComponent8
333
-
334
- let handler = VNImageRequestHandler(cgImage: cgImage)
335
- try handler.perform([request])
336
-
337
- guard let maskBuffer = request.results?.first?.pixelBuffer else {
338
- throw SegmentationError.noMask
339
- }
340
- return maskBuffer
341
- }
342
- ```
343
-
344
- ### Instance Segmentation (iOS 18+)
345
-
346
- Separate masks per person for individual effects.
347
-
348
- ```swift
349
- // Modern API (iOS 18+)
350
- func segmentIndividualPeople(in cgImage: CGImage) async throws -> [CVPixelBuffer] {
351
- let request = GeneratePersonInstanceMaskRequest()
352
- let observation = try await request.perform(on: cgImage)
353
-
354
- let indices = observation.allInstances
355
- return try indices.map { index in
356
- try observation.generateMask(forInstances: IndexSet(integer: index))
357
- }
358
- }
359
- ```
360
-
361
- ```swift
362
- // Legacy API (iOS 17+)
363
- func segmentIndividualPeopleLegacy(in cgImage: CGImage) throws -> [CVPixelBuffer] {
364
- let request = VNGeneratePersonInstanceMaskRequest()
365
- let handler = VNImageRequestHandler(cgImage: cgImage)
366
- try handler.perform([request])
367
-
368
- guard let result = request.results?.first else { return [] }
369
- let indices = result.allInstances
370
-
371
- return try indices.map { index in
372
- try result.generateMask(forInstances: IndexSet(integer: index))
373
- }
374
- }
375
- ```
376
-
377
- ## Image Classification
378
-
379
- ```swift
380
- import Vision
381
-
382
- func classifyImage(_ cgImage: CGImage, maxResults: Int = 5) async throws -> [(String, Float)] {
383
- let request = ClassifyImageRequest()
384
- let observations = try await request.perform(on: cgImage)
385
-
386
- return observations.prefix(maxResults).map { observation in
387
- (observation.identifier, observation.confidence)
388
- }
389
- }
390
- ```
391
-
392
- ## Saliency Detection
393
-
394
- Identify the most visually important or attention-grabbing regions.
395
-
396
- ```swift
397
- // Attention-based saliency (what humans would look at)
398
- func detectAttentionSaliency(in cgImage: CGImage) async throws -> [CGRect] {
399
- let request = GenerateAttentionBasedSaliencyImageRequest()
400
- let results = try await request.perform(on: cgImage)
401
- guard let saliency = results.first else { return [] }
402
- return saliency.salientObjects?.map(\.boundingBox) ?? []
403
- }
404
-
405
- // Objectness-based saliency (distinct objects)
406
- func detectObjectSaliency(in cgImage: CGImage) async throws -> [CGRect] {
407
- let request = GenerateObjectnessBasedSaliencyImageRequest()
408
- let results = try await request.perform(on: cgImage)
409
- guard let saliency = results.first else { return [] }
410
- return saliency.salientObjects?.map(\.boundingBox) ?? []
411
- }
412
- ```
413
-
414
- ## Rectangle Detection
415
-
416
- Detect rectangular shapes for document edges, business cards, etc.
417
-
418
- ```swift
419
- func detectRectangles(in cgImage: CGImage) async throws -> [CGRect] {
420
- var request = DetectRectanglesRequest()
421
- request.minimumAspectRatio = 0.3
422
- request.maximumAspectRatio = 1.0
423
- request.minimumSize = 0.1
424
- request.maximumObservations = 5
425
-
426
- let observations = try await request.perform(on: cgImage)
427
- return observations.map(\.boundingBox)
428
- }
429
- ```
430
-
431
- ## Horizon Detection
432
-
433
- Detect the horizon angle for auto-straightening photos.
434
-
435
- ```swift
436
- func detectHorizon(in cgImage: CGImage) async throws -> CGFloat? {
437
- let request = DetectHorizonRequest()
438
- let results = try await request.perform(on: cgImage)
439
- return results.first?.angle.map { CGFloat($0) }
440
- }
441
- ```
442
-
443
- ## Batch Processing Multiple Requests
444
-
445
- Run multiple requests on the same image simultaneously for efficiency.
446
-
447
- ```swift
448
- func analyzeImage(_ cgImage: CGImage) async throws -> ImageAnalysisResult {
449
- async let textResults = {
450
- var req = RecognizeTextRequest()
451
- req.recognitionLevel = .accurate
452
- return try await req.perform(on: cgImage)
453
- }()
454
-
455
- async let faceResults = {
456
- let req = DetectFaceRectanglesRequest()
457
- return try await req.perform(on: cgImage)
458
- }()
459
-
460
- async let barcodeResults = {
461
- var req = DetectBarcodesRequest()
462
- req.symbologies = [.qr, .ean13]
463
- return try await req.perform(on: cgImage)
464
- }()
465
-
466
- let text = try await textResults
467
- let faces = try await faceResults
468
- let barcodes = try await barcodeResults
469
-
470
- return ImageAnalysisResult(
471
- recognizedText: text.compactMap { $0.topCandidates(1).first?.string },
472
- faceCount: faces.count,
473
- barcodePayloads: barcodes.compactMap(\.payloadString)
474
- )
475
- }
476
-
477
- struct ImageAnalysisResult: Sendable {
478
- let recognizedText: [String]
479
- let faceCount: Int
480
- let barcodePayloads: [String]
481
- }
482
- ```
483
-
484
- ### Legacy Batch Processing
485
-
486
- With the legacy API, pass multiple requests to a single handler call.
487
-
488
- ```swift
489
- func analyzeImageLegacy(_ cgImage: CGImage) throws {
490
- let textRequest = VNRecognizeTextRequest { request, error in
491
- // Handle text results
492
- }
493
- let faceRequest = VNDetectFaceRectanglesRequest { request, error in
494
- // Handle face results
495
- }
496
- let barcodeRequest = VNDetectBarcodesRequest { request, error in
497
- // Handle barcode results
498
- }
499
-
500
- let handler = VNImageRequestHandler(cgImage: cgImage)
501
- try handler.perform([textRequest, faceRequest, barcodeRequest])
502
- }
503
- ```
504
-
505
- ## Video Frame Processing with CMSampleBuffer
506
-
507
- Process live camera frames from AVCaptureSession.
508
-
509
- ```swift
510
- import AVFoundation
511
- import Vision
512
-
513
- final class VisionVideoProcessor: NSObject, AVCaptureVideoDataOutputSampleBufferDelegate, Sendable {
514
- private let processingQueue = DispatchQueue(label: "vision.processing", qos: .userInitiated)
515
-
516
- func setupCapture(session: AVCaptureSession) {
517
- let output = AVCaptureVideoDataOutput()
518
- output.setSampleBufferDelegate(self, queue: processingQueue)
519
- output.alwaysDiscardsLateVideoFrames = true
520
-
521
- if session.canAddOutput(output) {
522
- session.addOutput(output)
523
- }
524
- }
525
-
526
- func captureOutput(
527
- _ output: AVCaptureOutput,
528
- didOutput sampleBuffer: CMSampleBuffer,
529
- from connection: AVCaptureConnection
530
- ) {
531
- Task {
532
- do {
533
- var request = RecognizeTextRequest()
534
- request.recognitionLevel = .fast
535
- let observations = try await request.perform(on: sampleBuffer)
536
- let strings = observations.compactMap {
537
- $0.topCandidates(1).first?.string
538
- }
539
- // Dispatch results to main actor for UI update
540
- await MainActor.run {
541
- // Update UI with recognized strings
542
- }
543
- } catch {
544
- // Handle error
545
- }
546
- }
547
- }
548
- }
549
- ```
550
-
551
- ### Object Tracking Across Video Frames
552
-
553
- #### Modern API (iOS 18+)
554
-
555
- `TrackObjectRequest` is a stateful request that maintains tracking context
556
- internally. No need for a separate sequence handler.
557
-
558
- ```swift
559
- import Vision
560
-
561
- final class ObjectTracker {
562
- private var request: TrackObjectRequest?
563
-
564
- /// Initialize tracking with a bounding box in normalized coordinates
565
- func startTracking(boundingBox: CGRect) {
566
- let observation = DetectedObjectObservation(boundingBox: boundingBox)
567
- var req = TrackObjectRequest(observation: observation)
568
- req.trackingLevel = .accurate
569
- request = req
570
- }
571
-
572
- /// Track object in next video frame
573
- func track(in pixelBuffer: CVPixelBuffer) async throws -> CGRect? {
574
- guard var req = request else { return nil }
575
-
576
- let results = try await req.perform(on: pixelBuffer)
577
- guard let tracked = results.first, tracked.confidence > 0.3 else {
578
- request = nil
579
- return nil
580
- }
581
-
582
- request = req // preserve stateful tracking context
583
- return tracked.boundingBox
584
- }
585
-
586
- func stopTracking() {
587
- request = nil
588
- }
589
- }
590
- ```
591
-
592
- #### Legacy API
593
-
594
- ```swift
595
- final class LegacyObjectTracker {
596
- private var sequenceHandler = VNSequenceRequestHandler()
597
- private var currentObservation: VNDetectedObjectObservation?
598
-
599
- func startTracking(boundingBox: CGRect) {
600
- currentObservation = VNDetectedObjectObservation(boundingBox: boundingBox)
601
- }
602
-
603
- func track(in pixelBuffer: CVPixelBuffer) throws -> CGRect? {
604
- guard let observation = currentObservation else { return nil }
605
-
606
- let trackRequest = VNTrackObjectRequest(detectedObjectObservation: observation)
607
- trackRequest.trackingLevel = .accurate
608
-
609
- try sequenceHandler.perform([trackRequest], on: pixelBuffer)
610
-
611
- guard let result = trackRequest.results?.first as? VNDetectedObjectObservation,
612
- result.confidence > 0.3 else {
613
- currentObservation = nil
614
- return nil
615
- }
616
-
617
- currentObservation = result
618
- return result.boundingBox
619
- }
620
-
621
- func stopTracking() {
622
- currentObservation = nil
623
- }
624
- }
625
- ```
626
-
627
- ## Coordinate Normalization Utilities
628
-
629
- Vision uses normalized coordinates (0...1) with bottom-left origin. These
630
- utilities convert to UIKit/SwiftUI coordinate systems.
631
-
632
- ```swift
633
- import Vision
634
- import UIKit
635
-
636
- enum VisionCoordinateConverter {
637
- /// Convert normalized Vision rect to image-pixel coordinates
638
- static func toImageCoordinates(
639
- _ normalizedRect: CGRect,
640
- imageWidth: Int,
641
- imageHeight: Int
642
- ) -> CGRect {
643
- VNImageRectForNormalizedRect(normalizedRect, imageWidth, imageHeight)
644
- }
645
-
646
- /// Convert normalized Vision point to image-pixel coordinates
647
- static func toImageCoordinates(
648
- _ normalizedPoint: CGPoint,
649
- imageWidth: Int,
650
- imageHeight: Int
651
- ) -> CGPoint {
652
- VNImagePointForNormalizedPoint(normalizedPoint, imageWidth, imageHeight)
653
- }
654
-
655
- /// Convert Vision rect (bottom-left origin) to UIKit rect (top-left origin)
656
- static func toUIKitCoordinates(
657
- _ normalizedRect: CGRect,
658
- viewSize: CGSize
659
- ) -> CGRect {
660
- let imageRect = VNImageRectForNormalizedRect(
661
- normalizedRect,
662
- Int(viewSize.width),
663
- Int(viewSize.height)
664
- )
665
- // Flip Y axis: Vision origin is bottom-left, UIKit is top-left
666
- return CGRect(
667
- x: imageRect.origin.x,
668
- y: viewSize.height - imageRect.origin.y - imageRect.height,
669
- width: imageRect.width,
670
- height: imageRect.height
671
- )
672
- }
673
-
674
- /// Convert an array of normalized points to UIKit points
675
- static func toUIKitPoints(
676
- _ normalizedPoints: [CGPoint],
677
- viewSize: CGSize
678
- ) -> [CGPoint] {
679
- normalizedPoints.map { point in
680
- CGPoint(
681
- x: point.x * viewSize.width,
682
- y: (1.0 - point.y) * viewSize.height // flip Y
683
- )
684
- }
685
- }
686
- }
687
- ```
688
-
689
- ## Performance Considerations
690
-
691
- ### Recognition Level Selection
692
-
693
- | Use Case | Level | Typical Latency |
694
- |---|---|---|
695
- | Live camera preview | `.fast` | ~30ms per frame |
696
- | Photo library scan | `.accurate` | ~200-500ms per image |
697
- | Batch document OCR | `.accurate` | ~200-500ms per page |
698
- | Barcode scanner | `.fast` or `.balanced` | ~15-50ms per frame |
699
-
700
- ### Memory Management
701
-
702
- - Reuse `VNSequenceRequestHandler` across video frames (do not recreate per frame)
703
- - For batch processing, process one image at a time to avoid memory spikes
704
- - Release `CVPixelBuffer` references promptly after processing
705
- - Use `autoreleasepool` in tight loops processing many images
706
-
707
- ```swift
708
- func batchProcess(images: [CGImage]) async throws -> [[String]] {
709
- var allResults: [[String]] = []
710
-
711
- for image in images {
712
- var request = RecognizeTextRequest()
713
- request.recognitionLevel = .accurate
714
- let obs = try await request.perform(on: image)
715
- let result = obs.compactMap { $0.topCandidates(1).first?.string }
716
- allResults.append(result)
717
- }
718
- return allResults
719
- }
720
- ```
721
-
722
- ### Threading
723
-
724
- - Modern API (`perform(on:)`) is async and safe to call from any context
725
- - Legacy API: create `VNImageRequestHandler` and call `perform` on a background queue
726
- - Never block the main thread with Vision requests
727
- - `VNSequenceRequestHandler` is not thread-safe -- use from a single serial queue
728
-
729
- ### Request Reuse
730
-
731
- Modern request structs are value types and cheap to create. Do not try to cache
732
- and reuse them across calls -- just create a fresh one each time.
733
-
734
- For the legacy API, `VNImageRequestHandler` is tied to a single image. Create a
735
- new handler for each image you process. `VNSequenceRequestHandler` can be reused
736
- across frames in a sequence.