@insidepics/expo-apple-intelligence 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +178 -0
- package/build/ExpoAppleIntelligence.types.d.ts +177 -0
- package/build/ExpoAppleIntelligence.types.d.ts.map +1 -0
- package/build/ExpoAppleIntelligence.types.js +3 -0
- package/build/ExpoAppleIntelligence.types.js.map +1 -0
- package/build/ExpoAppleIntelligenceModule.d.ts +51 -0
- package/build/ExpoAppleIntelligenceModule.d.ts.map +1 -0
- package/build/ExpoAppleIntelligenceModule.js +3 -0
- package/build/ExpoAppleIntelligenceModule.js.map +1 -0
- package/build/index.d.ts +3 -0
- package/build/index.d.ts.map +1 -0
- package/build/index.js +3 -0
- package/build/index.js.map +1 -0
- package/expo-module.config.json +6 -0
- package/ios/ExpoAppleIntelligence.podspec +38 -0
- package/ios/ExpoAppleIntelligenceModule.swift +352 -0
- package/ios/VisionHelpers.swift +62 -0
- package/ios/VisionModern.swift +239 -0
- package/ios/VisionProcessors.swift +197 -0
- package/package.json +59 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
import ExpoModulesCore
|
|
2
|
+
import Vision
|
|
3
|
+
|
|
4
|
+
#if canImport(UIKit)
|
|
5
|
+
import UIKit
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
public class ExpoAppleIntelligenceModule: Module {
|
|
9
|
+
public func definition() -> ModuleDefinition {
|
|
10
|
+
Name("ExpoAppleIntelligence")
|
|
11
|
+
|
|
12
|
+
// MARK: - Batch analysis
|
|
13
|
+
|
|
14
|
+
AsyncFunction("analyzeImage") { (imagePath: String, promise: Promise) in
|
|
15
|
+
guard let (cgImage, width, height) = VisionHelpers.loadImage(imagePath) else {
|
|
16
|
+
promise.reject("ERR_LOAD_IMAGE", "Could not load image at \(imagePath)")
|
|
17
|
+
return
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
DispatchQueue.global(qos: .userInitiated).async {
|
|
21
|
+
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
|
|
22
|
+
|
|
23
|
+
let faceLandmarksReq = VNDetectFaceLandmarksRequest()
|
|
24
|
+
let faceQualityReq = VNDetectFaceCaptureQualityRequest()
|
|
25
|
+
let textReq = VNRecognizeTextRequest()
|
|
26
|
+
textReq.recognitionLevel = .accurate
|
|
27
|
+
let classifyReq = VNClassifyImageRequest()
|
|
28
|
+
let barcodeReq = VNDetectBarcodesRequest()
|
|
29
|
+
let bodyPoseReq = VNDetectHumanBodyPoseRequest()
|
|
30
|
+
let handPoseReq = VNDetectHumanHandPoseRequest()
|
|
31
|
+
handPoseReq.maximumHandCount = 4
|
|
32
|
+
let featurePrintReq = VNGenerateImageFeaturePrintRequest()
|
|
33
|
+
let attentionReq = VNGenerateAttentionBasedSaliencyImageRequest()
|
|
34
|
+
let objectnessReq = VNGenerateObjectnessBasedSaliencyImageRequest()
|
|
35
|
+
let animalReq = VNRecognizeAnimalsRequest()
|
|
36
|
+
let rectangleReq = VNDetectRectanglesRequest()
|
|
37
|
+
rectangleReq.maximumObservations = 10
|
|
38
|
+
let horizonReq = VNDetectHorizonRequest()
|
|
39
|
+
|
|
40
|
+
var requests: [VNRequest] = [
|
|
41
|
+
faceLandmarksReq, faceQualityReq, textReq, classifyReq, barcodeReq,
|
|
42
|
+
bodyPoseReq, handPoseReq, featurePrintReq, attentionReq, objectnessReq,
|
|
43
|
+
animalReq, rectangleReq, horizonReq,
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
var aestheticsReq: VNRequest? = nil
|
|
47
|
+
if #available(iOS 18.0, macOS 15.0, *) {
|
|
48
|
+
let req = VNCalculateImageAestheticsScoresRequest()
|
|
49
|
+
aestheticsReq = req
|
|
50
|
+
requests.append(req)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
do { try handler.perform(requests) } catch {
|
|
54
|
+
promise.reject("ERR_ANALYSIS", error.localizedDescription)
|
|
55
|
+
return
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
let faces = VisionProcessors.processFaces(landmarks: faceLandmarksReq.results ?? [], quality: faceQualityReq.results ?? [])
|
|
59
|
+
let text = VisionProcessors.processText(textReq.results ?? [])
|
|
60
|
+
let labels = VisionProcessors.processClassification(classifyReq.results ?? [])
|
|
61
|
+
let barcodes = VisionProcessors.processBarcodes(barcodeReq.results ?? [])
|
|
62
|
+
let bodyPoses = VisionProcessors.processBodyPoses(bodyPoseReq.results ?? [])
|
|
63
|
+
let handPoses = VisionProcessors.processHandPoses(handPoseReq.results ?? [])
|
|
64
|
+
let featurePrint = VisionProcessors.processFeaturePrint(featurePrintReq.results?.first)
|
|
65
|
+
let attentionRegions = VisionProcessors.processSaliency(attentionReq.results ?? [])
|
|
66
|
+
let objectnessRegions = VisionProcessors.processSaliency(objectnessReq.results ?? [])
|
|
67
|
+
let animals = VisionProcessors.processAnimals(animalReq.results ?? [])
|
|
68
|
+
let rectangles = VisionProcessors.processRectangles(rectangleReq.results ?? [])
|
|
69
|
+
let horizon = VisionProcessors.processHorizon(horizonReq.results?.first)
|
|
70
|
+
|
|
71
|
+
var aesthetics: [String: Any]? = nil
|
|
72
|
+
if #available(iOS 18.0, macOS 15.0, *) {
|
|
73
|
+
aesthetics = VisionModern.processAesthetics(aestheticsReq?.results)
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
var result: [String: Any] = [
|
|
77
|
+
"faces": faces, "text": text, "labels": labels, "barcodes": barcodes,
|
|
78
|
+
"bodyPoses": bodyPoses, "handPoses": handPoses, "rectangles": rectangles,
|
|
79
|
+
"animals": animals,
|
|
80
|
+
"featurePrint": featurePrint ?? [:] as [String: Any],
|
|
81
|
+
"aesthetics": aesthetics ?? [:] as [String: Any],
|
|
82
|
+
"saliency": ["attentionRegions": attentionRegions, "objectnessRegions": objectnessRegions],
|
|
83
|
+
"horizon": horizon ?? [:] as [String: Any],
|
|
84
|
+
"lensSmudge": [:] as [String: Any],
|
|
85
|
+
"document": [:] as [String: Any],
|
|
86
|
+
"imageWidth": Double(width), "imageHeight": Double(height),
|
|
87
|
+
]
|
|
88
|
+
#if os(iOS)
|
|
89
|
+
result["platform"] = "ios"
|
|
90
|
+
#else
|
|
91
|
+
result["platform"] = "macos"
|
|
92
|
+
#endif
|
|
93
|
+
|
|
94
|
+
#if os(iOS)
|
|
95
|
+
if #available(iOS 26.0, *) {
|
|
96
|
+
Task {
|
|
97
|
+
let lensSmudge = await VisionModern.detectLensSmudge(cgImage: cgImage)
|
|
98
|
+
let document = await VisionModern.recognizeDocument(cgImage: cgImage)
|
|
99
|
+
var final26 = result
|
|
100
|
+
if let ls = lensSmudge { final26["lensSmudge"] = ls }
|
|
101
|
+
if let doc = document { final26["document"] = doc }
|
|
102
|
+
promise.resolve(final26)
|
|
103
|
+
}
|
|
104
|
+
} else {
|
|
105
|
+
promise.resolve(result)
|
|
106
|
+
}
|
|
107
|
+
#elseif os(macOS)
|
|
108
|
+
if #available(macOS 26.0, *) {
|
|
109
|
+
Task {
|
|
110
|
+
let lensSmudge = await VisionModern.detectLensSmudge(cgImage: cgImage)
|
|
111
|
+
let document = await VisionModern.recognizeDocument(cgImage: cgImage)
|
|
112
|
+
var final26 = result
|
|
113
|
+
if let ls = lensSmudge { final26["lensSmudge"] = ls }
|
|
114
|
+
if let doc = document { final26["document"] = doc }
|
|
115
|
+
promise.resolve(final26)
|
|
116
|
+
}
|
|
117
|
+
} else {
|
|
118
|
+
promise.resolve(result)
|
|
119
|
+
}
|
|
120
|
+
#endif
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// MARK: - Individual functions
|
|
125
|
+
|
|
126
|
+
AsyncFunction("detectFaces") { (imagePath: String, promise: Promise) in
|
|
127
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
128
|
+
let lReq = VNDetectFaceLandmarksRequest()
|
|
129
|
+
let qReq = VNDetectFaceCaptureQualityRequest()
|
|
130
|
+
try handler.perform([lReq, qReq])
|
|
131
|
+
return VisionProcessors.processFaces(landmarks: lReq.results ?? [], quality: qReq.results ?? [])
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
AsyncFunction("recognizeText") { (imagePath: String, promise: Promise) in
|
|
136
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
137
|
+
let req = VNRecognizeTextRequest()
|
|
138
|
+
req.recognitionLevel = .accurate
|
|
139
|
+
try handler.perform([req])
|
|
140
|
+
return VisionProcessors.processText(req.results ?? [])
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
AsyncFunction("classifyImage") { (imagePath: String, promise: Promise) in
|
|
145
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
146
|
+
let req = VNClassifyImageRequest()
|
|
147
|
+
try handler.perform([req])
|
|
148
|
+
return VisionProcessors.processClassification(req.results ?? [])
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
AsyncFunction("detectBarcodes") { (imagePath: String, promise: Promise) in
|
|
153
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
154
|
+
let req = VNDetectBarcodesRequest()
|
|
155
|
+
try handler.perform([req])
|
|
156
|
+
return VisionProcessors.processBarcodes(req.results ?? [])
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
AsyncFunction("detectBodyPoses") { (imagePath: String, promise: Promise) in
|
|
161
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
162
|
+
let req = VNDetectHumanBodyPoseRequest()
|
|
163
|
+
try handler.perform([req])
|
|
164
|
+
return VisionProcessors.processBodyPoses(req.results ?? [])
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
AsyncFunction("detectHandPoses") { (imagePath: String, promise: Promise) in
|
|
169
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
170
|
+
let req = VNDetectHumanHandPoseRequest()
|
|
171
|
+
req.maximumHandCount = 4
|
|
172
|
+
try handler.perform([req])
|
|
173
|
+
return VisionProcessors.processHandPoses(req.results ?? [])
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
AsyncFunction("generateFeaturePrint") { (imagePath: String, promise: Promise) in
|
|
178
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
179
|
+
let req = VNGenerateImageFeaturePrintRequest()
|
|
180
|
+
try handler.perform([req])
|
|
181
|
+
return VisionProcessors.processFeaturePrint(req.results?.first) as Any
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
AsyncFunction("calculateAesthetics") { (imagePath: String, promise: Promise) in
|
|
186
|
+
guard #available(iOS 18.0, macOS 15.0, *) else {
|
|
187
|
+
promise.resolve(nil)
|
|
188
|
+
return
|
|
189
|
+
}
|
|
190
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
191
|
+
let req = VNCalculateImageAestheticsScoresRequest()
|
|
192
|
+
try handler.perform([req])
|
|
193
|
+
return VisionModern.processAesthetics(req.results) as Any
|
|
194
|
+
}
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
AsyncFunction("detectSaliency") { (imagePath: String, promise: Promise) in
|
|
198
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
199
|
+
let aReq = VNGenerateAttentionBasedSaliencyImageRequest()
|
|
200
|
+
let oReq = VNGenerateObjectnessBasedSaliencyImageRequest()
|
|
201
|
+
try handler.perform([aReq, oReq])
|
|
202
|
+
return [
|
|
203
|
+
"attentionRegions": VisionProcessors.processSaliency(aReq.results ?? []),
|
|
204
|
+
"objectnessRegions": VisionProcessors.processSaliency(oReq.results ?? []),
|
|
205
|
+
]
|
|
206
|
+
}
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
AsyncFunction("detectAnimals") { (imagePath: String, promise: Promise) in
|
|
210
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
211
|
+
let req = VNRecognizeAnimalsRequest()
|
|
212
|
+
try handler.perform([req])
|
|
213
|
+
return VisionProcessors.processAnimals(req.results ?? [])
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
AsyncFunction("detectRectangles") { (imagePath: String, promise: Promise) in
|
|
218
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
219
|
+
let req = VNDetectRectanglesRequest()
|
|
220
|
+
req.maximumObservations = 10
|
|
221
|
+
try handler.perform([req])
|
|
222
|
+
return VisionProcessors.processRectangles(req.results ?? [])
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
AsyncFunction("detectHorizon") { (imagePath: String, promise: Promise) in
|
|
227
|
+
Self.runVision(imagePath, promise: promise) { handler in
|
|
228
|
+
let req = VNDetectHorizonRequest()
|
|
229
|
+
try handler.perform([req])
|
|
230
|
+
return VisionProcessors.processHorizon(req.results?.first) as Any
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
AsyncFunction("detectLensSmudge") { (imagePath: String, promise: Promise) in
|
|
235
|
+
if #available(iOS 26.0, macOS 26.0, *) {
|
|
236
|
+
guard let (cgImage, _, _) = VisionHelpers.loadImage(imagePath) else {
|
|
237
|
+
promise.reject("ERR_LOAD_IMAGE", "Could not load image at \(imagePath)")
|
|
238
|
+
return
|
|
239
|
+
}
|
|
240
|
+
Task {
|
|
241
|
+
let result = await VisionModern.detectLensSmudge(cgImage: cgImage)
|
|
242
|
+
promise.resolve(result)
|
|
243
|
+
}
|
|
244
|
+
} else {
|
|
245
|
+
promise.resolve(nil)
|
|
246
|
+
}
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
AsyncFunction("recognizeDocument") { (imagePath: String, promise: Promise) in
|
|
250
|
+
if #available(iOS 26.0, macOS 26.0, *) {
|
|
251
|
+
guard let (cgImage, _, _) = VisionHelpers.loadImage(imagePath) else {
|
|
252
|
+
promise.reject("ERR_LOAD_IMAGE", "Could not load image at \(imagePath)")
|
|
253
|
+
return
|
|
254
|
+
}
|
|
255
|
+
Task {
|
|
256
|
+
let result = await VisionModern.recognizeDocument(cgImage: cgImage)
|
|
257
|
+
promise.resolve(result)
|
|
258
|
+
}
|
|
259
|
+
} else {
|
|
260
|
+
promise.resolve(nil)
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// MARK: - Image decoding
|
|
265
|
+
|
|
266
|
+
AsyncFunction("decodeImagePixels") { (imagePath: String, promise: Promise) in
|
|
267
|
+
guard let (cgImage, _, _) = VisionHelpers.loadImage(imagePath) else {
|
|
268
|
+
promise.reject("ERR_LOAD_IMAGE", "Could not load image at \(imagePath)")
|
|
269
|
+
return
|
|
270
|
+
}
|
|
271
|
+
DispatchQueue.global(qos: .userInitiated).async {
|
|
272
|
+
guard let data = VisionHelpers.decodePixels(cgImage) else {
|
|
273
|
+
promise.reject("ERR_DECODE", "Failed to decode pixels")
|
|
274
|
+
return
|
|
275
|
+
}
|
|
276
|
+
promise.resolve([
|
|
277
|
+
"pixels": data.base64EncodedString(),
|
|
278
|
+
"width": Double(cgImage.width),
|
|
279
|
+
"height": Double(cgImage.height),
|
|
280
|
+
])
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// MARK: - Foundation Models (iOS 26+)
|
|
285
|
+
|
|
286
|
+
Function("isFoundationModelAvailable") { () -> Bool in
|
|
287
|
+
if #available(iOS 26.0, *) {
|
|
288
|
+
return VisionModern.isFoundationModelAvailable()
|
|
289
|
+
}
|
|
290
|
+
return false
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
AsyncFunction("generateText") { (prompt: String, systemPrompt: String?, promise: Promise) in
|
|
294
|
+
if #available(iOS 26.0, *) {
|
|
295
|
+
Task {
|
|
296
|
+
let result = await VisionModern.generateText(prompt: prompt, systemPrompt: systemPrompt)
|
|
297
|
+
promise.resolve(result)
|
|
298
|
+
}
|
|
299
|
+
} else {
|
|
300
|
+
promise.resolve(nil)
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
// MARK: - Speech Transcription (iOS 26+)
|
|
305
|
+
|
|
306
|
+
AsyncFunction("transcribeAudio") { (audioPath: String, locale: String?, promise: Promise) in
|
|
307
|
+
if #available(iOS 26.0, *) {
|
|
308
|
+
Task {
|
|
309
|
+
let result = await VisionModern.transcribeAudio(audioPath: audioPath, locale: locale)
|
|
310
|
+
promise.resolve(result)
|
|
311
|
+
}
|
|
312
|
+
} else {
|
|
313
|
+
promise.resolve(nil)
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
|
|
317
|
+
// MARK: - Image Generation (iOS 18.4+)
|
|
318
|
+
|
|
319
|
+
AsyncFunction("generateImage") { (prompt: String, style: String?, promise: Promise) in
|
|
320
|
+
if #available(iOS 18.4, *) {
|
|
321
|
+
Task {
|
|
322
|
+
let result = await VisionModern.generateImage(prompt: prompt, style: style)
|
|
323
|
+
promise.resolve(result)
|
|
324
|
+
}
|
|
325
|
+
} else {
|
|
326
|
+
promise.resolve(nil)
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
// MARK: - Helper to reduce boilerplate
|
|
332
|
+
|
|
333
|
+
private static func runVision(
|
|
334
|
+
_ imagePath: String,
|
|
335
|
+
promise: Promise,
|
|
336
|
+
work: @escaping (VNImageRequestHandler) throws -> Any
|
|
337
|
+
) {
|
|
338
|
+
guard let (cgImage, _, _) = VisionHelpers.loadImage(imagePath) else {
|
|
339
|
+
promise.reject("ERR_LOAD_IMAGE", "Could not load image at \(imagePath)")
|
|
340
|
+
return
|
|
341
|
+
}
|
|
342
|
+
DispatchQueue.global(qos: .userInitiated).async {
|
|
343
|
+
let handler = VNImageRequestHandler(cgImage: cgImage, options: [:])
|
|
344
|
+
do {
|
|
345
|
+
let result = try work(handler)
|
|
346
|
+
promise.resolve(result)
|
|
347
|
+
} catch {
|
|
348
|
+
promise.reject("ERR_VISION", error.localizedDescription)
|
|
349
|
+
}
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import Vision
|
|
2
|
+
|
|
3
|
+
#if canImport(UIKit)
|
|
4
|
+
import UIKit
|
|
5
|
+
#elseif canImport(AppKit)
|
|
6
|
+
import AppKit
|
|
7
|
+
#endif
|
|
8
|
+
|
|
9
|
+
/// Shared helpers for image loading and coordinate serialization.
|
|
10
|
+
enum VisionHelpers {
|
|
11
|
+
static func loadImage(_ path: String) -> (CGImage, CGFloat, CGFloat)? {
|
|
12
|
+
let url: URL
|
|
13
|
+
if path.hasPrefix("file://") {
|
|
14
|
+
guard let parsed = URL(string: path) else { return nil }
|
|
15
|
+
url = parsed
|
|
16
|
+
} else {
|
|
17
|
+
url = URL(fileURLWithPath: path)
|
|
18
|
+
}
|
|
19
|
+
guard let data = try? Data(contentsOf: url) else { return nil }
|
|
20
|
+
|
|
21
|
+
#if canImport(UIKit)
|
|
22
|
+
guard let uiImage = UIImage(data: data),
|
|
23
|
+
let cgImage = uiImage.cgImage else { return nil }
|
|
24
|
+
#elseif canImport(AppKit)
|
|
25
|
+
guard let nsImage = NSImage(data: data),
|
|
26
|
+
let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) else { return nil }
|
|
27
|
+
#endif
|
|
28
|
+
|
|
29
|
+
return (cgImage, CGFloat(cgImage.width), CGFloat(cgImage.height))
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/// Serialize CGRect as-is (normalized 0-1, bottom-left origin).
|
|
33
|
+
static func rawRect(_ rect: CGRect) -> [String: Double] {
|
|
34
|
+
["x": Double(rect.origin.x), "y": Double(rect.origin.y),
|
|
35
|
+
"width": Double(rect.size.width), "height": Double(rect.size.height)]
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/// Serialize CGPoint as-is (normalized 0-1, bottom-left origin).
|
|
39
|
+
static func rawPoint(_ point: CGPoint) -> [String: Double] {
|
|
40
|
+
["x": Double(point.x), "y": Double(point.y)]
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/// Decode a CGImage to raw RGBA pixel data.
|
|
44
|
+
static func decodePixels(_ cgImage: CGImage) -> Data? {
|
|
45
|
+
let w = cgImage.width
|
|
46
|
+
let h = cgImage.height
|
|
47
|
+
let bytesPerRow = w * 4
|
|
48
|
+
var pixels = [UInt8](repeating: 0, count: w * h * 4)
|
|
49
|
+
|
|
50
|
+
guard let ctx = CGContext(
|
|
51
|
+
data: &pixels,
|
|
52
|
+
width: w, height: h,
|
|
53
|
+
bitsPerComponent: 8,
|
|
54
|
+
bytesPerRow: bytesPerRow,
|
|
55
|
+
space: CGColorSpaceCreateDeviceRGB(),
|
|
56
|
+
bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue
|
|
57
|
+
) else { return nil }
|
|
58
|
+
|
|
59
|
+
ctx.draw(cgImage, in: CGRect(x: 0, y: 0, width: w, height: h))
|
|
60
|
+
return Data(pixels)
|
|
61
|
+
}
|
|
62
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import Vision
|
|
2
|
+
import CoreImage
|
|
3
|
+
import AVFAudio
|
|
4
|
+
|
|
5
|
+
#if canImport(UIKit)
|
|
6
|
+
import UIKit
|
|
7
|
+
#endif
|
|
8
|
+
|
|
9
|
+
#if canImport(FoundationModels)
|
|
10
|
+
import FoundationModels
|
|
11
|
+
#endif
|
|
12
|
+
|
|
13
|
+
#if canImport(Speech)
|
|
14
|
+
import Speech
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
#if canImport(ImagePlayground)
|
|
18
|
+
import ImagePlayground
|
|
19
|
+
#endif
|
|
20
|
+
|
|
21
|
+
/// iOS 18+/macOS 15+ and iOS 26+/macOS 26+ Vision APIs (Swift-native, async/await).
|
|
22
|
+
enum VisionModern {
|
|
23
|
+
|
|
24
|
+
// MARK: - Aesthetics (iOS 18+ / macOS 15+)
|
|
25
|
+
|
|
26
|
+
@available(iOS 18.0, macOS 15.0, *)
|
|
27
|
+
static func processAesthetics(_ results: [Any]?) -> [String: Any]? {
|
|
28
|
+
guard let obs = results?.first as? VNImageAestheticsScoresObservation else { return nil }
|
|
29
|
+
return ["overallScore": Double(obs.overallScore), "isUtility": obs.isUtility]
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
// MARK: - Lens Smudge (iOS 26+ / macOS 26+)
|
|
33
|
+
|
|
34
|
+
@available(iOS 26.0, macOS 26.0, *)
|
|
35
|
+
static func detectLensSmudge(cgImage: CGImage) async -> [String: Any]? {
|
|
36
|
+
let ciImage = CIImage(cgImage: cgImage)
|
|
37
|
+
let request = DetectLensSmudgeRequest()
|
|
38
|
+
guard let obs = try? await request.perform(on: ciImage) else { return nil }
|
|
39
|
+
return ["confidence": obs.confidence]
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
// MARK: - Document Recognition (iOS 26+ / macOS 26+)
|
|
43
|
+
|
|
44
|
+
@available(iOS 26.0, macOS 26.0, *)
|
|
45
|
+
static func recognizeDocument(cgImage: CGImage) async -> [String: Any]? {
|
|
46
|
+
let ciImage = CIImage(cgImage: cgImage)
|
|
47
|
+
let request = RecognizeDocumentsRequest()
|
|
48
|
+
guard let docObs = try? await request.perform(on: ciImage).first else { return nil }
|
|
49
|
+
let doc = docObs.document
|
|
50
|
+
return processDocument(doc)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
@available(iOS 26.0, macOS 26.0, *)
|
|
54
|
+
private static func processDocument(_ doc: DocumentObservation.Container) -> [String: Any] {
|
|
55
|
+
let paragraphs: [[String: Any]] = doc.paragraphs.map { para in
|
|
56
|
+
var detectedData: [[String: Any]] = []
|
|
57
|
+
for item in para.detectedData {
|
|
58
|
+
detectedData.append([
|
|
59
|
+
"type": detectedDataType(item),
|
|
60
|
+
"value": "\(item.match)",
|
|
61
|
+
"boundingBox": VisionHelpers.rawRect(item.boundingRegion.boundingBox.cgRect),
|
|
62
|
+
])
|
|
63
|
+
}
|
|
64
|
+
return [
|
|
65
|
+
"text": para.transcript,
|
|
66
|
+
"boundingBox": VisionHelpers.rawRect(para.boundingRegion.boundingBox.cgRect),
|
|
67
|
+
"detectedData": detectedData,
|
|
68
|
+
]
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
let tables: [[String: Any]] = doc.tables.enumerated().map { (_, table) in
|
|
72
|
+
var cells: [[String: Any]] = []
|
|
73
|
+
for (rowIdx, row) in table.rows.enumerated() {
|
|
74
|
+
for (colIdx, cell) in row.enumerated() {
|
|
75
|
+
let text = cell.content.paragraphs.map { $0.transcript }.joined(separator: "\n")
|
|
76
|
+
cells.append(["text": text, "row": rowIdx, "column": colIdx])
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
return [
|
|
80
|
+
"cells": cells,
|
|
81
|
+
"boundingBox": VisionHelpers.rawRect(table.boundingRegion.boundingBox.cgRect),
|
|
82
|
+
]
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
let lists: [[String: Any]] = doc.lists.map { list in
|
|
86
|
+
let items: [[String: Any]] = list.items.map { item in
|
|
87
|
+
[
|
|
88
|
+
"text": item.itemString,
|
|
89
|
+
"marker": item.markerString,
|
|
90
|
+
]
|
|
91
|
+
}
|
|
92
|
+
return [
|
|
93
|
+
"items": items,
|
|
94
|
+
"boundingBox": VisionHelpers.rawRect(list.boundingRegion.boundingBox.cgRect),
|
|
95
|
+
]
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
let barcodes: [[String: Any]] = doc.barcodes.map { barcode in
|
|
99
|
+
[
|
|
100
|
+
"value": barcode.payloadString ?? "",
|
|
101
|
+
"symbology": "\(barcode.symbology)",
|
|
102
|
+
"boundingBox": VisionHelpers.rawRect(barcode.boundingRegion.boundingBox.cgRect),
|
|
103
|
+
]
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
return ["paragraphs": paragraphs, "tables": tables, "lists": lists, "barcodes": barcodes]
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
@available(iOS 26.0, macOS 26.0, *)
|
|
110
|
+
private static func detectedDataType(_ data: DocumentObservation.Container.DataDetectorMatch) -> String {
|
|
111
|
+
switch data.match.details {
|
|
112
|
+
case .emailAddress: return "email"
|
|
113
|
+
case .phoneNumber: return "phoneNumber"
|
|
114
|
+
case .link: return "url"
|
|
115
|
+
case .calendarEvent: return "calendarEvent"
|
|
116
|
+
case .postalAddress: return "postalAddress"
|
|
117
|
+
case .moneyAmount: return "moneyAmount"
|
|
118
|
+
case .measurement: return "measurement"
|
|
119
|
+
default: return "unknown"
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// MARK: - Foundation Models (iOS 26+)
|
|
124
|
+
|
|
125
|
+
@available(iOS 26.0, *)
|
|
126
|
+
static func isFoundationModelAvailable() -> Bool {
|
|
127
|
+
#if canImport(FoundationModels)
|
|
128
|
+
return SystemLanguageModel.default.availability == .available
|
|
129
|
+
#else
|
|
130
|
+
return false
|
|
131
|
+
#endif
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
@available(iOS 26.0, *)
|
|
135
|
+
static func generateText(prompt: String, systemPrompt: String?) async -> String? {
|
|
136
|
+
#if canImport(FoundationModels)
|
|
137
|
+
guard SystemLanguageModel.default.availability == .available else { return nil }
|
|
138
|
+
do {
|
|
139
|
+
let session: LanguageModelSession
|
|
140
|
+
if let systemPrompt = systemPrompt {
|
|
141
|
+
session = LanguageModelSession(instructions: systemPrompt)
|
|
142
|
+
} else {
|
|
143
|
+
session = LanguageModelSession()
|
|
144
|
+
}
|
|
145
|
+
let response = try await session.respond(to: prompt)
|
|
146
|
+
return response.content
|
|
147
|
+
} catch {
|
|
148
|
+
return nil
|
|
149
|
+
}
|
|
150
|
+
#else
|
|
151
|
+
return nil
|
|
152
|
+
#endif
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
// MARK: - Speech Transcription (iOS 26+)
|
|
156
|
+
|
|
157
|
+
@available(iOS 26.0, *)
|
|
158
|
+
static func transcribeAudio(audioPath: String, locale: String?) async -> [String: Any]? {
|
|
159
|
+
#if canImport(Speech)
|
|
160
|
+
let url: URL
|
|
161
|
+
if audioPath.hasPrefix("file://") {
|
|
162
|
+
guard let parsed = URL(string: audioPath) else { return nil }
|
|
163
|
+
url = parsed
|
|
164
|
+
} else {
|
|
165
|
+
url = URL(fileURLWithPath: audioPath)
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
guard let audioFile = try? AVAudioFile(forReading: url) else { return nil }
|
|
169
|
+
|
|
170
|
+
let loc = locale != nil ? Locale(identifier: locale!) : .current
|
|
171
|
+
|
|
172
|
+
do {
|
|
173
|
+
let transcriber = SpeechTranscriber(locale: loc, preset: .transcription)
|
|
174
|
+
_ = try await SpeechAnalyzer(
|
|
175
|
+
inputAudioFile: audioFile,
|
|
176
|
+
modules: [transcriber],
|
|
177
|
+
finishAfterFile: true
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
var segments: [[String: Any]] = []
|
|
181
|
+
for try await result in transcriber.results {
|
|
182
|
+
let text = String(result.text.characters).trimmingCharacters(in: .whitespacesAndNewlines)
|
|
183
|
+
if !text.isEmpty {
|
|
184
|
+
segments.append(["text": text])
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if segments.isEmpty { return nil }
|
|
189
|
+
return ["segments": segments]
|
|
190
|
+
} catch {
|
|
191
|
+
return nil
|
|
192
|
+
}
|
|
193
|
+
#else
|
|
194
|
+
return nil
|
|
195
|
+
#endif
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// MARK: - Image Generation (iOS 18.4+)
|
|
199
|
+
|
|
200
|
+
@available(iOS 18.4, *)
|
|
201
|
+
static func generateImage(prompt: String, style: String?) async -> String? {
|
|
202
|
+
#if canImport(ImagePlayground)
|
|
203
|
+
do {
|
|
204
|
+
let creator = try await ImageCreator()
|
|
205
|
+
|
|
206
|
+
let imageStyle: ImagePlaygroundStyle
|
|
207
|
+
switch style {
|
|
208
|
+
case "illustration": imageStyle = .illustration
|
|
209
|
+
case "sketch": imageStyle = .sketch
|
|
210
|
+
default: imageStyle = .animation
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
var resultImage: CGImage? = nil
|
|
214
|
+
for try await image in creator.images(for: [.text(prompt)], style: imageStyle, limit: 1) {
|
|
215
|
+
resultImage = image.cgImage
|
|
216
|
+
break
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
guard let cgImage = resultImage else { return nil }
|
|
220
|
+
|
|
221
|
+
let fileName = UUID().uuidString + ".jpg"
|
|
222
|
+
let filePath = FileManager.default.temporaryDirectory.appendingPathComponent(fileName)
|
|
223
|
+
|
|
224
|
+
#if canImport(UIKit)
|
|
225
|
+
guard let data = UIImage(cgImage: cgImage).jpegData(compressionQuality: 0.9) else { return nil }
|
|
226
|
+
try data.write(to: filePath)
|
|
227
|
+
#else
|
|
228
|
+
return nil
|
|
229
|
+
#endif
|
|
230
|
+
|
|
231
|
+
return filePath.path
|
|
232
|
+
} catch {
|
|
233
|
+
return nil
|
|
234
|
+
}
|
|
235
|
+
#else
|
|
236
|
+
return nil
|
|
237
|
+
#endif
|
|
238
|
+
}
|
|
239
|
+
}
|