@hoshomoh/react-native-document-scanner 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/DocumentScanner.podspec +22 -0
- package/LICENSE +20 -0
- package/README.md +384 -0
- package/android/build.gradle +72 -0
- package/android/gradle.properties +17 -0
- package/android/local.properties +8 -0
- package/android/src/main/AndroidManifest.xml +8 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerModule.kt +217 -0
- package/android/src/main/java/com/documentscanner/DocumentScannerPackage.kt +39 -0
- package/android/src/main/java/com/documentscanner/ImageProcessor.kt +325 -0
- package/android/src/main/java/com/documentscanner/Logger.kt +36 -0
- package/android/src/main/java/com/documentscanner/OCRConfiguration.kt +56 -0
- package/android/src/main/java/com/documentscanner/Options.kt +109 -0
- package/android/src/main/java/com/documentscanner/ScannerError.kt +18 -0
- package/android/src/main/java/com/documentscanner/TextRecognizer.kt +56 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV1.kt +68 -0
- package/android/src/main/java/com/documentscanner/TextRecognizerV2.kt +244 -0
- package/ios/DocumentScanner.h +5 -0
- package/ios/DocumentScanner.mm +113 -0
- package/ios/DocumentScannerManager.swift +148 -0
- package/ios/Errors.swift +33 -0
- package/ios/ImageProcessor.swift +78 -0
- package/ios/ImageUtil.swift +279 -0
- package/ios/Logger.swift +43 -0
- package/ios/OCRConfiguration.swift +60 -0
- package/ios/Options.swift +109 -0
- package/ios/ResponseUtil.swift +25 -0
- package/ios/ScanModels.swift +84 -0
- package/ios/TextRecognizer.swift +134 -0
- package/ios/TextRecognizerV1.swift +56 -0
- package/ios/TextRecognizerV2.swift +169 -0
- package/lib/module/NativeDocumentScanner.js +51 -0
- package/lib/module/NativeDocumentScanner.js.map +1 -0
- package/lib/module/index.js +40 -0
- package/lib/module/index.js.map +1 -0
- package/lib/module/package.json +1 -0
- package/lib/module/textReconstructor.js +147 -0
- package/lib/module/textReconstructor.js.map +1 -0
- package/lib/typescript/package.json +1 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts +191 -0
- package/lib/typescript/src/NativeDocumentScanner.d.ts.map +1 -0
- package/lib/typescript/src/index.d.ts +34 -0
- package/lib/typescript/src/index.d.ts.map +1 -0
- package/lib/typescript/src/textReconstructor.d.ts +60 -0
- package/lib/typescript/src/textReconstructor.d.ts.map +1 -0
- package/package.json +137 -0
- package/src/NativeDocumentScanner.ts +205 -0
- package/src/index.ts +61 -0
- package/src/textReconstructor.ts +212 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import VisionKit
|
|
3
|
+
import React
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
The core native implementation of the Document Scanner.
|
|
7
|
+
|
|
8
|
+
This class handles:
|
|
9
|
+
1. Interfacing with `VNDocumentCameraViewController`.
|
|
10
|
+
2. Managing the scanning session lifecycle.
|
|
11
|
+
3. Delegating processing to `ImageProcessor`.
|
|
12
|
+
*/
|
|
13
|
+
@objc(DocumentScannerManager)
|
|
14
|
+
@available(iOS 13.0, *)
|
|
15
|
+
public class DocumentScannerManager: NSObject, VNDocumentCameraViewControllerDelegate {
|
|
16
|
+
|
|
17
|
+
private var resolve: RCTPromiseResolveBlock?
|
|
18
|
+
private var reject: RCTPromiseRejectBlock?
|
|
19
|
+
private var scanOptions: [String: Any]?
|
|
20
|
+
|
|
21
|
+
/* ----------------------------------------------------------------------- */
|
|
22
|
+
/* Scan Operations */
|
|
23
|
+
/* ----------------------------------------------------------------------- */
|
|
24
|
+
|
|
25
|
+
@objc
|
|
26
|
+
public func scanDocuments(_ options: NSDictionary?, resolve: @escaping RCTPromiseResolveBlock, reject: @escaping RCTPromiseRejectBlock) {
|
|
27
|
+
self.resolve = resolve
|
|
28
|
+
self.reject = reject
|
|
29
|
+
self.scanOptions = options as? [String: Any]
|
|
30
|
+
|
|
31
|
+
DispatchQueue.main.async { [weak self] in
|
|
32
|
+
guard let self = self else { return }
|
|
33
|
+
|
|
34
|
+
guard VNDocumentCameraViewController.isSupported else {
|
|
35
|
+
self.rejectError(.notSupported)
|
|
36
|
+
return
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
let scannerViewController = VNDocumentCameraViewController()
|
|
40
|
+
scannerViewController.delegate = self
|
|
41
|
+
scannerViewController.modalPresentationStyle = .fullScreen
|
|
42
|
+
|
|
43
|
+
if let topController = self.getTopMostViewController() {
|
|
44
|
+
topController.present(scannerViewController, animated: true, completion: nil)
|
|
45
|
+
} else {
|
|
46
|
+
self.rejectError(.operationFailed("Could not find top view controller."))
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
/* ----------------------------------------------------------------------- */
|
|
52
|
+
/* Process Operations */
|
|
53
|
+
/* ----------------------------------------------------------------------- */
|
|
54
|
+
@objc
|
|
55
|
+
public func processDocuments(_ options: NSDictionary, resolve: @escaping RCTPromiseResolveBlock, reject: @escaping RCTPromiseRejectBlock) {
|
|
56
|
+
self.resolve = resolve
|
|
57
|
+
self.reject = reject
|
|
58
|
+
|
|
59
|
+
Task(priority: .userInitiated) { [weak self] in
|
|
60
|
+
guard let self = self else { return }
|
|
61
|
+
|
|
62
|
+
guard let opts = ProcessOptions(from: options as? [String: Any]) else {
|
|
63
|
+
self.rejectError(.configurationError("Missing 'images' array in options"))
|
|
64
|
+
return
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/* Load images from sources */
|
|
68
|
+
let images = opts.images.compactMap { ImageUtil.loadImage(from: $0) }
|
|
69
|
+
|
|
70
|
+
if images.isEmpty {
|
|
71
|
+
self.rejectError(.configurationError("Could not load any valid images"))
|
|
72
|
+
return
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/* Process all images — async, never blocks the UI thread */
|
|
76
|
+
let results = await ImageProcessor.processAll(images, options: opts)
|
|
77
|
+
|
|
78
|
+
self.resolve?(results.map { $0.dictionary })
|
|
79
|
+
self.cleanup()
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/* ----------------------------------------------------------------------- */
|
|
84
|
+
/* VNDocumentCameraViewControllerDelegate */
|
|
85
|
+
/* ----------------------------------------------------------------------- */
|
|
86
|
+
|
|
87
|
+
public func documentCameraViewController(_ controller: VNDocumentCameraViewController, didFinishWith scan: VNDocumentCameraScan) {
|
|
88
|
+
controller.dismiss(animated: true) { [weak self] in
|
|
89
|
+
self?.processScan(scan)
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
public func documentCameraViewControllerDidCancel(_ controller: VNDocumentCameraViewController) {
|
|
94
|
+
controller.dismiss(animated: true) { [weak self] in
|
|
95
|
+
self?.rejectError(.canceled)
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
public func documentCameraViewController(_ controller: VNDocumentCameraViewController, didFailWithError error: Error) {
|
|
100
|
+
controller.dismiss(animated: true) { [weak self] in
|
|
101
|
+
self?.rejectError(.operationFailed(error.localizedDescription))
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
/* ----------------------------------------------------------------------- */
|
|
106
|
+
/* Private Helpers */
|
|
107
|
+
/* ----------------------------------------------------------------------- */
|
|
108
|
+
|
|
109
|
+
private func processScan(_ scan: VNDocumentCameraScan) {
|
|
110
|
+
Task(priority: .userInitiated) { [weak self] in
|
|
111
|
+
guard let self = self else { return }
|
|
112
|
+
|
|
113
|
+
let opts = ScanOptions(from: self.scanOptions, fallbackPageCount: scan.pageCount)
|
|
114
|
+
let pageLimit = opts.maxPageCount == 0 ? scan.pageCount : min(scan.pageCount, opts.maxPageCount)
|
|
115
|
+
|
|
116
|
+
/* Collect images from scan */
|
|
117
|
+
var images: [UIImage] = []
|
|
118
|
+
for i in 0..<pageLimit {
|
|
119
|
+
images.append(scan.imageOfPage(at: i))
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
/* Process all images — async, never blocks the UI thread */
|
|
123
|
+
let results = await ImageProcessor.processAll(images, options: opts)
|
|
124
|
+
|
|
125
|
+
self.resolve?(results.map { $0.dictionary })
|
|
126
|
+
self.cleanup()
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
private func getTopMostViewController() -> UIViewController? {
|
|
131
|
+
if #available(iOS 13.0, *) {
|
|
132
|
+
return UIApplication.shared.windows.first { $0.isKeyWindow }?.rootViewController
|
|
133
|
+
} else {
|
|
134
|
+
return UIApplication.shared.keyWindow?.rootViewController
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
private func rejectError(_ error: ScannerError) {
|
|
139
|
+
self.reject?("error", error.localizedDescription, error)
|
|
140
|
+
self.cleanup()
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
private func cleanup() {
|
|
144
|
+
self.resolve = nil
|
|
145
|
+
self.reject = nil
|
|
146
|
+
self.scanOptions = nil
|
|
147
|
+
}
|
|
148
|
+
}
|
package/ios/Errors.swift
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Custom error types for the Document Scanner.
|
|
5
|
+
* Used for communicating structured failure states back to the React Native bridge.
|
|
6
|
+
*/
|
|
7
|
+
enum ScannerError: Error {
|
|
8
|
+
/// Device hardware or OS version is unsupported.
|
|
9
|
+
case notSupported
|
|
10
|
+
/// Invalid configuration or missing options.
|
|
11
|
+
case configurationError(String)
|
|
12
|
+
/// A runtime failure occurred during processing.
|
|
13
|
+
case operationFailed(String)
|
|
14
|
+
/// The user cancelled the scanning process.
|
|
15
|
+
case canceled
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
extension ScannerError: LocalizedError {
|
|
19
|
+
var errorDescription: String? {
|
|
20
|
+
switch self {
|
|
21
|
+
case .notSupported:
|
|
22
|
+
return "Device does not support document scanning."
|
|
23
|
+
case .configurationError(let msg):
|
|
24
|
+
return "Configuration Error: \(msg)"
|
|
25
|
+
case .operationFailed(let msg):
|
|
26
|
+
return "Operation Failed: \(msg)"
|
|
27
|
+
case .canceled:
|
|
28
|
+
return "User canceled the scan."
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import UIKit
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
Centralized image processing pipeline.
|
|
6
|
+
Shared by scanDocuments and processDocuments to avoid code duplication.
|
|
7
|
+
*/
|
|
8
|
+
public class ImageProcessor {
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
Processes a single image through the full pipeline.
|
|
12
|
+
- Parameters:
|
|
13
|
+
- image: The UIImage to process.
|
|
14
|
+
- options: Processing configuration (BaseOptions).
|
|
15
|
+
- Returns: A ScanResult containing the processed data.
|
|
16
|
+
*/
|
|
17
|
+
public static func process(_ image: UIImage, options: BaseOptions) async -> ScanResult {
|
|
18
|
+
/* 1. Apply Filter */
|
|
19
|
+
let filteredImage = ImageUtil.applyFilter(image, filterType: options.filter) ?? image
|
|
20
|
+
|
|
21
|
+
/* 2. Save to File */
|
|
22
|
+
var uri: String?
|
|
23
|
+
do {
|
|
24
|
+
uri = try ImageUtil.saveImage(filteredImage, quality: options.quality, format: options.format)
|
|
25
|
+
} catch {
|
|
26
|
+
Logger.error("Error saving image: \(error.localizedDescription)")
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/* 3. Generate Base64 (if requested) */
|
|
30
|
+
var base64: String?
|
|
31
|
+
if options.includeBase64 {
|
|
32
|
+
base64 = ImageUtil.base64(from: filteredImage, format: options.format, quality: options.quality)
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/* 4. Perform OCR (if requested) — awaited to keep heavy work off the UI thread */
|
|
36
|
+
var text: String?
|
|
37
|
+
var blocks: [TextBlock]?
|
|
38
|
+
if options.includeText {
|
|
39
|
+
if let ocrResult = await TextRecognizer.recognizeText(from: filteredImage, version: options.textVersion) {
|
|
40
|
+
text = ocrResult.text
|
|
41
|
+
blocks = ocrResult.blocks
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
/* 5. Build Metadata */
|
|
46
|
+
let ocrEngine = options.includeText ? TextRecognizer.engineName(for: options.textVersion) : "none"
|
|
47
|
+
let metadata = ScanMetadata(
|
|
48
|
+
platform: "ios",
|
|
49
|
+
textVersion: options.textVersion,
|
|
50
|
+
filter: options.filter,
|
|
51
|
+
ocrEngine: ocrEngine
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
/* 6. Build Result */
|
|
55
|
+
return ResponseUtil.buildResult(
|
|
56
|
+
uri: uri,
|
|
57
|
+
base64: base64,
|
|
58
|
+
text: text,
|
|
59
|
+
blocks: blocks,
|
|
60
|
+
metadata: metadata
|
|
61
|
+
)
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
/**
|
|
65
|
+
Processes an array of images sequentially.
|
|
66
|
+
- Parameters:
|
|
67
|
+
- images: Array of UIImage objects.
|
|
68
|
+
- options: Processing configuration.
|
|
69
|
+
- Returns: Array of ScanResult.
|
|
70
|
+
*/
|
|
71
|
+
public static func processAll(_ images: [UIImage], options: BaseOptions) async -> [ScanResult] {
|
|
72
|
+
var results: [ScanResult] = []
|
|
73
|
+
for image in images {
|
|
74
|
+
results.append(await process(image, options: options))
|
|
75
|
+
}
|
|
76
|
+
return results
|
|
77
|
+
}
|
|
78
|
+
}
|
|
@@ -0,0 +1,279 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
import UIKit
|
|
3
|
+
import CoreImage
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
Utility class for image operations.
|
|
7
|
+
Handles filtering, encoding, and file system operations.
|
|
8
|
+
*/
|
|
9
|
+
public class ImageUtil {
|
|
10
|
+
|
|
11
|
+
/* ----------------------------------------------------------------------- */
|
|
12
|
+
/* Loading */
|
|
13
|
+
/* ----------------------------------------------------------------------- */
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
Loads a UIImage from a source string.
|
|
17
|
+
- Parameter source: Can be a file URI, data URI, or raw base64 string.
|
|
18
|
+
- Returns: The loaded UIImage, or nil if loading fails.
|
|
19
|
+
*/
|
|
20
|
+
public static func loadImage(from source: String) -> UIImage? {
|
|
21
|
+
/* File URL (file:// or absolute path) */
|
|
22
|
+
if source.hasPrefix("file://") || source.hasPrefix("/") {
|
|
23
|
+
let path = source.hasPrefix("file://") ? String(source.dropFirst(7)) : source
|
|
24
|
+
return UIImage(contentsOfFile: path)
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/* Data URI (e.g., data:image/png;base64,...) */
|
|
28
|
+
if source.hasPrefix("data:") {
|
|
29
|
+
if let commaIndex = source.firstIndex(of: ",") {
|
|
30
|
+
let base64String = String(source[source.index(after: commaIndex)...])
|
|
31
|
+
if let data = Data(base64Encoded: base64String) {
|
|
32
|
+
return UIImage(data: data)
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
return nil
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
/* Raw base64 string */
|
|
39
|
+
if let data = Data(base64Encoded: source) {
|
|
40
|
+
return UIImage(data: data)
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
Logger.warn("Could not load image from source: \(source.prefix(50))...")
|
|
44
|
+
return nil
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/* ----------------------------------------------------------------------- */
|
|
48
|
+
/* Filtering */
|
|
49
|
+
/* ----------------------------------------------------------------------- */
|
|
50
|
+
|
|
51
|
+
/**
|
|
52
|
+
Applies a CoreImage filter to the image.
|
|
53
|
+
- Parameters:
|
|
54
|
+
- image: The input UIImage.
|
|
55
|
+
- filterType: The filter name ('grayscale', 'monochrome', 'denoise', 'sharpen', 'ocrOptimized').
|
|
56
|
+
- Returns: The filtered UIImage, or nil if filtering fails.
|
|
57
|
+
*/
|
|
58
|
+
public static func applyFilter(_ image: UIImage, filterType: String) -> UIImage? {
|
|
59
|
+
if filterType == "color" { return image }
|
|
60
|
+
|
|
61
|
+
guard let ciImage = CIImage(image: image) else { return nil }
|
|
62
|
+
|
|
63
|
+
let outputImage: CIImage?
|
|
64
|
+
|
|
65
|
+
switch filterType {
|
|
66
|
+
case "grayscale":
|
|
67
|
+
outputImage = applyGrayscale(ciImage)
|
|
68
|
+
case "monochrome":
|
|
69
|
+
outputImage = applyMonochrome(ciImage)
|
|
70
|
+
case "denoise":
|
|
71
|
+
outputImage = applyDenoise(ciImage)
|
|
72
|
+
case "sharpen":
|
|
73
|
+
outputImage = applySharpen(ciImage)
|
|
74
|
+
case "ocrOptimized":
|
|
75
|
+
outputImage = applyOcrOptimizedPipeline(ciImage)
|
|
76
|
+
default:
|
|
77
|
+
return image
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
guard let finalCIImage = outputImage else { return nil }
|
|
81
|
+
|
|
82
|
+
let context = CIContext(options: nil)
|
|
83
|
+
if let cgImage = context.createCGImage(finalCIImage, from: finalCIImage.extent) {
|
|
84
|
+
return UIImage(cgImage: cgImage, scale: image.scale, orientation: image.imageOrientation)
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return nil
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
/* ----------------------------------------------------------------------- */
|
|
91
|
+
/* Filter Helpers */
|
|
92
|
+
/* ----------------------------------------------------------------------- */
|
|
93
|
+
|
|
94
|
+
/**
|
|
95
|
+
Applies a grayscale filter using CIPhotoEffectMono.
|
|
96
|
+
Desaturates the image while preserving luminance values.
|
|
97
|
+
|
|
98
|
+
- Parameter input: The source CIImage to filter.
|
|
99
|
+
- Returns: The grayscale CIImage, or nil if the filter is unavailable.
|
|
100
|
+
*/
|
|
101
|
+
private static func applyGrayscale(_ input: CIImage) -> CIImage? {
|
|
102
|
+
guard let filter = CIFilter(name: "CIPhotoEffectMono") else { return nil }
|
|
103
|
+
filter.setValue(input, forKey: kCIInputImageKey)
|
|
104
|
+
return filter.outputImage
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
/**
|
|
108
|
+
Applies a high-contrast monochrome filter using CIPhotoEffectNoir.
|
|
109
|
+
Creates a dramatic black & white effect ideal for document scanning.
|
|
110
|
+
|
|
111
|
+
- Parameter input: The source CIImage to filter.
|
|
112
|
+
- Returns: The monochrome CIImage, or nil if the filter is unavailable.
|
|
113
|
+
*/
|
|
114
|
+
private static func applyMonochrome(_ input: CIImage) -> CIImage? {
|
|
115
|
+
guard let filter = CIFilter(name: "CIPhotoEffectNoir") else { return nil }
|
|
116
|
+
filter.setValue(input, forKey: kCIInputImageKey)
|
|
117
|
+
return filter.outputImage
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
Applies noise reduction using CINoiseReduction.
|
|
122
|
+
Reduces speckle noise in images captured in low-light or with high ISO.
|
|
123
|
+
Useful for improving OCR accuracy on noisy photographs.
|
|
124
|
+
|
|
125
|
+
- Parameter input: The source CIImage to filter.
|
|
126
|
+
- Returns: The denoised CIImage, or nil if the filter is unavailable.
|
|
127
|
+
- Note: Uses inputNoiseLevel=0.02 and inputSharpness=0.4 for balanced results.
|
|
128
|
+
*/
|
|
129
|
+
private static func applyDenoise(_ input: CIImage) -> CIImage? {
|
|
130
|
+
guard let filter = CIFilter(name: "CINoiseReduction") else { return nil }
|
|
131
|
+
filter.setValue(input, forKey: kCIInputImageKey)
|
|
132
|
+
filter.setValue(0.02, forKey: "inputNoiseLevel")
|
|
133
|
+
filter.setValue(0.4, forKey: "inputSharpness")
|
|
134
|
+
return filter.outputImage
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
/**
|
|
138
|
+
Applies luminance-based sharpening using CISharpenLuminance.
|
|
139
|
+
Enhances edge clarity without amplifying color artifacts.
|
|
140
|
+
Improves OCR accuracy on blurry or soft-focused text.
|
|
141
|
+
|
|
142
|
+
- Parameter input: The source CIImage to filter.
|
|
143
|
+
- Returns: The sharpened CIImage, or nil if the filter is unavailable.
|
|
144
|
+
- Note: Uses sharpness=0.8 for noticeable but not over-aggressive enhancement.
|
|
145
|
+
*/
|
|
146
|
+
private static func applySharpen(_ input: CIImage) -> CIImage? {
|
|
147
|
+
guard let filter = CIFilter(name: "CISharpenLuminance") else { return nil }
|
|
148
|
+
filter.setValue(input, forKey: kCIInputImageKey)
|
|
149
|
+
filter.setValue(0.8, forKey: kCIInputSharpnessKey)
|
|
150
|
+
return filter.outputImage
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/**
|
|
154
|
+
Applies the full OCR optimization pipeline: denoise → sharpen → monochrome.
|
|
155
|
+
|
|
156
|
+
This is the recommended filter for maximum text recognition accuracy:
|
|
157
|
+
1. **Denoise**: Removes noise that would otherwise be amplified by sharpening.
|
|
158
|
+
2. **Sharpen**: Enhances edge clarity for better character recognition.
|
|
159
|
+
3. **Monochrome**: High-contrast B&W improves OCR engine performance.
|
|
160
|
+
|
|
161
|
+
- Parameter input: The source CIImage to process.
|
|
162
|
+
- Returns: The fully processed CIImage, or the input if any step fails.
|
|
163
|
+
*/
|
|
164
|
+
private static func applyOcrOptimizedPipeline(_ input: CIImage) -> CIImage? {
|
|
165
|
+
var processed = input
|
|
166
|
+
|
|
167
|
+
// Step 1: Denoise
|
|
168
|
+
if let denoised = applyDenoise(processed) { processed = denoised }
|
|
169
|
+
|
|
170
|
+
// Step 2: Sharpen
|
|
171
|
+
if let sharpened = applySharpen(processed) { processed = sharpened }
|
|
172
|
+
|
|
173
|
+
// Step 3: Monochrome (high contrast B&W)
|
|
174
|
+
if let mono = applyMonochrome(processed) { processed = mono }
|
|
175
|
+
|
|
176
|
+
return processed
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/* ----------------------------------------------------------------------- */
|
|
180
|
+
/* Encoding */
|
|
181
|
+
/* ----------------------------------------------------------------------- */
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
Converts an image to Base64 string.
|
|
185
|
+
- Parameters:
|
|
186
|
+
- image: The image to encode.
|
|
187
|
+
- format: "jpg" or "png".
|
|
188
|
+
- quality: Compression quality for JPEG.
|
|
189
|
+
- Returns: Base64 encoded string, or nil if encoding fails.
|
|
190
|
+
*/
|
|
191
|
+
public static func base64(from image: UIImage, format: String, quality: CGFloat) -> String? {
|
|
192
|
+
let data = imageData(from: image, format: format, quality: quality)
|
|
193
|
+
return data?.base64EncodedString()
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
/* ----------------------------------------------------------------------- */
|
|
197
|
+
/* File Operations */
|
|
198
|
+
/* ----------------------------------------------------------------------- */
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
Generates a unique file path in the temporary directory.
|
|
202
|
+
- Parameter format: The file extension (e.g., "jpg", "png").
|
|
203
|
+
- Returns: A unique URL.
|
|
204
|
+
*/
|
|
205
|
+
public static func createTempFileURL(format: String) -> URL {
|
|
206
|
+
let fileManager = FileManager.default
|
|
207
|
+
let tempDir = fileManager.temporaryDirectory
|
|
208
|
+
let uuid = UUID().uuidString
|
|
209
|
+
let fileName = "\(uuid).\(format)"
|
|
210
|
+
return tempDir.appendingPathComponent(fileName)
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
/**
|
|
214
|
+
Saves a UIImage to the file system.
|
|
215
|
+
- Parameters:
|
|
216
|
+
- image: The image to save.
|
|
217
|
+
- quality: Compression quality (0.0 - 1.0).
|
|
218
|
+
- format: "jpg" or "png".
|
|
219
|
+
- Returns: The absolute path string of the saved file.
|
|
220
|
+
*/
|
|
221
|
+
public static func saveImage(_ image: UIImage, quality: CGFloat, format: String) throws -> String {
|
|
222
|
+
let fileURL = createTempFileURL(format: format)
|
|
223
|
+
|
|
224
|
+
guard let data = imageData(from: image, format: format, quality: quality) else {
|
|
225
|
+
throw ScannerError.operationFailed("Could not generate data for image.")
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
do {
|
|
229
|
+
try data.write(to: fileURL)
|
|
230
|
+
return fileURL.absoluteString
|
|
231
|
+
} catch {
|
|
232
|
+
throw ScannerError.operationFailed(error.localizedDescription)
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
/* ----------------------------------------------------------------------- */
|
|
237
|
+
/* Private Helpers */
|
|
238
|
+
/* ----------------------------------------------------------------------- */
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
Converts a UIImage to Data in the specified format.
|
|
242
|
+
- Parameters:
|
|
243
|
+
- image: The image to convert.
|
|
244
|
+
- format: "jpg" or "png".
|
|
245
|
+
- quality: Compression quality for JPEG.
|
|
246
|
+
- Returns: Image data, or nil if conversion fails.
|
|
247
|
+
*/
|
|
248
|
+
private static func imageData(from image: UIImage, format: String, quality: CGFloat) -> Data? {
|
|
249
|
+
if format == "png" {
|
|
250
|
+
return image.pngData()
|
|
251
|
+
} else {
|
|
252
|
+
/* Strip alpha before JPEG encoding to avoid AlphaPremulLast warnings.
|
|
253
|
+
JPEG has no alpha channel, so composite onto white first. */
|
|
254
|
+
return renderOpaque(image).jpegData(compressionQuality: quality)
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
Renders a UIImage into an opaque RGB context, compositing onto white.
|
|
260
|
+
Used to strip the alpha channel before JPEG encoding.
|
|
261
|
+
- Parameter image: The source image (may have alpha).
|
|
262
|
+
- Returns: An opaque UIImage with no alpha channel.
|
|
263
|
+
*/
|
|
264
|
+
private static func renderOpaque(_ image: UIImage) -> UIImage {
|
|
265
|
+
/* If already opaque, skip the redraw entirely. */
|
|
266
|
+
if let cgImage = image.cgImage {
|
|
267
|
+
let alpha = cgImage.alphaInfo
|
|
268
|
+
if alpha == .none || alpha == .noneSkipFirst || alpha == .noneSkipLast {
|
|
269
|
+
return image
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
let rendererFormat = UIGraphicsImageRendererFormat()
|
|
273
|
+
rendererFormat.scale = image.scale
|
|
274
|
+
rendererFormat.opaque = true
|
|
275
|
+
return UIGraphicsImageRenderer(size: image.size, format: rendererFormat).image { _ in
|
|
276
|
+
image.draw(in: CGRect(origin: .zero, size: image.size))
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
package/ios/Logger.swift
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
A centralized logging utility for the Document Scanner.
|
|
5
|
+
Use this instead of `print` to ensure consistent formatting and easy disabling of logs in production.
|
|
6
|
+
*/
|
|
7
|
+
public class Logger {
|
|
8
|
+
private static let prefix = "[DocumentScanner]"
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
Logs an informational message.
|
|
12
|
+
- Parameter message: The message string.
|
|
13
|
+
*/
|
|
14
|
+
public static func info(_ message: String) {
|
|
15
|
+
print("\(prefix) ℹ️ Info: \(message)")
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
Logs a warning message.
|
|
20
|
+
- Parameter message: The message string.
|
|
21
|
+
*/
|
|
22
|
+
public static func warn(_ message: String) {
|
|
23
|
+
print("\(prefix) ⚠️ Warning: \(message)")
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
Logs an error message.
|
|
28
|
+
- Parameter message: The message string.
|
|
29
|
+
*/
|
|
30
|
+
public static func error(_ message: String) {
|
|
31
|
+
print("\(prefix) ❌ Error: \(message)")
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
/**
|
|
35
|
+
Logs a debug message.
|
|
36
|
+
- Parameter message: The message string.
|
|
37
|
+
*/
|
|
38
|
+
public static func debug(_ message: String) {
|
|
39
|
+
#if DEBUG
|
|
40
|
+
print("\(prefix) 🐛 Debug: \(message)")
|
|
41
|
+
#endif
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import Foundation
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
Configuration constants for the OCR engine.
|
|
5
|
+
Tuning these values affects how the `TextRecognizer` reconstructs layout from raw text blocks.
|
|
6
|
+
*/
|
|
7
|
+
public enum OCRConfiguration {
|
|
8
|
+
|
|
9
|
+
/* ----------------------------------------------------------------------- */
|
|
10
|
+
/* V2 Clustering Heuristics */
|
|
11
|
+
/* ----------------------------------------------------------------------- */
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
Height Compatibility Threshold.
|
|
15
|
+
Blocks must have similar heights to be clustered.
|
|
16
|
+
Formula: minH / maxH >= threshold
|
|
17
|
+
*/
|
|
18
|
+
public static let heightCompatibilityThreshold: Double = 0.40
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
Overlap Ratio Threshold.
|
|
22
|
+
Vertical intersection divided by min height.
|
|
23
|
+
*/
|
|
24
|
+
public static let overlapRatioThreshold: Double = 0.50
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
Centerline Distance Factor.
|
|
28
|
+
Max allowed vertical distance between centers as a factor of typical line height.
|
|
29
|
+
*/
|
|
30
|
+
public static let centerlineDistanceFactor: Double = 0.70
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
Adaptive Cluster Growth Limits.
|
|
34
|
+
Prevents lines from becoming too tall after merging blocks.
|
|
35
|
+
*/
|
|
36
|
+
public static let stackedGrowthLimit: Double = 1.2
|
|
37
|
+
public static let skewedGrowthLimit: Double = 2.0
|
|
38
|
+
|
|
39
|
+
/* ----------------------------------------------------------------------- */
|
|
40
|
+
/* Spacing & Reconstruction */
|
|
41
|
+
/* ----------------------------------------------------------------------- */
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
Adaptive Spacing Factor.
|
|
45
|
+
Gap width relative to median height to trigger extra spaces.
|
|
46
|
+
*/
|
|
47
|
+
public static let adaptiveSpacingFactor: Double = 1.0
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
Space Width Factor.
|
|
51
|
+
Determines the "width" of a single space character relative to median height.
|
|
52
|
+
*/
|
|
53
|
+
public static let spaceWidthFactor: Double = 0.3
|
|
54
|
+
|
|
55
|
+
/**
|
|
56
|
+
Maximum Spaces Cap.
|
|
57
|
+
Limits the number of consecutive spaces inserted.
|
|
58
|
+
*/
|
|
59
|
+
public static let maxSpaces: Int = 10
|
|
60
|
+
}
|