motion-ocr 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +3 -0
  3. data/Gemfile.lock +10 -0
  4. data/LICENSE +28 -0
  5. data/README.md +22 -0
  6. data/Rakefile +17 -0
  7. data/app/app_delegate.rb +2 -0
  8. data/lib/motion-ocr.rb +26 -0
  9. data/lib/motion-ocr/version.rb +3 -0
  10. data/motion-ocr.gemspec +20 -0
  11. data/resources/tessdata/configs/nodict +3 -0
  12. data/resources/tessdata/eng.traineddata +0 -0
  13. data/spec/motion_ocr_spec.rb +23 -0
  14. data/spec/support/phototest.gif +0 -0
  15. data/vendor/MotionOCR/MotionOCR.bridgesupport +14722 -0
  16. data/vendor/MotionOCR/MotionOCR.xcodeproj/project.pbxproj +390 -0
  17. data/vendor/MotionOCR/MotionOCR.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  18. data/vendor/MotionOCR/MotionOCR.xcodeproj/project.xcworkspace/xcuserdata/fer.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  19. data/vendor/MotionOCR/MotionOCR.xcodeproj/xcuserdata/fer.xcuserdatad/xcschemes/MotionOCR.xcscheme +59 -0
  20. data/vendor/MotionOCR/MotionOCR.xcodeproj/xcuserdata/fer.xcuserdatad/xcschemes/xcschememanagement.plist +22 -0
  21. data/vendor/MotionOCR/MotionOCR/MotionOCR-Prefix.pch +7 -0
  22. data/vendor/MotionOCR/MotionOCR/MotionOCR.h +22 -0
  23. data/vendor/MotionOCR/MotionOCR/MotionOCR.mm +89 -0
  24. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/allheaders.h +32 -0
  25. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/alltypes.h +49 -0
  26. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/array.h +125 -0
  27. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/arrayaccess.h +194 -0
  28. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bbuffer.h +46 -0
  29. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bmf.h +51 -0
  30. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bmp.h +74 -0
  31. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/ccbord.h +103 -0
  32. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/dewarp.h +57 -0
  33. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/endianness.h +11 -0
  34. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/environ.h +281 -0
  35. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/freetype.h +23 -0
  36. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/gplot.h +77 -0
  37. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/heap.h +73 -0
  38. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/imageio.h +153 -0
  39. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/jbclass.h +122 -0
  40. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/leptprotos.h +2058 -0
  41. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/leptwin.h +34 -0
  42. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/list.h +76 -0
  43. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/morph.h +218 -0
  44. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/pix.h +945 -0
  45. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/ptra.h +80 -0
  46. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/queue.h +63 -0
  47. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/readbarcode.h +220 -0
  48. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/regutils.h +122 -0
  49. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/stack.h +55 -0
  50. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/sudoku.h +62 -0
  51. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/watershed.h +52 -0
  52. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/apitypes.h +31 -0
  53. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/baseapi.h +664 -0
  54. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/errcode.h +104 -0
  55. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/genericvector.h +763 -0
  56. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/helpers.h +139 -0
  57. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/host.h +180 -0
  58. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/ndminx.h +31 -0
  59. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/ocrclass.h +335 -0
  60. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/platform.h +48 -0
  61. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/publictypes.h +202 -0
  62. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/tesscallback.h +1238 -0
  63. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/thresholder.h +170 -0
  64. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/unichar.h +85 -0
  65. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/lib/liblept.a +0 -0
  66. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/lib/libtesseract_all.a +0 -0
  67. metadata +113 -0
@@ -0,0 +1,139 @@
1
+ /* -*-C-*-
2
+ ********************************************************************************
3
+ *
4
+ * File: helpers.h
5
+ * Description: General utility functions
6
+ * Author: Daria Antonova
7
+ * Created: Wed Apr 8 14:37:00 2009
8
+ * Language: C++
9
+ * Package: N/A
10
+ * Status: Reusable Software Component
11
+ *
12
+ * (c) Copyright 2009, Google Inc.
13
+ ** Licensed under the Apache License, Version 2.0 (the "License");
14
+ ** you may not use this file except in compliance with the License.
15
+ ** You may obtain a copy of the License at
16
+ ** http://www.apache.org/licenses/LICENSE-2.0
17
+ ** Unless required by applicable law or agreed to in writing, software
18
+ ** distributed under the License is distributed on an "AS IS" BASIS,
19
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
+ ** See the License for the specific language governing permissions and
21
+ ** limitations under the License.
22
+ *
23
+ ********************************************************************************/
24
+
25
+ #ifndef TESSERACT_CCUTIL_HELPERS_H_
26
+ #define TESSERACT_CCUTIL_HELPERS_H_
27
+
28
+ #include <stdio.h>
29
+ #include <string.h>
30
+
31
+ // Remove newline (if any) at the end of the string.
32
+ inline void chomp_string(char *str) {
33
+ int last_index = strlen(str) - 1;
34
+ if (last_index >= 0 && str[last_index] == '\n') {
35
+ str[last_index] = '\0';
36
+ }
37
+ }
38
+
39
+ // Advance the current pointer of the file if it points to a newline character.
40
+ inline void SkipNewline(FILE *file) {
41
+ if (fgetc(file) != '\n') fseek(file, -1, SEEK_CUR);
42
+ }
43
+
44
+ // qsort function to sort 2 floats.
45
+ inline int sort_floats(const void *arg1, const void *arg2) {
46
+ float diff = *((float *) arg1) - *((float *) arg2);
47
+ if (diff > 0) {
48
+ return 1;
49
+ } else if (diff < 0) {
50
+ return -1;
51
+ } else {
52
+ return 0;
53
+ }
54
+ }
55
+
56
+ // return the smallest multiple of block_size greater than or equal to n.
57
+ inline int RoundUp(int n, int block_size) {
58
+ return block_size * ((n + block_size - 1) / block_size);
59
+ }
60
+
61
+ // Clip a numeric value to the interval [lower_bound, upper_bound].
62
+ template<typename T>
63
+ inline T ClipToRange(const T& x, const T& lower_bound, const T& upper_bound) {
64
+ if (x < lower_bound)
65
+ return lower_bound;
66
+ if (x > upper_bound)
67
+ return upper_bound;
68
+ return x;
69
+ }
70
+
71
+ // Extend the range [lower_bound, upper_bound] to include x.
72
+ template<typename T1, typename T2>
73
+ inline void UpdateRange(const T1& x, T2* lower_bound, T2* upper_bound) {
74
+ if (x < *lower_bound)
75
+ *lower_bound = x;
76
+ if (x > *upper_bound)
77
+ *upper_bound = x;
78
+ }
79
+
80
+ // Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi.
81
+ template<typename T1, typename T2>
82
+ inline void UpdateRange(const T1& x_lo, const T1& x_hi,
83
+ T2* lower_bound, T2* upper_bound) {
84
+ if (x_lo < *lower_bound)
85
+ *lower_bound = x_lo;
86
+ if (x_hi > *upper_bound)
87
+ *upper_bound = x_hi;
88
+ }
89
+
90
+ // Proper modulo arithmetic operator. Returns a mod b that works for -ve a.
91
+ // For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for
92
+ // some integer n.
93
+ inline int Modulo(int a, int b) {
94
+ return (a % b + b) % b;
95
+ }
96
+
97
+ // Integer division operator with rounding that works for negative input.
98
+ // Returns a divided by b, rounded to the nearest integer, without double
99
+ // counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0,
100
+ // -3/3 = 0 and -4/3 = -1.
101
+ // I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1.
102
+ // Results with b negative are not defined.
103
+ inline int DivRounded(int a, int b) {
104
+ return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
105
+ }
106
+
107
+ // Return a double cast to int with rounding.
108
+ inline int IntCastRounded(double x) {
109
+ return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
110
+ }
111
+
112
+ // Reverse the order of bytes in a n byte quantity for big/little-endian switch.
113
+ inline void ReverseN(void* ptr, int num_bytes) {
114
+ char *cptr = reinterpret_cast<char *>(ptr);
115
+ int halfsize = num_bytes / 2;
116
+ for (int i = 0; i < halfsize; ++i) {
117
+ char tmp = cptr[i];
118
+ cptr[i] = cptr[num_bytes - 1 - i];
119
+ cptr[num_bytes - 1 - i] = tmp;
120
+ }
121
+ }
122
+
123
+ // Reverse the order of bytes in a 16 bit quantity for big/little-endian switch.
124
+ inline void Reverse16(void *ptr) {
125
+ ReverseN(ptr, 2);
126
+ }
127
+
128
+ // Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
129
+ inline void Reverse32(void *ptr) {
130
+ ReverseN(ptr, 4);
131
+ }
132
+
133
+ // Reverse the order of bytes in a 64 bit quantity for big/little-endian switch.
134
+ inline void Reverse64(void* ptr) {
135
+ ReverseN(ptr, 8);
136
+ }
137
+
138
+
139
+ #endif // TESSERACT_CCUTIL_HELPERS_H_
@@ -0,0 +1,180 @@
1
+ /******************************************************************************
2
+ ** Filename: Host.h
3
+ ** Purpose: This is the system independent typedefs and defines
4
+ ** Author: MN, JG, MD
5
+ ** Version: 5.4.1
6
+ ** History: 11/7/94 MCD received the modification that Lennart made
7
+ ** to port to 32 bit world and modify this file so that it
8
+ ** will be shared between platform.
9
+ ** 11/9/94 MCD Make MSW32 subset of MSW. Now MSW means
10
+ ** MicroSoft Window and MSW32 means the 32 bit worlds
11
+ ** of MicroSoft Window. Therefore you want the environment
12
+ ** to be MicroSoft Window and in the 32 bit world -
13
+ ** __MSW__ and __MSW32__ must be uncommented out.
14
+ ** 11/30/94 MCD Incorporated comments received for more
15
+ ** readability and the missing typedef for FLOAT.
16
+ ** 12/1/94 MCD Added PFVOID typedef
17
+ ** 5/1/95 MCD. Made many changes based on the inputs.
18
+ ** Changes:
19
+ ** 1) Rearrange the #ifdef so that there're definitions for
20
+ ** particular platforms.
21
+ ** 2) Took out the #define for computer and environment
22
+ ** that developer can uncomment
23
+ ** 3) Added __OLDCODE__ where the defines will be
24
+ ** obsoleted in the next version and advise not to use.
25
+ ** 4) Added the definitions for the following:
26
+ ** FILE_HANDLE, MEMORY_HANDLE, BOOL8,
27
+ ** MAX_INT8, MAX_INT16, MAX_INT32, MAX_UINT8
28
+ ** MAX_UINT16, MAX_UINT32, MAX_FLOAT32
29
+ ** 06/19/96 MCD. Took out MAX_FLOAT32
30
+ ** 07/15/96 MCD. Fixed the comments error
31
+ ** Add back BOOL8.
32
+ **
33
+ ** (c) Copyright Hewlett-Packard Company, 1988-1996.
34
+ ** Licensed under the Apache License, Version 2.0 (the "License");
35
+ ** you may not use this file except in compliance with the License.
36
+ ** You may obtain a copy of the License at
37
+ ** http://www.apache.org/licenses/LICENSE-2.0
38
+ ** Unless required by applicable law or agreed to in writing, software
39
+ ** distributed under the License is distributed on an "AS IS" BASIS,
40
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
41
+ ** See the License for the specific language governing permissions and
42
+ ** limitations under the License.
43
+ */
44
+
45
+ #ifndef __HOST__
46
+ #define __HOST__
47
+
48
+ /******************************************************************************
49
+ ** IMPORTANT!!! **
50
+ ** **
51
+ ** Defines either __MSW__, __MSW32__, __MAC__, __UNIX__, __OS2__, __PM__ to
52
+ ** use the specified definitions indicated below in the preprocessor settings. **
53
+ ** **
54
+ ** Also define either __FarProc__ or __FarData__ and __MOTO__ to use the
55
+ ** specified definitions indicated below in the preprocessor settings. **
56
+ ** **
57
+ ** If a preprocessor settings is not allow in the compiler that is being use,
58
+ ** then it is recommended that a "platform.h" is created with the definition
59
+ ** of the computer and/or operating system.
60
+ ******************************************************************************/
61
+
62
+ #include "platform.h"
63
+ /* __MSW32__ */
64
+ #ifdef __MSW32__
65
+ #include <windows.h>
66
+ #include <winbase.h> // winbase.h contains windows.h
67
+
68
+ #define DLLIMPORT __declspec( dllimport)
69
+ #define DLLEXPORT __declspec( dllexport)
70
+
71
+ #else
72
+ /********************************************************/
73
+ /* __MSW__ */
74
+ #ifdef __MSW__
75
+ #include <windows.h> // provides standard definitions (like HANDLE)
76
+
77
+ #define DLLIMPORT __import
78
+ #define DLLEXPORT __export
79
+ #endif
80
+ #endif
81
+
82
+ /********************************************************/
83
+ /* __MAC__ */
84
+ #ifdef __MAC__
85
+ #include <Types.h>
86
+ /*----------------------------*/
87
+ /*----------------------------*/
88
+ #define DLLIMPORT
89
+ #define DLLEXPORT
90
+
91
+ #endif
92
+ /********************************************************/
93
+ #if defined(__UNIX__) || defined( __DOS__ ) || defined(__OS2__) || defined(__PM__)
94
+ /*----------------------------*/
95
+ /* FarProc and FarData */
96
+ /*----------------------------*/
97
+ #define DLLIMPORT
98
+ #define DLLEXPORT
99
+ /*----------------------------*/
100
+ #endif
101
+ /*****************************************************************************
102
+ **
103
+ ** Standard GHC Definitions
104
+ **
105
+ *****************************************************************************/
106
+
107
+ #ifdef __MOTO__
108
+ #define __NATIVE__ MOTO
109
+ #else
110
+ #define __NATIVE__ INTEL
111
+ #endif
112
+
113
+ //typedef HANDLE FD* PHANDLE;
114
+
115
+ // definitions of portable data types (numbers and characters)
116
+ typedef SIGNED char inT8;
117
+ typedef unsigned char uinT8;
118
+ typedef short inT16;
119
+ typedef unsigned short uinT16;
120
+ typedef int inT32;
121
+ typedef unsigned int uinT32;
122
+ #if (_MSC_VER >= 1200) //%%% vkr for VC 6.0
123
+ typedef INT64 inT64;
124
+ typedef UINT64 uinT64;
125
+ #else
126
+ typedef long long int inT64;
127
+ typedef unsigned long long int uinT64;
128
+ #endif //%%% vkr for VC 6.0
129
+ typedef float FLOAT32;
130
+ typedef double FLOAT64;
131
+ typedef unsigned char BOOL8;
132
+
133
+ #define INT32FORMAT "%d"
134
+ #define INT64FORMAT "%lld"
135
+
136
+ #define MAX_INT8 0x7f
137
+ #define MAX_INT16 0x7fff
138
+ #define MAX_INT32 0x7fffffff
139
+ #define MAX_UINT8 0xff
140
+ #define MAX_UINT16 0xffff
141
+ #define MAX_UINT32 0xffffffff
142
+ #define MAX_FLOAT32 ((float)3.40282347e+38)
143
+
144
+ #define MIN_INT8 0x80
145
+ #define MIN_INT16 0x8000
146
+ #define MIN_INT32 0x80000000
147
+ #define MIN_UINT8 0x00
148
+ #define MIN_UINT16 0x0000
149
+ #define MIN_UINT32 0x00000000
150
+ #define MIN_FLOAT32 ((float)1.17549435e-38)
151
+
152
+ // Defines
153
+
154
+ #ifndef OKAY
155
+ #define OKAY 0
156
+ #endif
157
+
158
+ #ifndef HPERR
159
+ #define HPERR -1
160
+ #endif
161
+
162
+ #ifndef TRUE
163
+ #define TRUE 1
164
+ #endif
165
+
166
+ #ifndef FALSE
167
+ #define FALSE 0
168
+ #endif
169
+
170
+ #ifndef NULL
171
+ #define NULL 0L
172
+ #endif
173
+
174
+ // Return true if x is within tolerance of y
175
+ template<class T> bool NearlyEqual(T x, T y, T tolerance) {
176
+ T diff = x - y;
177
+ return diff <= tolerance && -diff <= tolerance;
178
+ }
179
+
180
+ #endif
@@ -0,0 +1,31 @@
1
+ /**********************************************************************
2
+ * File: ndminx.h (Formerly ndminmax.h)
3
+ * Description: Extended ascii chars
4
+ * Author: Phil Cheatle
5
+ * Created: Mon Mar 29 14:46:01 BST 1993
6
+ *
7
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #ifndef NDMINX_H
21
+ #define NDMINX_H
22
+
23
+ #ifndef MAX
24
+ #define MAX(x,y) (((x) >= (y))?(x):(y))
25
+ #endif
26
+
27
+ #ifndef MIN
28
+ #define MIN(x,y) (((x) <= (y))?(x):(y))
29
+ #endif
30
+
31
+ #endif
@@ -0,0 +1,335 @@
1
+ /**********************************************************************
2
+ * File: ocrclass.h
3
+ * Description: Class definitions and constants for the OCR API.
4
+ * Author: Hewlett-Packard Co
5
+ *
6
+ * (C) Copyright 1996, Hewlett-Packard Co.
7
+ ** Licensed under the Apache License, Version 2.0 (the "License");
8
+ ** you may not use this file except in compliance with the License.
9
+ ** You may obtain a copy of the License at
10
+ ** http://www.apache.org/licenses/LICENSE-2.0
11
+ ** Unless required by applicable law or agreed to in writing, software
12
+ ** distributed under the License is distributed on an "AS IS" BASIS,
13
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ ** See the License for the specific language governing permissions and
15
+ ** limitations under the License.
16
+ *
17
+ **********************************************************************/
18
+
19
+ /**********************************************************************
20
+ * This file contains typedefs for all the structures used by
21
+ * the HP OCR interface.
22
+ * The code is designed to be used with either a C or C++ compiler.
23
+ * The structures are designed to allow them to be used with any
24
+ * structure alignment upto 8.
25
+ **********************************************************************/
26
+
27
+ #ifndef CCUTIL_OCRCLASS_H_
28
+ #define CCUTIL_OCRCLASS_H_
29
+
30
+ #ifndef __GNUC__
31
+ #ifdef __MSW32__
32
+ #include <windows.h>
33
+ #include "gettimeofday.h"
34
+ #endif
35
+ #else
36
+ #include <sys/time.h>
37
+ #endif
38
+ #include <time.h>
39
+ #include "host.h"
40
+
41
+ /*Maximum lengths of various strings*/
42
+ #define MAX_FONT_NAME 34 /*name of font */
43
+ #define MAX_OCR_NAME 32 /*name of engine */
44
+ #define MAX_OCR_VERSION 17 /*version code of engine */
45
+
46
+ /*Image parameters*/
47
+ #define MIN_IMAGE_SIZE 64 /*smallest image that will be passed */
48
+ #define IMAGE_ROUNDING 32 /*all sizes are multiple of this */
49
+
50
+ #if defined(__SLOW_TIMES__)
51
+ /*Maximum timeouts of various functions (in secs)*/
52
+ #define STARTUP_TIMEOUT 100 /*start of OCR engine */
53
+ #define SHUTDOWN_TIMEOUT 50 /*end of OCR engine */
54
+ #define SENDIM_TIMEOUT 50 /*send of image */
55
+ #define RELEASE_TIMEOUT 50 /*release of semaphore */
56
+ #define READIM_TIMEOUT 100 /*read of image */
57
+ #define READTEXT_TIMEOUT 50 /*read of text */
58
+ #define PROGRESS_TIMEOUT 30 /*progress every 3 seconds */
59
+ #define BADTIMES_TIMEOUT 7 /*max lack of progress */
60
+ #else
61
+ /*Maximum timeouts of various functions (in secs)*/
62
+ #define STARTUP_TIMEOUT 10 /*start of OCR engine */
63
+ #define SHUTDOWN_TIMEOUT 6 /*end of OCR engine */
64
+ #define SENDIM_TIMEOUT 5 /*send of image */
65
+ #define RELEASE_TIMEOUT 5 /*release of semaphore */
66
+ #define READIM_TIMEOUT 10 /*read of image */
67
+ #define READTEXT_TIMEOUT 5 /*read of text */
68
+ #define PROGRESS_TIMEOUT 3 /*progress every 3 seconds */
69
+ #define BADTIMES_TIMEOUT 7 /*max lack of progress */
70
+ #endif
71
+
72
+ /*language definitions are identical to RTF*/
73
+ #define LANGE_NONE 0x0400 /*no language */
74
+ #define LANGE_ALBANIAN 0x041c /*Albanian */
75
+ #define LANGE_BRITISH 0x0809 /*International English */
76
+ #define LANGE_BULGARIAN 0x0402 /*Bulgarian */
77
+ #define LANGE_CROATIAN 0x041a /*Croatian(latin alphabet) */
78
+ #define LANGE_CZECH 0x0405 /*Czech */
79
+ #define LANGE_DANISH 0x0406 /*Danish */
80
+ #define LANGE_DUTCH 0x0413 /*Dutch */
81
+ #define LANGE_FINNISH 0x040b /*Finnish */
82
+ #define LANGE_FRENCH 0x040c /*French */
83
+ #define LANGE_GERMAN 0x0407 /*German */
84
+ #define LANGE_GREEK 0x0408 /*Greek */
85
+ #define LANGE_HUNGARIAN 0x040e /*Hungarian */
86
+ #define LANGE_ITALIAN 0x0410 /*Italian */
87
+ #define LANGE_JAPANESE 0x0411 /*Japanese */
88
+ #define LANGE_KOREAN 0x0412 /*Korean */
89
+ #define LANGE_NORWEGIAN 0x0414 /*Bokmal */
90
+ #define LANGE_POLISH 0x0415 /*Polish */
91
+ #define LANGE_PORTUGESE 0x0416 /*Brazilian Portugese */
92
+ #define LANGE_ROMANIAN 0x0418 /*Romanian */
93
+ #define LANGE_RUSSIAN 0x0419 /*Russian */
94
+ #define LANGE_SCHINESE 0x0804 /*Simplified Chinese */
95
+ #define LANGE_SLOVAK 0x041b /*Slovak */
96
+ #define LANGE_SPANISH 0x040a /*Castilian */
97
+ #define LANGE_SWEDISH 0x041d /*Swedish */
98
+ #define LANGE_TCHINESE 0x0404 /*Traditional Chinese */
99
+ #define LANGE_TURKISH 0x041f /*Turkish */
100
+ #define LANGE_USENGLISH 0x0409 /*American */
101
+
102
+ /*font family definitions are identical to RTF*/
103
+ #define FFAM_NONE 0 /*unknown */
104
+ #define FFAM_ROMAN 1 /*serifed prop */
105
+ #define FFAM_SWISS 2 /*sans-serif prop */
106
+ #define FFAM_MODERN 3 /*fixed pitch */
107
+
108
+ /*character set definitions are identical to RTF*/
109
+ #define CHSET_ANSI 0 /*Ansi efigs */
110
+ #define CHSET_SHIFT_JIS 128 /*JIS X 0208-1990 */
111
+ #define CHSET_KOREAN 129 /*KS C 5601-1992 */
112
+ #define CHSET_SCHINESE 134 /*GB 2312-80 */
113
+ #define CHSET_BIG5 136 /*Big Five */
114
+ #define CHSET_CYRILLIC 204 /*Cyrillic */
115
+ #define CHSET_EEUROPE 238 /*Eastern Europe */
116
+
117
+ /*pitch set definitions are identical to RTF*/
118
+ #define PITCH_DEF 0 /*default */
119
+ #define PITCH_FIXED 1 /*fixed pitch */
120
+ #define PITCH_VAR 2 /*variable pitch */
121
+
122
+ /*Bitmasks for character enhancements.
123
+ OR these together for enhancement in ocr_append_char*/
124
+ #define EUC_BOLD 1 /*bold character */
125
+ #define EUC_ITALIC 2 /*italic char */
126
+ #define EUC_UNDERLINE 4 /*underlined char */
127
+ #define EUC_SUBSCRIPT 8 /*subscript char */
128
+ #define EUC_SUPERSCRIPT 16 /*superscript char */
129
+
130
+ /*enum for character rendering direction*/
131
+ enum OCR_CHAR_DIRECTION {
132
+ OCR_CDIR_RIGHT_LEFT, /*right to left horizontal */
133
+ OCR_CDIR_LEFT_RIGHT, /*left to right horizontal */
134
+ OCR_CDIR_TOP_BOTTOM, /*top to bottom vertical */
135
+ OCR_CDIR_BOTTOM_TOP /*bottom to top vertical */
136
+ };
137
+
138
+ /*enum for line rendering direction*/
139
+ enum OCR_LINE_DIRECTION {
140
+ OCR_LDIR_DOWN_RIGHT, /*horizontal lines go down */
141
+ /*vertical lines go right */
142
+ OCR_LDIR_UP_LEFT /*horizontal lines go up */
143
+ };
144
+
145
+ /*enum for newline type*/
146
+ enum OCR_NEWLINE_TYPE {
147
+ OCR_NL_NONE, /*not a newline */
148
+ OCR_NL_NEWLINE, /*this is a newline but not new para */
149
+ OCR_NL_NEWPARA /*this is a newline and a new para */
150
+ };
151
+
152
+ /*error codes that can be returned from the API functions other than OKAY
153
+ and HPERR*/
154
+ #define OCR_API_NO_MEM (-2) /*filled output buffer */
155
+ #define OCR_API_BAD_CHAR (-3) /*whitespace sent to ocr_append_char */
156
+ #define OCR_API_BAD_STATE (-4) /*invalid call sequence */
157
+
158
+ /*error codes used for passing errors back to the HP side*/
159
+ enum OCR_ERR_CODE {
160
+ OCR_ERR_NONE, /*no error */
161
+ OCR_ERR_CLEAN_EXIT, /*no error */
162
+ OCR_ERR_NO_MEM, /*out of memory */
163
+ OCR_ERR_FILE_READ, /*failed to read data file */
164
+ OCR_ERR_TMP_WRITE, /*failed to write temp file */
165
+ OCR_ERR_TMP_READ, /*failed to read temp file */
166
+ OCR_ERR_BAD_DLL, /*missing or invalid dll subcomponent */
167
+ OCR_ERR_BAD_EXE, /*missing or invalid exe subcomponent */
168
+ OCR_ERR_BAD_LOAD, /*failed to load subcomponent */
169
+ OCR_ERR_BAD_LANG, /*unable to recognize requested language */
170
+ OCR_ERR_BAD_STATE, /*engine did call out of sequence */
171
+ OCR_ERR_INTERNAL1, /*internal error type 1 */
172
+ OCR_ERR_INTERNAL2, /*internal error type 1 */
173
+ OCR_ERR_INTERNAL3, /*internal error type 1 */
174
+ OCR_ERR_INTERNAL4, /*internal error type 1 */
175
+ OCR_ERR_INTERNAL5, /*internal error type 1 */
176
+ OCR_ERR_INTERNAL6, /*internal error type 1 */
177
+ OCR_ERR_INTERNAL7, /*internal error type 1 */
178
+ OCR_ERR_INTERNAL8, /*internal error type 1 */
179
+ OCR_ERR_TIMEOUT /*timed out in comms */
180
+ }; /*for calls to ocr_error */
181
+
182
+ /**********************************************************************
183
+ * EFONT_DESC
184
+ * Description of one font.
185
+ * The information required is basically that used by RTF.
186
+ * The name may be either a valid font on the system or the empty string.
187
+ **********************************************************************/
188
+
189
+ typedef struct { /*font description */
190
+ uinT16 language; /*default language */
191
+ uinT8 font_family; /*serif/not, fixed/not */
192
+ uinT8 char_set; /*character set standard */
193
+ uinT8 pitch; /*fixed or prop */
194
+ inT8 name[MAX_FONT_NAME + 1]; /*plain ascii name */
195
+ } EFONT_DESC; /*font description */
196
+
197
+ /**********************************************************************
198
+ * EOCR_DESC
199
+ * Description of the OCR engine provided at startup.
200
+ * The name and version may be reported to the user at some point.
201
+ * The fonts array should indicate the fonts that the OCR system
202
+ * can recognize.
203
+ **********************************************************************/
204
+
205
+ typedef struct { /*startup info */
206
+ inT32 protocol; /*interface version */
207
+ uinT32 font_count; /*number of fonts */
208
+ uinT16 language; /*default language */
209
+ uinT16 name[MAX_OCR_NAME + 1]; /*name of engine */
210
+ /*version of engine */
211
+ uinT16 version[MAX_OCR_VERSION + 1];
212
+ EFONT_DESC fonts[1]; /*array of fonts */
213
+ } EOCR_DESC; /*startup info */
214
+
215
+ /**********************************************************************
216
+ * ESTRIP_DESC
217
+ * Description of the image strip as it is passed to the engine.
218
+ * The image is always 1 bit, with 1=black.
219
+ * The width is always a multiple of 32, so padding is always OK.
220
+ * The height of the full image is always a multiple of 32.
221
+ * The top y coordinate is 0, and increases down.
222
+ * The top leftmost pixel is in the most significant bit of the first byte.
223
+ **********************************************************************/
224
+
225
+ typedef struct { /*bitmap strip */
226
+ inT16 x_size; /*width in pixels */
227
+ inT16 y_size; /*of full image */
228
+ inT16 strip_size; /*of this strip */
229
+ inT16 resolution; /*pixels per inch */
230
+ uinT8 data[8]; /*image data */
231
+ } ESTRIP_DESC; /*bitmap strip */
232
+
233
+ /**********************************************************************
234
+ * EANYCODE_CHAR
235
+ * Description of a single character. The character code is defined by
236
+ * the character set of the current font.
237
+ * Output text is sent as an array of these structures.
238
+ * Spaces and line endings in the output are represented in the
239
+ * structures of the surrounding characters. They are not directly
240
+ * represented as characters.
241
+ * The first character in a word has a positive value of blanks.
242
+ * Missing information should be set to the defaults in the comments.
243
+ * If word bounds are known, but not character bounds, then the top and
244
+ * bottom of each character should be those of the word. The left of the
245
+ * first and right of the last char in each word should be set. All other
246
+ * lefts and rights should be set to -1.
247
+ * If set, the values of right and bottom are left+width and top+height.
248
+ * Most of the members come directly from the parameters to ocr_append_char.
249
+ * The formatting member uses the enhancement parameter and combines the
250
+ * line direction stuff into the top 3 bits.
251
+ * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
252
+ * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
253
+ * the coding is, only that it is backwards compatible with the previous
254
+ * version.
255
+ **********************************************************************/
256
+
257
+ typedef struct { /*single character */
258
+ // It should be noted that the format for char_code for version 2.0 and beyond
259
+ // is UTF8 which means that ASCII characters will come out as one structure but
260
+ // other characters will be returned in two or more instances of this structure
261
+ // with a single byte of the UTF8 code in each, but each will have the same
262
+ // bounding box. Programs which want to handle languagues with different
263
+ // characters sets will need to handle extended characters appropriately, but
264
+ // *all* code needs to be prepared to receive UTF8 coded characters for
265
+ // characters such as bullet and fancy quotes.
266
+ uinT16 char_code; /*character itself */
267
+ inT16 left; /*of char (-1) */
268
+ inT16 right; /*of char (-1) */
269
+ inT16 top; /*of char (-1) */
270
+ inT16 bottom; /*of char (-1) */
271
+ inT16 font_index; /*what font (0) */
272
+ uinT8 confidence; /*0=perfect, 100=reject (0/100) */
273
+ uinT8 point_size; /*of char, 72=i inch, (10) */
274
+ inT8 blanks; /*no of spaces before this char (1) */
275
+ uinT8 formatting; /*char formatting (0) */
276
+ } EANYCODE_CHAR; /*single character */
277
+
278
+ /**********************************************************************
279
+ * ETEXT_DESC
280
+ * Description of the output of the OCR engine.
281
+ * This structure is used as both a progress monitor and the final
282
+ * output header, since it needs to be a valid progress monitor while
283
+ * the OCR engine is storing its output to shared memory.
284
+ * During progress, all the buffer info is -1.
285
+ * Progress starts at 0 and increases to 100 during OCR. No other constraint.
286
+ * Every progress callback, the OCR engine must set ocr_alive to 1.
287
+ * The HP side will set ocr_alive to 0. Repeated failure to reset
288
+ * to 1 indicates that the OCR engine is dead.
289
+ * If the cancel function is not null then it is called with the number of
290
+ * user words found. If it returns true then operation is cancelled.
291
+ **********************************************************************/
292
+ typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
293
+
294
+ class ETEXT_DESC { // output header
295
+ public:
296
+ inT16 count; // chars in this buffer(0)
297
+ inT16 progress; // percent complete increasing (0-100)
298
+ inT8 more_to_come; // true if not last
299
+ volatile inT8 ocr_alive; // ocr sets to 1, HP 0
300
+ inT8 err_code; // for errcode use
301
+ CANCEL_FUNC cancel; // returns true to cancel
302
+ void* cancel_this; // this or other data for cancel
303
+ struct timeval end_time; // time to stop. expected to be set only by call
304
+ // to set_deadline_msecs()
305
+ EANYCODE_CHAR text[1]; // character data
306
+
307
+ ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0),
308
+ err_code(0), cancel(NULL), cancel_this(NULL) {
309
+ end_time.tv_sec = 0;
310
+ end_time.tv_usec = 0;
311
+ }
312
+
313
+ // Sets the end time to be deadline_msecs milliseconds from now.
314
+ void set_deadline_msecs(inT32 deadline_msecs) {
315
+ gettimeofday(&end_time, NULL);
316
+ inT32 deadline_secs = deadline_msecs / 1000;
317
+ end_time.tv_sec += deadline_secs;
318
+ end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000;
319
+ if (end_time.tv_usec > 1000000) {
320
+ end_time.tv_usec -= 1000000;
321
+ ++end_time.tv_sec;
322
+ }
323
+ }
324
+
325
+ // Returns false if we've not passed the end_time, or have not set a deadline.
326
+ bool deadline_exceeded() const {
327
+ if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false;
328
+ struct timeval now;
329
+ gettimeofday(&now, NULL);
330
+ return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec &&
331
+ now.tv_usec > end_time.tv_usec));
332
+ }
333
+ };
334
+
335
+ #endif // CCUTIL_OCRCLASS_H_