motion-ocr 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. data/.gitignore +17 -0
  2. data/Gemfile +3 -0
  3. data/Gemfile.lock +10 -0
  4. data/LICENSE +28 -0
  5. data/README.md +22 -0
  6. data/Rakefile +17 -0
  7. data/app/app_delegate.rb +2 -0
  8. data/lib/motion-ocr.rb +26 -0
  9. data/lib/motion-ocr/version.rb +3 -0
  10. data/motion-ocr.gemspec +20 -0
  11. data/resources/tessdata/configs/nodict +3 -0
  12. data/resources/tessdata/eng.traineddata +0 -0
  13. data/spec/motion_ocr_spec.rb +23 -0
  14. data/spec/support/phototest.gif +0 -0
  15. data/vendor/MotionOCR/MotionOCR.bridgesupport +14722 -0
  16. data/vendor/MotionOCR/MotionOCR.xcodeproj/project.pbxproj +390 -0
  17. data/vendor/MotionOCR/MotionOCR.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
  18. data/vendor/MotionOCR/MotionOCR.xcodeproj/project.xcworkspace/xcuserdata/fer.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
  19. data/vendor/MotionOCR/MotionOCR.xcodeproj/xcuserdata/fer.xcuserdatad/xcschemes/MotionOCR.xcscheme +59 -0
  20. data/vendor/MotionOCR/MotionOCR.xcodeproj/xcuserdata/fer.xcuserdatad/xcschemes/xcschememanagement.plist +22 -0
  21. data/vendor/MotionOCR/MotionOCR/MotionOCR-Prefix.pch +7 -0
  22. data/vendor/MotionOCR/MotionOCR/MotionOCR.h +22 -0
  23. data/vendor/MotionOCR/MotionOCR/MotionOCR.mm +89 -0
  24. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/allheaders.h +32 -0
  25. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/alltypes.h +49 -0
  26. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/array.h +125 -0
  27. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/arrayaccess.h +194 -0
  28. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bbuffer.h +46 -0
  29. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bmf.h +51 -0
  30. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bmp.h +74 -0
  31. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/ccbord.h +103 -0
  32. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/dewarp.h +57 -0
  33. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/endianness.h +11 -0
  34. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/environ.h +281 -0
  35. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/freetype.h +23 -0
  36. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/gplot.h +77 -0
  37. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/heap.h +73 -0
  38. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/imageio.h +153 -0
  39. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/jbclass.h +122 -0
  40. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/leptprotos.h +2058 -0
  41. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/leptwin.h +34 -0
  42. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/list.h +76 -0
  43. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/morph.h +218 -0
  44. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/pix.h +945 -0
  45. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/ptra.h +80 -0
  46. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/queue.h +63 -0
  47. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/readbarcode.h +220 -0
  48. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/regutils.h +122 -0
  49. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/stack.h +55 -0
  50. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/sudoku.h +62 -0
  51. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/watershed.h +52 -0
  52. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/apitypes.h +31 -0
  53. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/baseapi.h +664 -0
  54. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/errcode.h +104 -0
  55. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/genericvector.h +763 -0
  56. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/helpers.h +139 -0
  57. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/host.h +180 -0
  58. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/ndminx.h +31 -0
  59. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/ocrclass.h +335 -0
  60. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/platform.h +48 -0
  61. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/publictypes.h +202 -0
  62. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/tesscallback.h +1238 -0
  63. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/thresholder.h +170 -0
  64. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/unichar.h +85 -0
  65. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/lib/liblept.a +0 -0
  66. data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/lib/libtesseract_all.a +0 -0
  67. metadata +113 -0
@@ -0,0 +1,139 @@
1
+ /* -*-C-*-
2
+ ********************************************************************************
3
+ *
4
+ * File: helpers.h
5
+ * Description: General utility functions
6
+ * Author: Daria Antonova
7
+ * Created: Wed Apr 8 14:37:00 2009
8
+ * Language: C++
9
+ * Package: N/A
10
+ * Status: Reusable Software Component
11
+ *
12
+ * (c) Copyright 2009, Google Inc.
13
+ ** Licensed under the Apache License, Version 2.0 (the "License");
14
+ ** you may not use this file except in compliance with the License.
15
+ ** You may obtain a copy of the License at
16
+ ** http://www.apache.org/licenses/LICENSE-2.0
17
+ ** Unless required by applicable law or agreed to in writing, software
18
+ ** distributed under the License is distributed on an "AS IS" BASIS,
19
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
+ ** See the License for the specific language governing permissions and
21
+ ** limitations under the License.
22
+ *
23
+ ********************************************************************************/
24
+
25
+ #ifndef TESSERACT_CCUTIL_HELPERS_H_
26
+ #define TESSERACT_CCUTIL_HELPERS_H_
27
+
28
+ #include <stdio.h>
29
+ #include <string.h>
30
+
31
+ // Remove newline (if any) at the end of the string.
32
+ inline void chomp_string(char *str) {
33
+ int last_index = strlen(str) - 1;
34
+ if (last_index >= 0 && str[last_index] == '\n') {
35
+ str[last_index] = '\0';
36
+ }
37
+ }
38
+
39
+ // Advance the current pointer of the file if it points to a newline character.
40
+ inline void SkipNewline(FILE *file) {
41
+ if (fgetc(file) != '\n') fseek(file, -1, SEEK_CUR);
42
+ }
43
+
44
+ // qsort function to sort 2 floats.
45
+ inline int sort_floats(const void *arg1, const void *arg2) {
46
+ float diff = *((float *) arg1) - *((float *) arg2);
47
+ if (diff > 0) {
48
+ return 1;
49
+ } else if (diff < 0) {
50
+ return -1;
51
+ } else {
52
+ return 0;
53
+ }
54
+ }
55
+
56
+ // return the smallest multiple of block_size greater than or equal to n.
57
+ inline int RoundUp(int n, int block_size) {
58
+ return block_size * ((n + block_size - 1) / block_size);
59
+ }
60
+
61
+ // Clip a numeric value to the interval [lower_bound, upper_bound].
62
+ template<typename T>
63
+ inline T ClipToRange(const T& x, const T& lower_bound, const T& upper_bound) {
64
+ if (x < lower_bound)
65
+ return lower_bound;
66
+ if (x > upper_bound)
67
+ return upper_bound;
68
+ return x;
69
+ }
70
+
71
+ // Extend the range [lower_bound, upper_bound] to include x.
72
+ template<typename T1, typename T2>
73
+ inline void UpdateRange(const T1& x, T2* lower_bound, T2* upper_bound) {
74
+ if (x < *lower_bound)
75
+ *lower_bound = x;
76
+ if (x > *upper_bound)
77
+ *upper_bound = x;
78
+ }
79
+
80
+ // Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi.
81
+ template<typename T1, typename T2>
82
+ inline void UpdateRange(const T1& x_lo, const T1& x_hi,
83
+ T2* lower_bound, T2* upper_bound) {
84
+ if (x_lo < *lower_bound)
85
+ *lower_bound = x_lo;
86
+ if (x_hi > *upper_bound)
87
+ *upper_bound = x_hi;
88
+ }
89
+
90
+ // Proper modulo arithmetic operator. Returns a mod b that works for -ve a.
91
+ // For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for
92
+ // some integer n.
93
+ inline int Modulo(int a, int b) {
94
+ return (a % b + b) % b;
95
+ }
96
+
97
+ // Integer division operator with rounding that works for negative input.
98
+ // Returns a divided by b, rounded to the nearest integer, without double
99
+ // counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0,
100
+ // -3/3 = 0 and -4/3 = -1.
101
+ // I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1.
102
+ // Results with b negative are not defined.
103
+ inline int DivRounded(int a, int b) {
104
+ return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
105
+ }
106
+
107
+ // Return a double cast to int with rounding.
108
+ inline int IntCastRounded(double x) {
109
+ return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
110
+ }
111
+
112
+ // Reverse the order of bytes in a n byte quantity for big/little-endian switch.
113
+ inline void ReverseN(void* ptr, int num_bytes) {
114
+ char *cptr = reinterpret_cast<char *>(ptr);
115
+ int halfsize = num_bytes / 2;
116
+ for (int i = 0; i < halfsize; ++i) {
117
+ char tmp = cptr[i];
118
+ cptr[i] = cptr[num_bytes - 1 - i];
119
+ cptr[num_bytes - 1 - i] = tmp;
120
+ }
121
+ }
122
+
123
+ // Reverse the order of bytes in a 16 bit quantity for big/little-endian switch.
124
+ inline void Reverse16(void *ptr) {
125
+ ReverseN(ptr, 2);
126
+ }
127
+
128
+ // Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
129
+ inline void Reverse32(void *ptr) {
130
+ ReverseN(ptr, 4);
131
+ }
132
+
133
+ // Reverse the order of bytes in a 64 bit quantity for big/little-endian switch.
134
+ inline void Reverse64(void* ptr) {
135
+ ReverseN(ptr, 8);
136
+ }
137
+
138
+
139
+ #endif // TESSERACT_CCUTIL_HELPERS_H_
@@ -0,0 +1,180 @@
1
+ /******************************************************************************
2
+ ** Filename: Host.h
3
+ ** Purpose: This is the system independent typedefs and defines
4
+ ** Author: MN, JG, MD
5
+ ** Version: 5.4.1
6
+ ** History: 11/7/94 MCD received the modification that Lennart made
7
+ ** to port to 32 bit world and modify this file so that it
8
+ ** will be shared between platform.
9
+ ** 11/9/94 MCD Make MSW32 subset of MSW. Now MSW means
10
+ ** MicroSoft Window and MSW32 means the 32 bit worlds
11
+ ** of MicroSoft Window. Therefore you want the environment
12
+ ** to be MicroSoft Window and in the 32 bit world -
13
+ ** __MSW__ and __MSW32__ must be uncommented out.
14
+ ** 11/30/94 MCD Incorporated comments received for more
15
+ ** readability and the missing typedef for FLOAT.
16
+ ** 12/1/94 MCD Added PFVOID typedef
17
+ ** 5/1/95 MCD. Made many changes based on the inputs.
18
+ ** Changes:
19
+ ** 1) Rearrange the #ifdef so that there're definitions for
20
+ ** particular platforms.
21
+ ** 2) Took out the #define for computer and environment
22
+ ** that developer can uncomment
23
+ ** 3) Added __OLDCODE__ where the defines will be
24
+ ** obsoleted in the next version and advise not to use.
25
+ ** 4) Added the definitions for the following:
26
+ ** FILE_HANDLE, MEMORY_HANDLE, BOOL8,
27
+ ** MAX_INT8, MAX_INT16, MAX_INT32, MAX_UINT8
28
+ ** MAX_UINT16, MAX_UINT32, MAX_FLOAT32
29
+ ** 06/19/96 MCD. Took out MAX_FLOAT32
30
+ ** 07/15/96 MCD. Fixed the comments error
31
+ ** Add back BOOL8.
32
+ **
33
+ ** (c) Copyright Hewlett-Packard Company, 1988-1996.
34
+ ** Licensed under the Apache License, Version 2.0 (the "License");
35
+ ** you may not use this file except in compliance with the License.
36
+ ** You may obtain a copy of the License at
37
+ ** http://www.apache.org/licenses/LICENSE-2.0
38
+ ** Unless required by applicable law or agreed to in writing, software
39
+ ** distributed under the License is distributed on an "AS IS" BASIS,
40
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
41
+ ** See the License for the specific language governing permissions and
42
+ ** limitations under the License.
43
+ */
44
+
45
+ #ifndef __HOST__
46
+ #define __HOST__
47
+
48
+ /******************************************************************************
49
+ ** IMPORTANT!!! **
50
+ ** **
51
+ ** Defines either __MSW__, __MSW32__, __MAC__, __UNIX__, __OS2__, __PM__ to
52
+ ** use the specified definitions indicated below in the preprocessor settings. **
53
+ ** **
54
+ ** Also define either __FarProc__ or __FarData__ and __MOTO__ to use the
55
+ ** specified definitions indicated below in the preprocessor settings. **
56
+ ** **
57
+ ** If a preprocessor settings is not allow in the compiler that is being use,
58
+ ** then it is recommended that a "platform.h" is created with the definition
59
+ ** of the computer and/or operating system.
60
+ ******************************************************************************/
61
+
62
+ #include "platform.h"
63
+ /* __MSW32__ */
64
+ #ifdef __MSW32__
65
+ #include <windows.h>
66
+ #include <winbase.h> // winbase.h contains windows.h
67
+
68
+ #define DLLIMPORT __declspec( dllimport)
69
+ #define DLLEXPORT __declspec( dllexport)
70
+
71
+ #else
72
+ /********************************************************/
73
+ /* __MSW__ */
74
+ #ifdef __MSW__
75
+ #include <windows.h> // provides standard definitions (like HANDLE)
76
+
77
+ #define DLLIMPORT __import
78
+ #define DLLEXPORT __export
79
+ #endif
80
+ #endif
81
+
82
+ /********************************************************/
83
+ /* __MAC__ */
84
+ #ifdef __MAC__
85
+ #include <Types.h>
86
+ /*----------------------------*/
87
+ /*----------------------------*/
88
+ #define DLLIMPORT
89
+ #define DLLEXPORT
90
+
91
+ #endif
92
+ /********************************************************/
93
+ #if defined(__UNIX__) || defined( __DOS__ ) || defined(__OS2__) || defined(__PM__)
94
+ /*----------------------------*/
95
+ /* FarProc and FarData */
96
+ /*----------------------------*/
97
+ #define DLLIMPORT
98
+ #define DLLEXPORT
99
+ /*----------------------------*/
100
+ #endif
101
+ /*****************************************************************************
102
+ **
103
+ ** Standard GHC Definitions
104
+ **
105
+ *****************************************************************************/
106
+
107
+ #ifdef __MOTO__
108
+ #define __NATIVE__ MOTO
109
+ #else
110
+ #define __NATIVE__ INTEL
111
+ #endif
112
+
113
+ //typedef HANDLE FD* PHANDLE;
114
+
115
+ // definitions of portable data types (numbers and characters)
116
+ typedef SIGNED char inT8;
117
+ typedef unsigned char uinT8;
118
+ typedef short inT16;
119
+ typedef unsigned short uinT16;
120
+ typedef int inT32;
121
+ typedef unsigned int uinT32;
122
+ #if (_MSC_VER >= 1200) //%%% vkr for VC 6.0
123
+ typedef INT64 inT64;
124
+ typedef UINT64 uinT64;
125
+ #else
126
+ typedef long long int inT64;
127
+ typedef unsigned long long int uinT64;
128
+ #endif //%%% vkr for VC 6.0
129
+ typedef float FLOAT32;
130
+ typedef double FLOAT64;
131
+ typedef unsigned char BOOL8;
132
+
133
+ #define INT32FORMAT "%d"
134
+ #define INT64FORMAT "%lld"
135
+
136
+ #define MAX_INT8 0x7f
137
+ #define MAX_INT16 0x7fff
138
+ #define MAX_INT32 0x7fffffff
139
+ #define MAX_UINT8 0xff
140
+ #define MAX_UINT16 0xffff
141
+ #define MAX_UINT32 0xffffffff
142
+ #define MAX_FLOAT32 ((float)3.40282347e+38)
143
+
144
+ #define MIN_INT8 0x80
145
+ #define MIN_INT16 0x8000
146
+ #define MIN_INT32 0x80000000
147
+ #define MIN_UINT8 0x00
148
+ #define MIN_UINT16 0x0000
149
+ #define MIN_UINT32 0x00000000
150
+ #define MIN_FLOAT32 ((float)1.17549435e-38)
151
+
152
+ // Defines
153
+
154
+ #ifndef OKAY
155
+ #define OKAY 0
156
+ #endif
157
+
158
+ #ifndef HPERR
159
+ #define HPERR -1
160
+ #endif
161
+
162
+ #ifndef TRUE
163
+ #define TRUE 1
164
+ #endif
165
+
166
+ #ifndef FALSE
167
+ #define FALSE 0
168
+ #endif
169
+
170
+ #ifndef NULL
171
+ #define NULL 0L
172
+ #endif
173
+
174
+ // Return true if x is within tolerance of y
175
+ template<class T> bool NearlyEqual(T x, T y, T tolerance) {
176
+ T diff = x - y;
177
+ return diff <= tolerance && -diff <= tolerance;
178
+ }
179
+
180
+ #endif
@@ -0,0 +1,31 @@
1
+ /**********************************************************************
2
+ * File: ndminx.h (Formerly ndminmax.h)
3
+ * Description: Extended ascii chars
4
+ * Author: Phil Cheatle
5
+ * Created: Mon Mar 29 14:46:01 BST 1993
6
+ *
7
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ #ifndef NDMINX_H
21
+ #define NDMINX_H
22
+
23
+ #ifndef MAX
24
+ #define MAX(x,y) (((x) >= (y))?(x):(y))
25
+ #endif
26
+
27
+ #ifndef MIN
28
+ #define MIN(x,y) (((x) <= (y))?(x):(y))
29
+ #endif
30
+
31
+ #endif
@@ -0,0 +1,335 @@
1
+ /**********************************************************************
2
+ * File: ocrclass.h
3
+ * Description: Class definitions and constants for the OCR API.
4
+ * Author: Hewlett-Packard Co
5
+ *
6
+ * (C) Copyright 1996, Hewlett-Packard Co.
7
+ ** Licensed under the Apache License, Version 2.0 (the "License");
8
+ ** you may not use this file except in compliance with the License.
9
+ ** You may obtain a copy of the License at
10
+ ** http://www.apache.org/licenses/LICENSE-2.0
11
+ ** Unless required by applicable law or agreed to in writing, software
12
+ ** distributed under the License is distributed on an "AS IS" BASIS,
13
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ ** See the License for the specific language governing permissions and
15
+ ** limitations under the License.
16
+ *
17
+ **********************************************************************/
18
+
19
+ /**********************************************************************
20
+ * This file contains typedefs for all the structures used by
21
+ * the HP OCR interface.
22
+ * The code is designed to be used with either a C or C++ compiler.
23
+ * The structures are designed to allow them to be used with any
24
+ * structure alignment upto 8.
25
+ **********************************************************************/
26
+
27
+ #ifndef CCUTIL_OCRCLASS_H_
28
+ #define CCUTIL_OCRCLASS_H_
29
+
30
+ #ifndef __GNUC__
31
+ #ifdef __MSW32__
32
+ #include <windows.h>
33
+ #include "gettimeofday.h"
34
+ #endif
35
+ #else
36
+ #include <sys/time.h>
37
+ #endif
38
+ #include <time.h>
39
+ #include "host.h"
40
+
41
+ /*Maximum lengths of various strings*/
42
+ #define MAX_FONT_NAME 34 /*name of font */
43
+ #define MAX_OCR_NAME 32 /*name of engine */
44
+ #define MAX_OCR_VERSION 17 /*version code of engine */
45
+
46
+ /*Image parameters*/
47
+ #define MIN_IMAGE_SIZE 64 /*smallest image that will be passed */
48
+ #define IMAGE_ROUNDING 32 /*all sizes are multiple of this */
49
+
50
+ #if defined(__SLOW_TIMES__)
51
+ /*Maximum timeouts of various functions (in secs)*/
52
+ #define STARTUP_TIMEOUT 100 /*start of OCR engine */
53
+ #define SHUTDOWN_TIMEOUT 50 /*end of OCR engine */
54
+ #define SENDIM_TIMEOUT 50 /*send of image */
55
+ #define RELEASE_TIMEOUT 50 /*release of semaphore */
56
+ #define READIM_TIMEOUT 100 /*read of image */
57
+ #define READTEXT_TIMEOUT 50 /*read of text */
58
+ #define PROGRESS_TIMEOUT 30 /*progress every 3 seconds */
59
+ #define BADTIMES_TIMEOUT 7 /*max lack of progress */
60
+ #else
61
+ /*Maximum timeouts of various functions (in secs)*/
62
+ #define STARTUP_TIMEOUT 10 /*start of OCR engine */
63
+ #define SHUTDOWN_TIMEOUT 6 /*end of OCR engine */
64
+ #define SENDIM_TIMEOUT 5 /*send of image */
65
+ #define RELEASE_TIMEOUT 5 /*release of semaphore */
66
+ #define READIM_TIMEOUT 10 /*read of image */
67
+ #define READTEXT_TIMEOUT 5 /*read of text */
68
+ #define PROGRESS_TIMEOUT 3 /*progress every 3 seconds */
69
+ #define BADTIMES_TIMEOUT 7 /*max lack of progress */
70
+ #endif
71
+
72
+ /*language definitions are identical to RTF*/
73
+ #define LANGE_NONE 0x0400 /*no language */
74
+ #define LANGE_ALBANIAN 0x041c /*Albanian */
75
+ #define LANGE_BRITISH 0x0809 /*International English */
76
+ #define LANGE_BULGARIAN 0x0402 /*Bulgarian */
77
+ #define LANGE_CROATIAN 0x041a /*Croatian(latin alphabet) */
78
+ #define LANGE_CZECH 0x0405 /*Czech */
79
+ #define LANGE_DANISH 0x0406 /*Danish */
80
+ #define LANGE_DUTCH 0x0413 /*Dutch */
81
+ #define LANGE_FINNISH 0x040b /*Finnish */
82
+ #define LANGE_FRENCH 0x040c /*French */
83
+ #define LANGE_GERMAN 0x0407 /*German */
84
+ #define LANGE_GREEK 0x0408 /*Greek */
85
+ #define LANGE_HUNGARIAN 0x040e /*Hungarian */
86
+ #define LANGE_ITALIAN 0x0410 /*Italian */
87
+ #define LANGE_JAPANESE 0x0411 /*Japanese */
88
+ #define LANGE_KOREAN 0x0412 /*Korean */
89
+ #define LANGE_NORWEGIAN 0x0414 /*Bokmal */
90
+ #define LANGE_POLISH 0x0415 /*Polish */
91
+ #define LANGE_PORTUGESE 0x0416 /*Brazilian Portugese */
92
+ #define LANGE_ROMANIAN 0x0418 /*Romanian */
93
+ #define LANGE_RUSSIAN 0x0419 /*Russian */
94
+ #define LANGE_SCHINESE 0x0804 /*Simplified Chinese */
95
+ #define LANGE_SLOVAK 0x041b /*Slovak */
96
+ #define LANGE_SPANISH 0x040a /*Castilian */
97
+ #define LANGE_SWEDISH 0x041d /*Swedish */
98
+ #define LANGE_TCHINESE 0x0404 /*Traditional Chinese */
99
+ #define LANGE_TURKISH 0x041f /*Turkish */
100
+ #define LANGE_USENGLISH 0x0409 /*American */
101
+
102
+ /*font family definitions are identical to RTF*/
103
+ #define FFAM_NONE 0 /*unknown */
104
+ #define FFAM_ROMAN 1 /*serifed prop */
105
+ #define FFAM_SWISS 2 /*sans-serif prop */
106
+ #define FFAM_MODERN 3 /*fixed pitch */
107
+
108
+ /*character set definitions are identical to RTF*/
109
+ #define CHSET_ANSI 0 /*Ansi efigs */
110
+ #define CHSET_SHIFT_JIS 128 /*JIS X 0208-1990 */
111
+ #define CHSET_KOREAN 129 /*KS C 5601-1992 */
112
+ #define CHSET_SCHINESE 134 /*GB 2312-80 */
113
+ #define CHSET_BIG5 136 /*Big Five */
114
+ #define CHSET_CYRILLIC 204 /*Cyrillic */
115
+ #define CHSET_EEUROPE 238 /*Eastern Europe */
116
+
117
+ /*pitch set definitions are identical to RTF*/
118
+ #define PITCH_DEF 0 /*default */
119
+ #define PITCH_FIXED 1 /*fixed pitch */
120
+ #define PITCH_VAR 2 /*variable pitch */
121
+
122
+ /*Bitmasks for character enhancements.
123
+ OR these together for enhancement in ocr_append_char*/
124
+ #define EUC_BOLD 1 /*bold character */
125
+ #define EUC_ITALIC 2 /*italic char */
126
+ #define EUC_UNDERLINE 4 /*underlined char */
127
+ #define EUC_SUBSCRIPT 8 /*subscript char */
128
+ #define EUC_SUPERSCRIPT 16 /*superscript char */
129
+
130
+ /*enum for character rendering direction*/
131
+ enum OCR_CHAR_DIRECTION {
132
+ OCR_CDIR_RIGHT_LEFT, /*right to left horizontal */
133
+ OCR_CDIR_LEFT_RIGHT, /*left to right horizontal */
134
+ OCR_CDIR_TOP_BOTTOM, /*top to bottom vertical */
135
+ OCR_CDIR_BOTTOM_TOP /*bottom to top vertical */
136
+ };
137
+
138
+ /*enum for line rendering direction*/
139
+ enum OCR_LINE_DIRECTION {
140
+ OCR_LDIR_DOWN_RIGHT, /*horizontal lines go down */
141
+ /*vertical lines go right */
142
+ OCR_LDIR_UP_LEFT /*horizontal lines go up */
143
+ };
144
+
145
+ /*enum for newline type*/
146
+ enum OCR_NEWLINE_TYPE {
147
+ OCR_NL_NONE, /*not a newline */
148
+ OCR_NL_NEWLINE, /*this is a newline but not new para */
149
+ OCR_NL_NEWPARA /*this is a newline and a new para */
150
+ };
151
+
152
+ /*error codes that can be returned from the API functions other than OKAY
153
+ and HPERR*/
154
+ #define OCR_API_NO_MEM (-2) /*filled output buffer */
155
+ #define OCR_API_BAD_CHAR (-3) /*whitespace sent to ocr_append_char */
156
+ #define OCR_API_BAD_STATE (-4) /*invalid call sequence */
157
+
158
+ /*error codes used for passing errors back to the HP side*/
159
+ enum OCR_ERR_CODE {
160
+ OCR_ERR_NONE, /*no error */
161
+ OCR_ERR_CLEAN_EXIT, /*no error */
162
+ OCR_ERR_NO_MEM, /*out of memory */
163
+ OCR_ERR_FILE_READ, /*failed to read data file */
164
+ OCR_ERR_TMP_WRITE, /*failed to write temp file */
165
+ OCR_ERR_TMP_READ, /*failed to read temp file */
166
+ OCR_ERR_BAD_DLL, /*missing or invalid dll subcomponent */
167
+ OCR_ERR_BAD_EXE, /*missing or invalid exe subcomponent */
168
+ OCR_ERR_BAD_LOAD, /*failed to load subcomponent */
169
+ OCR_ERR_BAD_LANG, /*unable to recognize requested language */
170
+ OCR_ERR_BAD_STATE, /*engine did call out of sequence */
171
+ OCR_ERR_INTERNAL1, /*internal error type 1 */
172
+ OCR_ERR_INTERNAL2, /*internal error type 1 */
173
+ OCR_ERR_INTERNAL3, /*internal error type 1 */
174
+ OCR_ERR_INTERNAL4, /*internal error type 1 */
175
+ OCR_ERR_INTERNAL5, /*internal error type 1 */
176
+ OCR_ERR_INTERNAL6, /*internal error type 1 */
177
+ OCR_ERR_INTERNAL7, /*internal error type 1 */
178
+ OCR_ERR_INTERNAL8, /*internal error type 1 */
179
+ OCR_ERR_TIMEOUT /*timed out in comms */
180
+ }; /*for calls to ocr_error */
181
+
182
+ /**********************************************************************
183
+ * EFONT_DESC
184
+ * Description of one font.
185
+ * The information required is basically that used by RTF.
186
+ * The name may be either a valid font on the system or the empty string.
187
+ **********************************************************************/
188
+
189
+ typedef struct { /*font description */
190
+ uinT16 language; /*default language */
191
+ uinT8 font_family; /*serif/not, fixed/not */
192
+ uinT8 char_set; /*character set standard */
193
+ uinT8 pitch; /*fixed or prop */
194
+ inT8 name[MAX_FONT_NAME + 1]; /*plain ascii name */
195
+ } EFONT_DESC; /*font description */
196
+
197
+ /**********************************************************************
198
+ * EOCR_DESC
199
+ * Description of the OCR engine provided at startup.
200
+ * The name and version may be reported to the user at some point.
201
+ * The fonts array should indicate the fonts that the OCR system
202
+ * can recognize.
203
+ **********************************************************************/
204
+
205
+ typedef struct { /*startup info */
206
+ inT32 protocol; /*interface version */
207
+ uinT32 font_count; /*number of fonts */
208
+ uinT16 language; /*default language */
209
+ uinT16 name[MAX_OCR_NAME + 1]; /*name of engine */
210
+ /*version of engine */
211
+ uinT16 version[MAX_OCR_VERSION + 1];
212
+ EFONT_DESC fonts[1]; /*array of fonts */
213
+ } EOCR_DESC; /*startup info */
214
+
215
+ /**********************************************************************
216
+ * ESTRIP_DESC
217
+ * Description of the image strip as it is passed to the engine.
218
+ * The image is always 1 bit, with 1=black.
219
+ * The width is always a multiple of 32, so padding is always OK.
220
+ * The height of the full image is always a multiple of 32.
221
+ * The top y coordinate is 0, and increases down.
222
+ * The top leftmost pixel is in the most significant bit of the first byte.
223
+ **********************************************************************/
224
+
225
+ typedef struct { /*bitmap strip */
226
+ inT16 x_size; /*width in pixels */
227
+ inT16 y_size; /*of full image */
228
+ inT16 strip_size; /*of this strip */
229
+ inT16 resolution; /*pixels per inch */
230
+ uinT8 data[8]; /*image data */
231
+ } ESTRIP_DESC; /*bitmap strip */
232
+
233
+ /**********************************************************************
234
+ * EANYCODE_CHAR
235
+ * Description of a single character. The character code is defined by
236
+ * the character set of the current font.
237
+ * Output text is sent as an array of these structures.
238
+ * Spaces and line endings in the output are represented in the
239
+ * structures of the surrounding characters. They are not directly
240
+ * represented as characters.
241
+ * The first character in a word has a positive value of blanks.
242
+ * Missing information should be set to the defaults in the comments.
243
+ * If word bounds are known, but not character bounds, then the top and
244
+ * bottom of each character should be those of the word. The left of the
245
+ * first and right of the last char in each word should be set. All other
246
+ * lefts and rights should be set to -1.
247
+ * If set, the values of right and bottom are left+width and top+height.
248
+ * Most of the members come directly from the parameters to ocr_append_char.
249
+ * The formatting member uses the enhancement parameter and combines the
250
+ * line direction stuff into the top 3 bits.
251
+ * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
252
+ * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
253
+ * the coding is, only that it is backwards compatible with the previous
254
+ * version.
255
+ **********************************************************************/
256
+
257
+ typedef struct { /*single character */
258
+ // It should be noted that the format for char_code for version 2.0 and beyond
259
+ // is UTF8 which means that ASCII characters will come out as one structure but
260
+ // other characters will be returned in two or more instances of this structure
261
+ // with a single byte of the UTF8 code in each, but each will have the same
262
+ // bounding box. Programs which want to handle languagues with different
263
+ // characters sets will need to handle extended characters appropriately, but
264
+ // *all* code needs to be prepared to receive UTF8 coded characters for
265
+ // characters such as bullet and fancy quotes.
266
+ uinT16 char_code; /*character itself */
267
+ inT16 left; /*of char (-1) */
268
+ inT16 right; /*of char (-1) */
269
+ inT16 top; /*of char (-1) */
270
+ inT16 bottom; /*of char (-1) */
271
+ inT16 font_index; /*what font (0) */
272
+ uinT8 confidence; /*0=perfect, 100=reject (0/100) */
273
+ uinT8 point_size; /*of char, 72=i inch, (10) */
274
+ inT8 blanks; /*no of spaces before this char (1) */
275
+ uinT8 formatting; /*char formatting (0) */
276
+ } EANYCODE_CHAR; /*single character */
277
+
278
+ /**********************************************************************
279
+ * ETEXT_DESC
280
+ * Description of the output of the OCR engine.
281
+ * This structure is used as both a progress monitor and the final
282
+ * output header, since it needs to be a valid progress monitor while
283
+ * the OCR engine is storing its output to shared memory.
284
+ * During progress, all the buffer info is -1.
285
+ * Progress starts at 0 and increases to 100 during OCR. No other constraint.
286
+ * Every progress callback, the OCR engine must set ocr_alive to 1.
287
+ * The HP side will set ocr_alive to 0. Repeated failure to reset
288
+ * to 1 indicates that the OCR engine is dead.
289
+ * If the cancel function is not null then it is called with the number of
290
+ * user words found. If it returns true then operation is cancelled.
291
+ **********************************************************************/
292
+ typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
293
+
294
+ class ETEXT_DESC { // output header
295
+ public:
296
+ inT16 count; // chars in this buffer(0)
297
+ inT16 progress; // percent complete increasing (0-100)
298
+ inT8 more_to_come; // true if not last
299
+ volatile inT8 ocr_alive; // ocr sets to 1, HP 0
300
+ inT8 err_code; // for errcode use
301
+ CANCEL_FUNC cancel; // returns true to cancel
302
+ void* cancel_this; // this or other data for cancel
303
+ struct timeval end_time; // time to stop. expected to be set only by call
304
+ // to set_deadline_msecs()
305
+ EANYCODE_CHAR text[1]; // character data
306
+
307
+ ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0),
308
+ err_code(0), cancel(NULL), cancel_this(NULL) {
309
+ end_time.tv_sec = 0;
310
+ end_time.tv_usec = 0;
311
+ }
312
+
313
+ // Sets the end time to be deadline_msecs milliseconds from now.
314
+ void set_deadline_msecs(inT32 deadline_msecs) {
315
+ gettimeofday(&end_time, NULL);
316
+ inT32 deadline_secs = deadline_msecs / 1000;
317
+ end_time.tv_sec += deadline_secs;
318
+ end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000;
319
+ if (end_time.tv_usec > 1000000) {
320
+ end_time.tv_usec -= 1000000;
321
+ ++end_time.tv_sec;
322
+ }
323
+ }
324
+
325
+ // Returns false if we've not passed the end_time, or have not set a deadline.
326
+ bool deadline_exceeded() const {
327
+ if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false;
328
+ struct timeval now;
329
+ gettimeofday(&now, NULL);
330
+ return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec &&
331
+ now.tv_usec > end_time.tv_usec));
332
+ }
333
+ };
334
+
335
+ #endif // CCUTIL_OCRCLASS_H_