motion-ocr 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +17 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +10 -0
- data/LICENSE +28 -0
- data/README.md +22 -0
- data/Rakefile +17 -0
- data/app/app_delegate.rb +2 -0
- data/lib/motion-ocr.rb +26 -0
- data/lib/motion-ocr/version.rb +3 -0
- data/motion-ocr.gemspec +20 -0
- data/resources/tessdata/configs/nodict +3 -0
- data/resources/tessdata/eng.traineddata +0 -0
- data/spec/motion_ocr_spec.rb +23 -0
- data/spec/support/phototest.gif +0 -0
- data/vendor/MotionOCR/MotionOCR.bridgesupport +14722 -0
- data/vendor/MotionOCR/MotionOCR.xcodeproj/project.pbxproj +390 -0
- data/vendor/MotionOCR/MotionOCR.xcodeproj/project.xcworkspace/contents.xcworkspacedata +7 -0
- data/vendor/MotionOCR/MotionOCR.xcodeproj/project.xcworkspace/xcuserdata/fer.xcuserdatad/UserInterfaceState.xcuserstate +0 -0
- data/vendor/MotionOCR/MotionOCR.xcodeproj/xcuserdata/fer.xcuserdatad/xcschemes/MotionOCR.xcscheme +59 -0
- data/vendor/MotionOCR/MotionOCR.xcodeproj/xcuserdata/fer.xcuserdatad/xcschemes/xcschememanagement.plist +22 -0
- data/vendor/MotionOCR/MotionOCR/MotionOCR-Prefix.pch +7 -0
- data/vendor/MotionOCR/MotionOCR/MotionOCR.h +22 -0
- data/vendor/MotionOCR/MotionOCR/MotionOCR.mm +89 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/allheaders.h +32 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/alltypes.h +49 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/array.h +125 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/arrayaccess.h +194 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bbuffer.h +46 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bmf.h +51 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/bmp.h +74 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/ccbord.h +103 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/dewarp.h +57 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/endianness.h +11 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/environ.h +281 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/freetype.h +23 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/gplot.h +77 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/heap.h +73 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/imageio.h +153 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/jbclass.h +122 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/leptprotos.h +2058 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/leptwin.h +34 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/list.h +76 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/morph.h +218 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/pix.h +945 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/ptra.h +80 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/queue.h +63 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/readbarcode.h +220 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/regutils.h +122 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/stack.h +55 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/sudoku.h +62 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/leptonica/watershed.h +52 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/apitypes.h +31 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/baseapi.h +664 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/errcode.h +104 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/genericvector.h +763 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/helpers.h +139 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/host.h +180 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/ndminx.h +31 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/ocrclass.h +335 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/platform.h +48 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/publictypes.h +202 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/tesscallback.h +1238 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/thresholder.h +170 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/include/tesseract/unichar.h +85 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/lib/liblept.a +0 -0
- data/vendor/MotionOCR/MotionOCR/build_dependencies/dependencies/lib/libtesseract_all.a +0 -0
- metadata +113 -0
@@ -0,0 +1,139 @@
|
|
1
|
+
/* -*-C-*-
|
2
|
+
********************************************************************************
|
3
|
+
*
|
4
|
+
* File: helpers.h
|
5
|
+
* Description: General utility functions
|
6
|
+
* Author: Daria Antonova
|
7
|
+
* Created: Wed Apr 8 14:37:00 2009
|
8
|
+
* Language: C++
|
9
|
+
* Package: N/A
|
10
|
+
* Status: Reusable Software Component
|
11
|
+
*
|
12
|
+
* (c) Copyright 2009, Google Inc.
|
13
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
14
|
+
** you may not use this file except in compliance with the License.
|
15
|
+
** You may obtain a copy of the License at
|
16
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
17
|
+
** Unless required by applicable law or agreed to in writing, software
|
18
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
19
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
20
|
+
** See the License for the specific language governing permissions and
|
21
|
+
** limitations under the License.
|
22
|
+
*
|
23
|
+
********************************************************************************/
|
24
|
+
|
25
|
+
#ifndef TESSERACT_CCUTIL_HELPERS_H_
|
26
|
+
#define TESSERACT_CCUTIL_HELPERS_H_
|
27
|
+
|
28
|
+
#include <stdio.h>
|
29
|
+
#include <string.h>
|
30
|
+
|
31
|
+
// Remove newline (if any) at the end of the string.
|
32
|
+
inline void chomp_string(char *str) {
|
33
|
+
int last_index = strlen(str) - 1;
|
34
|
+
if (last_index >= 0 && str[last_index] == '\n') {
|
35
|
+
str[last_index] = '\0';
|
36
|
+
}
|
37
|
+
}
|
38
|
+
|
39
|
+
// Advance the current pointer of the file if it points to a newline character.
|
40
|
+
inline void SkipNewline(FILE *file) {
|
41
|
+
if (fgetc(file) != '\n') fseek(file, -1, SEEK_CUR);
|
42
|
+
}
|
43
|
+
|
44
|
+
// qsort function to sort 2 floats.
|
45
|
+
inline int sort_floats(const void *arg1, const void *arg2) {
|
46
|
+
float diff = *((float *) arg1) - *((float *) arg2);
|
47
|
+
if (diff > 0) {
|
48
|
+
return 1;
|
49
|
+
} else if (diff < 0) {
|
50
|
+
return -1;
|
51
|
+
} else {
|
52
|
+
return 0;
|
53
|
+
}
|
54
|
+
}
|
55
|
+
|
56
|
+
// return the smallest multiple of block_size greater than or equal to n.
|
57
|
+
inline int RoundUp(int n, int block_size) {
|
58
|
+
return block_size * ((n + block_size - 1) / block_size);
|
59
|
+
}
|
60
|
+
|
61
|
+
// Clip a numeric value to the interval [lower_bound, upper_bound].
|
62
|
+
template<typename T>
|
63
|
+
inline T ClipToRange(const T& x, const T& lower_bound, const T& upper_bound) {
|
64
|
+
if (x < lower_bound)
|
65
|
+
return lower_bound;
|
66
|
+
if (x > upper_bound)
|
67
|
+
return upper_bound;
|
68
|
+
return x;
|
69
|
+
}
|
70
|
+
|
71
|
+
// Extend the range [lower_bound, upper_bound] to include x.
|
72
|
+
template<typename T1, typename T2>
|
73
|
+
inline void UpdateRange(const T1& x, T2* lower_bound, T2* upper_bound) {
|
74
|
+
if (x < *lower_bound)
|
75
|
+
*lower_bound = x;
|
76
|
+
if (x > *upper_bound)
|
77
|
+
*upper_bound = x;
|
78
|
+
}
|
79
|
+
|
80
|
+
// Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi.
|
81
|
+
template<typename T1, typename T2>
|
82
|
+
inline void UpdateRange(const T1& x_lo, const T1& x_hi,
|
83
|
+
T2* lower_bound, T2* upper_bound) {
|
84
|
+
if (x_lo < *lower_bound)
|
85
|
+
*lower_bound = x_lo;
|
86
|
+
if (x_hi > *upper_bound)
|
87
|
+
*upper_bound = x_hi;
|
88
|
+
}
|
89
|
+
|
90
|
+
// Proper modulo arithmetic operator. Returns a mod b that works for -ve a.
|
91
|
+
// For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for
|
92
|
+
// some integer n.
|
93
|
+
inline int Modulo(int a, int b) {
|
94
|
+
return (a % b + b) % b;
|
95
|
+
}
|
96
|
+
|
97
|
+
// Integer division operator with rounding that works for negative input.
|
98
|
+
// Returns a divided by b, rounded to the nearest integer, without double
|
99
|
+
// counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0,
|
100
|
+
// -3/3 = 0 and -4/3 = -1.
|
101
|
+
// I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1.
|
102
|
+
// Results with b negative are not defined.
|
103
|
+
inline int DivRounded(int a, int b) {
|
104
|
+
return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
|
105
|
+
}
|
106
|
+
|
107
|
+
// Return a double cast to int with rounding.
|
108
|
+
inline int IntCastRounded(double x) {
|
109
|
+
return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
|
110
|
+
}
|
111
|
+
|
112
|
+
// Reverse the order of bytes in a n byte quantity for big/little-endian switch.
|
113
|
+
inline void ReverseN(void* ptr, int num_bytes) {
|
114
|
+
char *cptr = reinterpret_cast<char *>(ptr);
|
115
|
+
int halfsize = num_bytes / 2;
|
116
|
+
for (int i = 0; i < halfsize; ++i) {
|
117
|
+
char tmp = cptr[i];
|
118
|
+
cptr[i] = cptr[num_bytes - 1 - i];
|
119
|
+
cptr[num_bytes - 1 - i] = tmp;
|
120
|
+
}
|
121
|
+
}
|
122
|
+
|
123
|
+
// Reverse the order of bytes in a 16 bit quantity for big/little-endian switch.
|
124
|
+
inline void Reverse16(void *ptr) {
|
125
|
+
ReverseN(ptr, 2);
|
126
|
+
}
|
127
|
+
|
128
|
+
// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
|
129
|
+
inline void Reverse32(void *ptr) {
|
130
|
+
ReverseN(ptr, 4);
|
131
|
+
}
|
132
|
+
|
133
|
+
// Reverse the order of bytes in a 64 bit quantity for big/little-endian switch.
|
134
|
+
inline void Reverse64(void* ptr) {
|
135
|
+
ReverseN(ptr, 8);
|
136
|
+
}
|
137
|
+
|
138
|
+
|
139
|
+
#endif // TESSERACT_CCUTIL_HELPERS_H_
|
@@ -0,0 +1,180 @@
|
|
1
|
+
/******************************************************************************
|
2
|
+
** Filename: Host.h
|
3
|
+
** Purpose: This is the system independent typedefs and defines
|
4
|
+
** Author: MN, JG, MD
|
5
|
+
** Version: 5.4.1
|
6
|
+
** History: 11/7/94 MCD received the modification that Lennart made
|
7
|
+
** to port to 32 bit world and modify this file so that it
|
8
|
+
** will be shared between platform.
|
9
|
+
** 11/9/94 MCD Make MSW32 subset of MSW. Now MSW means
|
10
|
+
** MicroSoft Window and MSW32 means the 32 bit worlds
|
11
|
+
** of MicroSoft Window. Therefore you want the environment
|
12
|
+
** to be MicroSoft Window and in the 32 bit world -
|
13
|
+
** __MSW__ and __MSW32__ must be uncommented out.
|
14
|
+
** 11/30/94 MCD Incorporated comments received for more
|
15
|
+
** readability and the missing typedef for FLOAT.
|
16
|
+
** 12/1/94 MCD Added PFVOID typedef
|
17
|
+
** 5/1/95 MCD. Made many changes based on the inputs.
|
18
|
+
** Changes:
|
19
|
+
** 1) Rearrange the #ifdef so that there're definitions for
|
20
|
+
** particular platforms.
|
21
|
+
** 2) Took out the #define for computer and environment
|
22
|
+
** that developer can uncomment
|
23
|
+
** 3) Added __OLDCODE__ where the defines will be
|
24
|
+
** obsoleted in the next version and advise not to use.
|
25
|
+
** 4) Added the definitions for the following:
|
26
|
+
** FILE_HANDLE, MEMORY_HANDLE, BOOL8,
|
27
|
+
** MAX_INT8, MAX_INT16, MAX_INT32, MAX_UINT8
|
28
|
+
** MAX_UINT16, MAX_UINT32, MAX_FLOAT32
|
29
|
+
** 06/19/96 MCD. Took out MAX_FLOAT32
|
30
|
+
** 07/15/96 MCD. Fixed the comments error
|
31
|
+
** Add back BOOL8.
|
32
|
+
**
|
33
|
+
** (c) Copyright Hewlett-Packard Company, 1988-1996.
|
34
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
35
|
+
** you may not use this file except in compliance with the License.
|
36
|
+
** You may obtain a copy of the License at
|
37
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
38
|
+
** Unless required by applicable law or agreed to in writing, software
|
39
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
40
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
41
|
+
** See the License for the specific language governing permissions and
|
42
|
+
** limitations under the License.
|
43
|
+
*/
|
44
|
+
|
45
|
+
#ifndef __HOST__
|
46
|
+
#define __HOST__
|
47
|
+
|
48
|
+
/******************************************************************************
|
49
|
+
** IMPORTANT!!! **
|
50
|
+
** **
|
51
|
+
** Defines either __MSW__, __MSW32__, __MAC__, __UNIX__, __OS2__, __PM__ to
|
52
|
+
** use the specified definitions indicated below in the preprocessor settings. **
|
53
|
+
** **
|
54
|
+
** Also define either __FarProc__ or __FarData__ and __MOTO__ to use the
|
55
|
+
** specified definitions indicated below in the preprocessor settings. **
|
56
|
+
** **
|
57
|
+
** If a preprocessor settings is not allow in the compiler that is being use,
|
58
|
+
** then it is recommended that a "platform.h" is created with the definition
|
59
|
+
** of the computer and/or operating system.
|
60
|
+
******************************************************************************/
|
61
|
+
|
62
|
+
#include "platform.h"
|
63
|
+
/* __MSW32__ */
|
64
|
+
#ifdef __MSW32__
|
65
|
+
#include <windows.h>
|
66
|
+
#include <winbase.h> // winbase.h contains windows.h
|
67
|
+
|
68
|
+
#define DLLIMPORT __declspec( dllimport)
|
69
|
+
#define DLLEXPORT __declspec( dllexport)
|
70
|
+
|
71
|
+
#else
|
72
|
+
/********************************************************/
|
73
|
+
/* __MSW__ */
|
74
|
+
#ifdef __MSW__
|
75
|
+
#include <windows.h> // provides standard definitions (like HANDLE)
|
76
|
+
|
77
|
+
#define DLLIMPORT __import
|
78
|
+
#define DLLEXPORT __export
|
79
|
+
#endif
|
80
|
+
#endif
|
81
|
+
|
82
|
+
/********************************************************/
|
83
|
+
/* __MAC__ */
|
84
|
+
#ifdef __MAC__
|
85
|
+
#include <Types.h>
|
86
|
+
/*----------------------------*/
|
87
|
+
/*----------------------------*/
|
88
|
+
#define DLLIMPORT
|
89
|
+
#define DLLEXPORT
|
90
|
+
|
91
|
+
#endif
|
92
|
+
/********************************************************/
|
93
|
+
#if defined(__UNIX__) || defined( __DOS__ ) || defined(__OS2__) || defined(__PM__)
|
94
|
+
/*----------------------------*/
|
95
|
+
/* FarProc and FarData */
|
96
|
+
/*----------------------------*/
|
97
|
+
#define DLLIMPORT
|
98
|
+
#define DLLEXPORT
|
99
|
+
/*----------------------------*/
|
100
|
+
#endif
|
101
|
+
/*****************************************************************************
|
102
|
+
**
|
103
|
+
** Standard GHC Definitions
|
104
|
+
**
|
105
|
+
*****************************************************************************/
|
106
|
+
|
107
|
+
#ifdef __MOTO__
|
108
|
+
#define __NATIVE__ MOTO
|
109
|
+
#else
|
110
|
+
#define __NATIVE__ INTEL
|
111
|
+
#endif
|
112
|
+
|
113
|
+
//typedef HANDLE FD* PHANDLE;
|
114
|
+
|
115
|
+
// definitions of portable data types (numbers and characters)
|
116
|
+
typedef SIGNED char inT8;
|
117
|
+
typedef unsigned char uinT8;
|
118
|
+
typedef short inT16;
|
119
|
+
typedef unsigned short uinT16;
|
120
|
+
typedef int inT32;
|
121
|
+
typedef unsigned int uinT32;
|
122
|
+
#if (_MSC_VER >= 1200) //%%% vkr for VC 6.0
|
123
|
+
typedef INT64 inT64;
|
124
|
+
typedef UINT64 uinT64;
|
125
|
+
#else
|
126
|
+
typedef long long int inT64;
|
127
|
+
typedef unsigned long long int uinT64;
|
128
|
+
#endif //%%% vkr for VC 6.0
|
129
|
+
typedef float FLOAT32;
|
130
|
+
typedef double FLOAT64;
|
131
|
+
typedef unsigned char BOOL8;
|
132
|
+
|
133
|
+
#define INT32FORMAT "%d"
|
134
|
+
#define INT64FORMAT "%lld"
|
135
|
+
|
136
|
+
#define MAX_INT8 0x7f
|
137
|
+
#define MAX_INT16 0x7fff
|
138
|
+
#define MAX_INT32 0x7fffffff
|
139
|
+
#define MAX_UINT8 0xff
|
140
|
+
#define MAX_UINT16 0xffff
|
141
|
+
#define MAX_UINT32 0xffffffff
|
142
|
+
#define MAX_FLOAT32 ((float)3.40282347e+38)
|
143
|
+
|
144
|
+
#define MIN_INT8 0x80
|
145
|
+
#define MIN_INT16 0x8000
|
146
|
+
#define MIN_INT32 0x80000000
|
147
|
+
#define MIN_UINT8 0x00
|
148
|
+
#define MIN_UINT16 0x0000
|
149
|
+
#define MIN_UINT32 0x00000000
|
150
|
+
#define MIN_FLOAT32 ((float)1.17549435e-38)
|
151
|
+
|
152
|
+
// Defines
|
153
|
+
|
154
|
+
#ifndef OKAY
|
155
|
+
#define OKAY 0
|
156
|
+
#endif
|
157
|
+
|
158
|
+
#ifndef HPERR
|
159
|
+
#define HPERR -1
|
160
|
+
#endif
|
161
|
+
|
162
|
+
#ifndef TRUE
|
163
|
+
#define TRUE 1
|
164
|
+
#endif
|
165
|
+
|
166
|
+
#ifndef FALSE
|
167
|
+
#define FALSE 0
|
168
|
+
#endif
|
169
|
+
|
170
|
+
#ifndef NULL
|
171
|
+
#define NULL 0L
|
172
|
+
#endif
|
173
|
+
|
174
|
+
// Return true if x is within tolerance of y
|
175
|
+
template<class T> bool NearlyEqual(T x, T y, T tolerance) {
|
176
|
+
T diff = x - y;
|
177
|
+
return diff <= tolerance && -diff <= tolerance;
|
178
|
+
}
|
179
|
+
|
180
|
+
#endif
|
@@ -0,0 +1,31 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
* File: ndminx.h (Formerly ndminmax.h)
|
3
|
+
* Description: Extended ascii chars
|
4
|
+
* Author: Phil Cheatle
|
5
|
+
* Created: Mon Mar 29 14:46:01 BST 1993
|
6
|
+
*
|
7
|
+
* (C) Copyright 1991, Hewlett-Packard Ltd.
|
8
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
9
|
+
** you may not use this file except in compliance with the License.
|
10
|
+
** You may obtain a copy of the License at
|
11
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
12
|
+
** Unless required by applicable law or agreed to in writing, software
|
13
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
14
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
15
|
+
** See the License for the specific language governing permissions and
|
16
|
+
** limitations under the License.
|
17
|
+
*
|
18
|
+
**********************************************************************/
|
19
|
+
|
20
|
+
#ifndef NDMINX_H
|
21
|
+
#define NDMINX_H
|
22
|
+
|
23
|
+
#ifndef MAX
|
24
|
+
#define MAX(x,y) (((x) >= (y))?(x):(y))
|
25
|
+
#endif
|
26
|
+
|
27
|
+
#ifndef MIN
|
28
|
+
#define MIN(x,y) (((x) <= (y))?(x):(y))
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#endif
|
@@ -0,0 +1,335 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
* File: ocrclass.h
|
3
|
+
* Description: Class definitions and constants for the OCR API.
|
4
|
+
* Author: Hewlett-Packard Co
|
5
|
+
*
|
6
|
+
* (C) Copyright 1996, Hewlett-Packard Co.
|
7
|
+
** Licensed under the Apache License, Version 2.0 (the "License");
|
8
|
+
** you may not use this file except in compliance with the License.
|
9
|
+
** You may obtain a copy of the License at
|
10
|
+
** http://www.apache.org/licenses/LICENSE-2.0
|
11
|
+
** Unless required by applicable law or agreed to in writing, software
|
12
|
+
** distributed under the License is distributed on an "AS IS" BASIS,
|
13
|
+
** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
14
|
+
** See the License for the specific language governing permissions and
|
15
|
+
** limitations under the License.
|
16
|
+
*
|
17
|
+
**********************************************************************/
|
18
|
+
|
19
|
+
/**********************************************************************
|
20
|
+
* This file contains typedefs for all the structures used by
|
21
|
+
* the HP OCR interface.
|
22
|
+
* The code is designed to be used with either a C or C++ compiler.
|
23
|
+
* The structures are designed to allow them to be used with any
|
24
|
+
* structure alignment upto 8.
|
25
|
+
**********************************************************************/
|
26
|
+
|
27
|
+
#ifndef CCUTIL_OCRCLASS_H_
|
28
|
+
#define CCUTIL_OCRCLASS_H_
|
29
|
+
|
30
|
+
#ifndef __GNUC__
|
31
|
+
#ifdef __MSW32__
|
32
|
+
#include <windows.h>
|
33
|
+
#include "gettimeofday.h"
|
34
|
+
#endif
|
35
|
+
#else
|
36
|
+
#include <sys/time.h>
|
37
|
+
#endif
|
38
|
+
#include <time.h>
|
39
|
+
#include "host.h"
|
40
|
+
|
41
|
+
/*Maximum lengths of various strings*/
|
42
|
+
#define MAX_FONT_NAME 34 /*name of font */
|
43
|
+
#define MAX_OCR_NAME 32 /*name of engine */
|
44
|
+
#define MAX_OCR_VERSION 17 /*version code of engine */
|
45
|
+
|
46
|
+
/*Image parameters*/
|
47
|
+
#define MIN_IMAGE_SIZE 64 /*smallest image that will be passed */
|
48
|
+
#define IMAGE_ROUNDING 32 /*all sizes are multiple of this */
|
49
|
+
|
50
|
+
#if defined(__SLOW_TIMES__)
|
51
|
+
/*Maximum timeouts of various functions (in secs)*/
|
52
|
+
#define STARTUP_TIMEOUT 100 /*start of OCR engine */
|
53
|
+
#define SHUTDOWN_TIMEOUT 50 /*end of OCR engine */
|
54
|
+
#define SENDIM_TIMEOUT 50 /*send of image */
|
55
|
+
#define RELEASE_TIMEOUT 50 /*release of semaphore */
|
56
|
+
#define READIM_TIMEOUT 100 /*read of image */
|
57
|
+
#define READTEXT_TIMEOUT 50 /*read of text */
|
58
|
+
#define PROGRESS_TIMEOUT 30 /*progress every 3 seconds */
|
59
|
+
#define BADTIMES_TIMEOUT 7 /*max lack of progress */
|
60
|
+
#else
|
61
|
+
/*Maximum timeouts of various functions (in secs)*/
|
62
|
+
#define STARTUP_TIMEOUT 10 /*start of OCR engine */
|
63
|
+
#define SHUTDOWN_TIMEOUT 6 /*end of OCR engine */
|
64
|
+
#define SENDIM_TIMEOUT 5 /*send of image */
|
65
|
+
#define RELEASE_TIMEOUT 5 /*release of semaphore */
|
66
|
+
#define READIM_TIMEOUT 10 /*read of image */
|
67
|
+
#define READTEXT_TIMEOUT 5 /*read of text */
|
68
|
+
#define PROGRESS_TIMEOUT 3 /*progress every 3 seconds */
|
69
|
+
#define BADTIMES_TIMEOUT 7 /*max lack of progress */
|
70
|
+
#endif
|
71
|
+
|
72
|
+
/*language definitions are identical to RTF*/
|
73
|
+
#define LANGE_NONE 0x0400 /*no language */
|
74
|
+
#define LANGE_ALBANIAN 0x041c /*Albanian */
|
75
|
+
#define LANGE_BRITISH 0x0809 /*International English */
|
76
|
+
#define LANGE_BULGARIAN 0x0402 /*Bulgarian */
|
77
|
+
#define LANGE_CROATIAN 0x041a /*Croatian(latin alphabet) */
|
78
|
+
#define LANGE_CZECH 0x0405 /*Czech */
|
79
|
+
#define LANGE_DANISH 0x0406 /*Danish */
|
80
|
+
#define LANGE_DUTCH 0x0413 /*Dutch */
|
81
|
+
#define LANGE_FINNISH 0x040b /*Finnish */
|
82
|
+
#define LANGE_FRENCH 0x040c /*French */
|
83
|
+
#define LANGE_GERMAN 0x0407 /*German */
|
84
|
+
#define LANGE_GREEK 0x0408 /*Greek */
|
85
|
+
#define LANGE_HUNGARIAN 0x040e /*Hungarian */
|
86
|
+
#define LANGE_ITALIAN 0x0410 /*Italian */
|
87
|
+
#define LANGE_JAPANESE 0x0411 /*Japanese */
|
88
|
+
#define LANGE_KOREAN 0x0412 /*Korean */
|
89
|
+
#define LANGE_NORWEGIAN 0x0414 /*Bokmal */
|
90
|
+
#define LANGE_POLISH 0x0415 /*Polish */
|
91
|
+
#define LANGE_PORTUGESE 0x0416 /*Brazilian Portugese */
|
92
|
+
#define LANGE_ROMANIAN 0x0418 /*Romanian */
|
93
|
+
#define LANGE_RUSSIAN 0x0419 /*Russian */
|
94
|
+
#define LANGE_SCHINESE 0x0804 /*Simplified Chinese */
|
95
|
+
#define LANGE_SLOVAK 0x041b /*Slovak */
|
96
|
+
#define LANGE_SPANISH 0x040a /*Castilian */
|
97
|
+
#define LANGE_SWEDISH 0x041d /*Swedish */
|
98
|
+
#define LANGE_TCHINESE 0x0404 /*Traditional Chinese */
|
99
|
+
#define LANGE_TURKISH 0x041f /*Turkish */
|
100
|
+
#define LANGE_USENGLISH 0x0409 /*American */
|
101
|
+
|
102
|
+
/*font family definitions are identical to RTF*/
|
103
|
+
#define FFAM_NONE 0 /*unknown */
|
104
|
+
#define FFAM_ROMAN 1 /*serifed prop */
|
105
|
+
#define FFAM_SWISS 2 /*sans-serif prop */
|
106
|
+
#define FFAM_MODERN 3 /*fixed pitch */
|
107
|
+
|
108
|
+
/*character set definitions are identical to RTF*/
|
109
|
+
#define CHSET_ANSI 0 /*Ansi efigs */
|
110
|
+
#define CHSET_SHIFT_JIS 128 /*JIS X 0208-1990 */
|
111
|
+
#define CHSET_KOREAN 129 /*KS C 5601-1992 */
|
112
|
+
#define CHSET_SCHINESE 134 /*GB 2312-80 */
|
113
|
+
#define CHSET_BIG5 136 /*Big Five */
|
114
|
+
#define CHSET_CYRILLIC 204 /*Cyrillic */
|
115
|
+
#define CHSET_EEUROPE 238 /*Eastern Europe */
|
116
|
+
|
117
|
+
/*pitch set definitions are identical to RTF*/
|
118
|
+
#define PITCH_DEF 0 /*default */
|
119
|
+
#define PITCH_FIXED 1 /*fixed pitch */
|
120
|
+
#define PITCH_VAR 2 /*variable pitch */
|
121
|
+
|
122
|
+
/*Bitmasks for character enhancements.
|
123
|
+
OR these together for enhancement in ocr_append_char*/
|
124
|
+
#define EUC_BOLD 1 /*bold character */
|
125
|
+
#define EUC_ITALIC 2 /*italic char */
|
126
|
+
#define EUC_UNDERLINE 4 /*underlined char */
|
127
|
+
#define EUC_SUBSCRIPT 8 /*subscript char */
|
128
|
+
#define EUC_SUPERSCRIPT 16 /*superscript char */
|
129
|
+
|
130
|
+
/*enum for character rendering direction*/
|
131
|
+
enum OCR_CHAR_DIRECTION {
|
132
|
+
OCR_CDIR_RIGHT_LEFT, /*right to left horizontal */
|
133
|
+
OCR_CDIR_LEFT_RIGHT, /*left to right horizontal */
|
134
|
+
OCR_CDIR_TOP_BOTTOM, /*top to bottom vertical */
|
135
|
+
OCR_CDIR_BOTTOM_TOP /*bottom to top vertical */
|
136
|
+
};
|
137
|
+
|
138
|
+
/*enum for line rendering direction*/
|
139
|
+
enum OCR_LINE_DIRECTION {
|
140
|
+
OCR_LDIR_DOWN_RIGHT, /*horizontal lines go down */
|
141
|
+
/*vertical lines go right */
|
142
|
+
OCR_LDIR_UP_LEFT /*horizontal lines go up */
|
143
|
+
};
|
144
|
+
|
145
|
+
/*enum for newline type*/
|
146
|
+
enum OCR_NEWLINE_TYPE {
|
147
|
+
OCR_NL_NONE, /*not a newline */
|
148
|
+
OCR_NL_NEWLINE, /*this is a newline but not new para */
|
149
|
+
OCR_NL_NEWPARA /*this is a newline and a new para */
|
150
|
+
};
|
151
|
+
|
152
|
+
/*error codes that can be returned from the API functions other than OKAY
|
153
|
+
and HPERR*/
|
154
|
+
#define OCR_API_NO_MEM (-2) /*filled output buffer */
|
155
|
+
#define OCR_API_BAD_CHAR (-3) /*whitespace sent to ocr_append_char */
|
156
|
+
#define OCR_API_BAD_STATE (-4) /*invalid call sequence */
|
157
|
+
|
158
|
+
/*error codes used for passing errors back to the HP side*/
|
159
|
+
enum OCR_ERR_CODE {
|
160
|
+
OCR_ERR_NONE, /*no error */
|
161
|
+
OCR_ERR_CLEAN_EXIT, /*no error */
|
162
|
+
OCR_ERR_NO_MEM, /*out of memory */
|
163
|
+
OCR_ERR_FILE_READ, /*failed to read data file */
|
164
|
+
OCR_ERR_TMP_WRITE, /*failed to write temp file */
|
165
|
+
OCR_ERR_TMP_READ, /*failed to read temp file */
|
166
|
+
OCR_ERR_BAD_DLL, /*missing or invalid dll subcomponent */
|
167
|
+
OCR_ERR_BAD_EXE, /*missing or invalid exe subcomponent */
|
168
|
+
OCR_ERR_BAD_LOAD, /*failed to load subcomponent */
|
169
|
+
OCR_ERR_BAD_LANG, /*unable to recognize requested language */
|
170
|
+
OCR_ERR_BAD_STATE, /*engine did call out of sequence */
|
171
|
+
OCR_ERR_INTERNAL1, /*internal error type 1 */
|
172
|
+
OCR_ERR_INTERNAL2, /*internal error type 1 */
|
173
|
+
OCR_ERR_INTERNAL3, /*internal error type 1 */
|
174
|
+
OCR_ERR_INTERNAL4, /*internal error type 1 */
|
175
|
+
OCR_ERR_INTERNAL5, /*internal error type 1 */
|
176
|
+
OCR_ERR_INTERNAL6, /*internal error type 1 */
|
177
|
+
OCR_ERR_INTERNAL7, /*internal error type 1 */
|
178
|
+
OCR_ERR_INTERNAL8, /*internal error type 1 */
|
179
|
+
OCR_ERR_TIMEOUT /*timed out in comms */
|
180
|
+
}; /*for calls to ocr_error */
|
181
|
+
|
182
|
+
/**********************************************************************
|
183
|
+
* EFONT_DESC
|
184
|
+
* Description of one font.
|
185
|
+
* The information required is basically that used by RTF.
|
186
|
+
* The name may be either a valid font on the system or the empty string.
|
187
|
+
**********************************************************************/
|
188
|
+
|
189
|
+
typedef struct { /*font description */
|
190
|
+
uinT16 language; /*default language */
|
191
|
+
uinT8 font_family; /*serif/not, fixed/not */
|
192
|
+
uinT8 char_set; /*character set standard */
|
193
|
+
uinT8 pitch; /*fixed or prop */
|
194
|
+
inT8 name[MAX_FONT_NAME + 1]; /*plain ascii name */
|
195
|
+
} EFONT_DESC; /*font description */
|
196
|
+
|
197
|
+
/**********************************************************************
|
198
|
+
* EOCR_DESC
|
199
|
+
* Description of the OCR engine provided at startup.
|
200
|
+
* The name and version may be reported to the user at some point.
|
201
|
+
* The fonts array should indicate the fonts that the OCR system
|
202
|
+
* can recognize.
|
203
|
+
**********************************************************************/
|
204
|
+
|
205
|
+
typedef struct { /*startup info */
|
206
|
+
inT32 protocol; /*interface version */
|
207
|
+
uinT32 font_count; /*number of fonts */
|
208
|
+
uinT16 language; /*default language */
|
209
|
+
uinT16 name[MAX_OCR_NAME + 1]; /*name of engine */
|
210
|
+
/*version of engine */
|
211
|
+
uinT16 version[MAX_OCR_VERSION + 1];
|
212
|
+
EFONT_DESC fonts[1]; /*array of fonts */
|
213
|
+
} EOCR_DESC; /*startup info */
|
214
|
+
|
215
|
+
/**********************************************************************
|
216
|
+
* ESTRIP_DESC
|
217
|
+
* Description of the image strip as it is passed to the engine.
|
218
|
+
* The image is always 1 bit, with 1=black.
|
219
|
+
* The width is always a multiple of 32, so padding is always OK.
|
220
|
+
* The height of the full image is always a multiple of 32.
|
221
|
+
* The top y coordinate is 0, and increases down.
|
222
|
+
* The top leftmost pixel is in the most significant bit of the first byte.
|
223
|
+
**********************************************************************/
|
224
|
+
|
225
|
+
typedef struct { /*bitmap strip */
|
226
|
+
inT16 x_size; /*width in pixels */
|
227
|
+
inT16 y_size; /*of full image */
|
228
|
+
inT16 strip_size; /*of this strip */
|
229
|
+
inT16 resolution; /*pixels per inch */
|
230
|
+
uinT8 data[8]; /*image data */
|
231
|
+
} ESTRIP_DESC; /*bitmap strip */
|
232
|
+
|
233
|
+
/**********************************************************************
|
234
|
+
* EANYCODE_CHAR
|
235
|
+
* Description of a single character. The character code is defined by
|
236
|
+
* the character set of the current font.
|
237
|
+
* Output text is sent as an array of these structures.
|
238
|
+
* Spaces and line endings in the output are represented in the
|
239
|
+
* structures of the surrounding characters. They are not directly
|
240
|
+
* represented as characters.
|
241
|
+
* The first character in a word has a positive value of blanks.
|
242
|
+
* Missing information should be set to the defaults in the comments.
|
243
|
+
* If word bounds are known, but not character bounds, then the top and
|
244
|
+
* bottom of each character should be those of the word. The left of the
|
245
|
+
* first and right of the last char in each word should be set. All other
|
246
|
+
* lefts and rights should be set to -1.
|
247
|
+
* If set, the values of right and bottom are left+width and top+height.
|
248
|
+
* Most of the members come directly from the parameters to ocr_append_char.
|
249
|
+
* The formatting member uses the enhancement parameter and combines the
|
250
|
+
* line direction stuff into the top 3 bits.
|
251
|
+
* The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
|
252
|
+
* 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
|
253
|
+
* the coding is, only that it is backwards compatible with the previous
|
254
|
+
* version.
|
255
|
+
**********************************************************************/
|
256
|
+
|
257
|
+
typedef struct { /*single character */
|
258
|
+
// It should be noted that the format for char_code for version 2.0 and beyond
|
259
|
+
// is UTF8 which means that ASCII characters will come out as one structure but
|
260
|
+
// other characters will be returned in two or more instances of this structure
|
261
|
+
// with a single byte of the UTF8 code in each, but each will have the same
|
262
|
+
// bounding box. Programs which want to handle languagues with different
|
263
|
+
// characters sets will need to handle extended characters appropriately, but
|
264
|
+
// *all* code needs to be prepared to receive UTF8 coded characters for
|
265
|
+
// characters such as bullet and fancy quotes.
|
266
|
+
uinT16 char_code; /*character itself */
|
267
|
+
inT16 left; /*of char (-1) */
|
268
|
+
inT16 right; /*of char (-1) */
|
269
|
+
inT16 top; /*of char (-1) */
|
270
|
+
inT16 bottom; /*of char (-1) */
|
271
|
+
inT16 font_index; /*what font (0) */
|
272
|
+
uinT8 confidence; /*0=perfect, 100=reject (0/100) */
|
273
|
+
uinT8 point_size; /*of char, 72=i inch, (10) */
|
274
|
+
inT8 blanks; /*no of spaces before this char (1) */
|
275
|
+
uinT8 formatting; /*char formatting (0) */
|
276
|
+
} EANYCODE_CHAR; /*single character */
|
277
|
+
|
278
|
+
/**********************************************************************
|
279
|
+
* ETEXT_DESC
|
280
|
+
* Description of the output of the OCR engine.
|
281
|
+
* This structure is used as both a progress monitor and the final
|
282
|
+
* output header, since it needs to be a valid progress monitor while
|
283
|
+
* the OCR engine is storing its output to shared memory.
|
284
|
+
* During progress, all the buffer info is -1.
|
285
|
+
* Progress starts at 0 and increases to 100 during OCR. No other constraint.
|
286
|
+
* Every progress callback, the OCR engine must set ocr_alive to 1.
|
287
|
+
* The HP side will set ocr_alive to 0. Repeated failure to reset
|
288
|
+
* to 1 indicates that the OCR engine is dead.
|
289
|
+
* If the cancel function is not null then it is called with the number of
|
290
|
+
* user words found. If it returns true then operation is cancelled.
|
291
|
+
**********************************************************************/
|
292
|
+
typedef bool (*CANCEL_FUNC)(void* cancel_this, int words);
|
293
|
+
|
294
|
+
class ETEXT_DESC { // output header
|
295
|
+
public:
|
296
|
+
inT16 count; // chars in this buffer(0)
|
297
|
+
inT16 progress; // percent complete increasing (0-100)
|
298
|
+
inT8 more_to_come; // true if not last
|
299
|
+
volatile inT8 ocr_alive; // ocr sets to 1, HP 0
|
300
|
+
inT8 err_code; // for errcode use
|
301
|
+
CANCEL_FUNC cancel; // returns true to cancel
|
302
|
+
void* cancel_this; // this or other data for cancel
|
303
|
+
struct timeval end_time; // time to stop. expected to be set only by call
|
304
|
+
// to set_deadline_msecs()
|
305
|
+
EANYCODE_CHAR text[1]; // character data
|
306
|
+
|
307
|
+
ETEXT_DESC() : count(0), progress(0), more_to_come(0), ocr_alive(0),
|
308
|
+
err_code(0), cancel(NULL), cancel_this(NULL) {
|
309
|
+
end_time.tv_sec = 0;
|
310
|
+
end_time.tv_usec = 0;
|
311
|
+
}
|
312
|
+
|
313
|
+
// Sets the end time to be deadline_msecs milliseconds from now.
|
314
|
+
void set_deadline_msecs(inT32 deadline_msecs) {
|
315
|
+
gettimeofday(&end_time, NULL);
|
316
|
+
inT32 deadline_secs = deadline_msecs / 1000;
|
317
|
+
end_time.tv_sec += deadline_secs;
|
318
|
+
end_time.tv_usec += (deadline_msecs - deadline_secs * 1000) * 1000;
|
319
|
+
if (end_time.tv_usec > 1000000) {
|
320
|
+
end_time.tv_usec -= 1000000;
|
321
|
+
++end_time.tv_sec;
|
322
|
+
}
|
323
|
+
}
|
324
|
+
|
325
|
+
// Returns false if we've not passed the end_time, or have not set a deadline.
|
326
|
+
bool deadline_exceeded() const {
|
327
|
+
if (end_time.tv_sec == 0 && end_time.tv_usec == 0) return false;
|
328
|
+
struct timeval now;
|
329
|
+
gettimeofday(&now, NULL);
|
330
|
+
return (now.tv_sec > end_time.tv_sec || (now.tv_sec == end_time.tv_sec &&
|
331
|
+
now.tv_usec > end_time.tv_usec));
|
332
|
+
}
|
333
|
+
};
|
334
|
+
|
335
|
+
#endif // CCUTIL_OCRCLASS_H_
|