node-native-win-utils 1.3.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,235 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ // File: ltrresultiterator.h
3
+ // Description: Iterator for tesseract results in strict left-to-right
4
+ // order that avoids using tesseract internal data structures.
5
+ // Author: Ray Smith
6
+ //
7
+ // (C) Copyright 2010, Google Inc.
8
+ // Licensed under the Apache License, Version 2.0 (the "License");
9
+ // you may not use this file except in compliance with the License.
10
+ // You may obtain a copy of the License at
11
+ // http://www.apache.org/licenses/LICENSE-2.0
12
+ // Unless required by applicable law or agreed to in writing, software
13
+ // distributed under the License is distributed on an "AS IS" BASIS,
14
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ // See the License for the specific language governing permissions and
16
+ // limitations under the License.
17
+
18
+ #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
19
+ #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
20
+
21
+ #include "export.h" // for TESS_API
22
+ #include "pageiterator.h" // for PageIterator
23
+ #include "publictypes.h" // for PageIteratorLevel
24
+ #include "unichar.h" // for StrongScriptDirection
25
+
26
+ namespace tesseract {
27
+
28
+ class BLOB_CHOICE_IT;
29
+ class PAGE_RES;
30
+ class WERD_RES;
31
+
32
+ class Tesseract;
33
+
34
+ // Class to iterate over tesseract results, providing access to all levels
35
+ // of the page hierarchy, without including any tesseract headers or having
36
+ // to handle any tesseract structures.
37
+ // WARNING! This class points to data held within the TessBaseAPI class, and
38
+ // therefore can only be used while the TessBaseAPI class still exists and
39
+ // has not been subjected to a call of Init, SetImage, Recognize, Clear, End
40
+ // DetectOS, or anything else that changes the internal PAGE_RES.
41
+ // See tesseract/publictypes.h for the definition of PageIteratorLevel.
42
+ // See also base class PageIterator, which contains the bulk of the interface.
43
+ // LTRResultIterator adds text-specific methods for access to OCR output.
44
+
45
+ class TESS_API LTRResultIterator : public PageIterator {
46
+ friend class ChoiceIterator;
47
+
48
+ public:
49
+ // page_res and tesseract come directly from the BaseAPI.
50
+ // The rectangle parameters are copied indirectly from the Thresholder,
51
+ // via the BaseAPI. They represent the coordinates of some rectangle in an
52
+ // original image (in top-left-origin coordinates) and therefore the top-left
53
+ // needs to be added to any output boxes in order to specify coordinates
54
+ // in the original image. See TessBaseAPI::SetRectangle.
55
+ // The scale and scaled_yres are in case the Thresholder scaled the image
56
+ // rectangle prior to thresholding. Any coordinates in tesseract's image
57
+ // must be divided by scale before adding (rect_left, rect_top).
58
+ // The scaled_yres indicates the effective resolution of the binary image
59
+ // that tesseract has been given by the Thresholder.
60
+ // After the constructor, Begin has already been called.
61
+ LTRResultIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale,
62
+ int scaled_yres, int rect_left, int rect_top,
63
+ int rect_width, int rect_height);
64
+
65
+ ~LTRResultIterator() override;
66
+
67
+ // LTRResultIterators may be copied! This makes it possible to iterate over
68
+ // all the objects at a lower level, while maintaining an iterator to
69
+ // objects at a higher level. These constructors DO NOT CALL Begin, so
70
+ // iterations will continue from the location of src.
71
+ // TODO: For now the copy constructor and operator= only need the base class
72
+ // versions, but if new data members are added, don't forget to add them!
73
+
74
+ // ============= Moving around within the page ============.
75
+
76
+ // See PageIterator.
77
+
78
+ // ============= Accessing data ==============.
79
+
80
+ // Returns the null terminated UTF-8 encoded text string for the current
81
+ // object at the given level. Use delete [] to free after use.
82
+ char *GetUTF8Text(PageIteratorLevel level) const;
83
+
84
+ // Set the string inserted at the end of each text line. "\n" by default.
85
+ void SetLineSeparator(const char *new_line);
86
+
87
+ // Set the string inserted at the end of each paragraph. "\n" by default.
88
+ void SetParagraphSeparator(const char *new_para);
89
+
90
+ // Returns the mean confidence of the current object at the given level.
91
+ // The number should be interpreted as a percent probability. (0.0f-100.0f)
92
+ float Confidence(PageIteratorLevel level) const;
93
+
94
+ // ============= Functions that refer to words only ============.
95
+
96
+ // Returns the font attributes of the current word. If iterating at a higher
97
+ // level object than words, eg textlines, then this will return the
98
+ // attributes of the first word in that textline.
99
+ // The actual return value is a string representing a font name. It points
100
+ // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as
101
+ // the iterator itself, ie rendered invalid by various members of
102
+ // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI.
103
+ // Pointsize is returned in printers points (1/72 inch.)
104
+ const char *WordFontAttributes(bool *is_bold, bool *is_italic,
105
+ bool *is_underlined, bool *is_monospace,
106
+ bool *is_serif, bool *is_smallcaps,
107
+ int *pointsize, int *font_id) const;
108
+
109
+ // Return the name of the language used to recognize this word.
110
+ // On error, nullptr. Do not delete this pointer.
111
+ const char *WordRecognitionLanguage() const;
112
+
113
+ // Return the overall directionality of this word.
114
+ StrongScriptDirection WordDirection() const;
115
+
116
+ // Returns true if the current word was found in a dictionary.
117
+ bool WordIsFromDictionary() const;
118
+
119
+ // Returns the number of blanks before the current word.
120
+ int BlanksBeforeWord() const;
121
+
122
+ // Returns true if the current word is numeric.
123
+ bool WordIsNumeric() const;
124
+
125
+ // Returns true if the word contains blamer information.
126
+ bool HasBlamerInfo() const;
127
+
128
+ // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle
129
+ // of the current word.
130
+ const void *GetParamsTrainingBundle() const;
131
+
132
+ // Returns a pointer to the string with blamer information for this word.
133
+ // Assumes that the word's blamer_bundle is not nullptr.
134
+ const char *GetBlamerDebug() const;
135
+
136
+ // Returns a pointer to the string with misadaption information for this word.
137
+ // Assumes that the word's blamer_bundle is not nullptr.
138
+ const char *GetBlamerMisadaptionDebug() const;
139
+
140
+ // Returns true if a truth string was recorded for the current word.
141
+ bool HasTruthString() const;
142
+
143
+ // Returns true if the given string is equivalent to the truth string for
144
+ // the current word.
145
+ bool EquivalentToTruth(const char *str) const;
146
+
147
+ // Returns a null terminated UTF-8 encoded truth string for the current word.
148
+ // Use delete [] to free after use.
149
+ char *WordTruthUTF8Text() const;
150
+
151
+ // Returns a null terminated UTF-8 encoded normalized OCR string for the
152
+ // current word. Use delete [] to free after use.
153
+ char *WordNormedUTF8Text() const;
154
+
155
+ // Returns a pointer to serialized choice lattice.
156
+ // Fills lattice_size with the number of bytes in lattice data.
157
+ const char *WordLattice(int *lattice_size) const;
158
+
159
+ // ============= Functions that refer to symbols only ============.
160
+
161
+ // Returns true if the current symbol is a superscript.
162
+ // If iterating at a higher level object than symbols, eg words, then
163
+ // this will return the attributes of the first symbol in that word.
164
+ bool SymbolIsSuperscript() const;
165
+ // Returns true if the current symbol is a subscript.
166
+ // If iterating at a higher level object than symbols, eg words, then
167
+ // this will return the attributes of the first symbol in that word.
168
+ bool SymbolIsSubscript() const;
169
+ // Returns true if the current symbol is a dropcap.
170
+ // If iterating at a higher level object than symbols, eg words, then
171
+ // this will return the attributes of the first symbol in that word.
172
+ bool SymbolIsDropcap() const;
173
+
174
+ protected:
175
+ const char *line_separator_;
176
+ const char *paragraph_separator_;
177
+ };
178
+
179
+ // Class to iterate over the classifier choices for a single RIL_SYMBOL.
180
+ class TESS_API ChoiceIterator {
181
+ public:
182
+ // Construction is from a LTRResultIterator that points to the symbol of
183
+ // interest. The ChoiceIterator allows a one-shot iteration over the
184
+ // choices for this symbol and after that it is useless.
185
+ explicit ChoiceIterator(const LTRResultIterator &result_it);
186
+ ~ChoiceIterator();
187
+
188
+ // Moves to the next choice for the symbol and returns false if there
189
+ // are none left.
190
+ bool Next();
191
+
192
+ // ============= Accessing data ==============.
193
+
194
+ // Returns the null terminated UTF-8 encoded text string for the current
195
+ // choice.
196
+ // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an
197
+ // internal structure and should NOT be delete[]ed to free after use.
198
+ const char *GetUTF8Text() const;
199
+
200
+ // Returns the confidence of the current choice depending on the used language
201
+ // data. If only LSTM traineddata is used the value range is 0.0f - 1.0f. All
202
+ // choices for one symbol should roughly add up to 1.0f.
203
+ // If only traineddata of the legacy engine is used, the number should be
204
+ // interpreted as a percent probability. (0.0f-100.0f) In this case
205
+ // probabilities won't add up to 100. Each one stands on its own.
206
+ float Confidence() const;
207
+
208
+ // Returns a vector containing all timesteps, which belong to the currently
209
+ // selected symbol. A timestep is a vector containing pairs of symbols and
210
+ // floating point numbers. The number states the probability for the
211
+ // corresponding symbol.
212
+ std::vector<std::vector<std::pair<const char *, float>>> *Timesteps() const;
213
+
214
+ private:
215
+ // clears the remaining spaces out of the results and adapt the probabilities
216
+ void filterSpaces();
217
+ // Pointer to the WERD_RES object owned by the API.
218
+ WERD_RES *word_res_;
219
+ // Iterator over the blob choices.
220
+ BLOB_CHOICE_IT *choice_it_;
221
+ std::vector<std::pair<const char *, float>> *LSTM_choices_ = nullptr;
222
+ std::vector<std::pair<const char *, float>>::iterator LSTM_choice_it_;
223
+
224
+ const int *tstep_index_;
225
+ // regulates the rating granularity
226
+ double rating_coefficient_;
227
+ // leading blanks
228
+ int blanks_before_word_;
229
+ // true when there is lstm engine related trained data
230
+ bool oemLSTM_;
231
+ };
232
+
233
+ } // namespace tesseract.
234
+
235
+ #endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H_
@@ -0,0 +1,158 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ /**********************************************************************
3
+ * File: ocrclass.h
4
+ * Description: Class definitions and constants for the OCR API.
5
+ * Author: Hewlett-Packard Co
6
+ *
7
+ * (C) Copyright 1996, Hewlett-Packard Co.
8
+ ** Licensed under the Apache License, Version 2.0 (the "License");
9
+ ** you may not use this file except in compliance with the License.
10
+ ** You may obtain a copy of the License at
11
+ ** http://www.apache.org/licenses/LICENSE-2.0
12
+ ** Unless required by applicable law or agreed to in writing, software
13
+ ** distributed under the License is distributed on an "AS IS" BASIS,
14
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ ** See the License for the specific language governing permissions and
16
+ ** limitations under the License.
17
+ *
18
+ **********************************************************************/
19
+
20
+ /**********************************************************************
21
+ * This file contains typedefs for all the structures used by
22
+ * the HP OCR interface.
23
+ * The structures are designed to allow them to be used with any
24
+ * structure alignment up to 8.
25
+ **********************************************************************/
26
+
27
+ #ifndef CCUTIL_OCRCLASS_H_
28
+ #define CCUTIL_OCRCLASS_H_
29
+
30
+ #include <chrono>
31
+ #include <ctime>
32
+
33
+ namespace tesseract {
34
+
35
+ /**********************************************************************
36
+ * EANYCODE_CHAR
37
+ * Description of a single character. The character code is defined by
38
+ * the character set of the current font.
39
+ * Output text is sent as an array of these structures.
40
+ * Spaces and line endings in the output are represented in the
41
+ * structures of the surrounding characters. They are not directly
42
+ * represented as characters.
43
+ * The first character in a word has a positive value of blanks.
44
+ * Missing information should be set to the defaults in the comments.
45
+ * If word bounds are known, but not character bounds, then the top and
46
+ * bottom of each character should be those of the word. The left of the
47
+ * first and right of the last char in each word should be set. All other
48
+ * lefts and rights should be set to -1.
49
+ * If set, the values of right and bottom are left+width and top+height.
50
+ * Most of the members come directly from the parameters to ocr_append_char.
51
+ * The formatting member uses the enhancement parameter and combines the
52
+ * line direction stuff into the top 3 bits.
53
+ * The coding is 0=RL char, 1=LR char, 2=DR NL, 3=UL NL, 4=DR Para,
54
+ * 5=UL Para, 6=TB char, 7=BT char. API users do not need to know what
55
+ * the coding is, only that it is backwards compatible with the previous
56
+ * version.
57
+ **********************************************************************/
58
+
59
+ struct EANYCODE_CHAR { /*single character */
60
+ // It should be noted that the format for char_code for version 2.0 and beyond
61
+ // is UTF8 which means that ASCII characters will come out as one structure
62
+ // but other characters will be returned in two or more instances of this
63
+ // structure with a single byte of the UTF8 code in each, but each will have
64
+ // the same bounding box. Programs which want to handle languages with
65
+ // different characters sets will need to handle extended characters
66
+ // appropriately, but *all* code needs to be prepared to receive UTF8 coded
67
+ // characters for characters such as bullet and fancy quotes.
68
+ uint16_t char_code; /*character itself */
69
+ int16_t left; /*of char (-1) */
70
+ int16_t right; /*of char (-1) */
71
+ int16_t top; /*of char (-1) */
72
+ int16_t bottom; /*of char (-1) */
73
+ int16_t font_index; /*what font (0) */
74
+ uint8_t confidence; /*0=perfect, 100=reject (0/100) */
75
+ uint8_t point_size; /*of char, 72=i inch, (10) */
76
+ int8_t blanks; /*no of spaces before this char (1) */
77
+ uint8_t formatting; /*char formatting (0) */
78
+ };
79
+
80
+ /**********************************************************************
81
+ * ETEXT_DESC
82
+ * Description of the output of the OCR engine.
83
+ * This structure is used as both a progress monitor and the final
84
+ * output header, since it needs to be a valid progress monitor while
85
+ * the OCR engine is storing its output to shared memory.
86
+ * During progress, all the buffer info is -1.
87
+ * Progress starts at 0 and increases to 100 during OCR. No other constraint.
88
+ * Additionally the progress callback contains the bounding box of the word that
89
+ * is currently being processed.
90
+ * Every progress callback, the OCR engine must set ocr_alive to 1.
91
+ * The HP side will set ocr_alive to 0. Repeated failure to reset
92
+ * to 1 indicates that the OCR engine is dead.
93
+ * If the cancel function is not null then it is called with the number of
94
+ * user words found. If it returns true then operation is cancelled.
95
+ **********************************************************************/
96
+ class ETEXT_DESC;
97
+
98
+ using CANCEL_FUNC = bool (*)(void *, int);
99
+ using PROGRESS_FUNC = bool (*)(int, int, int, int, int);
100
+ using PROGRESS_FUNC2 = bool (*)(ETEXT_DESC *, int, int, int, int);
101
+
102
+ class ETEXT_DESC { // output header
103
+ public:
104
+ int16_t count{0}; /// chars in this buffer(0)
105
+ int16_t progress{0}; /// percent complete increasing (0-100)
106
+ /** Progress monitor covers word recognition and it does not cover layout
107
+ * analysis.
108
+ * See Ray comment in https://github.com/tesseract-ocr/tesseract/pull/27 */
109
+ int8_t more_to_come{0}; /// true if not last
110
+ volatile int8_t ocr_alive{0}; /// ocr sets to 1, HP 0
111
+ int8_t err_code{0}; /// for errcode use
112
+ CANCEL_FUNC cancel{nullptr}; /// returns true to cancel
113
+ PROGRESS_FUNC progress_callback{
114
+ nullptr}; /// called whenever progress increases
115
+ PROGRESS_FUNC2 progress_callback2; /// monitor-aware progress callback
116
+ void *cancel_this{nullptr}; /// this or other data for cancel
117
+ std::chrono::steady_clock::time_point end_time;
118
+ /// Time to stop. Expected to be set only
119
+ /// by call to set_deadline_msecs().
120
+ EANYCODE_CHAR text[1]{}; /// character data
121
+
122
+ ETEXT_DESC() : progress_callback2(&default_progress_func) {
123
+ end_time = std::chrono::time_point<std::chrono::steady_clock,
124
+ std::chrono::milliseconds>();
125
+ }
126
+
127
+ // Sets the end time to be deadline_msecs milliseconds from now.
128
+ void set_deadline_msecs(int32_t deadline_msecs) {
129
+ if (deadline_msecs > 0) {
130
+ end_time = std::chrono::steady_clock::now() +
131
+ std::chrono::milliseconds(deadline_msecs);
132
+ }
133
+ }
134
+
135
+ // Returns false if we've not passed the end_time, or have not set a deadline.
136
+ bool deadline_exceeded() const {
137
+ if (end_time.time_since_epoch() ==
138
+ std::chrono::steady_clock::duration::zero()) {
139
+ return false;
140
+ }
141
+ auto now = std::chrono::steady_clock::now();
142
+ return (now > end_time);
143
+ }
144
+
145
+ private:
146
+ static bool default_progress_func(ETEXT_DESC *ths, int left, int right,
147
+ int top, int bottom) {
148
+ if (ths->progress_callback != nullptr) {
149
+ return (*(ths->progress_callback))(ths->progress, left, right, top,
150
+ bottom);
151
+ }
152
+ return true;
153
+ }
154
+ };
155
+
156
+ } // namespace tesseract
157
+
158
+ #endif // CCUTIL_OCRCLASS_H_
@@ -0,0 +1,139 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ // File: osdetect.h
3
+ // Description: Orientation and script detection.
4
+ // Author: Samuel Charron
5
+ // Ranjith Unnikrishnan
6
+ //
7
+ // (C) Copyright 2008, Google Inc.
8
+ // Licensed under the Apache License, Version 2.0 (the "License");
9
+ // you may not use this file except in compliance with the License.
10
+ // You may obtain a copy of the License at
11
+ // http://www.apache.org/licenses/LICENSE-2.0
12
+ // Unless required by applicable law or agreed to in writing, software
13
+ // distributed under the License is distributed on an "AS IS" BASIS,
14
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ // See the License for the specific language governing permissions and
16
+ // limitations under the License.
17
+
18
+ #ifndef TESSERACT_CCMAIN_OSDETECT_H_
19
+ #define TESSERACT_CCMAIN_OSDETECT_H_
20
+
21
+ #include "export.h" // for TESS_API
22
+
23
+ #include <vector> // for std::vector
24
+
25
+ namespace tesseract {
26
+
27
+ class BLOBNBOX;
28
+ class BLOBNBOX_CLIST;
29
+ class BLOB_CHOICE_LIST;
30
+ class TO_BLOCK_LIST;
31
+ class UNICHARSET;
32
+
33
+ class Tesseract;
34
+
35
+ // Max number of scripts in ICU + "NULL" + Japanese and Korean + Fraktur
36
+ const int kMaxNumberOfScripts = 116 + 1 + 2 + 1;
37
+
38
+ struct OSBestResult {
39
+ OSBestResult()
40
+ : orientation_id(0), script_id(0), sconfidence(0.0), oconfidence(0.0) {}
41
+ int orientation_id;
42
+ int script_id;
43
+ float sconfidence;
44
+ float oconfidence;
45
+ };
46
+
47
+ struct OSResults {
48
+ OSResults() : unicharset(nullptr) {
49
+ for (int i = 0; i < 4; ++i) {
50
+ for (int j = 0; j < kMaxNumberOfScripts; ++j) {
51
+ scripts_na[i][j] = 0;
52
+ }
53
+ orientations[i] = 0;
54
+ }
55
+ }
56
+ void update_best_orientation();
57
+ // Set the estimate of the orientation to the given id.
58
+ void set_best_orientation(int orientation_id);
59
+ // Update/Compute the best estimate of the script assuming the given
60
+ // orientation id.
61
+ void update_best_script(int orientation_id);
62
+ // Return the index of the script with the highest score for this orientation.
63
+ TESS_API int get_best_script(int orientation_id) const;
64
+ // Accumulate scores with given OSResults instance and update the best script.
65
+ void accumulate(const OSResults &osr);
66
+
67
+ // Print statistics.
68
+ void print_scores(void) const;
69
+ void print_scores(int orientation_id) const;
70
+
71
+ // Array holding scores for each orientation id [0,3].
72
+ // Orientation ids [0..3] map to [0, 270, 180, 90] degree orientations of the
73
+ // page respectively, where the values refer to the amount of clockwise
74
+ // rotation to be applied to the page for the text to be upright and readable.
75
+ float orientations[4];
76
+ // Script confidence scores for each of 4 possible orientations.
77
+ float scripts_na[4][kMaxNumberOfScripts];
78
+
79
+ UNICHARSET *unicharset;
80
+ OSBestResult best_result;
81
+ };
82
+
83
+ class OrientationDetector {
84
+ public:
85
+ OrientationDetector(const std::vector<int> *allowed_scripts,
86
+ OSResults *results);
87
+ bool detect_blob(BLOB_CHOICE_LIST *scores);
88
+ int get_orientation();
89
+
90
+ private:
91
+ OSResults *osr_;
92
+ const std::vector<int> *allowed_scripts_;
93
+ };
94
+
95
+ class ScriptDetector {
96
+ public:
97
+ ScriptDetector(const std::vector<int> *allowed_scripts, OSResults *osr,
98
+ tesseract::Tesseract *tess);
99
+ void detect_blob(BLOB_CHOICE_LIST *scores);
100
+ bool must_stop(int orientation) const;
101
+
102
+ private:
103
+ OSResults *osr_;
104
+ static const char *korean_script_;
105
+ static const char *japanese_script_;
106
+ static const char *fraktur_script_;
107
+ int korean_id_;
108
+ int japanese_id_;
109
+ int katakana_id_;
110
+ int hiragana_id_;
111
+ int han_id_;
112
+ int hangul_id_;
113
+ int latin_id_;
114
+ int fraktur_id_;
115
+ tesseract::Tesseract *tess_;
116
+ const std::vector<int> *allowed_scripts_;
117
+ };
118
+
119
+ int orientation_and_script_detection(const char *filename, OSResults *,
120
+ tesseract::Tesseract *);
121
+
122
+ int os_detect(TO_BLOCK_LIST *port_blocks, OSResults *osr,
123
+ tesseract::Tesseract *tess);
124
+
125
+ int os_detect_blobs(const std::vector<int> *allowed_scripts,
126
+ BLOBNBOX_CLIST *blob_list, OSResults *osr,
127
+ tesseract::Tesseract *tess);
128
+
129
+ bool os_detect_blob(BLOBNBOX *bbox, OrientationDetector *o, ScriptDetector *s,
130
+ OSResults *, tesseract::Tesseract *tess);
131
+
132
+ // Helper method to convert an orientation index to its value in degrees.
133
+ // The value represents the amount of clockwise rotation in degrees that must be
134
+ // applied for the text to be upright (readable).
135
+ TESS_API int OrientationIdToValue(const int &id);
136
+
137
+ } // namespace tesseract
138
+
139
+ #endif // TESSERACT_CCMAIN_OSDETECT_H_