node-native-win-utils 1.3.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,174 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ // File: unichar.h
3
+ // Description: Unicode character/ligature class.
4
+ // Author: Ray Smith
5
+ //
6
+ // (C) Copyright 2006, Google Inc.
7
+ // Licensed under the Apache License, Version 2.0 (the "License");
8
+ // you may not use this file except in compliance with the License.
9
+ // You may obtain a copy of the License at
10
+ // http://www.apache.org/licenses/LICENSE-2.0
11
+ // Unless required by applicable law or agreed to in writing, software
12
+ // distributed under the License is distributed on an "AS IS" BASIS,
13
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ // See the License for the specific language governing permissions and
15
+ // limitations under the License.
16
+
17
+ #ifndef TESSERACT_CCUTIL_UNICHAR_H_
18
+ #define TESSERACT_CCUTIL_UNICHAR_H_
19
+
20
+ #include "export.h"
21
+
22
+ #include <memory.h>
23
+ #include <cstring>
24
+ #include <string>
25
+ #include <vector>
26
+
27
+ namespace tesseract {
28
+
29
+ // Maximum number of characters that can be stored in a UNICHAR. Must be
30
+ // at least 4. Must not exceed 31 without changing the coding of length.
31
+ #define UNICHAR_LEN 30
32
+
33
+ // A UNICHAR_ID is the unique id of a unichar.
34
+ using UNICHAR_ID = int;
35
+
36
+ // A variable to indicate an invalid or uninitialized unichar id.
37
+ static const int INVALID_UNICHAR_ID = -1;
38
+ // A special unichar that corresponds to INVALID_UNICHAR_ID.
39
+ static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
40
+
41
+ enum StrongScriptDirection {
42
+ DIR_NEUTRAL = 0, // Text contains only neutral characters.
43
+ DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
44
+ DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
45
+ DIR_MIX = 3, // Text contains a mixture of left-to-right
46
+ // and right-to-left characters.
47
+ };
48
+
49
+ using char32 = signed int;
50
+
51
+ // The UNICHAR class holds a single classification result. This may be
52
+ // a single Unicode character (stored as between 1 and 4 utf8 bytes) or
53
+ // multiple Unicode characters representing the NFKC expansion of a ligature
54
+ // such as fi, ffl etc. These are also stored as utf8.
55
+ class TESS_API UNICHAR {
56
+ public:
57
+ UNICHAR() {
58
+ memset(chars, 0, UNICHAR_LEN);
59
+ }
60
+
61
+ // Construct from a utf8 string. If len<0 then the string is null terminated.
62
+ // If the string is too long to fit in the UNICHAR then it takes only what
63
+ // will fit.
64
+ UNICHAR(const char *utf8_str, int len);
65
+
66
+ // Construct from a single UCS4 character.
67
+ explicit UNICHAR(int unicode);
68
+
69
+ // Default copy constructor and operator= are OK.
70
+
71
+ // Get the first character as UCS-4.
72
+ int first_uni() const;
73
+
74
+ // Get the length of the UTF8 string.
75
+ int utf8_len() const {
76
+ int len = chars[UNICHAR_LEN - 1];
77
+ return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
78
+ }
79
+
80
+ // Get a UTF8 string, but NOT nullptr terminated.
81
+ const char *utf8() const {
82
+ return chars;
83
+ }
84
+
85
+ // Get a terminated UTF8 string: Must delete[] it after use.
86
+ char *utf8_str() const;
87
+
88
+ // Get the number of bytes in the first character of the given utf8 string.
89
+ static int utf8_step(const char *utf8_str);
90
+
91
+ // A class to simplify iterating over and accessing elements of a UTF8
92
+ // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
93
+ // take ownership of the underlying byte array. It also does not permit
94
+ // modification of the array (as the name suggests).
95
+ //
96
+ // Example:
97
+ // for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
98
+ // it != UNICHAR::end(str, len);
99
+ // ++it) {
100
+ // printf("UCS-4 symbol code = %d\n", *it);
101
+ // char buf[5];
102
+ // int char_len = it.get_utf8(buf); buf[char_len] = '\0';
103
+ // printf("Char = %s\n", buf);
104
+ // }
105
+ class TESS_API const_iterator {
106
+ using CI = const_iterator;
107
+
108
+ public:
109
+ // Step to the next UTF8 character.
110
+ // If the current position is at an illegal UTF8 character, then print an
111
+ // error message and step by one byte. If the current position is at a
112
+ // nullptr value, don't step past it.
113
+ const_iterator &operator++();
114
+
115
+ // Return the UCS-4 value at the current position.
116
+ // If the current position is at an illegal UTF8 value, return a single
117
+ // space character.
118
+ int operator*() const;
119
+
120
+ // Store the UTF-8 encoding of the current codepoint into buf, which must be
121
+ // at least 4 bytes long. Return the number of bytes written.
122
+ // If the current position is at an illegal UTF8 value, writes a single
123
+ // space character and returns 1.
124
+ // Note that this method does not null-terminate the buffer.
125
+ int get_utf8(char *buf) const;
126
+ // Returns the number of bytes of the current codepoint. Returns 1 if the
127
+ // current position is at an illegal UTF8 value.
128
+ int utf8_len() const;
129
+ // Returns true if the UTF-8 encoding at the current position is legal.
130
+ bool is_legal() const;
131
+
132
+ // Return the pointer into the string at the current position.
133
+ const char *utf8_data() const {
134
+ return it_;
135
+ }
136
+
137
+ // Iterator equality operators.
138
+ friend bool operator==(const CI &lhs, const CI &rhs) {
139
+ return lhs.it_ == rhs.it_;
140
+ }
141
+ friend bool operator!=(const CI &lhs, const CI &rhs) {
142
+ return !(lhs == rhs);
143
+ }
144
+
145
+ private:
146
+ friend class UNICHAR;
147
+ explicit const_iterator(const char *it) : it_(it) {}
148
+
149
+ const char *it_; // Pointer into the string.
150
+ };
151
+
152
+ // Create a start/end iterator pointing to a string. Note that these methods
153
+ // are static and do NOT create a copy or take ownership of the underlying
154
+ // array.
155
+ static const_iterator begin(const char *utf8_str, int byte_length);
156
+ static const_iterator end(const char *utf8_str, int byte_length);
157
+
158
+ // Converts a utf-8 string to a vector of unicodes.
159
+ // Returns an empty vector if the input contains invalid UTF-8.
160
+ static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
161
+ // Converts a vector of unicodes to a utf8 string.
162
+ // Returns an empty string if the input contains an invalid unicode.
163
+ static std::string UTF32ToUTF8(const std::vector<char32> &str32);
164
+
165
+ private:
166
+ // A UTF-8 representation of 1 or more Unicode characters.
167
+ // The last element (chars[UNICHAR_LEN - 1]) is a length if
168
+ // its value < UNICHAR_LEN, otherwise it is a genuine character.
169
+ char chars[UNICHAR_LEN]{};
170
+ };
171
+
172
+ } // namespace tesseract
173
+
174
+ #endif // TESSERACT_CCUTIL_UNICHAR_H_
@@ -0,0 +1,34 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ // File: version.h
3
+ // Description: Version information
4
+ //
5
+ // (C) Copyright 2018, Google Inc.
6
+ // Licensed under the Apache License, Version 2.0 (the "License");
7
+ // you may not use this file except in compliance with the License.
8
+ // You may obtain a copy of the License at
9
+ // http://www.apache.org/licenses/LICENSE-2.0
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef TESSERACT_API_VERSION_H_
17
+ #define TESSERACT_API_VERSION_H_
18
+
19
+ // clang-format off
20
+
21
+ #define TESSERACT_MAJOR_VERSION 5
22
+ #define TESSERACT_MINOR_VERSION 5
23
+ #define TESSERACT_MICRO_VERSION 0
24
+
25
+ #define TESSERACT_VERSION \
26
+ (TESSERACT_MAJOR_VERSION << 16 | \
27
+ TESSERACT_MINOR_VERSION << 8 | \
28
+ TESSERACT_MICRO_VERSION)
29
+
30
+ #define TESSERACT_VERSION_STR "5.5.0-48-gf96c"
31
+
32
+ // clang-format on
33
+
34
+ #endif // TESSERACT_API_VERSION_H_
@@ -0,0 +1,34 @@
1
+ // SPDX-License-Identifier: Apache-2.0
2
+ // File: version.h
3
+ // Description: Version information
4
+ //
5
+ // (C) Copyright 2018, Google Inc.
6
+ // Licensed under the Apache License, Version 2.0 (the "License");
7
+ // you may not use this file except in compliance with the License.
8
+ // You may obtain a copy of the License at
9
+ // http://www.apache.org/licenses/LICENSE-2.0
10
+ // Unless required by applicable law or agreed to in writing, software
11
+ // distributed under the License is distributed on an "AS IS" BASIS,
12
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ // See the License for the specific language governing permissions and
14
+ // limitations under the License.
15
+
16
+ #ifndef TESSERACT_API_VERSION_H_
17
+ #define TESSERACT_API_VERSION_H_
18
+
19
+ // clang-format off
20
+
21
+ #define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
22
+ #define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
23
+ #define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
24
+
25
+ #define TESSERACT_VERSION \
26
+ (TESSERACT_MAJOR_VERSION << 16 | \
27
+ TESSERACT_MINOR_VERSION << 8 | \
28
+ TESSERACT_MICRO_VERSION)
29
+
30
+ #define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
31
+
32
+ // clang-format on
33
+
34
+ #endif // TESSERACT_API_VERSION_H_
Binary file
package/package.json CHANGED
@@ -1,9 +1,9 @@
1
1
  {
2
2
  "name": "node-native-win-utils",
3
- "version": "1.3.3",
4
- "author": "RynerNO",
3
+ "version": "1.4.0",
4
+ "author": "Andrew K.",
5
5
  "license": "MIT",
6
- "repository": "https://github.com/RynerNO/node-native-win-utils.git",
6
+ "repository": "https://github.com/T-Rumibul/node-native-win-utils.git",
7
7
  "description": "Native addon for Node.js providing utility operations on Windows systems",
8
8
  "keywords": [
9
9
  "node.js",
@@ -25,14 +25,16 @@
25
25
  "template matching",
26
26
  "image",
27
27
  "blur",
28
- "grayscale"
28
+ "grayscale",
29
+ "text recognition",
30
+ "OCR"
29
31
  ],
30
32
  "main": "dist/index.js",
31
33
  "types": "dist/index.d.ts",
32
34
  "gypfile": true,
33
35
  "scripts": {
34
36
  "install": "node-gyp-build",
35
- "build": "node-gyp clean && prebuildify --napi && tsc",
37
+ "build": "node-gyp clean && npx prebuildify --napi && npx tsc && node ./dllCopy.js",
36
38
  "test": "jest"
37
39
  },
38
40
  "dependencies": {
@@ -42,6 +44,7 @@
42
44
  },
43
45
  "devDependencies": {
44
46
  "@types/node": "^20.2.5",
45
- "jest": "^29.7.0"
47
+ "jest": "^29.7.0",
48
+ "node-api-headers": "^1.5.0"
46
49
  }
47
50
  }
Binary file
package/src/cpp/main.cpp CHANGED
@@ -5,6 +5,7 @@
5
5
  #include <keyboard.cpp>
6
6
  #include <mouse.cpp>
7
7
  #include <opencv.cpp>
8
+ #include <tesseract.cpp>
8
9
 
9
10
  Napi::Object Init(Napi::Env env, Napi::Object exports)
10
11
  {
@@ -24,6 +25,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports)
24
25
  exports.Set("bgrToGray", Napi::Function::New(env, BgrToGray));
25
26
  exports.Set("drawRectangle", Napi::Function::New(env, DrawRectangle));
26
27
  exports.Set("getRegion", Napi::Function::New(env, GetRegion));
28
+ exports.Set("textRecognition", Napi::Function::New(env, TextRecognition));
27
29
  return exports;
28
30
  }
29
31
 
@@ -0,0 +1,49 @@
1
+ #include "tesseract/tesseract.h"
2
+ #include <napi.h>
3
+ #include <opencv2/core.hpp>
4
+ #include <opencv2/imgcodecs.hpp>
5
+ #include <opencv2/imgproc.hpp>
6
+
7
+ Napi::Value TextRecognition(const Napi::CallbackInfo &info)
8
+ {
9
+ Napi::Env env = info.Env();
10
+ if (info.Length() < 3 || !info[0].IsString() || !info[1].IsString() || !info[2].IsString())
11
+ {
12
+ Napi::TypeError::New(env, "Missing argument or wront type").ThrowAsJavaScriptException();
13
+ return env.Null();
14
+ }
15
+ std::string trainedDataPath = info[0].ToString().Utf8Value();
16
+ std::string dataLang = info[1].ToString().Utf8Value();
17
+ std::string imagePath = info[2].ToString().Utf8Value();
18
+
19
+ cv::Mat im = cv::imread(imagePath, cv::IMREAD_COLOR);
20
+
21
+ // Create an instance of Tesseract API
22
+ tesseract::TessBaseAPI *tess = Tesseract_Create();
23
+
24
+ // Initialize with language and data path
25
+ if (Tesseract_Init(tess, trainedDataPath.c_str(), dataLang.c_str()) != 0)
26
+ {
27
+ Napi::TypeError::New(env, "Could not initialize Tesseract!").ThrowAsJavaScriptException();
28
+ return env.Null();
29
+ }
30
+
31
+ // Set OCR mode
32
+ Tesseract_SetPageSegMode(tess, 3);
33
+
34
+ // Do OCR processing
35
+ Tesseract_SetImage(tess, im.data, im.cols, im.rows, 3, im.step);
36
+
37
+ // Get OCR result
38
+ int len;
39
+ const char *text = Tesseract_GetUTF8Text(tess, &len);
40
+ std::string outText = text;
41
+
42
+ // Free text memory
43
+ Tesseract_FreeUTF8Text((char *&)text);
44
+
45
+ // Clean up
46
+ Tesseract_End(tess);
47
+ Tesseract_Delete(tess);
48
+ return Napi::String::New(env, outText);
49
+ }