npm - node-native-win-utils - Versions diffs - 1.3.3 → 1.4.0 - Mend

node-native-win-utils 1.3.3 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/LICENSE +1 -1
package/README.md +3 -6
package/binding.gyp +1 -0
package/dist/index.d.ts +3 -2
package/dist/index.js +5 -4
package/dllCopy.js +14 -0
package/include/tesseract/baseapi.h +820 -0
package/include/tesseract/capi.h +485 -0
package/include/tesseract/export.h +37 -0
package/include/tesseract/ltrresultiterator.h +235 -0
package/include/tesseract/ocrclass.h +158 -0
package/include/tesseract/osdetect.h +139 -0
package/include/tesseract/pageiterator.h +364 -0
package/include/tesseract/publictypes.h +281 -0
package/include/tesseract/renderer.h +334 -0
package/include/tesseract/resultiterator.h +250 -0
package/include/tesseract/tesseract.h +40 -0
package/include/tesseract/unichar.h +174 -0
package/include/tesseract/version.h +34 -0
package/include/tesseract/version.h.in +34 -0
package/libs/tesseract.lib +0 -0
package/package.json +9 -6
package/prebuilds/win32-x64/node-native-win-utils.node +0 -0
package/prebuilds/win32-x64/tesseract.dll +0 -0
package/prebuilds/win32-x64/tiff.dll +0 -0
package/src/cpp/main.cpp +2 -0
package/src/cpp/tesseract.cpp +49 -0

package/include/tesseract/unichar.h ADDED Viewed

@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: Apache-2.0
+// File:        unichar.h
+// Description: Unicode character/ligature class.
+// Author:      Ray Smith
+//
+// (C) Copyright 2006, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef TESSERACT_CCUTIL_UNICHAR_H_
+#define TESSERACT_CCUTIL_UNICHAR_H_
+#include "export.h"
+#include <memory.h>
+#include <cstring>
+#include <string>
+#include <vector>
+namespace tesseract {
+// Maximum number of characters that can be stored in a UNICHAR. Must be
+// at least 4. Must not exceed 31 without changing the coding of length.
+#define UNICHAR_LEN 30
+// A UNICHAR_ID is the unique id of a unichar.
+using UNICHAR_ID = int;
+// A variable to indicate an invalid or uninitialized unichar id.
+static const int INVALID_UNICHAR_ID = -1;
+// A special unichar that corresponds to INVALID_UNICHAR_ID.
+static const char INVALID_UNICHAR[] = "__INVALID_UNICHAR__";
+enum StrongScriptDirection {
+  DIR_NEUTRAL = 0,       // Text contains only neutral characters.
+  DIR_LEFT_TO_RIGHT = 1, // Text contains no Right-to-Left characters.
+  DIR_RIGHT_TO_LEFT = 2, // Text contains no Left-to-Right characters.
+  DIR_MIX = 3,           // Text contains a mixture of left-to-right
+                         // and right-to-left characters.
+};
+using char32 = signed int;
+// The UNICHAR class holds a single classification result. This may be
+// a single Unicode character (stored as between 1 and 4 utf8 bytes) or
+// multiple Unicode characters representing the NFKC expansion of a ligature
+// such as fi, ffl etc. These are also stored as utf8.
+class TESS_API UNICHAR {
+public:
+  UNICHAR() {
+    memset(chars, 0, UNICHAR_LEN);
+  }
+  // Construct from a utf8 string. If len<0 then the string is null terminated.
+  // If the string is too long to fit in the UNICHAR then it takes only what
+  // will fit.
+  UNICHAR(const char *utf8_str, int len);
+  // Construct from a single UCS4 character.
+  explicit UNICHAR(int unicode);
+  // Default copy constructor and operator= are OK.
+  // Get the first character as UCS-4.
+  int first_uni() const;
+  // Get the length of the UTF8 string.
+  int utf8_len() const {
+    int len = chars[UNICHAR_LEN - 1];
+    return len >= 0 && len < UNICHAR_LEN ? len : UNICHAR_LEN;
+  }
+  // Get a UTF8 string, but NOT nullptr terminated.
+  const char *utf8() const {
+    return chars;
+  }
+  // Get a terminated UTF8 string: Must delete[] it after use.
+  char *utf8_str() const;
+  // Get the number of bytes in the first character of the given utf8 string.
+  static int utf8_step(const char *utf8_str);
+  // A class to simplify iterating over and accessing elements of a UTF8
+  // string. Note that unlike the UNICHAR class, const_iterator does NOT COPY or
+  // take ownership of the underlying byte array. It also does not permit
+  // modification of the array (as the name suggests).
+  //
+  // Example:
+  //   for (UNICHAR::const_iterator it = UNICHAR::begin(str, str_len);
+  //        it != UNICHAR::end(str, len);
+  //        ++it) {
+  //     printf("UCS-4 symbol code = %d\n", *it);
+  //     char buf[5];
+  //     int char_len = it.get_utf8(buf); buf[char_len] = '\0';
+  //     printf("Char = %s\n", buf);
+  //   }
+  class TESS_API const_iterator {
+    using CI = const_iterator;
+  public:
+    // Step to the next UTF8 character.
+    // If the current position is at an illegal UTF8 character, then print an
+    // error message and step by one byte. If the current position is at a
+    // nullptr value, don't step past it.
+    const_iterator &operator++();
+    // Return the UCS-4 value at the current position.
+    // If the current position is at an illegal UTF8 value, return a single
+    // space character.
+    int operator*() const;
+    // Store the UTF-8 encoding of the current codepoint into buf, which must be
+    // at least 4 bytes long. Return the number of bytes written.
+    // If the current position is at an illegal UTF8 value, writes a single
+    // space character and returns 1.
+    // Note that this method does not null-terminate the buffer.
+    int get_utf8(char *buf) const;
+    // Returns the number of bytes of the current codepoint. Returns 1 if the
+    // current position is at an illegal UTF8 value.
+    int utf8_len() const;
+    // Returns true if the UTF-8 encoding at the current position is legal.
+    bool is_legal() const;
+    // Return the pointer into the string at the current position.
+    const char *utf8_data() const {
+      return it_;
+    }
+    // Iterator equality operators.
+    friend bool operator==(const CI &lhs, const CI &rhs) {
+      return lhs.it_ == rhs.it_;
+    }
+    friend bool operator!=(const CI &lhs, const CI &rhs) {
+      return !(lhs == rhs);
+    }
+  private:
+    friend class UNICHAR;
+    explicit const_iterator(const char *it) : it_(it) {}
+    const char *it_; // Pointer into the string.
+  };
+  // Create a start/end iterator pointing to a string. Note that these methods
+  // are static and do NOT create a copy or take ownership of the underlying
+  // array.
+  static const_iterator begin(const char *utf8_str, int byte_length);
+  static const_iterator end(const char *utf8_str, int byte_length);
+  // Converts a utf-8 string to a vector of unicodes.
+  // Returns an empty vector if the input contains invalid UTF-8.
+  static std::vector<char32> UTF8ToUTF32(const char *utf8_str);
+  // Converts a vector of unicodes to a utf8 string.
+  // Returns an empty string if the input contains an invalid unicode.
+  static std::string UTF32ToUTF8(const std::vector<char32> &str32);
+private:
+  // A UTF-8 representation of 1 or more Unicode characters.
+  // The last element (chars[UNICHAR_LEN - 1]) is a length if
+  // its value < UNICHAR_LEN, otherwise it is a genuine character.
+  char chars[UNICHAR_LEN]{};
+};
+} // namespace tesseract
+#endif // TESSERACT_CCUTIL_UNICHAR_H_

package/include/tesseract/version.h ADDED Viewed

@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: Apache-2.0
+// File:        version.h
+// Description: Version information
+//
+// (C) Copyright 2018, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef TESSERACT_API_VERSION_H_
+#define TESSERACT_API_VERSION_H_
+// clang-format off
+#define TESSERACT_MAJOR_VERSION 5
+#define TESSERACT_MINOR_VERSION 5
+#define TESSERACT_MICRO_VERSION 0
+#define TESSERACT_VERSION          \
+  (TESSERACT_MAJOR_VERSION << 16 | \
+   TESSERACT_MINOR_VERSION <<  8 | \
+   TESSERACT_MICRO_VERSION)
+#define TESSERACT_VERSION_STR "5.5.0-48-gf96c"
+// clang-format on
+#endif // TESSERACT_API_VERSION_H_

package/include/tesseract/version.h.in ADDED Viewed

@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: Apache-2.0
+// File:        version.h
+// Description: Version information
+//
+// (C) Copyright 2018, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#ifndef TESSERACT_API_VERSION_H_
+#define TESSERACT_API_VERSION_H_
+// clang-format off
+#define TESSERACT_MAJOR_VERSION @GENERIC_MAJOR_VERSION@
+#define TESSERACT_MINOR_VERSION @GENERIC_MINOR_VERSION@
+#define TESSERACT_MICRO_VERSION @GENERIC_MICRO_VERSION@
+#define TESSERACT_VERSION          \
+  (TESSERACT_MAJOR_VERSION << 16 | \
+   TESSERACT_MINOR_VERSION <<  8 | \
+   TESSERACT_MICRO_VERSION)
+#define TESSERACT_VERSION_STR "@PACKAGE_VERSION@"
+// clang-format on
+#endif // TESSERACT_API_VERSION_H_

package/libs/tesseract.lib ADDED Viewed

Binary file

package/package.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "name": "node-native-win-utils",
-  "version": "1.3.3",
-  "author": "RynerNO",
+  "version": "1.4.0",
+  "author": "Andrew K.",
   "license": "MIT",
-  "repository": "https://github.com/RynerNO/node-native-win-utils.git",
+  "repository": "https://github.com/T-Rumibul/node-native-win-utils.git",
   "description": "Native addon for Node.js providing utility operations on Windows systems",
   "keywords": [
     "node.js",
@@ -25,14 +25,16 @@
     "template matching",
     "image",
     "blur",
-    "grayscale"
+    "grayscale",
+    "text recognition",
+    "OCR"
   ],
   "main": "dist/index.js",
   "types": "dist/index.d.ts",
   "gypfile": true,
   "scripts": {
     "install": "node-gyp-build",
-    "build": "node-gyp clean && prebuildify --napi && tsc",
+    "build": "node-gyp clean && npx prebuildify --napi && npx tsc && node ./dllCopy.js",
     "test": "jest"
   },
   "dependencies": {
@@ -42,6 +44,7 @@
   },
   "devDependencies": {
     "@types/node": "^20.2.5",
-    "jest": "^29.7.0"
+    "jest": "^29.7.0",
+    "node-api-headers": "^1.5.0"
   }
 }

package/prebuilds/win32-x64/node-native-win-utils.node CHANGED Viewed

Binary file

package/prebuilds/win32-x64/tesseract.dll ADDED Viewed

Binary file

package/prebuilds/win32-x64/tiff.dll ADDED Viewed

Binary file

package/src/cpp/main.cpp CHANGED Viewed

@@ -5,6 +5,7 @@
 #include <keyboard.cpp>
 #include <mouse.cpp>
 #include <opencv.cpp>
+#include <tesseract.cpp>
 Napi::Object Init(Napi::Env env, Napi::Object exports)
 {
@@ -24,6 +25,7 @@ Napi::Object Init(Napi::Env env, Napi::Object exports)
     exports.Set("bgrToGray", Napi::Function::New(env, BgrToGray));
     exports.Set("drawRectangle", Napi::Function::New(env, DrawRectangle));
     exports.Set("getRegion", Napi::Function::New(env, GetRegion));
+    exports.Set("textRecognition", Napi::Function::New(env, TextRecognition));
     return exports;
 }

package/src/cpp/tesseract.cpp ADDED Viewed

@@ -0,0 +1,49 @@
+#include "tesseract/tesseract.h"
+#include <napi.h>
+#include <opencv2/core.hpp>
+#include <opencv2/imgcodecs.hpp>
+#include <opencv2/imgproc.hpp>
+Napi::Value TextRecognition(const Napi::CallbackInfo &info)
+{
+    Napi::Env env = info.Env();
+    if (info.Length() < 3 || !info[0].IsString() || !info[1].IsString() || !info[2].IsString())
+    {
+        Napi::TypeError::New(env, "Missing argument or wront type").ThrowAsJavaScriptException();
+        return env.Null();
+    }
+    std::string trainedDataPath = info[0].ToString().Utf8Value();
+    std::string dataLang = info[1].ToString().Utf8Value();
+    std::string imagePath = info[2].ToString().Utf8Value();
+    cv::Mat im = cv::imread(imagePath, cv::IMREAD_COLOR);
+    // Create an instance of Tesseract API
+    tesseract::TessBaseAPI *tess = Tesseract_Create();
+    // Initialize with language and data path
+    if (Tesseract_Init(tess, trainedDataPath.c_str(), dataLang.c_str()) != 0)
+    {
+        Napi::TypeError::New(env, "Could not initialize Tesseract!").ThrowAsJavaScriptException();
+        return env.Null();
+    }
+    // Set OCR mode
+    Tesseract_SetPageSegMode(tess, 3);
+    // Do OCR processing
+    Tesseract_SetImage(tess, im.data, im.cols, im.rows, 3, im.step);
+    // Get OCR result
+    int len;
+    const char *text = Tesseract_GetUTF8Text(tess, &len);
+    std::string outText = text;
+    // Free text memory
+    Tesseract_FreeUTF8Text((char *&)text);
+    // Clean up
+    Tesseract_End(tess);
+    Tesseract_Delete(tess);
+    return Napi::String::New(env, outText);
+}