RubyGems - ocr-file - Versions diffs - 0.0.7 → 0.0.10 - Mend

ocr-file 0.0.7 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

checksums.yaml +4 -4
data/Gemfile.lock +1 -1
data/README.md +5 -3
data/lib/ocr-file/document.rb +2 -1
data/lib/ocr-file/image_engines/image_magick.rb +12 -1
data/lib/ocr-file/image_engines/pdf_engine.rb +20 -3
data/lib/ocr-file/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8b87806d21622a72c6166c35fe4367f5b07135e5e7fab4e8be8b8941f75439dc
-  data.tar.gz: d342a91e9b23f8677784553327ba1cc1c00e1599415512b28226f8e9f6bc55b4
+  metadata.gz: e77eefa085a14282b42584bd4bf6796a99e10589552b52858cb8f5dd75c84b97
+  data.tar.gz: 9fde9adae0c252ecc56937a314c676903a6b2a6ababe51030cbcd9ab3ee1ba81
 SHA512:
-  metadata.gz: ecadeeb21a358274bce4ed3d7fce66e53d31ff3abe940ff1b9d77893f12b73bfd41e9ac35324e3a98f004638f9d1906760ef962a3637fbaf48973faeec9a17cb
-  data.tar.gz: 5d4a149dd6d0da1feb723b08c327edab414b75f0b633cea53aaee00d43313d26b84659956957acec7550a822998b76a760b3888770a606d8b4a1f9bb14f807c2
+  metadata.gz: f6c9cf596d6a78ccea7e1fb45543826e1b95dab74449700eb0f0bed4bace802fbc15fba118a982fbf7daed9ebb188e417876f1e6a9cb6f620eb3630a4aaed7af
+  data.tar.gz: 9aab569d476170d8c7b405f65a1629d8ec789f008f734a7cc7f49c5716c89c7a7b5010ae3096ed7e4b3358fe650c221da658801a893cd76f6183f23b19696349

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    ocr-file (0.0.6)
+    ocr-file (0.0.9)
       active_attr (~> 0.15.4)
       console-style (~> 0.0.1)
       hexapdf (~> 0.23.0)

data/README.md CHANGED Viewed

@@ -45,6 +45,7 @@ You will need to install `tesseract` with your desired language on your system,
     image_preprocess: true,
     effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'], # Applies effects as listed. 'norm' is also available
     automatic_reprocess: true, # Will possibly do double + the operations but can produce better results automatically
+    dimensions: [width, height], # Can be nil but will lock the images
     # PDF to Image Processing
     optimise_pdf: true,
     extract_pdf_images: true, # if false will screenshot each PDF page
@@ -79,9 +80,10 @@ You will need to install `tesseract` with your desired language on your system,
   # How to merge files into a single PDF:
   # The files can be images or other PDFs
-  filepaths = []
-  documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.open_pdf(path, password: '') }
-  merged_document = OcrFile::ImageEngines::PdfEngine.merge(documents)
+  file_paths = []
+  merged_document = ::HexaPDF::Document.new
+  dimensions = [width, height] # or nil to maintain dimensions
+  documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path, dimensions: dimensions) }
   OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
 ```

data/lib/ocr-file/document.rb CHANGED Viewed

@@ -25,6 +25,7 @@ module OcrFile
       image_preprocess: true,
       effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'],
       automatic_reprocess: true,
+      dimensions: nil, # width, height. Will lock images to these dimensions
       # PDF to Image Processing
       optimise_pdf: true,
       extract_pdf_images: true, # if false will screenshot each PDF page
@@ -66,7 +67,7 @@ module OcrFile
     def image?
       return false if pdf?
-      ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}")}
+      ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}") }
     end
     # Treat anything which isnt a PDF or image as text

data/lib/ocr-file/image_engines/image_magick.rb CHANGED Viewed

@@ -5,7 +5,7 @@ module OcrFile
       # Conversion of image types
       # Rotation and detection of skew
-      attr_reader :image_path, :image, :temp_path, :save_file_path, :config
+      attr_reader :image_path, :image, :temp_path, :save_file_path, :config, :width, :height
       def initialize(image_path:, temp_path:, save_file_path:, config:)
         @image_path = image_path
@@ -22,11 +22,18 @@ module OcrFile
         # end
         @image = MiniMagick::Image.open(image_path)
+        @width = @image[:width]
+        @height = @image[:height]
       end
       def convert!
         return @image_path unless @config[:image_preprocess]
+        if @config[:dimensions].is_a?(Array) && @config[:dimensions].size == 2
+          resize(width, height)
+        end
         @config[:effects].each do |effect|
           self.send(effect.to_sym)
         end
@@ -39,6 +46,10 @@ module OcrFile
         @save_file_path
       end
+      def resize(width, height)
+        @image.resize("#{width}x#{height}")
+      end
       # Effects
       # http://www.imagemagick.org/script/command-line-options.php
       def bw

data/lib/ocr-file/image_engines/pdf_engine.rb CHANGED Viewed

@@ -61,9 +61,26 @@ module OcrFile
         image_paths
       end
-      def insert_image(document, image_path)
-        canvas = document.pages.add.canvas
-        canvas.image(image_path, at: [0, 0], height: 700)
+      def insert_image(document, image_path, dimensions: nil)
+        image_processor = OcrFile::ImageEngines::ImageMagick.new(
+          image_path: image_path,
+          temp_path: @temp_folder_path,
+          save_file_path: '',
+          config: @config
+        )
+        if dimensions
+          width = dimensions[0]
+          height = dimensions[1]
+        else
+          width = image_processor.width
+          height = image_processor.height
+        end
+        page = document.pages.add([0, 0, width, height])
+        page.canvas.image(@image || image_path, at: [0, 0], width: width, height: height)
+        document
       end
       def combine(text, pdf_of_images)

data/lib/ocr-file/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module OcrFile
-  VERSION = "0.0.7"
+  VERSION = "0.0.10"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: ocr-file
 version: !ruby/object:Gem::Version
-  version: 0.0.7
+  version: 0.0.10
 platform: ruby
 authors:
 - trex22
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2022-07-22 00:00:00.000000000 Z
+date: 2023-07-23 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: console-style