ocr-file 0.0.7 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b87806d21622a72c6166c35fe4367f5b07135e5e7fab4e8be8b8941f75439dc
4
- data.tar.gz: d342a91e9b23f8677784553327ba1cc1c00e1599415512b28226f8e9f6bc55b4
3
+ metadata.gz: e77eefa085a14282b42584bd4bf6796a99e10589552b52858cb8f5dd75c84b97
4
+ data.tar.gz: 9fde9adae0c252ecc56937a314c676903a6b2a6ababe51030cbcd9ab3ee1ba81
5
5
  SHA512:
6
- metadata.gz: ecadeeb21a358274bce4ed3d7fce66e53d31ff3abe940ff1b9d77893f12b73bfd41e9ac35324e3a98f004638f9d1906760ef962a3637fbaf48973faeec9a17cb
7
- data.tar.gz: 5d4a149dd6d0da1feb723b08c327edab414b75f0b633cea53aaee00d43313d26b84659956957acec7550a822998b76a760b3888770a606d8b4a1f9bb14f807c2
6
+ metadata.gz: f6c9cf596d6a78ccea7e1fb45543826e1b95dab74449700eb0f0bed4bace802fbc15fba118a982fbf7daed9ebb188e417876f1e6a9cb6f620eb3630a4aaed7af
7
+ data.tar.gz: 9aab569d476170d8c7b405f65a1629d8ec789f008f734a7cc7f49c5716c89c7a7b5010ae3096ed7e4b3358fe650c221da658801a893cd76f6183f23b19696349
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ocr-file (0.0.6)
4
+ ocr-file (0.0.9)
5
5
  active_attr (~> 0.15.4)
6
6
  console-style (~> 0.0.1)
7
7
  hexapdf (~> 0.23.0)
data/README.md CHANGED
@@ -45,6 +45,7 @@ You will need to install `tesseract` with your desired language on your system,
45
45
  image_preprocess: true,
46
46
  effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'], # Applies effects as listed. 'norm' is also available
47
47
  automatic_reprocess: true, # Will possibly do double + the operations but can produce better results automatically
48
+ dimensions: [width, height], # Can be nil but will lock the images
48
49
  # PDF to Image Processing
49
50
  optimise_pdf: true,
50
51
  extract_pdf_images: true, # if false will screenshot each PDF page
@@ -79,9 +80,10 @@ You will need to install `tesseract` with your desired language on your system,
79
80
 
80
81
  # How to merge files into a single PDF:
81
82
  # The files can be images or other PDFs
82
- filepaths = []
83
- documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.open_pdf(path, password: '') }
84
- merged_document = OcrFile::ImageEngines::PdfEngine.merge(documents)
83
+ file_paths = []
84
+ merged_document = ::HexaPDF::Document.new
85
+ dimensions = [width, height] # or nil to maintain dimensions
86
+ documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path, dimensions: dimensions) }
85
87
  OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
86
88
  ```
87
89
 
@@ -25,6 +25,7 @@ module OcrFile
25
25
  image_preprocess: true,
26
26
  effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'],
27
27
  automatic_reprocess: true,
28
+ dimensions: nil, # width, height. Will lock images to these dimensions
28
29
  # PDF to Image Processing
29
30
  optimise_pdf: true,
30
31
  extract_pdf_images: true, # if false will screenshot each PDF page
@@ -66,7 +67,7 @@ module OcrFile
66
67
 
67
68
  def image?
68
69
  return false if pdf?
69
- ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}")}
70
+ ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}") }
70
71
  end
71
72
 
72
73
  # Treat anything which isnt a PDF or image as text
@@ -5,7 +5,7 @@ module OcrFile
5
5
  # Conversion of image types
6
6
  # Rotation and detection of skew
7
7
 
8
- attr_reader :image_path, :image, :temp_path, :save_file_path, :config
8
+ attr_reader :image_path, :image, :temp_path, :save_file_path, :config, :width, :height
9
9
 
10
10
  def initialize(image_path:, temp_path:, save_file_path:, config:)
11
11
  @image_path = image_path
@@ -22,11 +22,18 @@ module OcrFile
22
22
  # end
23
23
 
24
24
  @image = MiniMagick::Image.open(image_path)
25
+
26
+ @width = @image[:width]
27
+ @height = @image[:height]
25
28
  end
26
29
 
27
30
  def convert!
28
31
  return @image_path unless @config[:image_preprocess]
29
32
 
33
+ if @config[:dimensions].is_a?(Array) && @config[:dimensions].size == 2
34
+ resize(width, height)
35
+ end
36
+
30
37
  @config[:effects].each do |effect|
31
38
  self.send(effect.to_sym)
32
39
  end
@@ -39,6 +46,10 @@ module OcrFile
39
46
  @save_file_path
40
47
  end
41
48
 
49
+ def resize(width, height)
50
+ @image.resize("#{width}x#{height}")
51
+ end
52
+
42
53
  # Effects
43
54
  # http://www.imagemagick.org/script/command-line-options.php
44
55
  def bw
@@ -61,9 +61,26 @@ module OcrFile
61
61
  image_paths
62
62
  end
63
63
 
64
- def insert_image(document, image_path)
65
- canvas = document.pages.add.canvas
66
- canvas.image(image_path, at: [0, 0], height: 700)
64
+ def insert_image(document, image_path, dimensions: nil)
65
+ image_processor = OcrFile::ImageEngines::ImageMagick.new(
66
+ image_path: image_path,
67
+ temp_path: @temp_folder_path,
68
+ save_file_path: '',
69
+ config: @config
70
+ )
71
+
72
+ if dimensions
73
+ width = dimensions[0]
74
+ height = dimensions[1]
75
+ else
76
+ width = image_processor.width
77
+ height = image_processor.height
78
+ end
79
+
80
+ page = document.pages.add([0, 0, width, height])
81
+ page.canvas.image(@image || image_path, at: [0, 0], width: width, height: height)
82
+
83
+ document
67
84
  end
68
85
 
69
86
  def combine(text, pdf_of_images)
@@ -1,3 +1,3 @@
1
1
  module OcrFile
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.10"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-22 00:00:00.000000000 Z
11
+ date: 2023-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: console-style