ocr-file 0.0.8 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 9ae0f4940b34df3280221cf8b26d86ba3498f8344ef5f0e27ea335ca651a8906
4
- data.tar.gz: 5e790899721d25bb0f4dc0e8e276b39b62bbb2803549fdbc8ba148804885bec0
3
+ metadata.gz: e77eefa085a14282b42584bd4bf6796a99e10589552b52858cb8f5dd75c84b97
4
+ data.tar.gz: 9fde9adae0c252ecc56937a314c676903a6b2a6ababe51030cbcd9ab3ee1ba81
5
5
  SHA512:
6
- metadata.gz: 6cd016ca7bba37866579cad59f01f41d190c0a191cd1ce27fa7037646da7bf4962923664c7b6295655936aed8714fac01b08301be65fdfef68403c8dd12c075b
7
- data.tar.gz: f1581713a76e19f1b24d43f030cccbfb32b206bea8d1a5f07fed26fe4e0cfaa3f991c0c35b98bf1f222ca36b143e83700638ecf3b0520b9663d2fe4336cc5da2
6
+ metadata.gz: f6c9cf596d6a78ccea7e1fb45543826e1b95dab74449700eb0f0bed4bace802fbc15fba118a982fbf7daed9ebb188e417876f1e6a9cb6f620eb3630a4aaed7af
7
+ data.tar.gz: 9aab569d476170d8c7b405f65a1629d8ec789f008f734a7cc7f49c5716c89c7a7b5010ae3096ed7e4b3358fe650c221da658801a893cd76f6183f23b19696349
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ocr-file (0.0.8)
4
+ ocr-file (0.0.9)
5
5
  active_attr (~> 0.15.4)
6
6
  console-style (~> 0.0.1)
7
7
  hexapdf (~> 0.23.0)
data/README.md CHANGED
@@ -45,6 +45,7 @@ You will need to install `tesseract` with your desired language on your system,
45
45
  image_preprocess: true,
46
46
  effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'], # Applies effects as listed. 'norm' is also available
47
47
  automatic_reprocess: true, # Will possibly do double + the operations but can produce better results automatically
48
+ dimensions: [width, height], # Can be nil but will lock the images
48
49
  # PDF to Image Processing
49
50
  optimise_pdf: true,
50
51
  extract_pdf_images: true, # if false will screenshot each PDF page
@@ -81,7 +82,8 @@ You will need to install `tesseract` with your desired language on your system,
81
82
  # The files can be images or other PDFs
82
83
  file_paths = []
83
84
  merged_document = ::HexaPDF::Document.new
84
- documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path) }
85
+ dimensions = [width, height] # or nil to maintain dimensions
86
+ documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path, dimensions: dimensions) }
85
87
  OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
86
88
  ```
87
89
 
@@ -25,6 +25,7 @@ module OcrFile
25
25
  image_preprocess: true,
26
26
  effects: ['despeckle', 'deskew', 'enhance', 'sharpen', 'remove_shadow', 'bw'],
27
27
  automatic_reprocess: true,
28
+ dimensions: nil, # width, height. Will lock images to these dimensions
28
29
  # PDF to Image Processing
29
30
  optimise_pdf: true,
30
31
  extract_pdf_images: true, # if false will screenshot each PDF page
@@ -30,6 +30,10 @@ module OcrFile
30
30
  def convert!
31
31
  return @image_path unless @config[:image_preprocess]
32
32
 
33
+ if @config[:dimensions].is_a?(Array) && @config[:dimensions].size == 2
34
+ resize(width, height)
35
+ end
36
+
33
37
  @config[:effects].each do |effect|
34
38
  self.send(effect.to_sym)
35
39
  end
@@ -79,6 +79,8 @@ module OcrFile
79
79
 
80
80
  page = document.pages.add([0, 0, width, height])
81
81
  page.canvas.image(@image || image_path, at: [0, 0], width: width, height: height)
82
+
83
+ document
82
84
  end
83
85
 
84
86
  def combine(text, pdf_of_images)
@@ -1,3 +1,3 @@
1
1
  module OcrFile
2
- VERSION = "0.0.8"
2
+ VERSION = "0.0.10"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-23 00:00:00.000000000 Z
11
+ date: 2023-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: console-style