ocr-file 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b87806d21622a72c6166c35fe4367f5b07135e5e7fab4e8be8b8941f75439dc
4
- data.tar.gz: d342a91e9b23f8677784553327ba1cc1c00e1599415512b28226f8e9f6bc55b4
3
+ metadata.gz: 9ae0f4940b34df3280221cf8b26d86ba3498f8344ef5f0e27ea335ca651a8906
4
+ data.tar.gz: 5e790899721d25bb0f4dc0e8e276b39b62bbb2803549fdbc8ba148804885bec0
5
5
  SHA512:
6
- metadata.gz: ecadeeb21a358274bce4ed3d7fce66e53d31ff3abe940ff1b9d77893f12b73bfd41e9ac35324e3a98f004638f9d1906760ef962a3637fbaf48973faeec9a17cb
7
- data.tar.gz: 5d4a149dd6d0da1feb723b08c327edab414b75f0b633cea53aaee00d43313d26b84659956957acec7550a822998b76a760b3888770a606d8b4a1f9bb14f807c2
6
+ metadata.gz: 6cd016ca7bba37866579cad59f01f41d190c0a191cd1ce27fa7037646da7bf4962923664c7b6295655936aed8714fac01b08301be65fdfef68403c8dd12c075b
7
+ data.tar.gz: f1581713a76e19f1b24d43f030cccbfb32b206bea8d1a5f07fed26fe4e0cfaa3f991c0c35b98bf1f222ca36b143e83700638ecf3b0520b9663d2fe4336cc5da2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ocr-file (0.0.6)
4
+ ocr-file (0.0.8)
5
5
  active_attr (~> 0.15.4)
6
6
  console-style (~> 0.0.1)
7
7
  hexapdf (~> 0.23.0)
data/README.md CHANGED
@@ -79,9 +79,9 @@ You will need to install `tesseract` with your desired language on your system,
79
79
 
80
80
  # How to merge files into a single PDF:
81
81
  # The files can be images or other PDFs
82
- filepaths = []
83
- documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.open_pdf(path, password: '') }
84
- merged_document = OcrFile::ImageEngines::PdfEngine.merge(documents)
82
+ file_paths = []
83
+ merged_document = ::HexaPDF::Document.new
84
+ documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path) }
85
85
  OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
86
86
  ```
87
87
 
@@ -66,7 +66,7 @@ module OcrFile
66
66
 
67
67
  def image?
68
68
  return false if pdf?
69
- ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}")}
69
+ ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}") }
70
70
  end
71
71
 
72
72
  # Treat anything which isnt a PDF or image as text
@@ -5,7 +5,7 @@ module OcrFile
5
5
  # Conversion of image types
6
6
  # Rotation and detection of skew
7
7
 
8
- attr_reader :image_path, :image, :temp_path, :save_file_path, :config
8
+ attr_reader :image_path, :image, :temp_path, :save_file_path, :config, :width, :height
9
9
 
10
10
  def initialize(image_path:, temp_path:, save_file_path:, config:)
11
11
  @image_path = image_path
@@ -22,6 +22,9 @@ module OcrFile
22
22
  # end
23
23
 
24
24
  @image = MiniMagick::Image.open(image_path)
25
+
26
+ @width = @image[:width]
27
+ @height = @image[:height]
25
28
  end
26
29
 
27
30
  def convert!
@@ -39,6 +42,10 @@ module OcrFile
39
42
  @save_file_path
40
43
  end
41
44
 
45
+ def resize(width, height)
46
+ @image.resize("#{width}x#{height}")
47
+ end
48
+
42
49
  # Effects
43
50
  # http://www.imagemagick.org/script/command-line-options.php
44
51
  def bw
@@ -61,9 +61,24 @@ module OcrFile
61
61
  image_paths
62
62
  end
63
63
 
64
- def insert_image(document, image_path)
65
- canvas = document.pages.add.canvas
66
- canvas.image(image_path, at: [0, 0], height: 700)
64
+ def insert_image(document, image_path, dimensions: nil)
65
+ image_processor = OcrFile::ImageEngines::ImageMagick.new(
66
+ image_path: image_path,
67
+ temp_path: @temp_folder_path,
68
+ save_file_path: '',
69
+ config: @config
70
+ )
71
+
72
+ if dimensions
73
+ width = dimensions[0]
74
+ height = dimensions[1]
75
+ else
76
+ width = image_processor.width
77
+ height = image_processor.height
78
+ end
79
+
80
+ page = document.pages.add([0, 0, width, height])
81
+ page.canvas.image(@image || image_path, at: [0, 0], width: width, height: height)
67
82
  end
68
83
 
69
84
  def combine(text, pdf_of_images)
@@ -1,3 +1,3 @@
1
1
  module OcrFile
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-22 00:00:00.000000000 Z
11
+ date: 2022-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: console-style