ocr-file 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b87806d21622a72c6166c35fe4367f5b07135e5e7fab4e8be8b8941f75439dc
4
- data.tar.gz: d342a91e9b23f8677784553327ba1cc1c00e1599415512b28226f8e9f6bc55b4
3
+ metadata.gz: 9ae0f4940b34df3280221cf8b26d86ba3498f8344ef5f0e27ea335ca651a8906
4
+ data.tar.gz: 5e790899721d25bb0f4dc0e8e276b39b62bbb2803549fdbc8ba148804885bec0
5
5
  SHA512:
6
- metadata.gz: ecadeeb21a358274bce4ed3d7fce66e53d31ff3abe940ff1b9d77893f12b73bfd41e9ac35324e3a98f004638f9d1906760ef962a3637fbaf48973faeec9a17cb
7
- data.tar.gz: 5d4a149dd6d0da1feb723b08c327edab414b75f0b633cea53aaee00d43313d26b84659956957acec7550a822998b76a760b3888770a606d8b4a1f9bb14f807c2
6
+ metadata.gz: 6cd016ca7bba37866579cad59f01f41d190c0a191cd1ce27fa7037646da7bf4962923664c7b6295655936aed8714fac01b08301be65fdfef68403c8dd12c075b
7
+ data.tar.gz: f1581713a76e19f1b24d43f030cccbfb32b206bea8d1a5f07fed26fe4e0cfaa3f991c0c35b98bf1f222ca36b143e83700638ecf3b0520b9663d2fe4336cc5da2
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ocr-file (0.0.6)
4
+ ocr-file (0.0.8)
5
5
  active_attr (~> 0.15.4)
6
6
  console-style (~> 0.0.1)
7
7
  hexapdf (~> 0.23.0)
data/README.md CHANGED
@@ -79,9 +79,9 @@ You will need to install `tesseract` with your desired language on your system,
79
79
 
80
80
  # How to merge files into a single PDF:
81
81
  # The files can be images or other PDFs
82
- filepaths = []
83
- documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.open_pdf(path, password: '') }
84
- merged_document = OcrFile::ImageEngines::PdfEngine.merge(documents)
82
+ file_paths = []
83
+ merged_document = ::HexaPDF::Document.new
84
+ documents = file_paths.map { |path| OcrFile::ImageEngines::PdfEngine.insert_image(merged_document, path) }
85
85
  OcrFile::ImageEngines::PdfEngine.save_pdf(merged_document, save_file_path, optimise: true)
86
86
  ```
87
87
 
@@ -66,7 +66,7 @@ module OcrFile
66
66
 
67
67
  def image?
68
68
  return false if pdf?
69
- ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}")}
69
+ ACCEPTED_IMAGE_TYPES.any? { |type| @original_file_path.downcase.include?(".#{type}") }
70
70
  end
71
71
 
72
72
  # Treat anything which isnt a PDF or image as text
@@ -5,7 +5,7 @@ module OcrFile
5
5
  # Conversion of image types
6
6
  # Rotation and detection of skew
7
7
 
8
- attr_reader :image_path, :image, :temp_path, :save_file_path, :config
8
+ attr_reader :image_path, :image, :temp_path, :save_file_path, :config, :width, :height
9
9
 
10
10
  def initialize(image_path:, temp_path:, save_file_path:, config:)
11
11
  @image_path = image_path
@@ -22,6 +22,9 @@ module OcrFile
22
22
  # end
23
23
 
24
24
  @image = MiniMagick::Image.open(image_path)
25
+
26
+ @width = @image[:width]
27
+ @height = @image[:height]
25
28
  end
26
29
 
27
30
  def convert!
@@ -39,6 +42,10 @@ module OcrFile
39
42
  @save_file_path
40
43
  end
41
44
 
45
+ def resize(width, height)
46
+ @image.resize("#{width}x#{height}")
47
+ end
48
+
42
49
  # Effects
43
50
  # http://www.imagemagick.org/script/command-line-options.php
44
51
  def bw
@@ -61,9 +61,24 @@ module OcrFile
61
61
  image_paths
62
62
  end
63
63
 
64
- def insert_image(document, image_path)
65
- canvas = document.pages.add.canvas
66
- canvas.image(image_path, at: [0, 0], height: 700)
64
+ def insert_image(document, image_path, dimensions: nil)
65
+ image_processor = OcrFile::ImageEngines::ImageMagick.new(
66
+ image_path: image_path,
67
+ temp_path: @temp_folder_path,
68
+ save_file_path: '',
69
+ config: @config
70
+ )
71
+
72
+ if dimensions
73
+ width = dimensions[0]
74
+ height = dimensions[1]
75
+ else
76
+ width = image_processor.width
77
+ height = image_processor.height
78
+ end
79
+
80
+ page = document.pages.add([0, 0, width, height])
81
+ page.canvas.image(@image || image_path, at: [0, 0], width: width, height: height)
67
82
  end
68
83
 
69
84
  def combine(text, pdf_of_images)
@@ -1,3 +1,3 @@
1
1
  module OcrFile
2
- VERSION = "0.0.7"
2
+ VERSION = "0.0.8"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ocr-file
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - trex22
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-07-22 00:00:00.000000000 Z
11
+ date: 2022-07-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: console-style