RubyGems - toc_extract - Versions diffs - 0.1.0 → 0.2.0 - Mend

toc_extract 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 8166372166f4ca2fc3242398b6c9ff620bdc0760260a882e6373fec870361a84
-  data.tar.gz: 8a10993154383702ed1629e8f20026a817f3115ed076c06cbe4e8ccce561fcbd
+  metadata.gz: 6b4ad0c8374da7bd65d180846d2cdc3753de5882414e2bd8c9938cf5c6576f69
+  data.tar.gz: 10576f74ffbe6849cd18de5448dcc4cbfd86c77b058b3aa854bdbfe4f0961597
 SHA512:
-  metadata.gz: dc2c0915f1c84cda13741dcc674c4878c1731e275c012293dcbc55dc366f50f27a80cda053c6ee00c8522017355651c625b6b81d477c90a84b54899aff64cad5
-  data.tar.gz: 647c126a09844ed72603180b6f2d9569bf83537240d5bdea89dbfdca06a2478dd39800b6a981502ddfaced54f6e662bfb0e6d434a4c97d7da1124e85e7a85655
+  metadata.gz: 6ac11b25e4d645093870437716cb1532e439fc0224e90d5312781a730081333cb7cc9e2221aa53c1938b81b647ac603a9a49d08cbceec7a7604a214a8474d7e4
+  data.tar.gz: 9786a77f1068b8489243c067ea5e95b77921c3689bdc28115768748e65515ff13cc38ded2bf6c3247d18a75bf3ad54bd4d62e9296d49cfca91a88cf2256073e0

data/lib/toc_extract/extractor.rb CHANGED Viewed

@@ -11,6 +11,14 @@ class TocExtract
   require "pdf/reader"
   require "pdf/reader/find_text"
+  def self.extract(pdf_file, template, toc_start_page, toc_end_page)
+    lines = TocExtract.toc_lines(pdf_file, template, toc_start_page, toc_end_page)
+    sections = TocExtract.sections_from_toc_lines(lines, template)
+    TocExtract.fill_bounding_boxes(pdf_file, sections, toc_end_page)
+    sections
+  end
   def self.toc_lines(pdf_file, template, toc_start_page, toc_end_page)
     # To detect the lines, we assume that all the elements on a line have the same y value.
@@ -98,7 +106,7 @@ class TocExtract
           break
         end
       end
-      page = page.reverse.to_i
+      page = page.reverse.to_i - 1 # Since TOC pages are 1-based
       # title
       title = line[title_start..title_end-1].sub(/\.+$/, '')

data/lib/toc_extract/preview.rb ADDED Viewed

@@ -0,0 +1,54 @@
+class TocExtract
+  def self.preview(pdf_file, section, crop_width, crop_height)
+    require "rmagick"
+    require "pdf/reader"
+    require "pdf/reader/find_text"
+    result = ""
+    PDF::Reader.open(pdf_file) do |reader|
+      page_num = section.page_number
+      target_page = reader.pages[page_num]
+      page_width = target_page.width
+      page_height = target_page.height
+      images = Magick::Image.read("#{pdf_file}[#{page_num}]") do |info|
+        info.density = 150
+      end
+      img = images.first
+      img_width = img.columns
+      img_height = img.rows
+      scale_x = img_width.to_f / page_width
+      scale_y = img_height.to_f / page_height
+      # In pdf, x,y are the bottom left coordinates, converting them to top left
+      # Convert pdf-reader coordinates to RMagick coordinates
+      # pdf-reader: origin at bottom-left, Y increases upward
+      # RMagick: origin at top-left, Y increases downward
+      # Add some padding as well
+      pdf_x = section.bounding_box["x"] - 30
+      pdf_y = section.bounding_box["endy"] + 10
+      img_x = (pdf_x * scale_x).round
+      img_y = ((page_height - pdf_y) * scale_y).round  # Flip Y coordinate
+      img_width_pixels = (crop_width * scale_x).round
+      img_height_pixels = (crop_height * scale_y).round
+      # Ensure coordinates are within image bounds
+      img_x = [ img_x, 0 ].max
+      img_y = [ img_y, 0 ].max
+      img_width_pixels = [ img_width_pixels, img_width ].min
+      img_height_pixels = [ img_height_pixels, img_height ].min
+      cropped_img = img.crop(img_x, img_y, img_width_pixels, img_height_pixels)
+      cropped_img.resize_to_fit!(crop_width, crop_height)
+      result = cropped_img.to_blob { |info| info.format = "PNG" }
+    end
+    result
+  end
+end

data/lib/toc_extract.rb CHANGED Viewed

@@ -6,3 +6,4 @@ class TocExtract
 end
 require_relative 'toc_extract/extractor'
+require_relative 'toc_extract/preview'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: toc_extract
 version: !ruby/object:Gem::Version
-  version: 0.1.0
+  version: 0.2.0
 platform: ruby
 authors:
 - Arash Afshar
@@ -37,6 +37,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '1.0'
+- !ruby/object:Gem::Dependency
+  name: rmagick
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '6.1'
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: '6.1'
 - !ruby/object:Gem::Dependency
   name: rake
   requirement: !ruby/object:Gem::Requirement
@@ -119,6 +133,7 @@ files:
 - README.md
 - lib/toc_extract.rb
 - lib/toc_extract/extractor.rb
+- lib/toc_extract/preview.rb
 homepage: https://github.com/Arash-Afshar/toc_extract
 licenses:
 - MIT