pdf-reader 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/README.rdoc +3 -9
- data/lib/pdf/reader/page.rb +4 -0
- data/lib/pdf/reader/page_layout.rb +4 -2
- data/lib/pdf/reader/page_state.rb +14 -5
- data/lib/pdf/reader/page_text_receiver.rb +8 -9
- data/lib/pdf/reader/width_calculator/built_in.rb +8 -1
- metadata +2 -2
    
        data/CHANGELOG
    CHANGED
    
    
    
        data/README.rdoc
    CHANGED
    
    | @@ -77,17 +77,11 @@ of PDF::Reader::Page. | |
| 77 77 | 
             
                receiver = RedGreenBlue.new
         | 
| 78 78 | 
             
                page.walk(receiver)
         | 
| 79 79 |  | 
| 80 | 
            -
            For low level access to the objects in a PDF file, use the ObjectHash class | 
| 81 | 
            -
             | 
| 82 | 
            -
             | 
| 83 | 
            -
                puts PDF::Reader::ObjectHash.new("somefile.pdf")
         | 
| 84 | 
            -
             | 
| 85 | 
            -
            or via a PDF::Reader instance:
         | 
| 80 | 
            +
            For low level access to the objects in a PDF file, use the ObjectHash class like
         | 
| 81 | 
            +
            so:
         | 
| 86 82 |  | 
| 87 83 | 
             
                reader  = PDF::Reader.new("somefile.pdf")
         | 
| 88 | 
            -
                puts reader.objects
         | 
| 89 | 
            -
             | 
| 90 | 
            -
            The second method is preferred to increase the effectiveness of internal caching.
         | 
| 84 | 
            +
                puts reader.objects.inspect
         | 
| 91 85 |  | 
| 92 86 | 
             
            = Text Encoding
         | 
| 93 87 |  | 
    
        data/lib/pdf/reader/page.rb
    CHANGED
    
    | @@ -61,6 +61,10 @@ module PDF | |
| 61 61 | 
             
                        hash.merge!(@objects.deref(obj))
         | 
| 62 62 | 
             
                      end
         | 
| 63 63 | 
             
                    }
         | 
| 64 | 
            +
                    # This shouldn't be necesary, but some non compliant PDFs leave MediaBox
         | 
| 65 | 
            +
                    # out. Assuming 8.5" x 11" is what Acobat does, so we do it too.
         | 
| 66 | 
            +
                    @attributes[:MediaBox] ||= [0,0,612,792]
         | 
| 67 | 
            +
                    @attributes
         | 
| 64 68 | 
             
                  end
         | 
| 65 69 |  | 
| 66 70 | 
             
                  # returns the plain text content of this page encoded as UTF-8. Any
         | 
| @@ -9,6 +9,8 @@ class PDF::Reader | |
| 9 9 | 
             
              # page to be rendered as described by the page's MediaBox attribute
         | 
| 10 10 | 
             
              class PageLayout
         | 
| 11 11 | 
             
                def initialize(runs, mediabox)
         | 
| 12 | 
            +
                  raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
         | 
| 13 | 
            +
             | 
| 12 14 | 
             
                  @runs    = merge_runs(runs)
         | 
| 13 15 | 
             
                  @mean_font_size   = mean(@runs.map(&:font_size)) || 0
         | 
| 14 16 | 
             
                  @mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
         | 
| @@ -58,11 +60,11 @@ class PDF::Reader | |
| 58 60 | 
             
                end
         | 
| 59 61 |  | 
| 60 62 | 
             
                def row_multiplier
         | 
| 61 | 
            -
                  @row_multiplier ||= @page_height / row_count
         | 
| 63 | 
            +
                  @row_multiplier ||= @page_height.to_f / row_count.to_f
         | 
| 62 64 | 
             
                end
         | 
| 63 65 |  | 
| 64 66 | 
             
                def col_multiplier
         | 
| 65 | 
            -
                  @col_multiplier ||= @page_width / col_count
         | 
| 67 | 
            +
                  @col_multiplier ||= @page_width.to_f / col_count.to_f
         | 
| 66 68 | 
             
                end
         | 
| 67 69 |  | 
| 68 70 | 
             
                def mean(collection)
         | 
| @@ -63,7 +63,12 @@ class PDF::Reader | |
| 63 63 | 
             
                  #
         | 
| 64 64 | 
             
                  def concatenate_matrix(a, b, c, d, e, f)
         | 
| 65 65 | 
             
                    if state[:ctm]
         | 
| 66 | 
            -
                      state[:ctm] | 
| 66 | 
            +
                      ctm = state[:ctm]
         | 
| 67 | 
            +
                      state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
         | 
| 68 | 
            +
                        ctm.a, ctm.b,
         | 
| 69 | 
            +
                        ctm.c, ctm.d,
         | 
| 70 | 
            +
                        ctm.e, ctm.f
         | 
| 71 | 
            +
                      )
         | 
| 67 72 | 
             
                    else
         | 
| 68 73 | 
             
                      state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
         | 
| 69 74 | 
             
                    end
         | 
| @@ -102,7 +107,11 @@ class PDF::Reader | |
| 102 107 | 
             
                  end
         | 
| 103 108 |  | 
| 104 109 | 
             
                  def font_size
         | 
| 105 | 
            -
                    @font_size ||=  | 
| 110 | 
            +
                    @font_size ||= begin
         | 
| 111 | 
            +
                                     _, zero = trm_transform(0,0)
         | 
| 112 | 
            +
                                     _, one  = trm_transform(1,1)
         | 
| 113 | 
            +
                                     (zero - one).abs
         | 
| 114 | 
            +
                                   end
         | 
| 106 115 | 
             
                  end
         | 
| 107 116 |  | 
| 108 117 | 
             
                  def set_text_leading(leading)
         | 
| @@ -324,7 +333,7 @@ class PDF::Reader | |
| 324 333 | 
             
                    #       ctm[0] here, but this gets my tests green and I'm out of
         | 
| 325 334 | 
             
                    #       ideas for now
         | 
| 326 335 | 
             
                    # TODO: support ty > 0
         | 
| 327 | 
            -
                    if ctm.a == 1
         | 
| 336 | 
            +
                    if ctm.a == 1 || ctm.a == 0
         | 
| 328 337 | 
             
                      @text_matrix.horizontal_displacement_multiply!(tx)
         | 
| 329 338 | 
             
                    else
         | 
| 330 339 | 
             
                      @text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
         | 
| @@ -341,8 +350,8 @@ class PDF::Reader | |
| 341 350 | 
             
                  def text_rendering_matrix
         | 
| 342 351 | 
             
                    @text_rendering_matrix ||= begin
         | 
| 343 352 | 
             
                      state_matrix = TransformationMatrix.new(
         | 
| 344 | 
            -
                         | 
| 345 | 
            -
                        0,  | 
| 353 | 
            +
                        state[:text_font_size] * state[:h_scaling], 0,
         | 
| 354 | 
            +
                        0, state[:text_font_size],
         | 
| 346 355 | 
             
                        0, state[:text_rise]
         | 
| 347 356 | 
             
                      )
         | 
| 348 357 | 
             
                      state_matrix.multiply!(
         | 
| @@ -58,8 +58,12 @@ module PDF | |
| 58 58 | 
             
                  end
         | 
| 59 59 |  | 
| 60 60 | 
             
                  def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
         | 
| 61 | 
            -
                    params. | 
| 62 | 
            -
                       | 
| 61 | 
            +
                    params.each do |arg|
         | 
| 62 | 
            +
                      if arg.is_a?(String)
         | 
| 63 | 
            +
                        internal_show_text(arg)
         | 
| 64 | 
            +
                      else
         | 
| 65 | 
            +
                        @state.process_glyph_displacement(0, arg, false)
         | 
| 66 | 
            +
                      end
         | 
| 63 67 | 
             
                    end
         | 
| 64 68 | 
             
                  end
         | 
| 65 69 |  | 
| @@ -88,7 +92,7 @@ module PDF | |
| 88 92 |  | 
| 89 93 | 
             
                  private
         | 
| 90 94 |  | 
| 91 | 
            -
                  def internal_show_text(string | 
| 95 | 
            +
                  def internal_show_text(string)
         | 
| 92 96 | 
             
                    if @state.current_font.nil?
         | 
| 93 97 | 
             
                      raise PDF::Reader::MalformedPDFError, "current font is invalid"
         | 
| 94 98 | 
             
                    end
         | 
| @@ -102,16 +106,11 @@ module PDF | |
| 102 106 | 
             
                      # glyph will appear in the correct position
         | 
| 103 107 | 
             
                      glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
         | 
| 104 108 | 
             
                      th = 1
         | 
| 105 | 
            -
                      if kerning != 0 && index == glyphs.size - 1
         | 
| 106 | 
            -
                        tj = kerning
         | 
| 107 | 
            -
                      else
         | 
| 108 | 
            -
                        tj = 0
         | 
| 109 | 
            -
                      end
         | 
| 110 109 | 
             
                      scaled_glyph_width = glyph_width * @state.font_size * th
         | 
| 111 110 | 
             
                      unless utf8_chars == SPACE
         | 
| 112 111 | 
             
                        @characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
         | 
| 113 112 | 
             
                      end
         | 
| 114 | 
            -
                      @state.process_glyph_displacement(glyph_width,  | 
| 113 | 
            +
                      @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
         | 
| 115 114 | 
             
                    end
         | 
| 116 115 | 
             
                  end
         | 
| 117 116 |  | 
| @@ -42,7 +42,14 @@ class PDF::Reader | |
| 42 42 | 
             
                      name = @font.encoding.int_to_name(code_point)
         | 
| 43 43 | 
             
                      m = @metrics.metrics_for_name(name)
         | 
| 44 44 | 
             
                    end
         | 
| 45 | 
            -
             | 
| 45 | 
            +
             | 
| 46 | 
            +
                    if m
         | 
| 47 | 
            +
                      m[:wx]
         | 
| 48 | 
            +
                    elsif @font.widths[code_point - 1]
         | 
| 49 | 
            +
                      @font.widths[code_point - 1]
         | 
| 50 | 
            +
                    else
         | 
| 51 | 
            +
                      raise ArgumentError, "Unknown glyph width for #{codepoint}"
         | 
| 52 | 
            +
                    end
         | 
| 46 53 | 
             
                  end
         | 
| 47 54 |  | 
| 48 55 | 
             
                end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: pdf-reader
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1.3. | 
| 4 | 
            +
              version: 1.3.1
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date:  | 
| 12 | 
            +
            date: 2013-02-12 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: rake
         |