pdftdx 1.1.8 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/pdftdx/parser.rb +24 -2
- data/lib/pdftdx/version.rb +1 -1
- metadata +1 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 369947ea208604dcb52e6a9f329df412709d0f0f
         | 
| 4 | 
            +
              data.tar.gz: 14d1803d485efed81cfc14e89d720f8da9e0d5bc
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 71965bd9e8648e4be72546718891ea87d3d521080711396adaf50325359a0d90d445b0a56e0c210307698561f49f6c93379b47c4416d6a0fb05ad0211886b9db
         | 
| 7 | 
            +
              data.tar.gz: bb9f966e063c2a8d3e83b60c9e8af61635ed8e7e7d9e98508ebc456bf2cca3b40e2a243bafb1d3036fc5eb5fab30ea1ce8a4b0437e80119e08432a3416d8aadc
         | 
    
        data/lib/pdftdx/parser.rb
    CHANGED
    
    | @@ -152,6 +152,28 @@ module PDFTDX | |
| 152 152 | 
             
            			Hash[*(r.to_a.sort { |a, b| ((a[0] == b[0]) ? 0 : (a[0] > b[0] ? 1 : -1)) }.flatten)]
         | 
| 153 153 | 
             
            		end
         | 
| 154 154 |  | 
| 155 | 
            +
            		# Fix Dupes
         | 
| 156 | 
            +
            		# Shifts Duplicate Cells (Cells which share their x-offset with others) to the right (so they don't get overwritten)
         | 
| 157 | 
            +
            		# @param [Array] r A row of data in the form [[xoffset, cell]] (Example: [[120, 'cell 0'], [200, 'cell 1'], [280, 'cell 2']])
         | 
| 158 | 
            +
            		# @param [Array] The same row of data, but with duplicate cells shifted so that no x-offset-collisions occur
         | 
| 159 | 
            +
            		def self.fix_dupes r
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            			# Deep-Duplicate Row
         | 
| 162 | 
            +
            			nr = r.collect { |e| e.clone }
         | 
| 163 | 
            +
             | 
| 164 | 
            +
            			# Run through Cells
         | 
| 165 | 
            +
            			nr.length.times do |i|
         | 
| 166 | 
            +
             | 
| 167 | 
            +
            				# Acquire Duplicate Length
         | 
| 168 | 
            +
            				dupes = nr.slice(i + 1, nr.length).inject(0) { |a, c| a + (c[0] == nr[i][0] ? 1 : 0) }
         | 
| 169 | 
            +
             | 
| 170 | 
            +
            				# Fix Dupes
         | 
| 171 | 
            +
            				dupes.times { |j| nr[i + j + 1][0] = nr[i + j + 1][0] + 1 }
         | 
| 172 | 
            +
            			end
         | 
| 173 | 
            +
             | 
| 174 | 
            +
            			nr
         | 
| 175 | 
            +
            		end
         | 
| 176 | 
            +
             | 
| 155 177 | 
             
            		# Touch up Table
         | 
| 156 178 | 
             
            		# Splits Table into multiple headered tables.
         | 
| 157 179 | 
             
            		# Also, strips Left Offset info from Table Cells.
         | 
| @@ -177,8 +199,8 @@ module PDFTDX | |
| 177 199 | 
             
            				# Compute Row Base (Default Columns)
         | 
| 178 200 | 
             
            				row_base = Hash[*(cols.collect { |c| [c, ''] }.flatten)]
         | 
| 179 201 |  | 
| 180 | 
            -
            				#  | 
| 181 | 
            -
            				{ head: t[:head], data: t[:data].collect { |r| sort_row row_base.merge(Hash[*(r.collect { |o, c| [(cols.reverse.find { |co| co <= o }) || o, c] }.flatten)]) } }
         | 
| 202 | 
            +
            				# Re-Build Table
         | 
| 203 | 
            +
            				{ head: t[:head], data: t[:data].collect { |r| sort_row row_base.merge(Hash[*((fix_dupes r.collect { |o, c| [(cols.reverse.find { |co| co <= o }) || o, c] }).flatten)]) } }
         | 
| 182 204 | 
             
            			end
         | 
| 183 205 |  | 
| 184 206 | 
             
            			# Drop Offsets
         | 
    
        data/lib/pdftdx/version.rb
    CHANGED