pdftdx 1.1.8 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/pdftdx/parser.rb +24 -2
- data/lib/pdftdx/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 369947ea208604dcb52e6a9f329df412709d0f0f
|
4
|
+
data.tar.gz: 14d1803d485efed81cfc14e89d720f8da9e0d5bc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 71965bd9e8648e4be72546718891ea87d3d521080711396adaf50325359a0d90d445b0a56e0c210307698561f49f6c93379b47c4416d6a0fb05ad0211886b9db
|
7
|
+
data.tar.gz: bb9f966e063c2a8d3e83b60c9e8af61635ed8e7e7d9e98508ebc456bf2cca3b40e2a243bafb1d3036fc5eb5fab30ea1ce8a4b0437e80119e08432a3416d8aadc
|
data/lib/pdftdx/parser.rb
CHANGED
@@ -152,6 +152,28 @@ module PDFTDX
|
|
152
152
|
Hash[*(r.to_a.sort { |a, b| ((a[0] == b[0]) ? 0 : (a[0] > b[0] ? 1 : -1)) }.flatten)]
|
153
153
|
end
|
154
154
|
|
155
|
+
# Fix Dupes
|
156
|
+
# Shifts Duplicate Cells (Cells which share their x-offset with others) to the right (so they don't get overwritten)
|
157
|
+
# @param [Array] r A row of data in the form [[xoffset, cell]] (Example: [[120, 'cell 0'], [200, 'cell 1'], [280, 'cell 2']])
|
158
|
+
# @param [Array] The same row of data, but with duplicate cells shifted so that no x-offset-collisions occur
|
159
|
+
def self.fix_dupes r
|
160
|
+
|
161
|
+
# Deep-Duplicate Row
|
162
|
+
nr = r.collect { |e| e.clone }
|
163
|
+
|
164
|
+
# Run through Cells
|
165
|
+
nr.length.times do |i|
|
166
|
+
|
167
|
+
# Acquire Duplicate Length
|
168
|
+
dupes = nr.slice(i + 1, nr.length).inject(0) { |a, c| a + (c[0] == nr[i][0] ? 1 : 0) }
|
169
|
+
|
170
|
+
# Fix Dupes
|
171
|
+
dupes.times { |j| nr[i + j + 1][0] = nr[i + j + 1][0] + 1 }
|
172
|
+
end
|
173
|
+
|
174
|
+
nr
|
175
|
+
end
|
176
|
+
|
155
177
|
# Touch up Table
|
156
178
|
# Splits Table into multiple headered tables.
|
157
179
|
# Also, strips Left Offset info from Table Cells.
|
@@ -177,8 +199,8 @@ module PDFTDX
|
|
177
199
|
# Compute Row Base (Default Columns)
|
178
200
|
row_base = Hash[*(cols.collect { |c| [c, ''] }.flatten)]
|
179
201
|
|
180
|
-
#
|
181
|
-
{ head: t[:head], data: t[:data].collect { |r| sort_row row_base.merge(Hash[*(r.collect { |o, c| [(cols.reverse.find { |co| co <= o }) || o, c] }.flatten)]) } }
|
202
|
+
# Re-Build Table
|
203
|
+
{ head: t[:head], data: t[:data].collect { |r| sort_row row_base.merge(Hash[*((fix_dupes r.collect { |o, c| [(cols.reverse.find { |co| co <= o }) || o, c] }).flatten)]) } }
|
182
204
|
end
|
183
205
|
|
184
206
|
# Drop Offsets
|
data/lib/pdftdx/version.rb
CHANGED