pdf-extract 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/s6.mask.pdf +0 -0
- data/lib/analysis/columns.rb +3 -0
- data/lib/font_metrics.rb +3 -0
- metadata +4 -3
data/bin/s6.mask.pdf
ADDED
Binary file
|
data/lib/analysis/columns.rb
CHANGED
@@ -47,6 +47,9 @@ module PdfExtract
|
|
47
47
|
y = body[:y] + (body[:height] * i * step)
|
48
48
|
column_ranges << columns_at(y, body_regions)
|
49
49
|
end
|
50
|
+
|
51
|
+
# Discard those with a coverage of 0.
|
52
|
+
column_ranges.reject! { |r| r.covered.zero? }
|
50
53
|
|
51
54
|
# Discard those with more than x columns. They've probably hit a table.
|
52
55
|
column_ranges.reject! { |r| r.count > pdf.settings[:max_column_count] }
|
data/lib/font_metrics.rb
CHANGED
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 0
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 0.0.
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Karl Jonathan Ward
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-10-
|
17
|
+
date: 2011-10-24 00:00:00 +01:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- bin/one-column.mask.pdf
|
141
141
|
- bin/pdf-extract
|
142
142
|
- bin/s002040050107_Arch_Toxicol_1994_68_8.mask.pdf
|
143
|
+
- bin/s6.mask.pdf
|
143
144
|
- bin/some3.mask.pdf
|
144
145
|
- bin/some5.mask.pdf
|
145
146
|
- bin/some6.mask.pdf
|