pdf-extract 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/assign.rb +72 -0
- data/bin/config.json +4 -0
- data/bin/fac_v19n11_s5.mask.pdf +0 -0
- data/bin/margins.mask.pdf +0 -0
- data/bin/one-column.mask.pdf +24110 -39
- data/bin/pdf-extract +146 -0
- data/bin/s002040050107_Arch_Toxicol_1994_68_8.mask.pdf +0 -0
- data/bin/some3.mask.pdf +0 -0
- data/bin/some5.mask.pdf +0 -0
- data/bin/some6.mask.pdf +0 -0
- data/bin/train.rb +48 -0
- data/bin/two-column.mask.pdf +0 -0
- data/data/familynames.db +0 -0
- data/data/stopwords.txt +1 -0
- data/lib/analysis/columns.rb +75 -0
- data/lib/analysis/margins.rb +84 -0
- data/lib/analysis/sections.rb +156 -0
- data/lib/analysis/titles.rb +53 -0
- data/lib/analysis/zones.rb +128 -0
- data/lib/font_metrics.rb +240 -0
- data/lib/kmeans.rb +114 -0
- data/lib/language.rb +58 -0
- data/lib/model/characters.rb +320 -0
- data/lib/model/chunks.rb +103 -0
- data/lib/model/regions.rb +112 -0
- data/lib/multi_range.rb +69 -0
- data/lib/names.rb +85 -0
- data/lib/pdf-extract.rb +77 -0
- data/lib/pdf.rb +255 -0
- data/lib/references/references.rb +184 -0
- data/lib/references/resolve.rb +113 -0
- data/lib/references/resolved_references.rb +37 -0
- data/lib/spatial.rb +188 -0
- data/lib/view/abstract_view.rb +32 -0
- data/lib/view/pdf_view.rb +43 -0
- data/lib/view/png_view.rb +30 -0
- data/lib/view/xml_view.rb +113 -0
- metadata +208 -0
@@ -0,0 +1,128 @@
|
|
1
|
+
module PdfExtract
|
2
|
+
module Zones
|
3
|
+
|
4
|
+
# TODO Headers/footers examine margins. Check header and footer
|
5
|
+
# distance from margins. Should be within a factor of the body
|
6
|
+
# area.
|
7
|
+
|
8
|
+
# Ratio of marginless page height to minimum body height.
|
9
|
+
Settings.default :body_ratio, 0.9
|
10
|
+
|
11
|
+
def self.include_in pdf
|
12
|
+
deps = [:top_margins, :left_margins, :right_margins, :bottom_margins, :regions]
|
13
|
+
pdf.spatials :zones, :paged => true, :depends_on => deps do |parser|
|
14
|
+
y_mask = MultiRange.new
|
15
|
+
t_margin = nil
|
16
|
+
b_margin = nil
|
17
|
+
left_margin_x = 0
|
18
|
+
right_margin_x = 0
|
19
|
+
|
20
|
+
parser.before do
|
21
|
+
y_mask = MultiRange.new
|
22
|
+
end
|
23
|
+
|
24
|
+
parser.objects :left_margins do |lm|
|
25
|
+
left_margin_x = lm[:x] + lm[:width]
|
26
|
+
end
|
27
|
+
|
28
|
+
parser.objects :right_margins do |rm|
|
29
|
+
right_margin_x = rm[:x]
|
30
|
+
end
|
31
|
+
|
32
|
+
parser.objects :top_margins do |m|
|
33
|
+
t_margin = m
|
34
|
+
end
|
35
|
+
|
36
|
+
parser.objects :bottom_margins do |m|
|
37
|
+
b_margin = m
|
38
|
+
end
|
39
|
+
|
40
|
+
parser.objects :regions do |r|
|
41
|
+
y_mask.append r[:y]..(r[:y] + r[:height])
|
42
|
+
end
|
43
|
+
|
44
|
+
parser.after do
|
45
|
+
# Mask out a middle chunk of the document.
|
46
|
+
marginless_height = t_margin[:y] - (b_margin[:y] + b_margin[:height])
|
47
|
+
a = (marginless_height - (marginless_height * pdf.settings[:body_ratio])) / 2
|
48
|
+
y_mask.append((b_margin[:y] + b_margin[:height] + a)..(t_margin[:y] - a))
|
49
|
+
|
50
|
+
objs = []
|
51
|
+
|
52
|
+
if y_mask.count < 2
|
53
|
+
objs << {
|
54
|
+
:group => :bodies,
|
55
|
+
:x => left_margin_x,
|
56
|
+
:y => b_margin[:y] + b_margin[:height],
|
57
|
+
:width => right_margin_x - left_margin_x,
|
58
|
+
:height => t_margin[:y] - (b_margin[:y] + b_margin[:height])
|
59
|
+
}
|
60
|
+
elsif y_mask.count < 3
|
61
|
+
top = {
|
62
|
+
:x => left_margin_x,
|
63
|
+
:y => y_mask.max_excluded,
|
64
|
+
:width => right_margin_x - left_margin_x,
|
65
|
+
:height => t_margin[:y] - y_mask.max_excluded
|
66
|
+
}
|
67
|
+
|
68
|
+
bottom = {
|
69
|
+
:x => left_margin_x,
|
70
|
+
:y => b_margin[:y] + b_margin[:height],
|
71
|
+
:width => right_margin_x - left_margin_x,
|
72
|
+
:height => top[:y] - (b_margin[:y] + b_margin[:height])
|
73
|
+
}
|
74
|
+
|
75
|
+
if top[:height] > bottom[:height]
|
76
|
+
top[:group] = :bodies
|
77
|
+
bottom[:group] = :footers
|
78
|
+
else
|
79
|
+
top[:group] = :headers
|
80
|
+
bottom[:group] = :bodies
|
81
|
+
end
|
82
|
+
|
83
|
+
objs += [top, bottom]
|
84
|
+
else
|
85
|
+
header = {
|
86
|
+
:group => :headers,
|
87
|
+
:x => left_margin_x,
|
88
|
+
:y => y_mask.max_excluded,
|
89
|
+
:width => right_margin_x - left_margin_x,
|
90
|
+
:height => t_margin[:y] - y_mask.max_excluded
|
91
|
+
}
|
92
|
+
|
93
|
+
footer = {
|
94
|
+
:group => :footers,
|
95
|
+
:x => left_margin_x,
|
96
|
+
:y => b_margin[:y] + b_margin[:height],
|
97
|
+
:width => right_margin_x - left_margin_x,
|
98
|
+
:height => y_mask.min_excluded - (b_margin[:y] + b_margin[:height])
|
99
|
+
}
|
100
|
+
|
101
|
+
body = {
|
102
|
+
:group => :bodies,
|
103
|
+
:x => left_margin_x,
|
104
|
+
:y => footer[:y] + footer[:height],
|
105
|
+
:width => right_margin_x - left_margin_x,
|
106
|
+
:height => header[:y] - (footer[:y] + footer[:height])
|
107
|
+
}
|
108
|
+
|
109
|
+
objs += [header, body, footer]
|
110
|
+
end
|
111
|
+
|
112
|
+
page_base = {
|
113
|
+
:page => t_margin[:page],
|
114
|
+
:page_width => t_margin[:page_width],
|
115
|
+
:page_height => t_margin[:page_height]
|
116
|
+
}
|
117
|
+
|
118
|
+
objs.map { |o| page_base.merge o }
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
pdf.spatials :headers, :depends_on => [:zones]
|
123
|
+
pdf.spatials :footers, :depends_on => [:zones]
|
124
|
+
pdf.spatials :bodies, :depends_on => [:zones]
|
125
|
+
end
|
126
|
+
|
127
|
+
end
|
128
|
+
end
|
data/lib/font_metrics.rb
ADDED
@@ -0,0 +1,240 @@
|
|
1
|
+
# Taken from pdfminer, in turn extracted from AFM files at:
|
2
|
+
#
|
3
|
+
# http://www.ctan.org/tex-archive/fonts/adobe/afm/
|
4
|
+
#
|
5
|
+
|
6
|
+
### BEGIN Verbatim copy of the license part
|
7
|
+
|
8
|
+
#
|
9
|
+
# Adobe Core 35 AFM Files with 229 Glyph Entries - ReadMe
|
10
|
+
#
|
11
|
+
# This file and the 35 PostScript(R) AFM files it accompanies may be
|
12
|
+
# used, copied, and distributed for any purpose and without charge,
|
13
|
+
# with or without modification, provided that all copyright notices
|
14
|
+
# are retained; that the AFM files are not distributed without this
|
15
|
+
# file; that all modifications to this file or any of the AFM files
|
16
|
+
# are prominently noted in the modified file(s); and that this
|
17
|
+
# paragraph is not modified. Adobe Systems has no responsibility or
|
18
|
+
# obligation to support the use of the AFM files.
|
19
|
+
#
|
20
|
+
|
21
|
+
### END Verbatim copy of the license part
|
22
|
+
|
23
|
+
module PdfExtract
|
24
|
+
class FontMetrics
|
25
|
+
|
26
|
+
attr_accessor :ascent, :descent, :bbox
|
27
|
+
|
28
|
+
def initialize font
|
29
|
+
base_font = font.basefont.to_s
|
30
|
+
if @@base_fonts.key? base_font
|
31
|
+
@ascent = @@base_fonts[base_font][:Ascent]
|
32
|
+
@descent = @@base_fonts[base_font][:Descent]
|
33
|
+
@bbox = @@base_fonts[base_font][:FontBBox]
|
34
|
+
@glyph_width_lookup = proc { |c|
|
35
|
+
@@base_fonts[base_font][:Widths].fetch(c.codepoints.first, 0)
|
36
|
+
}
|
37
|
+
else
|
38
|
+
@ascent = font.ascent
|
39
|
+
@descent = font.descent
|
40
|
+
@bbox = font.bbox
|
41
|
+
@glyph_width_lookup = proc { |c| font.glyph_width c }
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def glyph_width c
|
46
|
+
@glyph_width_lookup.call c
|
47
|
+
end
|
48
|
+
|
49
|
+
@@base_fonts = {
|
50
|
+
'Courier-Oblique' => {
|
51
|
+
:FontName => 'Courier-Oblique',
|
52
|
+
:Descent => -194.0,
|
53
|
+
:FontBBox => [-49.0, -249.0, 749.0, 803.0],
|
54
|
+
:FontWeight => 'Medium',
|
55
|
+
:CapHeight => 572.0,
|
56
|
+
:FontFamily => 'Courier',
|
57
|
+
:Flags => 64,
|
58
|
+
:XHeight => 434.0,
|
59
|
+
:ItalicAngle => -11.0,
|
60
|
+
:Ascent => 627.0,
|
61
|
+
:Widths => {32 => 600, 33 => 600, 34 => 600, 35 => 600, 36 => 600, 37 => 600, 38 => 600, 39 => 600, 40 => 600, 41 => 600, 42 => 600, 43 => 600, 44 => 600, 45 => 600, 46 => 600, 47 => 600, 48 => 600, 49 => 600, 50 => 600, 51 => 600, 52 => 600, 53 => 600, 54 => 600, 55 => 600, 56 => 600, 57 => 600, 58 => 600, 59 => 600, 60 => 600, 61 => 600, 62 => 600, 63 => 600, 64 => 600, 65 => 600, 66 => 600, 67 => 600, 68 => 600, 69 => 600, 70 => 600, 71 => 600, 72 => 600, 73 => 600, 74 => 600, 75 => 600, 76 => 600, 77 => 600, 78 => 600, 79 => 600, 80 => 600, 81 => 600, 82 => 600, 83 => 600, 84 => 600, 85 => 600, 86 => 600, 87 => 600, 88 => 600, 89 => 600, 90 => 600, 91 => 600, 92 => 600, 93 => 600, 94 => 600, 95 => 600, 96 => 600, 97 => 600, 98 => 600, 99 => 600, 100 => 600, 101 => 600, 102 => 600, 103 => 600, 104 => 600, 105 => 600, 106 => 600, 107 => 600, 108 => 600, 109 => 600, 110 => 600, 111 => 600, 112 => 600, 113 => 600, 114 => 600, 115 => 600, 116 => 600, 117 => 600, 118 => 600, 119 => 600, 120 => 600, 121 => 600, 122 => 600, 123 => 600, 124 => 600, 125 => 600, 126 => 600, 161 => 600, 162 => 600, 163 => 600, 164 => 600, 165 => 600, 166 => 600, 167 => 600, 168 => 600, 169 => 600, 170 => 600, 171 => 600, 172 => 600, 173 => 600, 174 => 600, 175 => 600, 177 => 600, 178 => 600, 179 => 600, 180 => 600, 182 => 600, 183 => 600, 184 => 600, 185 => 600, 186 => 600, 187 => 600, 188 => 600, 189 => 600, 191 => 600, 193 => 600, 194 => 600, 195 => 600, 196 => 600, 197 => 600, 198 => 600, 199 => 600, 200 => 600, 202 => 600, 203 => 600, 205 => 600, 206 => 600, 207 => 600, 208 => 600, 225 => 600, 227 => 600, 232 => 600, 233 => 600, 234 => 600, 235 => 600, 241 => 600, 245 => 600, 248 => 600, 249 => 600, 250 => 600, 251 => 600}
|
62
|
+
},
|
63
|
+
|
64
|
+
'Times-BoldItalic' => {
|
65
|
+
:FontName => 'Times-BoldItalic',
|
66
|
+
:Descent => -217.0,
|
67
|
+
:FontBBox => [-200.0, -218.0, 996.0, 921.0],
|
68
|
+
:FontWeight => 'Bold',
|
69
|
+
:CapHeight => 669.0,
|
70
|
+
:FontFamily =>'Times',
|
71
|
+
:Flags => 0,
|
72
|
+
:XHeight => 462.0,
|
73
|
+
:ItalicAngle => -15.0,
|
74
|
+
:Ascent => 683.0,
|
75
|
+
:Widths => {32 => 250, 33 => 389, 34 => 555, 35 => 500, 36 => 500, 37 => 833, 38 => 778, 39 => 333, 40 => 333, 41 => 333, 42 => 500, 43 => 570, 44 => 250, 45 => 333, 46 => 250, 47 => 278, 48 => 500, 49 => 500, 50 => 500, 51 => 500, 52 => 500, 53 => 500, 54 => 500, 55 => 500, 56 => 500, 57 => 500, 58 => 333, 59 => 333, 60 => 570, 61 => 570, 62 => 570, 63 => 500, 64 => 832, 65 => 667, 66 => 667, 67 => 667, 68 => 722, 69 => 667, 70 => 667, 71 => 722, 72 => 778, 73 => 389, 74 => 500, 75 => 667, 76 => 611, 77 => 889, 78 => 722, 79 => 722, 80 => 611, 81 => 722, 82 => 667, 83 => 556, 84 => 611, 85 => 722, 86 => 667, 87 => 889, 88 => 667, 89 => 611, 90 => 611, 91 => 333, 92 => 278, 93 => 333, 94 => 570, 95 => 500, 96 => 333, 97 => 500, 98 => 500, 99 => 444, 100 => 500, 101 => 444, 102 => 333, 103 => 500, 104 => 556, 105 => 278, 106 => 278, 107 => 500, 108 => 278, 109 => 778, 110 => 556, 111 => 500, 112 => 500, 113 => 500, 114 => 389, 115 => 389, 116 => 278, 117 => 556, 118 => 444, 119 => 667, 120 => 500, 121 => 444, 122 => 389, 123 => 348, 124 => 220, 125 => 348, 126 => 570, 161 => 389, 162 => 500, 163 => 500, 164 => 167, 165 => 500, 166 => 500, 167 => 500, 168 => 500, 169 => 278, 170 => 500, 171 => 500, 172 => 333, 173 => 333, 174 => 556, 175 => 556, 177 => 500, 178 => 500, 179 => 500, 180 => 250, 182 => 500, 183 => 350, 184 => 333, 185 => 500, 186 => 500, 187 => 500, 188 => 1000, 189 => 1000, 191 => 500, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 1000, 225 => 944, 227 => 266, 232 => 611, 233 => 722, 234 => 944, 235 => 300, 241 => 722, 245 => 278, 248 => 278, 249 => 500, 250 => 722, 251 => 500}
|
76
|
+
},
|
77
|
+
|
78
|
+
'Helvetica-Bold' => {
|
79
|
+
:FontName => 'Helvetica-Bold',
|
80
|
+
:Descent => -207.0,
|
81
|
+
:FontBBox => [-170.0, -228.0, 1003.0, 962.0],
|
82
|
+
:FontWeight => 'Bold',
|
83
|
+
:CapHeight => 718.0,
|
84
|
+
:FontFamily => 'Helvetica',
|
85
|
+
:Flags => 0,
|
86
|
+
:XHeight => 532.0,
|
87
|
+
:ItalicAngle => 0.0,
|
88
|
+
:Ascent => 718.0,
|
89
|
+
:Widths => {32 => 278, 33 => 333, 34 => 474, 35 => 556, 36 => 556, 37 => 889, 38 => 722, 39 => 278, 40 => 333, 41 => 333, 42 => 389, 43 => 584, 44 => 278, 45 => 333, 46 => 278, 47 => 278, 48 => 556, 49 => 556, 50 => 556, 51 => 556, 52 => 556, 53 => 556, 54 => 556, 55 => 556, 56 => 556, 57 => 556, 58 => 333, 59 => 333, 60 => 584, 61 => 584, 62 => 584, 63 => 611, 64 => 975, 65 => 722, 66 => 722, 67 => 722, 68 => 722, 69 => 667, 70 => 611, 71 => 778, 72 => 722, 73 => 278, 74 => 556, 75 => 722, 76 => 611, 77 => 833, 78 => 722, 79 => 778, 80 => 667, 81 => 778, 82 => 722, 83 => 667, 84 => 611, 85 => 722, 86 => 667, 87 => 944, 88 => 667, 89 => 667, 90 => 611, 91 => 333, 92 => 278, 93 => 333, 94 => 584, 95 => 556, 96 => 278, 97 => 556, 98 => 611, 99 => 556, 100 => 611, 101 => 556, 102 => 333, 103 => 611, 104 => 611, 105 => 278, 106 => 278, 107 => 556, 108 => 278, 109 => 889, 110 => 611, 111 => 611, 112 => 611, 113 => 611, 114 => 389, 115 => 556, 116 => 333, 117 => 611, 118 => 556, 119 => 778, 120 => 556, 121 => 556, 122 => 500, 123 => 389, 124 => 280, 125 => 389, 126 => 584, 161 => 333, 162 => 556, 163 => 556, 164 => 167, 165 => 556, 166 => 556, 167 => 556, 168 => 556, 169 => 238, 170 => 500, 171 => 556, 172 => 333, 173 => 333, 174 => 611, 175 => 611, 177 => 556, 178 => 556, 179 => 556, 180 => 278, 182 => 556, 183 => 350, 184 => 278, 185 => 500, 186 => 500, 187 => 556, 188 => 1000, 189 => 1000, 191 => 611, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 1000, 225 => 1000, 227 => 370, 232 => 611, 233 => 778, 234 => 1000, 235 => 365, 241 => 889, 245 => 278, 248 => 278, 249 => 611, 250 => 944, 251 => 611}
|
90
|
+
},
|
91
|
+
|
92
|
+
'Courier' => {
|
93
|
+
:FontName => 'Courier',
|
94
|
+
:Descent => -194.0,
|
95
|
+
:FontBBox => [-6.0, -249.0, 639.0, 803.0],
|
96
|
+
:FontWeight => 'Medium',
|
97
|
+
:CapHeight => 572.0,
|
98
|
+
:FontFamily => 'Courier',
|
99
|
+
:Flags => 64,
|
100
|
+
:XHeight => 434.0,
|
101
|
+
:ItalicAngle => 0.0,
|
102
|
+
:Ascent => 627.0,
|
103
|
+
:Widths => {32 => 600, 33 => 600, 34 => 600, 35 => 600, 36 => 600, 37 => 600, 38 => 600, 39 => 600, 40 => 600, 41 => 600, 42 => 600, 43 => 600, 44 => 600, 45 => 600, 46 => 600, 47 => 600, 48 => 600, 49 => 600, 50 => 600, 51 => 600, 52 => 600, 53 => 600, 54 => 600, 55 => 600, 56 => 600, 57 => 600, 58 => 600, 59 => 600, 60 => 600, 61 => 600, 62 => 600, 63 => 600, 64 => 600, 65 => 600, 66 => 600, 67 => 600, 68 => 600, 69 => 600, 70 => 600, 71 => 600, 72 => 600, 73 => 600, 74 => 600, 75 => 600, 76 => 600, 77 => 600, 78 => 600, 79 => 600, 80 => 600, 81 => 600, 82 => 600, 83 => 600, 84 => 600, 85 => 600, 86 => 600, 87 => 600, 88 => 600, 89 => 600, 90 => 600, 91 => 600, 92 => 600, 93 => 600, 94 => 600, 95 => 600, 96 => 600, 97 => 600, 98 => 600, 99 => 600, 100 => 600, 101 => 600, 102 => 600, 103 => 600, 104 => 600, 105 => 600, 106 => 600, 107 => 600, 108 => 600, 109 => 600, 110 => 600, 111 => 600, 112 => 600, 113 => 600, 114 => 600, 115 => 600, 116 => 600, 117 => 600, 118 => 600, 119 => 600, 120 => 600, 121 => 600, 122 => 600, 123 => 600, 124 => 600, 125 => 600, 126 => 600, 161 => 600, 162 => 600, 163 => 600, 164 => 600, 165 => 600, 166 => 600, 167 => 600, 168 => 600, 169 => 600, 170 => 600, 171 => 600, 172 => 600, 173 => 600, 174 => 600, 175 => 600, 177 => 600, 178 => 600, 179 => 600, 180 => 600, 182 => 600, 183 => 600, 184 => 600, 185 => 600, 186 => 600, 187 => 600, 188 => 600, 189 => 600, 191 => 600, 193 => 600, 194 => 600, 195 => 600, 196 => 600, 197 => 600, 198 => 600, 199 => 600, 200 => 600, 202 => 600, 203 => 600, 205 => 600, 206 => 600, 207 => 600, 208 => 600, 225 => 600, 227 => 600, 232 => 600, 233 => 600, 234 => 600, 235 => 600, 241 => 600, 245 => 600, 248 => 600, 249 => 600, 250 => 600, 251 => 600}
|
104
|
+
},
|
105
|
+
|
106
|
+
'Courier-BoldOblique' => {
|
107
|
+
:FontName => 'Courier-BoldOblique',
|
108
|
+
:Descent => -194.0,
|
109
|
+
:FontBBox => [-49.0, -249.0, 758.0, 811.0],
|
110
|
+
:FontWeight => 'Bold',
|
111
|
+
:CapHeight => 572.0,
|
112
|
+
:FontFamily => 'Courier',
|
113
|
+
:Flags => 64,
|
114
|
+
:XHeight => 434.0,
|
115
|
+
:ItalicAngle => -11.0,
|
116
|
+
:Ascent => 627.0,
|
117
|
+
:Widths => {32 => 600, 33 => 600, 34 => 600, 35 => 600, 36 => 600, 37 => 600, 38 => 600, 39 => 600, 40 => 600, 41 => 600, 42 => 600, 43 => 600, 44 => 600, 45 => 600, 46 => 600, 47 => 600, 48 => 600, 49 => 600, 50 => 600, 51 => 600, 52 => 600, 53 => 600, 54 => 600, 55 => 600, 56 => 600, 57 => 600, 58 => 600, 59 => 600, 60 => 600, 61 => 600, 62 => 600, 63 => 600, 64 => 600, 65 => 600, 66 => 600, 67 => 600, 68 => 600, 69 => 600, 70 => 600, 71 => 600, 72 => 600, 73 => 600, 74 => 600, 75 => 600, 76 => 600, 77 => 600, 78 => 600, 79 => 600, 80 => 600, 81 => 600, 82 => 600, 83 => 600, 84 => 600, 85 => 600, 86 => 600, 87 => 600, 88 => 600, 89 => 600, 90 => 600, 91 => 600, 92 => 600, 93 => 600, 94 => 600, 95 => 600, 96 => 600, 97 => 600, 98 => 600, 99 => 600, 100 => 600, 101 => 600, 102 => 600, 103 => 600, 104 => 600, 105 => 600, 106 => 600, 107 => 600, 108 => 600, 109 => 600, 110 => 600, 111 => 600, 112 => 600, 113 => 600, 114 => 600, 115 => 600, 116 => 600, 117 => 600, 118 => 600, 119 => 600, 120 => 600, 121 => 600, 122 => 600, 123 => 600, 124 => 600, 125 => 600, 126 => 600, 161 => 600, 162 => 600, 163 => 600, 164 => 600, 165 => 600, 166 => 600, 167 => 600, 168 => 600, 169 => 600, 170 => 600, 171 => 600, 172 => 600, 173 => 600, 174 => 600, 175 => 600, 177 => 600, 178 => 600, 179 => 600, 180 => 600, 182 => 600, 183 => 600, 184 => 600, 185 => 600, 186 => 600, 187 => 600, 188 => 600, 189 => 600, 191 => 600, 193 => 600, 194 => 600, 195 => 600, 196 => 600, 197 => 600, 198 => 600, 199 => 600, 200 => 600, 202 => 600, 203 => 600, 205 => 600, 206 => 600, 207 => 600, 208 => 600, 225 => 600, 227 => 600, 232 => 600, 233 => 600, 234 => 600, 235 => 600, 241 => 600, 245 => 600, 248 => 600, 249 => 600, 250 => 600, 251 => 600}
|
118
|
+
},
|
119
|
+
|
120
|
+
'Times-Bold' => {
|
121
|
+
:FontName => 'Times-Bold',
|
122
|
+
:Descent => -217.0,
|
123
|
+
:FontBBox => [-168.0, -218.0, 1000.0, 935.0],
|
124
|
+
:FontWeight => 'Bold',
|
125
|
+
:CapHeight => 676.0,
|
126
|
+
:FontFamily => 'Times',
|
127
|
+
:Flags => 0,
|
128
|
+
:XHeight => 461.0,
|
129
|
+
:ItalicAngle => 0.0,
|
130
|
+
:Ascent => 683.0,
|
131
|
+
:Widths => {32 => 250, 33 => 333, 34 => 555, 35 => 500, 36 => 500, 37 => 1000, 38 => 833, 39 => 333, 40 => 333, 41 => 333, 42 => 500, 43 => 570, 44 => 250, 45 => 333, 46 => 250, 47 => 278, 48 => 500, 49 => 500, 50 => 500, 51 => 500, 52 => 500, 53 => 500, 54 => 500, 55 => 500, 56 => 500, 57 => 500, 58 => 333, 59 => 333, 60 => 570, 61 => 570, 62 => 570, 63 => 500, 64 => 930, 65 => 722, 66 => 667, 67 => 722, 68 => 722, 69 => 667, 70 => 611, 71 => 778, 72 => 778, 73 => 389, 74 => 500, 75 => 778, 76 => 667, 77 => 944, 78 => 722, 79 => 778, 80 => 611, 81 => 778, 82 => 722, 83 => 556, 84 => 667, 85 => 722, 86 => 722, 87 => 1000, 88 => 722, 89 => 722, 90 => 667, 91 => 333, 92 => 278, 93 => 333, 94 => 581, 95 => 500, 96 => 333, 97 => 500, 98 => 556, 99 => 444, 100 => 556, 101 => 444, 102 => 333, 103 => 500, 104 => 556, 105 => 278, 106 => 333, 107 => 556, 108 => 278, 109 => 833, 110 => 556, 111 => 500, 112 => 556, 113 => 556, 114 => 444, 115 => 389, 116 => 333, 117 => 556, 118 => 500, 119 => 722, 120 => 500, 121 => 500, 122 => 444, 123 => 394, 124 => 220, 125 => 394, 126 => 520, 161 => 333, 162 => 500, 163 => 500, 164 => 167, 165 => 500, 166 => 500, 167 => 500, 168 => 500, 169 => 278, 170 => 500, 171 => 500, 172 => 333, 173 => 333, 174 => 556, 175 => 556, 177 => 500, 178 => 500, 179 => 500, 180 => 250, 182 => 540, 183 => 350, 184 => 333, 185 => 500, 186 => 500, 187 => 500, 188 => 1000, 189 => 1000, 191 => 500, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 1000, 225 => 1000, 227 => 300, 232 => 667, 233 => 778, 234 => 1000, 235 => 330, 241 => 722, 245 => 278, 248 => 278, 249 => 500, 250 => 722, 251 => 556}
|
132
|
+
},
|
133
|
+
|
134
|
+
'Symbol' => {
|
135
|
+
:FontName => 'Symbol',
|
136
|
+
:FontBBox => [-180.0, -293.0, 1090.0, 1010.0],
|
137
|
+
:FontWeight => 'Medium',
|
138
|
+
:FontFamily => 'Symbol',
|
139
|
+
:Flags => 0,
|
140
|
+
:ItalicAngle => 0.0,
|
141
|
+
:Widths => {32 => 250, 33 => 333, 34 => 713, 35 => 500, 36 => 549, 37 => 833, 38 => 778, 39 => 439, 40 => 333, 41 => 333, 42 => 500, 43 => 549, 44 => 250, 45 => 549, 46 => 250, 47 => 278, 48 => 500, 49 => 500, 50 => 500, 51 => 500, 52 => 500, 53 => 500, 54 => 500, 55 => 500, 56 => 500, 57 => 500, 58 => 278, 59 => 278, 60 => 549, 61 => 549, 62 => 549, 63 => 444, 64 => 549, 65 => 722, 66 => 667, 67 => 722, 68 => 612, 69 => 611, 70 => 763, 71 => 603, 72 => 722, 73 => 333, 74 => 631, 75 => 722, 76 => 686, 77 => 889, 78 => 722, 79 => 722, 80 => 768, 81 => 741, 82 => 556, 83 => 592, 84 => 611, 85 => 690, 86 => 439, 87 => 768, 88 => 645, 89 => 795, 90 => 611, 91 => 333, 92 => 863, 93 => 333, 94 => 658, 95 => 500, 96 => 500, 97 => 631, 98 => 549, 99 => 549, 100 => 494, 101 => 439, 102 => 521, 103 => 411, 104 => 603, 105 => 329, 106 => 603, 107 => 549, 108 => 549, 109 => 576, 110 => 521, 111 => 549, 112 => 549, 113 => 521, 114 => 549, 115 => 603, 116 => 439, 117 => 576, 118 => 713, 119 => 686, 120 => 493, 121 => 686, 122 => 494, 123 => 480, 124 => 200, 125 => 480, 126 => 549, 160 => 750, 161 => 620, 162 => 247, 163 => 549, 164 => 167, 165 => 713, 166 => 500, 167 => 753, 168 => 753, 169 => 753, 170 => 753, 171 => 1042, 172 => 987, 173 => 603, 174 => 987, 175 => 603, 176 => 400, 177 => 549, 178 => 411, 179 => 549, 180 => 549, 181 => 713, 182 => 494, 183 => 460, 184 => 549, 185 => 549, 186 => 549, 187 => 549, 188 => 1000, 189 => 603, 190 => 1000, 191 => 658, 192 => 823, 193 => 686, 194 => 795, 195 => 987, 196 => 768, 197 => 768, 198 => 823, 199 => 768, 200 => 768, 201 => 713, 202 => 713, 203 => 713, 204 => 713, 205 => 713, 206 => 713, 207 => 713, 208 => 768, 209 => 713, 210 => 790, 211 => 790, 212 => 890, 213 => 823, 214 => 549, 215 => 250, 216 => 713, 217 => 603, 218 => 603, 219 => 1042, 220 => 987, 221 => 603, 222 => 987, 223 => 603, 224 => 494, 225 => 329, 226 => 790, 227 => 790, 228 => 786, 229 => 713, 230 => 384, 231 => 384, 232 => 384, 233 => 384, 234 => 384, 235 => 384, 236 => 494, 237 => 494, 238 => 494, 239 => 494, 241 => 329, 242 => 274, 243 => 686, 244 => 686, 245 => 686, 246 => 384, 247 => 384, 248 => 384, 249 => 384, 250 => 384, 251 => 384, 252 => 494, 253 => 494, 254 => 494}
|
142
|
+
},
|
143
|
+
|
144
|
+
'Helvetica' => {
|
145
|
+
:FontName => 'Helvetica',
|
146
|
+
:Descent => -207.0,
|
147
|
+
:FontBBox => [-166.0, -225.0, 1000.0, 931.0],
|
148
|
+
:FontWeight => 'Medium',
|
149
|
+
:CapHeight => 718.0,
|
150
|
+
:FontFamily => 'Helvetica',
|
151
|
+
:Flags => 0,
|
152
|
+
:XHeight => 523.0,
|
153
|
+
:ItalicAngle => 0.0,
|
154
|
+
:Ascent => 718.0,
|
155
|
+
:Widths => {32 => 278, 33 => 278, 34 => 355, 35 => 556, 36 => 556, 37 => 889, 38 => 667, 39 => 222, 40 => 333, 41 => 333, 42 => 389, 43 => 584, 44 => 278, 45 => 333, 46 => 278, 47 => 278, 48 => 556, 49 => 556, 50 => 556, 51 => 556, 52 => 556, 53 => 556, 54 => 556, 55 => 556, 56 => 556, 57 => 556, 58 => 278, 59 => 278, 60 => 584, 61 => 584, 62 => 584, 63 => 556, 64 => 1015, 65 => 667, 66 => 667, 67 => 722, 68 => 722, 69 => 667, 70 => 611, 71 => 778, 72 => 722, 73 => 278, 74 => 500, 75 => 667, 76 => 556, 77 => 833, 78 => 722, 79 => 778, 80 => 667, 81 => 778, 82 => 722, 83 => 667, 84 => 611, 85 => 722, 86 => 667, 87 => 944, 88 => 667, 89 => 667, 90 => 611, 91 => 278, 92 => 278, 93 => 278, 94 => 469, 95 => 556, 96 => 222, 97 => 556, 98 => 556, 99 => 500, 100 => 556, 101 => 556, 102 => 278, 103 => 556, 104 => 556, 105 => 222, 106 => 222, 107 => 500, 108 => 222, 109 => 833, 110 => 556, 111 => 556, 112 => 556, 113 => 556, 114 => 333, 115 => 500, 116 => 278, 117 => 556, 118 => 500, 119 => 722, 120 => 500, 121 => 500, 122 => 500, 123 => 334, 124 => 260, 125 => 334, 126 => 584, 161 => 333, 162 => 556, 163 => 556, 164 => 167, 165 => 556, 166 => 556, 167 => 556, 168 => 556, 169 => 191, 170 => 333, 171 => 556, 172 => 333, 173 => 333, 174 => 500, 175 => 500, 177 => 556, 178 => 556, 179 => 556, 180 => 278, 182 => 537, 183 => 350, 184 => 222, 185 => 333, 186 => 333, 187 => 556, 188 => 1000, 189 => 1000, 191 => 611, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 1000, 225 => 1000, 227 => 370, 232 => 556, 233 => 778, 234 => 1000, 235 => 365, 241 => 889, 245 => 278, 248 => 222, 249 => 611, 250 => 944, 251 => 611}
|
156
|
+
},
|
157
|
+
|
158
|
+
'Helvetica-BoldOblique' => {
|
159
|
+
:FontName => 'Helvetica-BoldOblique',
|
160
|
+
:Descent => -207.0,
|
161
|
+
:FontBBox => [-175.0, -228.0, 1114.0, 962.0],
|
162
|
+
:FontWeight => 'Bold',
|
163
|
+
:CapHeight => 718.0,
|
164
|
+
:FontFamily => 'Helvetica',
|
165
|
+
:Flags => 0,
|
166
|
+
:XHeight => 532.0,
|
167
|
+
:ItalicAngle => -12.0,
|
168
|
+
:Ascent => 718.0,
|
169
|
+
:Widths => {32 => 278, 33 => 333, 34 => 474, 35 => 556, 36 => 556, 37 => 889, 38 => 722, 39 => 278, 40 => 333, 41 => 333, 42 => 389, 43 => 584, 44 => 278, 45 => 333, 46 => 278, 47 => 278, 48 => 556, 49 => 556, 50 => 556, 51 => 556, 52 => 556, 53 => 556, 54 => 556, 55 => 556, 56 => 556, 57 => 556, 58 => 333, 59 => 333, 60 => 584, 61 => 584, 62 => 584, 63 => 611, 64 => 975, 65 => 722, 66 => 722, 67 => 722, 68 => 722, 69 => 667, 70 => 611, 71 => 778, 72 => 722, 73 => 278, 74 => 556, 75 => 722, 76 => 611, 77 => 833, 78 => 722, 79 => 778, 80 => 667, 81 => 778, 82 => 722, 83 => 667, 84 => 611, 85 => 722, 86 => 667, 87 => 944, 88 => 667, 89 => 667, 90 => 611, 91 => 333, 92 => 278, 93 => 333, 94 => 584, 95 => 556, 96 => 278, 97 => 556, 98 => 611, 99 => 556, 100 => 611, 101 => 556, 102 => 333, 103 => 611, 104 => 611, 105 => 278, 106 => 278, 107 => 556, 108 => 278, 109 => 889, 110 => 611, 111 => 611, 112 => 611, 113 => 611, 114 => 389, 115 => 556, 116 => 333, 117 => 611, 118 => 556, 119 => 778, 120 => 556, 121 => 556, 122 => 500, 123 => 389, 124 => 280, 125 => 389, 126 => 584, 161 => 333, 162 => 556, 163 => 556, 164 => 167, 165 => 556, 166 => 556, 167 => 556, 168 => 556, 169 => 238, 170 => 500, 171 => 556, 172 => 333, 173 => 333, 174 => 611, 175 => 611, 177 => 556, 178 => 556, 179 => 556, 180 => 278, 182 => 556, 183 => 350, 184 => 278, 185 => 500, 186 => 500, 187 => 556, 188 => 1000, 189 => 1000, 191 => 611, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 1000, 225 => 1000, 227 => 370, 232 => 611, 233 => 778, 234 => 1000, 235 => 365, 241 => 889, 245 => 278, 248 => 278, 249 => 611, 250 => 944, 251 => 611}
|
170
|
+
},
|
171
|
+
|
172
|
+
'ZapfDingbats' => {
|
173
|
+
:FontName => 'ZapfDingbats',
|
174
|
+
:FontBBox => [-1.0, -143.0, 981.0, 820.0],
|
175
|
+
:FontWeight => 'Medium',
|
176
|
+
:FontFamily => 'ITC',
|
177
|
+
:Flags => 0,
|
178
|
+
:ItalicAngle => 0.0,
|
179
|
+
:Widths => {32 => 278, 33 => 974, 34 => 961, 35 => 974, 36 => 980, 37 => 719, 38 => 789, 39 => 790, 40 => 791, 41 => 690, 42 => 960, 43 => 939, 44 => 549, 45 => 855, 46 => 911, 47 => 933, 48 => 911, 49 => 945, 50 => 974, 51 => 755, 52 => 846, 53 => 762, 54 => 761, 55 => 571, 56 => 677, 57 => 763, 58 => 760, 59 => 759, 60 => 754, 61 => 494, 62 => 552, 63 => 537, 64 => 577, 65 => 692, 66 => 786, 67 => 788, 68 => 788, 69 => 790, 70 => 793, 71 => 794, 72 => 816, 73 => 823, 74 => 789, 75 => 841, 76 => 823, 77 => 833, 78 => 816, 79 => 831, 80 => 923, 81 => 744, 82 => 723, 83 => 749, 84 => 790, 85 => 792, 86 => 695, 87 => 776, 88 => 768, 89 => 792, 90 => 759, 91 => 707, 92 => 708, 93 => 682, 94 => 701, 95 => 826, 96 => 815, 97 => 789, 98 => 789, 99 => 707, 100 => 687, 101 => 696, 102 => 689, 103 => 786, 104 => 787, 105 => 713, 106 => 791, 107 => 785, 108 => 791, 109 => 873, 110 => 761, 111 => 762, 112 => 762, 113 => 759, 114 => 759, 115 => 892, 116 => 892, 117 => 788, 118 => 784, 119 => 438, 120 => 138, 121 => 277, 122 => 415, 123 => 392, 124 => 392, 125 => 668, 126 => 668, 128 => 390, 129 => 390, 130 => 317, 131 => 317, 132 => 276, 133 => 276, 134 => 509, 135 => 509, 136 => 410, 137 => 410, 138 => 234, 139 => 234, 140 => 334, 141 => 334, 161 => 732, 162 => 544, 163 => 544, 164 => 910, 165 => 667, 166 => 760, 167 => 760, 168 => 776, 169 => 595, 170 => 694, 171 => 626, 172 => 788, 173 => 788, 174 => 788, 175 => 788, 176 => 788, 177 => 788, 178 => 788, 179 => 788, 180 => 788, 181 => 788, 182 => 788, 183 => 788, 184 => 788, 185 => 788, 186 => 788, 187 => 788, 188 => 788, 189 => 788, 190 => 788, 191 => 788, 192 => 788, 193 => 788, 194 => 788, 195 => 788, 196 => 788, 197 => 788, 198 => 788, 199 => 788, 200 => 788, 201 => 788, 202 => 788, 203 => 788, 204 => 788, 205 => 788, 206 => 788, 207 => 788, 208 => 788, 209 => 788, 210 => 788, 211 => 788, 212 => 894, 213 => 838, 214 => 1016, 215 => 458, 216 => 748, 217 => 924, 218 => 748, 219 => 918, 220 => 927, 221 => 928, 222 => 928, 223 => 834, 224 => 873, 225 => 828, 226 => 924, 227 => 924, 228 => 917, 229 => 930, 230 => 931, 231 => 463, 232 => 883, 233 => 836, 234 => 836, 235 => 867, 236 => 867, 237 => 696, 238 => 696, 239 => 874, 241 => 874, 242 => 760, 243 => 946, 244 => 771, 245 => 865, 246 => 771, 247 => 888, 248 => 967, 249 => 888, 250 => 831, 251 => 873, 252 => 927, 253 => 970, 254 => 918}
|
180
|
+
},
|
181
|
+
|
182
|
+
'Courier-Bold' => {
|
183
|
+
:FontName => 'Courier-Bold',
|
184
|
+
:Descent => -194.0,
|
185
|
+
:FontBBox => [-88.0, -249.0, 697.0, 811.0],
|
186
|
+
:FontWeight => 'Bold',
|
187
|
+
:CapHeight => 572.0,
|
188
|
+
:FontFamily => 'Courier',
|
189
|
+
:Flags => 64,
|
190
|
+
:XHeight => 434.0,
|
191
|
+
:ItalicAngle => 0.0,
|
192
|
+
:Ascent => 627.0,
|
193
|
+
:Widths => {32 => 600, 33 => 600, 34 => 600, 35 => 600, 36 => 600, 37 => 600, 38 => 600, 39 => 600, 40 => 600, 41 => 600, 42 => 600, 43 => 600, 44 => 600, 45 => 600, 46 => 600, 47 => 600, 48 => 600, 49 => 600, 50 => 600, 51 => 600, 52 => 600, 53 => 600, 54 => 600, 55 => 600, 56 => 600, 57 => 600, 58 => 600, 59 => 600, 60 => 600, 61 => 600, 62 => 600, 63 => 600, 64 => 600, 65 => 600, 66 => 600, 67 => 600, 68 => 600, 69 => 600, 70 => 600, 71 => 600, 72 => 600, 73 => 600, 74 => 600, 75 => 600, 76 => 600, 77 => 600, 78 => 600, 79 => 600, 80 => 600, 81 => 600, 82 => 600, 83 => 600, 84 => 600, 85 => 600, 86 => 600, 87 => 600, 88 => 600, 89 => 600, 90 => 600, 91 => 600, 92 => 600, 93 => 600, 94 => 600, 95 => 600, 96 => 600, 97 => 600, 98 => 600, 99 => 600, 100 => 600, 101 => 600, 102 => 600, 103 => 600, 104 => 600, 105 => 600, 106 => 600, 107 => 600, 108 => 600, 109 => 600, 110 => 600, 111 => 600, 112 => 600, 113 => 600, 114 => 600, 115 => 600, 116 => 600, 117 => 600, 118 => 600, 119 => 600, 120 => 600, 121 => 600, 122 => 600, 123 => 600, 124 => 600, 125 => 600, 126 => 600, 161 => 600, 162 => 600, 163 => 600, 164 => 600, 165 => 600, 166 => 600, 167 => 600, 168 => 600, 169 => 600, 170 => 600, 171 => 600, 172 => 600, 173 => 600, 174 => 600, 175 => 600, 177 => 600, 178 => 600, 179 => 600, 180 => 600, 182 => 600, 183 => 600, 184 => 600, 185 => 600, 186 => 600, 187 => 600, 188 => 600, 189 => 600, 191 => 600, 193 => 600, 194 => 600, 195 => 600, 196 => 600, 197 => 600, 198 => 600, 199 => 600, 200 => 600, 202 => 600, 203 => 600, 205 => 600, 206 => 600, 207 => 600, 208 => 600, 225 => 600, 227 => 600, 232 => 600, 233 => 600, 234 => 600, 235 => 600, 241 => 600, 245 => 600, 248 => 600, 249 => 600, 250 => 600, 251 => 600}
|
194
|
+
},
|
195
|
+
|
196
|
+
'Times-Italic' => {
|
197
|
+
:FontName => 'Times-Italic',
|
198
|
+
:Descent => -217.0,
|
199
|
+
:FontBBox => [-169.0, -217.0, 1010.0, 883.0],
|
200
|
+
:FontWeight => 'Medium',
|
201
|
+
:CapHeight => 653.0,
|
202
|
+
:FontFamily => 'Times',
|
203
|
+
:Flags => 0,
|
204
|
+
:XHeight => 441.0,
|
205
|
+
:ItalicAngle => -15.5,
|
206
|
+
:Ascent => 683.0,
|
207
|
+
:Widths => {32 => 250, 33 => 333, 34 => 420, 35 => 500, 36 => 500, 37 => 833, 38 => 778, 39 => 333, 40 => 333, 41 => 333, 42 => 500, 43 => 675, 44 => 250, 45 => 333, 46 => 250, 47 => 278, 48 => 500, 49 => 500, 50 => 500, 51 => 500, 52 => 500, 53 => 500, 54 => 500, 55 => 500, 56 => 500, 57 => 500, 58 => 333, 59 => 333, 60 => 675, 61 => 675, 62 => 675, 63 => 500, 64 => 920, 65 => 611, 66 => 611, 67 => 667, 68 => 722, 69 => 611, 70 => 611, 71 => 722, 72 => 722, 73 => 333, 74 => 444, 75 => 667, 76 => 556, 77 => 833, 78 => 667, 79 => 722, 80 => 611, 81 => 722, 82 => 611, 83 => 500, 84 => 556, 85 => 722, 86 => 611, 87 => 833, 88 => 611, 89 => 556, 90 => 556, 91 => 389, 92 => 278, 93 => 389, 94 => 422, 95 => 500, 96 => 333, 97 => 500, 98 => 500, 99 => 444, 100 => 500, 101 => 444, 102 => 278, 103 => 500, 104 => 500, 105 => 278, 106 => 278, 107 => 444, 108 => 278, 109 => 722, 110 => 500, 111 => 500, 112 => 500, 113 => 500, 114 => 389, 115 => 389, 116 => 278, 117 => 500, 118 => 444, 119 => 667, 120 => 444, 121 => 444, 122 => 389, 123 => 400, 124 => 275, 125 => 400, 126 => 541, 161 => 389, 162 => 500, 163 => 500, 164 => 167, 165 => 500, 166 => 500, 167 => 500, 168 => 500, 169 => 214, 170 => 556, 171 => 500, 172 => 333, 173 => 333, 174 => 500, 175 => 500, 177 => 500, 178 => 500, 179 => 500, 180 => 250, 182 => 523, 183 => 350, 184 => 333, 185 => 556, 186 => 556, 187 => 500, 188 => 889, 189 => 1000, 191 => 500, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 889, 225 => 889, 227 => 276, 232 => 556, 233 => 722, 234 => 944, 235 => 310, 241 => 667, 245 => 278, 248 => 278, 249 => 500, 250 => 667, 251 => 500}
|
208
|
+
},
|
209
|
+
|
210
|
+
'Times-Roman' => {
|
211
|
+
:FontName => 'Times-Roman',
|
212
|
+
:Descent => -217.0,
|
213
|
+
:FontBBox => [-168.0, -218.0, 1000.0, 898.0],
|
214
|
+
:FontWeight => 'Roman',
|
215
|
+
:CapHeight => 662.0,
|
216
|
+
:FontFamily => 'Times',
|
217
|
+
:Flags => 0,
|
218
|
+
:XHeight => 450.0,
|
219
|
+
:ItalicAngle => 0.0,
|
220
|
+
:Ascent => 683.0,
|
221
|
+
:Widths => {32 => 250, 33 => 333, 34 => 408, 35 => 500, 36 => 500, 37 => 833, 38 => 778, 39 => 333, 40 => 333, 41 => 333, 42 => 500, 43 => 564, 44 => 250, 45 => 333, 46 => 250, 47 => 278, 48 => 500, 49 => 500, 50 => 500, 51 => 500, 52 => 500, 53 => 500, 54 => 500, 55 => 500, 56 => 500, 57 => 500, 58 => 278, 59 => 278, 60 => 564, 61 => 564, 62 => 564, 63 => 444, 64 => 921, 65 => 722, 66 => 667, 67 => 667, 68 => 722, 69 => 611, 70 => 556, 71 => 722, 72 => 722, 73 => 333, 74 => 389, 75 => 722, 76 => 611, 77 => 889, 78 => 722, 79 => 722, 80 => 556, 81 => 722, 82 => 667, 83 => 556, 84 => 611, 85 => 722, 86 => 722, 87 => 944, 88 => 722, 89 => 722, 90 => 611, 91 => 333, 92 => 278, 93 => 333, 94 => 469, 95 => 500, 96 => 333, 97 => 444, 98 => 500, 99 => 444, 100 => 500, 101 => 444, 102 => 333, 103 => 500, 104 => 500, 105 => 278, 106 => 278, 107 => 500, 108 => 278, 109 => 778, 110 => 500, 111 => 500, 112 => 500, 113 => 500, 114 => 333, 115 => 389, 116 => 278, 117 => 500, 118 => 500, 119 => 722, 120 => 500, 121 => 500, 122 => 444, 123 => 480, 124 => 200, 125 => 480, 126 => 541, 161 => 333, 162 => 500, 163 => 500, 164 => 167, 165 => 500, 166 => 500, 167 => 500, 168 => 500, 169 => 180, 170 => 444, 171 => 500, 172 => 333, 173 => 333, 174 => 556, 175 => 556, 177 => 500, 178 => 500, 179 => 500, 180 => 250, 182 => 453, 183 => 350, 184 => 333, 185 => 444, 186 => 444, 187 => 500, 188 => 1000, 189 => 1000, 191 => 444, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 1000, 225 => 889, 227 => 276, 232 => 611, 233 => 722, 234 => 889, 235 => 310, 241 => 667, 245 => 278, 248 => 278, 249 => 500, 250 => 722, 251 => 500}
|
222
|
+
},
|
223
|
+
|
224
|
+
'Helvetica-Oblique' => {
|
225
|
+
:FontName => 'Helvetica-Oblique',
|
226
|
+
:Descent => -207.0,
|
227
|
+
:FontBBox => [-171.0, -225.0, 1116.0, 931.0],
|
228
|
+
:FontWeight => 'Medium',
|
229
|
+
:CapHeight => 718.0,
|
230
|
+
:FontFamily => 'Helvetica',
|
231
|
+
:Flags => 0,
|
232
|
+
:XHeight => 523.0,
|
233
|
+
:ItalicAngle => -12.0,
|
234
|
+
:Ascent => 718.0,
|
235
|
+
:Widths => {32 => 278, 33 => 278, 34 => 355, 35 => 556, 36 => 556, 37 => 889, 38 => 667, 39 => 222, 40 => 333, 41 => 333, 42 => 389, 43 => 584, 44 => 278, 45 => 333, 46 => 278, 47 => 278, 48 => 556, 49 => 556, 50 => 556, 51 => 556, 52 => 556, 53 => 556, 54 => 556, 55 => 556, 56 => 556, 57 => 556, 58 => 278, 59 => 278, 60 => 584, 61 => 584, 62 => 584, 63 => 556, 64 => 1015, 65 => 667, 66 => 667, 67 => 722, 68 => 722, 69 => 667, 70 => 611, 71 => 778, 72 => 722, 73 => 278, 74 => 500, 75 => 667, 76 => 556, 77 => 833, 78 => 722, 79 => 778, 80 => 667, 81 => 778, 82 => 722, 83 => 667, 84 => 611, 85 => 722, 86 => 667, 87 => 944, 88 => 667, 89 => 667, 90 => 611, 91 => 278, 92 => 278, 93 => 278, 94 => 469, 95 => 556, 96 => 222, 97 => 556, 98 => 556, 99 => 500, 100 => 556, 101 => 556, 102 => 278, 103 => 556, 104 => 556, 105 => 222, 106 => 222, 107 => 500, 108 => 222, 109 => 833, 110 => 556, 111 => 556, 112 => 556, 113 => 556, 114 => 333, 115 => 500, 116 => 278, 117 => 556, 118 => 500, 119 => 722, 120 => 500, 121 => 500, 122 => 500, 123 => 334, 124 => 260, 125 => 334, 126 => 584, 161 => 333, 162 => 556, 163 => 556, 164 => 167, 165 => 556, 166 => 556, 167 => 556, 168 => 556, 169 => 191, 170 => 333, 171 => 556, 172 => 333, 173 => 333, 174 => 500, 175 => 500, 177 => 556, 178 => 556, 179 => 556, 180 => 278, 182 => 537, 183 => 350, 184 => 222, 185 => 333, 186 => 333, 187 => 556, 188 => 1000, 189 => 1000, 191 => 611, 193 => 333, 194 => 333, 195 => 333, 196 => 333, 197 => 333, 198 => 333, 199 => 333, 200 => 333, 202 => 333, 203 => 333, 205 => 333, 206 => 333, 207 => 333, 208 => 1000, 225 => 1000, 227 => 370, 232 => 556, 233 => 778, 234 => 1000, 235 => 365, 241 => 889, 245 => 278, 248 => 222, 249 => 611, 250 => 944, 251 => 611}
|
236
|
+
}
|
237
|
+
}
|
238
|
+
|
239
|
+
end
|
240
|
+
end
|
data/lib/kmeans.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
module PdfExtract
|
2
|
+
module Kmeans
|
3
|
+
|
4
|
+
def self.take_keys item, keys
|
5
|
+
r = {}
|
6
|
+
keys.each do |key|
|
7
|
+
r[key] = item[key]
|
8
|
+
end
|
9
|
+
r
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.ndist a, b, keys
|
13
|
+
sum = 0
|
14
|
+
keys.each do |key|
|
15
|
+
sum += (a[key] - b[key]) ** 2
|
16
|
+
end
|
17
|
+
Math.sqrt sum
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.cluster_centre cluster
|
21
|
+
keys = cluster[:centre].keys
|
22
|
+
|
23
|
+
centre = {}
|
24
|
+
|
25
|
+
# Sum each key
|
26
|
+
cluster[:items].each do |item|
|
27
|
+
keys.each do |key|
|
28
|
+
centre[key] ||= 0
|
29
|
+
centre[key] += item[key]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Avg each key
|
34
|
+
centre.each_key do |key|
|
35
|
+
centre[key] = centre[key] / cluster[:items].length.to_f
|
36
|
+
end
|
37
|
+
|
38
|
+
centre
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.clusters items, keys, options = {}
|
42
|
+
options = {
|
43
|
+
:k => 10,
|
44
|
+
:delta => 0.001,
|
45
|
+
:init => [],
|
46
|
+
:random => true
|
47
|
+
}.merge options
|
48
|
+
|
49
|
+
cs = []
|
50
|
+
|
51
|
+
if not options[:init].empty?
|
52
|
+
options[:init].each do |centre|
|
53
|
+
cs << {:centre => centre, :items => []}
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# Make k clusters with random centre points
|
58
|
+
if options[:random]
|
59
|
+
options[:k].times do
|
60
|
+
idx = (items.length * rand).to_i
|
61
|
+
cs << {:centre => take_keys(items[idx], keys), :items => []}
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
puts cs
|
66
|
+
|
67
|
+
while true
|
68
|
+
|
69
|
+
# Add each item to a cluster
|
70
|
+
items.each do |item|
|
71
|
+
min_distance = Float::MAX
|
72
|
+
selected_cluster = nil
|
73
|
+
|
74
|
+
cs.each do |cluster|
|
75
|
+
distance = ndist(item, cluster[:centre], keys)
|
76
|
+
if distance < min_distance
|
77
|
+
min_distance = distance
|
78
|
+
selected_cluster = cluster
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
selected_cluster[:items] << item
|
83
|
+
end
|
84
|
+
|
85
|
+
# Drop clusters with no items (often because of duplicate
|
86
|
+
# initial centre points)
|
87
|
+
cs = cs.reject { |cluster| cluster[:items].empty? }
|
88
|
+
|
89
|
+
max_delta = Float::MIN
|
90
|
+
|
91
|
+
# Recalculate centre points and max delta
|
92
|
+
cs.each do |cluster|
|
93
|
+
old_centre = cluster[:centre]
|
94
|
+
centre = cluster_centre cluster
|
95
|
+
cluster[:centre] = centre
|
96
|
+
|
97
|
+
max_delta = [ndist(old_centre, centre, keys), max_delta].max
|
98
|
+
end
|
99
|
+
|
100
|
+
if max_delta <= options[:delta]
|
101
|
+
break
|
102
|
+
else
|
103
|
+
cs.each do |cluster|
|
104
|
+
cluster[:items] = []
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
cs
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
114
|
+
end
|
data/lib/language.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
require_relative "names"
|
2
|
+
|
3
|
+
module PdfExtract::Language
|
4
|
+
|
5
|
+
def self.transliterate s
|
6
|
+
s = s.gsub "\ufb01", "fi"
|
7
|
+
s = s.gsub "\ufb02", "fl"
|
8
|
+
s = s.gsub "\u2018", "'"
|
9
|
+
s = s.gsub "\u2019", "'"
|
10
|
+
s = s.gsub "\u2013", "-"
|
11
|
+
s = s.gsub "\u201c", "\""
|
12
|
+
s = s.gsub "\u201d", "\""
|
13
|
+
s
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.letter_ratio s
|
17
|
+
s.count("A-Z0-9\-[],.\"'()") / s.length.to_f
|
18
|
+
end
|
19
|
+
|
20
|
+
# TODO Ignore caps in middle of words
|
21
|
+
def self.cap_ratio s
|
22
|
+
sentence_end = true
|
23
|
+
cap_count = 0
|
24
|
+
|
25
|
+
s.each_char do |c|
|
26
|
+
if c =~ /\./
|
27
|
+
sentence_end = true
|
28
|
+
elsif c =~ /[A-Z]/
|
29
|
+
cap_count = cap_count + 1 unless sentence_end
|
30
|
+
sentence_end = false
|
31
|
+
elsif c =~ /[^\s]/
|
32
|
+
sentence_end = false
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
cap_count / s.split.length.to_f
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.year_ratio s
|
40
|
+
words = s.split
|
41
|
+
|
42
|
+
year_words = words.map do |word|
|
43
|
+
word =~ /\.*\d{4}\.*/
|
44
|
+
end
|
45
|
+
|
46
|
+
year_words.reject { |year_word| not year_word }.length / words.length.to_f
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.name_ratio content
|
50
|
+
PdfExtract::Names.detect_names(content)[:name_frequency]
|
51
|
+
end
|
52
|
+
|
53
|
+
def self.word_count s
|
54
|
+
s.split.count
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|