alphasights-prawn 0.10.0 → 0.10.1
Sign up to get free protection for your applications and to get access to all the features.
- data/vendor/pdf-inspector/README +18 -0
- data/vendor/pdf-inspector/lib/pdf/inspector.rb +26 -0
- data/vendor/pdf-inspector/lib/pdf/inspector/extgstate.rb +18 -0
- data/vendor/pdf-inspector/lib/pdf/inspector/graphics.rb +131 -0
- data/vendor/pdf-inspector/lib/pdf/inspector/page.rb +25 -0
- data/vendor/pdf-inspector/lib/pdf/inspector/text.rb +46 -0
- data/vendor/pdf-inspector/lib/pdf/inspector/xobject.rb +19 -0
- data/vendor/ttfunk/data/fonts/DejaVuSans.ttf +0 -0
- data/vendor/ttfunk/data/fonts/comicsans.ttf +0 -0
- data/vendor/ttfunk/example.rb +45 -0
- data/vendor/ttfunk/lib/ttfunk.rb +102 -0
- data/vendor/ttfunk/lib/ttfunk/directory.rb +17 -0
- data/vendor/ttfunk/lib/ttfunk/encoding/mac_roman.rb +88 -0
- data/vendor/ttfunk/lib/ttfunk/encoding/windows_1252.rb +69 -0
- data/vendor/ttfunk/lib/ttfunk/reader.rb +44 -0
- data/vendor/ttfunk/lib/ttfunk/resource_file.rb +78 -0
- data/vendor/ttfunk/lib/ttfunk/subset.rb +18 -0
- data/vendor/ttfunk/lib/ttfunk/subset/base.rb +141 -0
- data/vendor/ttfunk/lib/ttfunk/subset/mac_roman.rb +50 -0
- data/vendor/ttfunk/lib/ttfunk/subset/unicode.rb +48 -0
- data/vendor/ttfunk/lib/ttfunk/subset/unicode_8bit.rb +63 -0
- data/vendor/ttfunk/lib/ttfunk/subset/windows_1252.rb +55 -0
- data/vendor/ttfunk/lib/ttfunk/subset_collection.rb +72 -0
- data/vendor/ttfunk/lib/ttfunk/table.rb +46 -0
- data/vendor/ttfunk/lib/ttfunk/table/cmap.rb +34 -0
- data/vendor/ttfunk/lib/ttfunk/table/cmap/format00.rb +54 -0
- data/vendor/ttfunk/lib/ttfunk/table/cmap/format04.rb +126 -0
- data/vendor/ttfunk/lib/ttfunk/table/cmap/subtable.rb +79 -0
- data/vendor/ttfunk/lib/ttfunk/table/glyf.rb +64 -0
- data/vendor/ttfunk/lib/ttfunk/table/glyf/compound.rb +81 -0
- data/vendor/ttfunk/lib/ttfunk/table/glyf/simple.rb +37 -0
- data/vendor/ttfunk/lib/ttfunk/table/head.rb +44 -0
- data/vendor/ttfunk/lib/ttfunk/table/hhea.rb +41 -0
- data/vendor/ttfunk/lib/ttfunk/table/hmtx.rb +47 -0
- data/vendor/ttfunk/lib/ttfunk/table/kern.rb +79 -0
- data/vendor/ttfunk/lib/ttfunk/table/kern/format0.rb +62 -0
- data/vendor/ttfunk/lib/ttfunk/table/loca.rb +43 -0
- data/vendor/ttfunk/lib/ttfunk/table/maxp.rb +40 -0
- data/vendor/ttfunk/lib/ttfunk/table/name.rb +125 -0
- data/vendor/ttfunk/lib/ttfunk/table/os2.rb +78 -0
- data/vendor/ttfunk/lib/ttfunk/table/post.rb +91 -0
- data/vendor/ttfunk/lib/ttfunk/table/post/format10.rb +43 -0
- data/vendor/ttfunk/lib/ttfunk/table/post/format20.rb +35 -0
- data/vendor/ttfunk/lib/ttfunk/table/post/format25.rb +23 -0
- data/vendor/ttfunk/lib/ttfunk/table/post/format30.rb +17 -0
- data/vendor/ttfunk/lib/ttfunk/table/post/format40.rb +17 -0
- data/vendor/ttfunk/lib/ttfunk/table/simple.rb +14 -0
- metadata +50 -3
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'ttfunk/subset/base'
|
3
|
+
|
4
|
+
module TTFunk
|
5
|
+
module Subset
|
6
|
+
class Unicode < Base
|
7
|
+
def initialize(original)
|
8
|
+
super
|
9
|
+
@subset = Set.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def unicode?
|
13
|
+
true
|
14
|
+
end
|
15
|
+
|
16
|
+
def to_unicode_map
|
17
|
+
@subset.inject({}) { |map, code| map[code] = code; map }
|
18
|
+
end
|
19
|
+
|
20
|
+
def use(character)
|
21
|
+
@subset << character
|
22
|
+
end
|
23
|
+
|
24
|
+
def covers?(character)
|
25
|
+
true
|
26
|
+
end
|
27
|
+
|
28
|
+
def includes?(character)
|
29
|
+
@subset.includes(character)
|
30
|
+
end
|
31
|
+
|
32
|
+
def from_unicode(character)
|
33
|
+
character
|
34
|
+
end
|
35
|
+
|
36
|
+
protected
|
37
|
+
|
38
|
+
def new_cmap_table(options)
|
39
|
+
mapping = @subset.inject({}) { |map, code| map[code] = unicode_cmap[code]; map }
|
40
|
+
TTFunk::Table::Cmap.encode(mapping, :unicode)
|
41
|
+
end
|
42
|
+
|
43
|
+
def original_glyph_ids
|
44
|
+
([0] + @subset.map { |code| unicode_cmap[code] }).uniq.sort
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'ttfunk/subset/base'
|
3
|
+
|
4
|
+
module TTFunk
|
5
|
+
module Subset
|
6
|
+
class Unicode8Bit < Base
|
7
|
+
def initialize(original)
|
8
|
+
super
|
9
|
+
@subset = { 0x20 => 0x20 }
|
10
|
+
@unicodes = { 0x20 => 0x20 }
|
11
|
+
@next = 0x21 # apparently, PDF's don't like to use chars between 0-31
|
12
|
+
end
|
13
|
+
|
14
|
+
def unicode?
|
15
|
+
true
|
16
|
+
end
|
17
|
+
|
18
|
+
def to_unicode_map
|
19
|
+
@subset.dup
|
20
|
+
end
|
21
|
+
|
22
|
+
def use(character)
|
23
|
+
if !@unicodes.key?(character)
|
24
|
+
@subset[@next] = character
|
25
|
+
@unicodes[character] = @next
|
26
|
+
@next += 1
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def covers?(character)
|
31
|
+
@unicodes.key?(character) || @next < 256
|
32
|
+
end
|
33
|
+
|
34
|
+
def includes?(character)
|
35
|
+
@unicodes.key?(character)
|
36
|
+
end
|
37
|
+
|
38
|
+
def from_unicode(character)
|
39
|
+
@unicodes[character]
|
40
|
+
end
|
41
|
+
|
42
|
+
protected
|
43
|
+
|
44
|
+
def new_cmap_table(options)
|
45
|
+
mapping = @subset.inject({}) do |map, (code,unicode)|
|
46
|
+
map[code] = unicode_cmap[unicode]
|
47
|
+
map
|
48
|
+
end
|
49
|
+
|
50
|
+
# since we're mapping a subset of the unicode glyphs into an
|
51
|
+
# arbitrary 256-character space, the actual encoding we're
|
52
|
+
# using is irrelevant. We choose MacRoman because it's a 256-character
|
53
|
+
# encoding that happens to be well-supported in both TTF and
|
54
|
+
# PDF formats.
|
55
|
+
TTFunk::Table::Cmap.encode(mapping, :mac_roman)
|
56
|
+
end
|
57
|
+
|
58
|
+
def original_glyph_ids
|
59
|
+
([0] + @unicodes.keys.map { |unicode| unicode_cmap[unicode] }).uniq.sort
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'set'
|
2
|
+
require 'ttfunk/subset/base'
|
3
|
+
require 'ttfunk/encoding/windows_1252'
|
4
|
+
|
5
|
+
module TTFunk
|
6
|
+
module Subset
|
7
|
+
class Windows1252 < Base
|
8
|
+
def initialize(original)
|
9
|
+
super
|
10
|
+
@subset = Array.new(256)
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_unicode_map
|
14
|
+
Encoding::Windows1252::TO_UNICODE
|
15
|
+
end
|
16
|
+
|
17
|
+
def use(character)
|
18
|
+
@subset[Encoding::Windows1252::FROM_UNICODE[character]] = character
|
19
|
+
end
|
20
|
+
|
21
|
+
def covers?(character)
|
22
|
+
Encoding::Windows1252.covers?(character)
|
23
|
+
end
|
24
|
+
|
25
|
+
def includes?(character)
|
26
|
+
code = Encoding::Windows1252::FROM_UNICODE[character]
|
27
|
+
code && @subset[code]
|
28
|
+
end
|
29
|
+
|
30
|
+
def from_unicode(character)
|
31
|
+
Encoding::Windows1252::FROM_UNICODE[character]
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
|
36
|
+
def new_cmap_table(options)
|
37
|
+
mapping = {}
|
38
|
+
@subset.each_with_index do |unicode, cp1252|
|
39
|
+
mapping[cp1252] = unicode_cmap[unicode] if cp1252
|
40
|
+
end
|
41
|
+
|
42
|
+
# yes, I really mean "mac roman". TTF has no cp1252 encoding, and the
|
43
|
+
# alternative would be to encode it using a format 4 unicode table, which
|
44
|
+
# is overkill. for our purposes, mac-roman suffices. (If we were building
|
45
|
+
# a _real_ font, instead of a PDF-embeddable subset, things would probably
|
46
|
+
# be different.)
|
47
|
+
TTFunk::Table::Cmap.encode(mapping, :mac_roman)
|
48
|
+
end
|
49
|
+
|
50
|
+
def original_glyph_ids
|
51
|
+
([0] + @subset.map { |unicode| unicode && unicode_cmap[unicode] }).compact.uniq.sort
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'ttfunk/subset'
|
2
|
+
|
3
|
+
module TTFunk
|
4
|
+
class SubsetCollection
|
5
|
+
def initialize(original)
|
6
|
+
@original = original
|
7
|
+
@subsets = [Subset.for(@original, :mac_roman)]
|
8
|
+
end
|
9
|
+
|
10
|
+
def [](subset)
|
11
|
+
@subsets[subset]
|
12
|
+
end
|
13
|
+
|
14
|
+
# +characters+ should be an array of UTF-16 characters
|
15
|
+
def use(characters)
|
16
|
+
characters.each do |char|
|
17
|
+
covered = false
|
18
|
+
@subsets.each_with_index do |subset, i|
|
19
|
+
if subset.covers?(char)
|
20
|
+
subset.use(char)
|
21
|
+
covered = true
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
if !covered
|
27
|
+
@subsets << Subset.for(@original, :unicode_8bit)
|
28
|
+
@subsets.last.use(char)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# +characters+ should be an array of UTF-16 characters. Returns
|
34
|
+
# an array of subset chunks, where each chunk is another array of
|
35
|
+
# two elements. The first element is the subset number, and the
|
36
|
+
# second element is the string of characters to render with that
|
37
|
+
# font subset. The strings will be encoded for their subset font,
|
38
|
+
# and so may not look (in the raw) like what was passed in, but
|
39
|
+
# they will render correctly with the indicated subset font.
|
40
|
+
def encode(characters)
|
41
|
+
return [] if characters.empty?
|
42
|
+
|
43
|
+
# TODO: probably would be more optimal to nix the #use method,
|
44
|
+
# and merge it into this one, so it can be done in a single
|
45
|
+
# pass instead of two passes.
|
46
|
+
use(characters)
|
47
|
+
|
48
|
+
parts = []
|
49
|
+
current_subset = 0
|
50
|
+
current_char = 0
|
51
|
+
char = characters[current_char]
|
52
|
+
|
53
|
+
loop do
|
54
|
+
while @subsets[current_subset].includes?(char)
|
55
|
+
char = @subsets[current_subset].from_unicode(char)
|
56
|
+
|
57
|
+
if parts.empty? || parts.last[0] != current_subset
|
58
|
+
parts << [current_subset, char.chr]
|
59
|
+
else
|
60
|
+
parts.last[1] << char
|
61
|
+
end
|
62
|
+
|
63
|
+
current_char += 1
|
64
|
+
return parts if current_char >= characters.length
|
65
|
+
char = characters[current_char]
|
66
|
+
end
|
67
|
+
|
68
|
+
current_subset = (current_subset + 1) % @subsets.length
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'ttfunk/reader'
|
2
|
+
|
3
|
+
module TTFunk
|
4
|
+
class Table
|
5
|
+
include Reader
|
6
|
+
|
7
|
+
attr_reader :file
|
8
|
+
attr_reader :offset
|
9
|
+
attr_reader :length
|
10
|
+
|
11
|
+
def initialize(file)
|
12
|
+
@file = file
|
13
|
+
|
14
|
+
info = file.directory_info(tag)
|
15
|
+
|
16
|
+
if info
|
17
|
+
@offset = info[:offset]
|
18
|
+
@length = info[:length]
|
19
|
+
|
20
|
+
parse_from(@offset) { parse! }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def exists?
|
25
|
+
!@offset.nil?
|
26
|
+
end
|
27
|
+
|
28
|
+
def raw
|
29
|
+
if exists?
|
30
|
+
parse_from(offset) { io.read(length) }
|
31
|
+
else
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def tag
|
37
|
+
self.class.name.split(/::/).last.downcase
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
def parse!
|
43
|
+
# do nothing, by default
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module TTFunk
|
2
|
+
class Table
|
3
|
+
class Cmap < Table
|
4
|
+
attr_reader :version
|
5
|
+
attr_reader :tables
|
6
|
+
|
7
|
+
def self.encode(charmap, encoding)
|
8
|
+
result = Cmap::Subtable.encode(charmap, encoding)
|
9
|
+
|
10
|
+
# pack 'version' and 'table-count'
|
11
|
+
result[:table] = [0, 1, result.delete(:subtable)].pack("nnA*")
|
12
|
+
return result
|
13
|
+
end
|
14
|
+
|
15
|
+
def unicode
|
16
|
+
@unicode ||= @tables.select { |table| table.unicode? }
|
17
|
+
end
|
18
|
+
|
19
|
+
private
|
20
|
+
|
21
|
+
def parse!
|
22
|
+
@version, table_count = read(4, "nn")
|
23
|
+
@tables = []
|
24
|
+
|
25
|
+
table_count.times do
|
26
|
+
@tables << Cmap::Subtable.new(file, offset)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
require 'ttfunk/table/cmap/subtable'
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'ttfunk/encoding/mac_roman'
|
2
|
+
require 'ttfunk/encoding/windows_1252'
|
3
|
+
|
4
|
+
module TTFunk
|
5
|
+
class Table
|
6
|
+
class Cmap
|
7
|
+
|
8
|
+
module Format00
|
9
|
+
attr_reader :language
|
10
|
+
attr_reader :code_map
|
11
|
+
|
12
|
+
# Expects a hash mapping character codes to glyph ids (where the
|
13
|
+
# glyph ids are from the original font). Returns a hash including
|
14
|
+
# a new map (:charmap) that maps the characters in charmap to a
|
15
|
+
# another hash containing both the old (:old) and new (:new) glyph
|
16
|
+
# ids. The returned hash also includes a :subtable key, which contains
|
17
|
+
# the encoded subtable for the given charmap.
|
18
|
+
def self.encode(charmap)
|
19
|
+
next_id = 0
|
20
|
+
glyph_indexes = Array.new(256, 0)
|
21
|
+
glyph_map = { 0 => 0 }
|
22
|
+
|
23
|
+
new_map = charmap.keys.sort.inject({}) do |map, code|
|
24
|
+
glyph_map[charmap[code]] ||= next_id += 1
|
25
|
+
map[code] = { :old => charmap[code], :new => glyph_map[charmap[code]] }
|
26
|
+
glyph_indexes[code] = glyph_map[charmap[code]]
|
27
|
+
map
|
28
|
+
end
|
29
|
+
|
30
|
+
# format, length, language, indices
|
31
|
+
subtable = [0, 262, 0, *glyph_indexes].pack("nnnC*")
|
32
|
+
|
33
|
+
{ :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id+1 }
|
34
|
+
end
|
35
|
+
|
36
|
+
def [](code)
|
37
|
+
@code_map[code] || 0
|
38
|
+
end
|
39
|
+
|
40
|
+
def supported?
|
41
|
+
true
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def parse_cmap!
|
47
|
+
length, @language = read(4, "nn")
|
48
|
+
@code_map = read(256, "C*")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
module TTFunk
|
2
|
+
class Table
|
3
|
+
class Cmap
|
4
|
+
|
5
|
+
module Format04
|
6
|
+
attr_reader :language
|
7
|
+
attr_reader :code_map
|
8
|
+
|
9
|
+
# Expects a hash mapping character codes to glyph ids (where the
|
10
|
+
# glyph ids are from the original font). Returns a hash including
|
11
|
+
# a new map (:charmap) that maps the characters in charmap to a
|
12
|
+
# another hash containing both the old (:old) and new (:new) glyph
|
13
|
+
# ids. The returned hash also includes a :subtable key, which contains
|
14
|
+
# the encoded subtable for the given charmap.
|
15
|
+
def self.encode(charmap)
|
16
|
+
end_codes = []
|
17
|
+
start_codes = []
|
18
|
+
next_id = 0
|
19
|
+
last = difference = nil
|
20
|
+
|
21
|
+
glyph_map = { 0 => 0 }
|
22
|
+
new_map = charmap.keys.sort.inject({}) do |map, code|
|
23
|
+
old = charmap[code]
|
24
|
+
glyph_map[old] ||= next_id += 1
|
25
|
+
map[code] = { :old => old, :new => glyph_map[old] }
|
26
|
+
|
27
|
+
delta = glyph_map[old] - code
|
28
|
+
if last.nil? || delta != difference
|
29
|
+
end_codes << last if last
|
30
|
+
start_codes << code
|
31
|
+
difference = delta
|
32
|
+
end
|
33
|
+
last = code
|
34
|
+
|
35
|
+
map
|
36
|
+
end
|
37
|
+
|
38
|
+
end_codes << last if last
|
39
|
+
end_codes << 0xFFFF
|
40
|
+
start_codes << 0xFFFF
|
41
|
+
segcount = start_codes.length
|
42
|
+
|
43
|
+
# build the conversion tables
|
44
|
+
deltas = []
|
45
|
+
range_offsets = []
|
46
|
+
glyph_indices = []
|
47
|
+
|
48
|
+
offset = 0
|
49
|
+
start_codes.zip(end_codes).each_with_index do |(a, b), segment|
|
50
|
+
if a == 0xFFFF
|
51
|
+
deltas << 0
|
52
|
+
range_offsets << 0
|
53
|
+
break
|
54
|
+
end
|
55
|
+
|
56
|
+
start_glyph_id = new_map[a][:new]
|
57
|
+
if a - start_glyph_id >= 0x8000
|
58
|
+
deltas << 0
|
59
|
+
range_offsets << 2 * (glyph_indices.length + segcount - segment)
|
60
|
+
a.upto(b) { |code| glyph_indices << new_map[code][:new] }
|
61
|
+
else
|
62
|
+
deltas << -a + start_glyph_id
|
63
|
+
range_offsets << 0
|
64
|
+
end
|
65
|
+
offset += 2
|
66
|
+
end
|
67
|
+
|
68
|
+
# format, length, language
|
69
|
+
subtable = [4, 16 + 8 * segcount + 2 * glyph_indices.length, 0].pack("nnn")
|
70
|
+
|
71
|
+
search_range = 2 * 2 ** (Math.log(segcount) / Math.log(2)).to_i
|
72
|
+
entry_selector = (Math.log(search_range / 2) / Math.log(2)).to_i
|
73
|
+
range_shift = (2 * segcount) - search_range
|
74
|
+
subtable << [segcount * 2, search_range, entry_selector, range_shift].pack("nnnn")
|
75
|
+
|
76
|
+
subtable << end_codes.pack("n*") << "\0\0" << start_codes.pack("n*")
|
77
|
+
subtable << deltas.pack("n*") << range_offsets.pack("n*") << glyph_indices.pack("n*")
|
78
|
+
|
79
|
+
{ :charmap => new_map, :subtable => subtable, :max_glyph_id => next_id+1 }
|
80
|
+
end
|
81
|
+
|
82
|
+
def [](code)
|
83
|
+
@code_map[code] || 0
|
84
|
+
end
|
85
|
+
|
86
|
+
def supported?
|
87
|
+
true
|
88
|
+
end
|
89
|
+
|
90
|
+
private
|
91
|
+
|
92
|
+
def parse_cmap!
|
93
|
+
length, @language, segcount_x2 = read(6, "nnn")
|
94
|
+
segcount = segcount_x2 / 2
|
95
|
+
|
96
|
+
io.read(6) # skip searching hints
|
97
|
+
|
98
|
+
end_code = read(segcount_x2, "n*")
|
99
|
+
io.read(2) # skip reserved value
|
100
|
+
start_code = read(segcount_x2, "n*")
|
101
|
+
id_delta = read_signed(segcount)
|
102
|
+
id_range_offset = read(segcount_x2, "n*")
|
103
|
+
|
104
|
+
glyph_ids = read(length - io.pos + @offset, "n*")
|
105
|
+
|
106
|
+
@code_map = {}
|
107
|
+
|
108
|
+
end_code.each_with_index do |tail, i|
|
109
|
+
start_code[i].upto(tail) do |code|
|
110
|
+
if id_range_offset[i].zero?
|
111
|
+
glyph_id = code + id_delta[i]
|
112
|
+
else
|
113
|
+
index = id_range_offset[i] / 2 + (code - start_code[i]) - (segcount - i)
|
114
|
+
glyph_id = glyph_ids[index] || 0 # because some TTF fonts are broken
|
115
|
+
glyph_id += id_delta[i] if glyph_id != 0
|
116
|
+
end
|
117
|
+
|
118
|
+
@code_map[code] = glyph_id & 0xFFFF
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|