ucode 0.2.3 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ucode/code_chart/extractor.rb +1 -9
- data/lib/ucode/code_chart/writer.rb +1 -1
- data/lib/ucode/commands/canonical_build.rb +4 -4
- data/lib/ucode/commands/universal_set.rb +5 -3
- data/lib/ucode/coordinator/enrichment/bidi.rb +35 -0
- data/lib/ucode/coordinator/enrichment/binary.rb +38 -0
- data/lib/ucode/coordinator/enrichment/casing.rb +55 -0
- data/lib/ucode/coordinator/enrichment/cjk.rb +49 -0
- data/lib/ucode/coordinator/enrichment/display.rb +36 -0
- data/lib/ucode/coordinator/enrichment/emoji.rb +36 -0
- data/lib/ucode/coordinator/enrichment/identity.rb +42 -0
- data/lib/ucode/coordinator/enrichment/indic.rb +32 -0
- data/lib/ucode/coordinator/enrichment/names.rb +63 -0
- data/lib/ucode/coordinator/enrichment/segmentation.rb +34 -0
- data/lib/ucode/coordinator/enrichment.rb +51 -0
- data/lib/ucode/coordinator/range_lookup.rb +65 -0
- data/lib/ucode/coordinator.rb +4 -276
- data/lib/ucode/glyphs/embedded_fonts/catalog.rb +32 -376
- data/lib/ucode/glyphs/embedded_fonts/codepoint_mapper.rb +130 -0
- data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +25 -124
- data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +0 -1
- data/lib/ucode/glyphs/embedded_fonts/pdf_indexer.rb +236 -0
- data/lib/ucode/glyphs/embedded_fonts/{source.rb → pdf_location.rb} +5 -5
- data/lib/ucode/glyphs/embedded_fonts/positional_matcher.rb +162 -0
- data/lib/ucode/glyphs/embedded_fonts/raw_font_descriptor.rb +24 -0
- data/lib/ucode/glyphs/embedded_fonts/renderer.rb +0 -2
- data/lib/ucode/glyphs/embedded_fonts/trace_correlator.rb +54 -168
- data/lib/ucode/glyphs/embedded_fonts/writer.rb +0 -4
- data/lib/ucode/glyphs/embedded_fonts.rb +5 -1
- data/lib/ucode/glyphs/resolver_factory.rb +45 -0
- data/lib/ucode/glyphs/sources/pillar1_embedded_tounicode.rb +1 -1
- data/lib/ucode/glyphs.rb +1 -0
- data/lib/ucode/version.rb +1 -1
- metadata +20 -3
|
@@ -7,106 +7,91 @@ module Ucode
|
|
|
7
7
|
# their Unicode codepoints via positional matching against hex
|
|
8
8
|
# codepoint labels on the same chart page.
|
|
9
9
|
#
|
|
10
|
-
#
|
|
10
|
+
# Adapter for the `mutool trace` XML format: parses {TraceGlyph}
|
|
11
|
+
# arrays, partitions into specimens and labels, auto-detects the
|
|
12
|
+
# label font by proximity, then delegates matching to
|
|
13
|
+
# {PositionalMatcher}.
|
|
11
14
|
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
# 2. **Grid layout** (summary pages): the hex codepoint label is
|
|
17
|
-
# printed directly ABOVE the specimen glyph (~12 pt higher on
|
|
18
|
-
# Y, same X).
|
|
19
|
-
#
|
|
20
|
-
# Both layouts are handled by matching each specimen to the
|
|
21
|
-
# nearest valid label cluster by Euclidean distance, with a
|
|
22
|
-
# maximum match radius that excludes far-away header/footer text.
|
|
23
|
-
#
|
|
24
|
-
# The codepoint labels in every Unicode Code Charts PDF are set
|
|
25
|
-
# in a single dedicated label font (typically ArialNarrow).
|
|
26
|
-
# Character names, headers, and footers use other fonts. To avoid
|
|
27
|
-
# false matches from hex chars in those texts, the correlator
|
|
28
|
-
# auto-detects the label font as the non-specimen font that
|
|
29
|
-
# contributes the most hex-char glyphs.
|
|
30
|
-
#
|
|
31
|
-
# Matching is greedy one-to-one: each GID and each codepoint is
|
|
32
|
-
# assigned at most once, so a specimen that sits between two
|
|
33
|
-
# labels only claims the closer one.
|
|
34
|
-
#
|
|
35
|
-
# Pure logic — no I/O. The caller passes pre-parsed TraceGlyph
|
|
36
|
-
# arrays (typically from {TraceRunner} + {TraceParser}).
|
|
15
|
+
# The label font auto-detection is the only piece of "intelligence"
|
|
16
|
+
# in this adapter — everything else is format translation. The
|
|
17
|
+
# matching algorithm lives in {PositionalMatcher} and is shared
|
|
18
|
+
# with {ContentStreamCorrelator}.
|
|
37
19
|
class TraceCorrelator
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
#
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
X_GAP_THRESHOLD = 10.0
|
|
45
|
-
private_constant :X_GAP_THRESHOLD
|
|
46
|
-
|
|
47
|
-
# Maximum valid Unicode codepoint. Filters out false labels
|
|
48
|
-
# that form hex strings from character-name fragments.
|
|
49
|
-
UNICODE_MAX = 0x10FFFF
|
|
50
|
-
private_constant :UNICODE_MAX
|
|
51
|
-
|
|
52
|
-
# Maximum Euclidean distance from a specimen to its matching
|
|
53
|
-
# label cluster. List-layout labels are ~21 pt to the left;
|
|
54
|
-
# grid-layout labels are ~12 pt above. Header/footer text is
|
|
55
|
-
# always > 30 pt away from any specimen.
|
|
56
|
-
MAX_MATCH_DISTANCE = 30.0
|
|
57
|
-
private_constant :MAX_MATCH_DISTANCE
|
|
20
|
+
# Proximity radius (in PDF points) for counting how often each
|
|
21
|
+
# non-specimen font's hex-char glyphs appear near a specimen.
|
|
22
|
+
# Code Charts dedicate one small font to the codepoint labels;
|
|
23
|
+
# body text and headers are farther away.
|
|
24
|
+
LABEL_PROXIMITY_RADIUS = 50.0
|
|
25
|
+
private_constant :LABEL_PROXIMITY_RADIUS
|
|
58
26
|
|
|
59
27
|
# @param specimen_font_name [String] the BaseFont name of the
|
|
60
28
|
# CID font whose glyphs need correlation
|
|
61
29
|
def initialize(specimen_font_name:)
|
|
62
30
|
@specimen_font_name = specimen_font_name
|
|
63
|
-
@y_bucket = DEFAULT_Y_BUCKET
|
|
64
31
|
end
|
|
65
32
|
|
|
66
33
|
# @param trace_glyphs [Array<TraceGlyph>]
|
|
67
34
|
# @return [Hash{Integer=>Integer}] codepoint => gid
|
|
68
35
|
def correlate(trace_glyphs)
|
|
69
|
-
specimens = trace_glyphs
|
|
36
|
+
specimens = select_specimens(trace_glyphs)
|
|
70
37
|
return {} if specimens.empty?
|
|
71
38
|
|
|
72
|
-
|
|
73
|
-
return {} unless label_font
|
|
74
|
-
|
|
75
|
-
labels = trace_glyphs.select { |g| label_glyph?(g, label_font) }
|
|
39
|
+
labels = select_labels(trace_glyphs)
|
|
76
40
|
return {} if labels.empty?
|
|
77
41
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
42
|
+
PositionalMatcher.match(
|
|
43
|
+
specimens.map { |g| to_position(g) },
|
|
44
|
+
labels.map { |g| to_position(g) },
|
|
45
|
+
)
|
|
82
46
|
end
|
|
83
47
|
|
|
84
48
|
private
|
|
85
49
|
|
|
50
|
+
def select_specimens(trace_glyphs)
|
|
51
|
+
trace_glyphs.select { |g| g.font_name == @specimen_font_name }
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def select_labels(trace_glyphs)
|
|
55
|
+
label_font = detect_label_font(trace_glyphs)
|
|
56
|
+
return [] unless label_font
|
|
57
|
+
|
|
58
|
+
trace_glyphs.select { |g| hex_char_from?(g, label_font) }
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def hex_char_from?(glyph, font_name)
|
|
62
|
+
glyph.font_name == font_name && glyph.unicode&.match?(/\A[0-9A-Fa-f]\z/)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def to_position(glyph)
|
|
66
|
+
PositionalMatcher::Position.new(
|
|
67
|
+
x: glyph.x,
|
|
68
|
+
y: glyph.y,
|
|
69
|
+
font_ref: glyph.font_name,
|
|
70
|
+
glyph_id: glyph.gid,
|
|
71
|
+
text: glyph.unicode,
|
|
72
|
+
)
|
|
73
|
+
end
|
|
74
|
+
|
|
86
75
|
# The label font is the non-specimen font whose hex-char glyphs
|
|
87
76
|
# appear most often in close proximity to specimen glyphs.
|
|
88
77
|
# Code Charts dedicate one small font to the codepoint labels;
|
|
89
78
|
# body text, headers, and character names use other fonts that
|
|
90
79
|
# may also contain hex chars but are not co-located with
|
|
91
|
-
# specimens
|
|
92
|
-
# MyriadPro-Light but zero specimens).
|
|
93
|
-
LABEL_PROXIMITY_RADIUS = 50.0
|
|
94
|
-
private_constant :LABEL_PROXIMITY_RADIUS
|
|
95
|
-
|
|
80
|
+
# specimens.
|
|
96
81
|
def detect_label_font(trace_glyphs)
|
|
97
|
-
specimens = trace_glyphs
|
|
82
|
+
specimens = select_specimens(trace_glyphs)
|
|
98
83
|
return nil if specimens.empty?
|
|
99
84
|
|
|
100
|
-
|
|
101
|
-
return nil if
|
|
85
|
+
candidates = select_hex_candidates(trace_glyphs)
|
|
86
|
+
return nil if candidates.empty?
|
|
102
87
|
|
|
103
|
-
counts = proximity_counts(specimens,
|
|
88
|
+
counts = proximity_counts(specimens, candidates)
|
|
104
89
|
return nil if counts.empty?
|
|
105
90
|
|
|
106
91
|
counts.max_by { |_, n| n }.first
|
|
107
92
|
end
|
|
108
93
|
|
|
109
|
-
def
|
|
94
|
+
def select_hex_candidates(trace_glyphs)
|
|
110
95
|
trace_glyphs.select do |g|
|
|
111
96
|
g.font_name != @specimen_font_name &&
|
|
112
97
|
g.unicode&.match?(/\A[0-9A-Fa-f]\z/)
|
|
@@ -118,112 +103,13 @@ module Ucode
|
|
|
118
103
|
radius_sq = LABEL_PROXIMITY_RADIUS * LABEL_PROXIMITY_RADIUS
|
|
119
104
|
specimens.each do |spec|
|
|
120
105
|
candidates.each do |g|
|
|
121
|
-
|
|
106
|
+
dx = spec.x - g.x
|
|
107
|
+
dy = spec.y - g.y
|
|
108
|
+
counts[g.font_name] += 1 if dx * dx + dy * dy < radius_sq
|
|
122
109
|
end
|
|
123
110
|
end
|
|
124
111
|
counts
|
|
125
112
|
end
|
|
126
|
-
|
|
127
|
-
def within_radius?(spec, glyph, radius_sq)
|
|
128
|
-
dx = spec.x - glyph.x
|
|
129
|
-
dy = spec.y - glyph.y
|
|
130
|
-
dx * dx + dy * dy < radius_sq
|
|
131
|
-
end
|
|
132
|
-
|
|
133
|
-
def label_glyph?(glyph, label_font)
|
|
134
|
-
glyph.font_name == label_font &&
|
|
135
|
-
glyph.unicode&.match?(/\A[0-9A-Fa-f]\z/)
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
# Cluster labels by Y (row), then by X gap (column within row).
|
|
139
|
-
# Returns a flat array of {x:, y:, codepoint:} clusters.
|
|
140
|
-
def build_label_clusters(labels)
|
|
141
|
-
by_y = labels.group_by { |g| quantize(g.y, @y_bucket) }
|
|
142
|
-
by_y.flat_map { |(_, glyphs)| clusters_from_row(glyphs) }
|
|
143
|
-
end
|
|
144
|
-
|
|
145
|
-
def clusters_from_row(glyphs)
|
|
146
|
-
cluster_by_x_gap(glyphs.sort_by(&:x)).filter_map { |cluster| build_cluster(cluster) }
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
def build_cluster(cluster)
|
|
150
|
-
hex = cluster.map(&:unicode).join
|
|
151
|
-
return nil unless hex.match?(/\A[0-9A-Fa-f]{4,6}\z/)
|
|
152
|
-
|
|
153
|
-
cp = hex.to_i(16)
|
|
154
|
-
return nil unless cp <= UNICODE_MAX
|
|
155
|
-
|
|
156
|
-
{
|
|
157
|
-
x: cluster.sum(&:x) / cluster.size,
|
|
158
|
-
y: cluster.first.y,
|
|
159
|
-
codepoint: cp,
|
|
160
|
-
}
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
def cluster_by_x_gap(sorted_glyphs)
|
|
164
|
-
clusters = []
|
|
165
|
-
current = []
|
|
166
|
-
|
|
167
|
-
sorted_glyphs.each do |g|
|
|
168
|
-
if current.empty? || (g.x - current.last.x).abs < X_GAP_THRESHOLD
|
|
169
|
-
current << g
|
|
170
|
-
else
|
|
171
|
-
clusters << current if current.size > 1
|
|
172
|
-
current = [g]
|
|
173
|
-
end
|
|
174
|
-
end
|
|
175
|
-
clusters << current if current.size > 1
|
|
176
|
-
clusters
|
|
177
|
-
end
|
|
178
|
-
|
|
179
|
-
# Greedy one-to-one matching: each GID and each codepoint is
|
|
180
|
-
# assigned at most once. Candidate pairs are sorted by distance
|
|
181
|
-
# so the closest specimen-label pair always wins.
|
|
182
|
-
def build_mapping(specimens, clusters)
|
|
183
|
-
candidates = Array.new(clusters.size) { |ci| specimen_distances(specimens, clusters, ci) }
|
|
184
|
-
|
|
185
|
-
assigned_gids = Set.new
|
|
186
|
-
assigned_cps = Set.new
|
|
187
|
-
mapping = {}
|
|
188
|
-
|
|
189
|
-
pairs_by_distance(candidates).each do |spec_idx, cluster_idx, dist|
|
|
190
|
-
next if dist > MAX_MATCH_DISTANCE
|
|
191
|
-
|
|
192
|
-
spec = specimens[spec_idx]
|
|
193
|
-
cluster = clusters[cluster_idx]
|
|
194
|
-
next if assigned_gids.include?(spec.gid)
|
|
195
|
-
next if assigned_cps.include?(cluster[:codepoint])
|
|
196
|
-
|
|
197
|
-
assigned_gids << spec.gid
|
|
198
|
-
assigned_cps << cluster[:codepoint]
|
|
199
|
-
mapping[cluster[:codepoint]] = spec.gid
|
|
200
|
-
end
|
|
201
|
-
|
|
202
|
-
mapping
|
|
203
|
-
end
|
|
204
|
-
|
|
205
|
-
def specimen_distances(specimens, clusters, cluster_idx)
|
|
206
|
-
cluster = clusters[cluster_idx]
|
|
207
|
-
specimens.each_with_index.map do |spec, spec_idx|
|
|
208
|
-
[spec_idx, cluster_idx, distance(spec, cluster)]
|
|
209
|
-
end
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
def pairs_by_distance(candidates)
|
|
213
|
-
candidates.flatten(1).sort_by { |_, _, dist| dist }
|
|
214
|
-
end
|
|
215
|
-
|
|
216
|
-
def distance(spec, cluster)
|
|
217
|
-
dx = spec.x - cluster[:x]
|
|
218
|
-
dy = spec.y - cluster[:y]
|
|
219
|
-
Math.sqrt(dx * dx + dy * dy)
|
|
220
|
-
end
|
|
221
|
-
|
|
222
|
-
def quantize(value, bucket_size)
|
|
223
|
-
return nil if value.nil?
|
|
224
|
-
|
|
225
|
-
(value / bucket_size).round * bucket_size
|
|
226
|
-
end
|
|
227
113
|
end
|
|
228
114
|
end
|
|
229
115
|
end
|
|
@@ -36,12 +36,16 @@ module Ucode
|
|
|
36
36
|
# `mutool info` (font enumeration) and `mutool show -b -o` (raw
|
|
37
37
|
# stream extraction).
|
|
38
38
|
module EmbeddedFonts
|
|
39
|
-
autoload :
|
|
39
|
+
autoload :PdfLocation, "ucode/glyphs/embedded_fonts/pdf_location"
|
|
40
40
|
autoload :ToUnicode, "ucode/glyphs/embedded_fonts/tounicode"
|
|
41
41
|
autoload :FontEntry, "ucode/glyphs/embedded_fonts/font_entry"
|
|
42
|
+
autoload :RawFontDescriptor, "ucode/glyphs/embedded_fonts/raw_font_descriptor"
|
|
43
|
+
autoload :PdfIndexer, "ucode/glyphs/embedded_fonts/pdf_indexer"
|
|
44
|
+
autoload :CodepointMapper, "ucode/glyphs/embedded_fonts/codepoint_mapper"
|
|
42
45
|
autoload :Catalog, "ucode/glyphs/embedded_fonts/catalog"
|
|
43
46
|
autoload :ContentStreamCorrelator,
|
|
44
47
|
"ucode/glyphs/embedded_fonts/content_stream_correlator"
|
|
48
|
+
autoload :PositionalMatcher, "ucode/glyphs/embedded_fonts/positional_matcher"
|
|
45
49
|
autoload :TraceGlyph, "ucode/glyphs/embedded_fonts/trace_glyph"
|
|
46
50
|
autoload :TraceParser, "ucode/glyphs/embedded_fonts/trace_parser"
|
|
47
51
|
autoload :TraceCorrelator, "ucode/glyphs/embedded_fonts/trace_correlator"
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Glyphs
|
|
7
|
+
# Single injection point for the 4-tier {Resolver}.
|
|
8
|
+
#
|
|
9
|
+
# Both CanonicalBuildCommand and UniversalSet::BuildCommand need the
|
|
10
|
+
# same shape: open a Database, load the SourceConfig, run a
|
|
11
|
+
# SourceBuilder, wrap the resulting tier-1 sources in a Resolver.
|
|
12
|
+
# Extracting it here gives tests one seam to mock (or bypass) and
|
|
13
|
+
# prevents drift between the two call sites.
|
|
14
|
+
module ResolverFactory
|
|
15
|
+
DEFAULT_INSTALL = false
|
|
16
|
+
private_constant :DEFAULT_INSTALL
|
|
17
|
+
|
|
18
|
+
# @param version [String] UCD version, used to open the Database
|
|
19
|
+
# when one is not supplied.
|
|
20
|
+
# @param source_config_path [String, Pathname, nil] override path
|
|
21
|
+
# to the Tier 1 font config YAML; nil uses the default.
|
|
22
|
+
# @param install [Boolean] pass through to SourceBuilder#tier1_sources
|
|
23
|
+
# — whether to fontist-install missing fonts eagerly.
|
|
24
|
+
# @param database [Ucode::Database, nil] an already-open Database,
|
|
25
|
+
# to skip re-opening when the caller already has one.
|
|
26
|
+
# @return [Ucode::Glyphs::Resolver]
|
|
27
|
+
def self.build(version:, source_config_path: nil,
|
|
28
|
+
install: DEFAULT_INSTALL, database: nil)
|
|
29
|
+
db = database || Ucode::Database.open(version)
|
|
30
|
+
config = SourceConfig.new(path: resolve_config_path(source_config_path))
|
|
31
|
+
builder = SourceBuilder.new(config: config, database: db)
|
|
32
|
+
Resolver.new(sources: builder.tier1_sources(install: install))
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @api private
|
|
36
|
+
def self.resolve_config_path(path)
|
|
37
|
+
return SourceConfig::DEFAULT_PATH if path.nil?
|
|
38
|
+
return path if path.is_a?(Pathname)
|
|
39
|
+
|
|
40
|
+
Pathname.new(path)
|
|
41
|
+
end
|
|
42
|
+
private_class_method :resolve_config_path
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -31,7 +31,7 @@ module Ucode
|
|
|
31
31
|
# @param renderer [EmbeddedFonts::Renderer] the renderer to
|
|
32
32
|
# delegate to. Callers typically construct it with the
|
|
33
33
|
# {EmbeddedFonts::Catalog} built from the resolved Code
|
|
34
|
-
# Charts {EmbeddedFonts::
|
|
34
|
+
# Charts {EmbeddedFonts::PdfLocation}. To enable pillar-2
|
|
35
35
|
# fallback, that Catalog must be constructed with
|
|
36
36
|
# +correlator_configs:+.
|
|
37
37
|
def initialize(renderer:)
|
data/lib/ucode/glyphs.rb
CHANGED
|
@@ -16,6 +16,7 @@ module Ucode
|
|
|
16
16
|
autoload :RealFonts, "ucode/glyphs/real_fonts"
|
|
17
17
|
autoload :Source, "ucode/glyphs/source"
|
|
18
18
|
autoload :Resolver, "ucode/glyphs/resolver"
|
|
19
|
+
autoload :ResolverFactory, "ucode/glyphs/resolver_factory"
|
|
19
20
|
autoload :SourceConfig, "ucode/glyphs/source_config"
|
|
20
21
|
autoload :SourceBuilder, "ucode/glyphs/source_builder"
|
|
21
22
|
autoload :Sources, "ucode/glyphs/sources"
|
data/lib/ucode/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ucode
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-07-
|
|
11
|
+
date: 2026-07-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -328,7 +328,19 @@ files:
|
|
|
328
328
|
- lib/ucode/commands/universal_set.rb
|
|
329
329
|
- lib/ucode/config.rb
|
|
330
330
|
- lib/ucode/coordinator.rb
|
|
331
|
+
- lib/ucode/coordinator/enrichment.rb
|
|
332
|
+
- lib/ucode/coordinator/enrichment/bidi.rb
|
|
333
|
+
- lib/ucode/coordinator/enrichment/binary.rb
|
|
334
|
+
- lib/ucode/coordinator/enrichment/casing.rb
|
|
335
|
+
- lib/ucode/coordinator/enrichment/cjk.rb
|
|
336
|
+
- lib/ucode/coordinator/enrichment/display.rb
|
|
337
|
+
- lib/ucode/coordinator/enrichment/emoji.rb
|
|
338
|
+
- lib/ucode/coordinator/enrichment/identity.rb
|
|
339
|
+
- lib/ucode/coordinator/enrichment/indic.rb
|
|
340
|
+
- lib/ucode/coordinator/enrichment/names.rb
|
|
341
|
+
- lib/ucode/coordinator/enrichment/segmentation.rb
|
|
331
342
|
- lib/ucode/coordinator/indices.rb
|
|
343
|
+
- lib/ucode/coordinator/range_lookup.rb
|
|
332
344
|
- lib/ucode/database.rb
|
|
333
345
|
- lib/ucode/db_builder.rb
|
|
334
346
|
- lib/ucode/error.rb
|
|
@@ -343,10 +355,14 @@ files:
|
|
|
343
355
|
- lib/ucode/glyphs.rb
|
|
344
356
|
- lib/ucode/glyphs/embedded_fonts.rb
|
|
345
357
|
- lib/ucode/glyphs/embedded_fonts/catalog.rb
|
|
358
|
+
- lib/ucode/glyphs/embedded_fonts/codepoint_mapper.rb
|
|
346
359
|
- lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb
|
|
347
360
|
- lib/ucode/glyphs/embedded_fonts/font_entry.rb
|
|
361
|
+
- lib/ucode/glyphs/embedded_fonts/pdf_indexer.rb
|
|
362
|
+
- lib/ucode/glyphs/embedded_fonts/pdf_location.rb
|
|
363
|
+
- lib/ucode/glyphs/embedded_fonts/positional_matcher.rb
|
|
364
|
+
- lib/ucode/glyphs/embedded_fonts/raw_font_descriptor.rb
|
|
348
365
|
- lib/ucode/glyphs/embedded_fonts/renderer.rb
|
|
349
|
-
- lib/ucode/glyphs/embedded_fonts/source.rb
|
|
350
366
|
- lib/ucode/glyphs/embedded_fonts/svg.rb
|
|
351
367
|
- lib/ucode/glyphs/embedded_fonts/tounicode.rb
|
|
352
368
|
- lib/ucode/glyphs/embedded_fonts/trace_correlator.rb
|
|
@@ -372,6 +388,7 @@ files:
|
|
|
372
388
|
- lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb
|
|
373
389
|
- lib/ucode/glyphs/real_fonts/writer.rb
|
|
374
390
|
- lib/ucode/glyphs/resolver.rb
|
|
391
|
+
- lib/ucode/glyphs/resolver_factory.rb
|
|
375
392
|
- lib/ucode/glyphs/source.rb
|
|
376
393
|
- lib/ucode/glyphs/source_builder.rb
|
|
377
394
|
- lib/ucode/glyphs/source_config.rb
|