ucode 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -39,7 +39,7 @@ module Ucode
39
39
  # (in `lib/ucode/cli.rb`) is responsible only for argument
40
40
  # parsing and dispatch.
41
41
  class BuildCommand
42
- # @param version_intent [nil, :default, :latest, String]
42
+ # @param version [String] resolved UCD version
43
43
  # @param output_root [String, Pathname] directory that will
44
44
  # hold `manifest.json`, `glyphs/`, `reports/`.
45
45
  # @param source_config_path [String, Pathname, nil] override
@@ -58,10 +58,9 @@ module Ucode
58
58
  # resolver and don't have a real source config on disk.
59
59
  # @return [Hash] { version:, manifest_path:, totals:,
60
60
  # by_tier:, coverage:, validation: }
61
- def call(version_intent, output_root:, source_config_path: nil,
61
+ def call(version, output_root:, source_config_path: nil,
62
62
  resolver: nil, block_filter: nil,
63
63
  parallel_workers: default_workers, skip_pre_check: false)
64
- version = VersionResolver.resolve(version_intent)
65
64
  root = Pathname.new(output_root)
66
65
 
67
66
  config_path = source_config_path_or_default(source_config_path)
@@ -144,7 +143,7 @@ module Ucode
144
143
  # checks (config loads, fonts present, coverage assertion runs)
145
144
  # without starting the 4-hour build.
146
145
  class PreCheckCommand
147
- # @param version_intent [nil, :default, :latest, String]
146
+ # @param version [String] resolved UCD version
148
147
  # @param source_config_path [String, Pathname, nil]
149
148
  # @param cmaps [#covers?] injectable; defaults to
150
149
  # RealFonts::CmapCache.
@@ -153,9 +152,8 @@ module Ucode
153
152
  # @return [Ucode::Glyphs::UniversalSet::PreBuildReport]
154
153
  # @raise [Ucode::UniversalSetPreBuildError] when missing_fonts
155
154
  # is non-empty or the config fails to load.
156
- def call(version_intent, source_config_path: nil, cmaps: nil,
155
+ def call(version, source_config_path: nil, cmaps: nil,
157
156
  font_locator: nil)
158
- version = VersionResolver.resolve(version_intent)
159
157
  database = Database.open(version)
160
158
  config_path = source_config_path || Glyphs::SourceConfig::DEFAULT_PATH
161
159
 
@@ -171,12 +169,11 @@ module Ucode
171
169
  # shape (or regenerating reports after a model change) without
172
170
  # re-running the build.
173
171
  class ReportCommand
174
- # @param version_intent [nil, :default, :latest, String]
172
+ # @param version [String] resolved UCD version
175
173
  # @param output_root [String, Pathname] directory holding
176
174
  # `manifest.json`.
177
175
  # @return [Hash] the {CoverageReport#emit} payload.
178
- def call(version_intent, output_root:)
179
- version = VersionResolver.resolve(version_intent)
176
+ def call(version, output_root:)
180
177
  root = Pathname.new(output_root)
181
178
  manifest_path = root.join("manifest.json")
182
179
  raise Ucode::Error, "manifest not found at #{manifest_path}" unless manifest_path.exist?
@@ -196,12 +193,11 @@ module Ucode
196
193
  # totals_reconcile, provenance_complete).
197
194
  class ValidateCommand
198
195
  # @param output_root [String, Pathname]
199
- # @param version_intent [nil, :default, :latest, String]
200
- # used only to stamp the report's unicode_version when the
201
- # manifest's recorded value is missing.
196
+ # @param version [String, nil] resolved UCD version, used only
197
+ # to stamp the report's unicode_version when the manifest's
198
+ # recorded value is missing.
202
199
  # @return [Hash] the {Validator#validate} payload.
203
- def call(output_root, version_intent: nil)
204
- version = version_intent && VersionResolver.resolve(version_intent)
200
+ def call(output_root, version: nil)
205
201
  Glyphs::UniversalSet::Validator
206
202
  .new(output_root, unicode_version: version).validate
207
203
  end
@@ -2,6 +2,22 @@
2
2
 
3
3
  module Ucode
4
4
  class Coordinator
5
+ # Pairs of (output-file-slug, indices-field) for every per-property
6
+ # relationship table the Repo writes. Each field holds a Hash whose
7
+ # values are Records (or Arrays of Records). The Repo iterates
8
+ # `Indices#each_relationship` instead of reaching into the Struct by
9
+ # field name (see Candidate 1 of the 2026-06-29 architecture review).
10
+ RELATIONSHIPS = [
11
+ ["special_casing", :special_casing],
12
+ ["case_folding", :case_folding],
13
+ ["bidi_mirroring", :bidi_mirroring],
14
+ ["bidi_brackets", :bidi_brackets],
15
+ ["cjk_radicals", :cjk_radicals],
16
+ ["standardized_variants", :standardized_variants],
17
+ ["name_aliases", :name_aliases],
18
+ ].freeze
19
+ private_constant :RELATIONSHIPS
20
+
5
21
  # Bag of pre-built indices consumed by the per-codepoint enrichment
6
22
  # pass. Every field is a frozen-shaped collection that is read-only
7
23
  # after `build_indices` returns: range files land in sorted Arrays
@@ -12,6 +28,10 @@ module Ucode
12
28
  # call reads as a self-documenting catalogue of every parsed file —
13
29
  # adding a new index is one keyword arg here, one builder call in
14
30
  # `Coordinator#build_indices`, and one assignment in `#enrich`.
31
+ #
32
+ # The relationship enumerator (`#each_relationship`) is the seam the
33
+ # Repo uses to write per-property relationship tables without knowing
34
+ # which Struct field carries which data.
15
35
  Indices = Struct.new(
16
36
  :blocks,
17
37
  :scripts,
@@ -40,6 +60,22 @@ module Ucode
40
60
  :emoji_properties,
41
61
  :extra_binary_properties,
42
62
  keyword_init: true,
43
- )
63
+ ) do
64
+ # Yield (slug, records) for each relationship table. The seam
65
+ # between "what the Coordinator indexed" and "what the Repo writes"
66
+ # lives here — Repo never names a Struct field directly.
67
+ #
68
+ # @yieldparam slug [String] output file slug under
69
+ # `output/relationships/`
70
+ # @yieldparam records [Hash<Integer|String, Record|Array<Record>>]
71
+ # @return [Enumerator] when no block is given
72
+ def each_relationship(&block)
73
+ return enum_for(:each_relationship) unless block_given?
74
+
75
+ RELATIONSHIPS.each do |slug, field|
76
+ yield(slug, public_send(field))
77
+ end
78
+ end
79
+ end
44
80
  end
45
- end
81
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "pathname"
4
+ require "set"
5
+
6
+ require "ucode/cache"
7
+ require "ucode/glyphs/pdf_fetcher"
8
+ require "ucode/glyphs/writer"
9
+ require "ucode/parsers"
10
+
11
+ module Ucode
12
+ module Glyphs
13
+ # Assembles the per-block specs that {Glyphs::Writer#write_all} drains.
14
+ #
15
+ # Owns three pieces of orchestration that {Commands::GlyphsCommand}
16
+ # used to carry inline:
17
+ #
18
+ # - block loading from {Cache.ucd_dir}/Blocks.txt (with an optional
19
+ # block filter)
20
+ # - PDF fetcher construction (with monolith fallback)
21
+ # - the per-block page-map heuristic (per-block PDFs are page 1 =
22
+ # title, page 2 = first chart page starting at the block's first
23
+ # codepoint; true for most BMP blocks; multi-page blocks need a
24
+ # richer resolver — mismatches yield placeholder SVGs only, never
25
+ # wrong glyphs)
26
+ #
27
+ # The Command stays a thin wrapper that prints the experimental
28
+ # warning and wires the writer. See Candidate 3 of the 2026-06-29
29
+ # architecture review.
30
+ class Pipeline
31
+ # Path to the monolith fallback file when no per-block PDF is on
32
+ # disk yet. Overridable for tests.
33
+ DEFAULT_MONOLITH_PATH = "CodeCharts.pdf"
34
+ # Cache path for the page-map corpus. Overridable for tests.
35
+ DEFAULT_PAGE_MAP_CACHE = "data/codecharts_page_map.json"
36
+
37
+ Spec = Struct.new(:block, :pdf_path, :page_map, keyword_init: true)
38
+
39
+ # @param version [String] resolved UCD version (callers must
40
+ # resolve via {VersionResolver.resolve} first)
41
+ # @param block_filter [Array<String>, nil] block ids to limit to;
42
+ # nil = every block
43
+ # @param monolith_path [String, Pathname, nil] fallback monolith
44
+ # @param page_map_cache [String, Pathname] cache for the page map
45
+ def initialize(version:, block_filter: nil,
46
+ monolith_path: DEFAULT_MONOLITH_PATH,
47
+ page_map_cache: DEFAULT_PAGE_MAP_CACHE)
48
+ @version = version
49
+ @block_filter = block_filter
50
+ @monolith_path = monolith_path
51
+ @page_map_cache = page_map_cache
52
+ end
53
+
54
+ # Load every block from the cached Blocks.txt (filtered by
55
+ # `@block_filter` when set) and pair each one with a fetched PDF
56
+ # path and a page map. Blocks whose PDF cannot be fetched are
57
+ # silently dropped — the placeholder pass downstream covers them.
58
+ #
59
+ # @param force [Boolean] re-fetch PDFs even when cached
60
+ # @return [Array<Spec>]
61
+ def build_specs(force: false)
62
+ blocks = load_blocks
63
+ fetcher = build_fetcher(blocks)
64
+ blocks.map { |block| spec_for(block, fetcher, force) }.compact
65
+ end
66
+
67
+ private
68
+
69
+ def load_blocks
70
+ path = Cache.ucd_dir(@version).join("Blocks.txt")
71
+ return [] unless path.exist?
72
+
73
+ all = Parsers::Blocks.each_record(path).to_a
74
+ return all unless @block_filter && !@block_filter.empty?
75
+
76
+ filter_set = @block_filter.to_set
77
+ all.select { |block| filter_set.include?(block.id) }
78
+ end
79
+
80
+ def build_fetcher(blocks)
81
+ monolith = @monolith_path ? Pathname.new(@monolith_path) : nil
82
+ monolith = monolith.exist? ? monolith : nil
83
+ PdfFetcher.new(
84
+ @version,
85
+ monolith_path: monolith,
86
+ blocks: blocks,
87
+ page_map_cache: @page_map_cache,
88
+ )
89
+ end
90
+
91
+ def spec_for(block, fetcher, force)
92
+ pdf_path = fetcher.fetch(block_first_cp: block.range_first, force: force)
93
+ return nil unless pdf_path
94
+
95
+ Spec.new(block: block, pdf_path: pdf_path, page_map: page_map_for(block))
96
+ end
97
+
98
+ # Per-block PDFs are page 1 = title, page 2 = first chart page
99
+ # starting at the block's first codepoint. True for most BMP
100
+ # blocks; multi-page blocks (CJK) need a richer resolver.
101
+ def page_map_for(block)
102
+ { 2 => block.range_first }
103
+ end
104
+ end
105
+ end
106
+ end
data/lib/ucode/glyphs.rb CHANGED
@@ -20,6 +20,7 @@ module Ucode
20
20
  autoload :CellExtractor, "ucode/glyphs/cell_extractor"
21
21
  autoload :MonolithPageMap, "ucode/glyphs/monolith_page_map"
22
22
  autoload :Writer, "ucode/glyphs/writer"
23
+ autoload :Pipeline, "ucode/glyphs/pipeline"
23
24
  autoload :LastResort, "ucode/glyphs/last_resort"
24
25
  autoload :EmbeddedFonts, "ucode/glyphs/embedded_fonts"
25
26
  autoload :RealFonts, "ucode/glyphs/real_fonts"
@@ -1,12 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "pathname"
4
- require "json"
5
- require "time"
6
4
 
7
- require "ucode/models"
8
- require "ucode/repo/atomic_writes"
9
- require "ucode/repo/paths"
5
+ require "ucode/repo/writers/planes_writer"
6
+ require "ucode/repo/writers/blocks_writer"
7
+ require "ucode/repo/writers/scripts_writer"
8
+ require "ucode/repo/writers/indexes_writer"
9
+ require "ucode/repo/writers/relationships_writer"
10
+ require "ucode/repo/writers/enums_writer"
11
+ require "ucode/repo/writers/named_sequences_writer"
12
+ require "ucode/repo/writers/manifest_writer"
10
13
 
11
14
  module Ucode
12
15
  module Repo
@@ -14,67 +17,30 @@ module Ucode
14
17
  #
15
18
  # output/planes/<n>.json
16
19
  # output/blocks/<ID>.json
17
- # output/blocks/index.json (block index)
20
+ # output/blocks/index.json
18
21
  # output/scripts/<code>.json
19
- # output/index/names.json (cp_id → name)
20
- # output/index/labels.json (cp_id → {name, gc, sc, cc, bc, mir})
21
- # output/index/codepoint_to_block.json (cp_id → block_id)
22
- # output/relationships/*.json (per-property tables)
23
- # output/enums.json (property aliases + value aliases)
22
+ # output/index/names.json
23
+ # output/index/labels.json
24
+ # output/index/codepoint_to_block.json
25
+ # output/relationships/*.json
26
+ # output/enums.json
24
27
  # output/named_sequences/<slug>.json
25
28
  # output/manifest.json
26
29
  #
27
- # **Single pass**: callers feed one CodePoint at a time via `#add`;
28
- # `#flush` writes all derived files using the Coordinator's indices
29
- # for the static tables (relationships, enums, named sequences).
30
+ # **Single pass**: callers feed one CodePoint at a time via `#add`,
31
+ # which folds into the streaming accumulators. `#flush` then
32
+ # composes eight per-concern writer classes (one per output kind)
33
+ # and runs them in order. Adding a new aggregate = adding one
34
+ # writer class + one line here. See Candidate 5 of the 2026-06-29
35
+ # architecture review.
30
36
  #
31
37
  # **MECE**:
32
38
  # - paths: `Repo::Paths`
33
39
  # - atomic writes: `Repo::AtomicWrites`
34
- # - stream aggregation: this class
40
+ # - stream aggregation: this class (the `#add` half)
41
+ # - per-concern writers: `Repo::Writers::*`
35
42
  # - serialization: lutaml-model `to_yaml_hash` / `to_json`
36
43
  class AggregateWriter
37
- include AtomicWrites
38
-
39
- # Static metadata for the 17 Unicode planes. Planes 4–13 are
40
- # unassigned in Unicode 17; their entries use placeholder names.
41
- PLANE_TABLE = {
42
- 0 => ["Basic Multilingual Plane", "BMP"],
43
- 1 => ["Supplementary Multilingual Plane", "SMP"],
44
- 2 => ["Supplementary Ideographic Plane", "SIP"],
45
- 3 => ["Tertiary Ideographic Plane", "TIP"],
46
- 4 => ["Unassigned Plane 4", "—"],
47
- 5 => ["Unassigned Plane 5", "—"],
48
- 6 => ["Unassigned Plane 6", "—"],
49
- 7 => ["Unassigned Plane 7", "—"],
50
- 8 => ["Unassigned Plane 8", "—"],
51
- 9 => ["Unassigned Plane 9", "—"],
52
- 10 => ["Unassigned Plane 10", "—"],
53
- 11 => ["Unassigned Plane 11", "—"],
54
- 12 => ["Unassigned Plane 12", "—"],
55
- 13 => ["Unassigned Plane 13", "—"],
56
- 14 => ["Supplementary Special-purpose Plane", "SSP"],
57
- 15 => ["Supplementary Private Use Area-A", "SPUA-A"],
58
- 16 => ["Supplementary Private Use Area-B", "SPUA-B"],
59
- }.freeze
60
- private_constant :PLANE_TABLE
61
-
62
- # Coordinator::Indices fields paired with the file slug used
63
- # under `output/relationships/`. Each field is a Hash<Integer,
64
- # Record> or Hash<Integer, Array<Record>>.
65
- RELATIONSHIP_SOURCES = {
66
- special_casing: "special_casing",
67
- case_folding: "case_folding",
68
- bidi_mirroring: "bidi_mirroring",
69
- bidi_brackets: "bidi_brackets",
70
- cjk_radicals: "cjk_radicals",
71
- standardized_variants: "standardized_variants",
72
- name_aliases: "name_aliases",
73
- }.freeze
74
- private_constant :RELATIONSHIP_SOURCES
75
-
76
- attr_reader :codepoint_count
77
-
78
44
  # @param output_root [String, Pathname]
79
45
  def initialize(output_root)
80
46
  @output_root = Pathname.new(output_root)
@@ -87,6 +53,8 @@ module Ucode
87
53
  @codepoint_count = 0
88
54
  end
89
55
 
56
+ attr_reader :codepoint_count
57
+
90
58
  # Fold one CodePoint into the stream accumulators. No-ops if the
91
59
  # cp has no block_id (it has no home in the output tree).
92
60
  # @param cp [Ucode::Models::CodePoint]
@@ -107,10 +75,8 @@ module Ucode
107
75
  @codepoint_count += 1
108
76
  end
109
77
 
110
- # Write every aggregate file. Optional params supply data that is
111
- # not in `Coordinator::Indices` (the Coordinator only resolves the
112
- # `sc` subset of PropertyValueAliases; the full alias tables and
113
- # the named sequences are passed through from the CLI/parsers).
78
+ # Compose the eight per-concern writers, run them in order, and
79
+ # return the total number of files written.
114
80
  #
115
81
  # @param ucd_version [String]
116
82
  # @param indices [Ucode::Coordinator::Indices]
@@ -118,19 +84,40 @@ module Ucode
118
84
  # @param property_value_aliases [Array<Ucode::Models::PropertyValueAlias>]
119
85
  # @param named_sequences [Array<Ucode::Models::NamedSequence>]
120
86
  # @param glyph_count [Integer]
121
- # @return [Integer] number of files written
87
+ # @return [Integer]
122
88
  def flush(ucd_version:, indices:, property_aliases: [],
123
89
  property_value_aliases: [], named_sequences: [], glyph_count: 0)
124
- writes = 0
125
- writes += write_planes(indices.blocks)
126
- writes += write_blocks(indices.blocks)
127
- writes += write_scripts(indices.scripts)
128
- writes += write_indexes
129
- writes += write_relationships(indices)
130
- writes += write_enums(property_aliases, property_value_aliases)
131
- writes += write_named_sequences(named_sequences)
132
- writes += write_manifest(ucd_version: ucd_version, glyph_count: glyph_count)
133
- writes
90
+ writers(ucd_version, indices, property_aliases, property_value_aliases,
91
+ named_sequences, glyph_count).sum(&:write)
92
+ end
93
+
94
+ # @api private — exposed for testing.
95
+ def writers(ucd_version, indices, property_aliases,
96
+ property_value_aliases, named_sequences, glyph_count)
97
+ [
98
+ Writers::PlanesWriter.new(output_root: @output_root, blocks: indices.blocks),
99
+ Writers::BlocksWriter.new(output_root: @output_root,
100
+ blocks: indices.blocks,
101
+ block_codepoint_ids: @block_codepoint_ids,
102
+ block_ages: @block_ages),
103
+ Writers::ScriptsWriter.new(output_root: @output_root,
104
+ scripts: indices.scripts,
105
+ script_codepoint_ids: @script_codepoint_ids),
106
+ Writers::IndexesWriter.new(output_root: @output_root,
107
+ names: @names_index,
108
+ labels: @labels_index,
109
+ cp_to_block: @cp_to_block),
110
+ Writers::RelationshipsWriter.new(output_root: @output_root, indices: indices),
111
+ Writers::EnumsWriter.new(output_root: @output_root,
112
+ property_aliases: property_aliases,
113
+ property_value_aliases: property_value_aliases),
114
+ Writers::NamedSequencesWriter.new(output_root: @output_root,
115
+ named_sequences: named_sequences),
116
+ Writers::ManifestWriter.new(output_root: @output_root,
117
+ ucd_version: ucd_version,
118
+ codepoint_count: @codepoint_count,
119
+ glyph_count: glyph_count),
120
+ ]
134
121
  end
135
122
 
136
123
  private
@@ -151,9 +138,7 @@ module Ucode
151
138
 
152
139
  # Per-block `age` is the earliest DerivedAge of any codepoint in
153
140
  # the block, compared as a Gem::Version. Stored as the original
154
- # string (e.g. "1.1", "17.0.0"). nil when no codepoint in the
155
- # block has an age (rare — only happens for entirely-reserved
156
- # blocks, which the parser excludes anyway).
141
+ # string (e.g. "1.1", "17.0.0").
157
142
  def track_block_age(cp)
158
143
  return if cp.age.nil? || cp.age.empty?
159
144
 
@@ -168,229 +153,6 @@ module Ucode
168
153
  def min_age(a, b)
169
154
  Gem::Version.new(a) < Gem::Version.new(b) ? a : b
170
155
  end
171
-
172
- # ---- Plane files -------------------------------------------------
173
-
174
- def write_planes(blocks)
175
- plane_block_ids = group_block_ids_by_plane(blocks)
176
- count = 0
177
- (0..16).each do |n|
178
- path = Paths.plane_metadata_path(@output_root, n)
179
- count += 1 if write_atomic(path, plane_payload(n, plane_block_ids[n] || []))
180
- end
181
- count
182
- end
183
-
184
- def group_block_ids_by_plane(blocks)
185
- blocks.each_with_object(Hash.new { |h, k| h[k] = [] }) do |block, h|
186
- h[block.plane_number] << block.id
187
- end
188
- end
189
-
190
- def plane_payload(plane_number, block_ids)
191
- name, abbrev = PLANE_TABLE.fetch(plane_number)
192
- range_first = plane_number * 0x10000
193
- range_last = range_first + 0xFFFF
194
- to_pretty_json(
195
- "number" => plane_number,
196
- "name" => name,
197
- "abbrev" => abbrev,
198
- "range_first" => range_first,
199
- "range_last" => range_last,
200
- "block_ids" => block_ids,
201
- )
202
- end
203
-
204
- # ---- Block files -------------------------------------------------
205
-
206
- def write_blocks(blocks)
207
- count = blocks.sum do |block|
208
- block.age = @block_ages[block.id]
209
- path = Paths.block_metadata_path(@output_root, block.id)
210
- write_atomic(path, block_payload(block)) ? 1 : 0
211
- end
212
- count + write_blocks_index(blocks)
213
- end
214
-
215
- def write_blocks_index(blocks)
216
- path = Paths.blocks_index_path(@output_root)
217
- summary = blocks.map do |block|
218
- {
219
- "id" => block.id,
220
- "name" => block.name,
221
- "first_cp" => block.range_first,
222
- "last_cp" => block.range_last,
223
- "plane_number" => block.plane_number,
224
- "age" => @block_ages[block.id],
225
- }
226
- end
227
- write_atomic(path, to_pretty_json(summary)) ? 1 : 0
228
- end
229
-
230
- def block_payload(block)
231
- to_pretty_json(
232
- "id" => block.id,
233
- "name" => block.name,
234
- "range_first" => block.range_first,
235
- "range_last" => block.range_last,
236
- "plane_number" => block.plane_number,
237
- "age" => @block_ages[block.id],
238
- "codepoint_ids" => (@block_codepoint_ids[block.id] || []),
239
- )
240
- end
241
-
242
- # ---- Script files ------------------------------------------------
243
-
244
- def write_scripts(scripts)
245
- count = 0
246
- scripts.group_by(&:code).each do |code, ranges|
247
- next if code.nil? || code.empty?
248
-
249
- path = Paths.script_metadata_path(@output_root, code)
250
- count += 1 if write_atomic(path, script_payload(code, ranges))
251
- end
252
- count
253
- end
254
-
255
- def script_payload(code, ranges)
256
- to_pretty_json(
257
- "code" => code,
258
- "name" => ranges.first&.name,
259
- "range_first" => ranges.map(&:range_first).min,
260
- "range_last" => ranges.map(&:range_last).max,
261
- "codepoint_ids" => (@script_codepoint_ids[code] || []),
262
- )
263
- end
264
-
265
- # ---- Lookup indexes ---------------------------------------------
266
-
267
- def write_indexes
268
- count = 0
269
- count += 1 if write_atomic(Paths.names_index_path(@output_root), to_pretty_json(@names_index))
270
- count += 1 if write_atomic(Paths.labels_index_path(@output_root), to_pretty_json(@labels_index))
271
- count += 1 if write_atomic(codepoint_to_block_path, to_pretty_json(@cp_to_block))
272
- count
273
- end
274
-
275
- def codepoint_to_block_path
276
- Pathname(@output_root).join("index", "codepoint_to_block.json")
277
- end
278
-
279
- # ---- Relationships ----------------------------------------------
280
-
281
- def write_relationships(indices)
282
- RELATIONSHIP_SOURCES.sum do |field, slug|
283
- records = indices.public_send(field)
284
- write_relationship_file(slug, records)
285
- end
286
- end
287
-
288
- def write_relationship_file(slug, records)
289
- return 0 if records.nil? || records.empty?
290
-
291
- path = Pathname(@output_root).join("relationships", "#{slug}.json")
292
- write_atomic(path, relationship_payload(records)) ? 1 : 0
293
- end
294
-
295
- # records is Hash<Integer, Record>, Hash<Integer, Array<Record>>,
296
- # Hash<String, Record>, or Hash<String, Array<Record>>.
297
- # Output: { "U+XXXX" => record.to_yaml_hash, ... } or
298
- # { "U+XXXX" => [record.to_yaml_hash, ...], ... }
299
- def relationship_payload(records)
300
- payload = records.each_with_object({}) do |(key, value), h|
301
- h[key_to_cp_id(key)] = serialize_value(value)
302
- end
303
- to_pretty_json(payload)
304
- end
305
-
306
- # Indices that are keyed by Integer codepoint (most of them) get
307
- # formatted into "U+XXXX". Indices keyed by string ids already
308
- # (cjk_radicals by ideograph_id, standardized_variants by base_id)
309
- # are passed through verbatim.
310
- def key_to_cp_id(key)
311
- key.is_a?(Integer) ? Paths.cp_id(key) : key
312
- end
313
-
314
- def serialize_value(value)
315
- return value.map { |v| serialize_one(v) } if value.is_a?(Array)
316
-
317
- serialize_one(value)
318
- end
319
-
320
- def serialize_one(record)
321
- record.to_yaml_hash
322
- end
323
-
324
- # ---- Enums -------------------------------------------------------
325
-
326
- def write_enums(property_aliases, property_value_aliases)
327
- path = Pathname(@output_root).join("enums.json")
328
- payload = {
329
- "properties" => property_aliases.map(&:to_yaml_hash),
330
- "property_values" => property_value_aliases.map(&:to_yaml_hash),
331
- }
332
- write_atomic(path, to_pretty_json(payload)) ? 1 : 0
333
- end
334
-
335
- # ---- Named sequences --------------------------------------------
336
-
337
- def write_named_sequences(named_sequences)
338
- return 0 if named_sequences.nil? || named_sequences.empty?
339
-
340
- dir = Pathname(@output_root).join("named_sequences")
341
- named_sequences.sum do |ns|
342
- path = dir.join("#{slug_for(ns)}.json")
343
- write_atomic(path, ns.to_json(pretty: true)) ? 1 : 0
344
- end
345
- end
346
-
347
- # Slug derived from the name: downcase, non-alphanumerics → "_".
348
- def slug_for(named_sequence)
349
- named_sequence.name
350
- .downcase
351
- .gsub(/[^a-z0-9]+/, "_")
352
- .gsub(/^_+|_+$/, "")
353
- end
354
-
355
- # ---- Manifest ---------------------------------------------------
356
-
357
- # Fields that define the manifest's semantic content. When these
358
- # match the existing manifest on disk, we preserve the old
359
- # `generated_at` so that re-runs are byte-idempotent (no rewrite
360
- # unless something actually changed).
361
- MANIFEST_CONTENT_KEYS = %w[
362
- ucd_version codepoint_count glyph_count schema_version
363
- ].freeze
364
- private_constant :MANIFEST_CONTENT_KEYS
365
-
366
- def write_manifest(ucd_version:, glyph_count:)
367
- path = Paths.manifest_path(@output_root)
368
- content = {
369
- "ucd_version" => ucd_version,
370
- "codepoint_count" => @codepoint_count,
371
- "glyph_count" => glyph_count,
372
- "schema_version" => "1",
373
- }
374
- ts = preserved_or_new_timestamp(path, content)
375
- payload = content.merge("generated_at" => ts)
376
- write_atomic(path, to_pretty_json(payload)) ? 1 : 0
377
- end
378
-
379
- def preserved_or_new_timestamp(path, content)
380
- existing = read_manifest(path)
381
- return Time.now.utc.iso8601 unless existing
382
-
383
- unchanged = MANIFEST_CONTENT_KEYS.all? { |k| existing[k] == content[k] }
384
- unchanged ? existing["generated_at"] : Time.now.utc.iso8601
385
- end
386
-
387
- def read_manifest(path)
388
- return nil unless path.exist?
389
-
390
- JSON.parse(path.read)
391
- rescue JSON::ParserError
392
- nil
393
- end
394
156
  end
395
157
  end
396
- end
158
+ end