ucode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +211 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +406 -0
- data/README.md +469 -0
- data/Rakefile +18 -0
- data/TODO.new/00-README.md +66 -0
- data/TODO.new/01-pillar-terminology-alignment.md +69 -0
- data/TODO.new/02-audit-schema-design.md +255 -0
- data/TODO.new/03-directory-output-spec.md +203 -0
- data/TODO.new/04-fontist-org-contract.md +173 -0
- data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
- data/TODO.new/06-audit-namespace-skeleton.md +105 -0
- data/TODO.new/07-audit-models-port.md +132 -0
- data/TODO.new/08-extractors-cheap-port.md +113 -0
- data/TODO.new/09-extractors-expensive-port.md +99 -0
- data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
- data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
- data/TODO.new/12-formatters-port.md +115 -0
- data/TODO.new/13-directory-emitter.md +147 -0
- data/TODO.new/14-html-face-browser.md +144 -0
- data/TODO.new/15-html-library-browser.md +102 -0
- data/TODO.new/16-cli-audit-subcommands.md +142 -0
- data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
- data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
- data/TODO.new/19-fontisan-docs-update.md +155 -0
- data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
- data/TODO.new/21-canonical-unicode17-build.md +148 -0
- data/TODO.new/22-implementation-order.md +176 -0
- data/UCODE_CHANGELOG.md +97 -0
- data/exe/ucode +8 -0
- data/lib/ucode/aggregator.rb +77 -0
- data/lib/ucode/audit/block_aggregator.rb +90 -0
- data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
- data/lib/ucode/audit/context.rb +137 -0
- data/lib/ucode/audit/discrepancy_detector.rb +213 -0
- data/lib/ucode/audit/extractors/aggregations.rb +70 -0
- data/lib/ucode/audit/extractors/base.rb +21 -0
- data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
- data/lib/ucode/audit/extractors/coverage.rb +55 -0
- data/lib/ucode/audit/extractors/hinting.rb +199 -0
- data/lib/ucode/audit/extractors/identity.rb +65 -0
- data/lib/ucode/audit/extractors/licensing.rb +75 -0
- data/lib/ucode/audit/extractors/metrics.rb +108 -0
- data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
- data/lib/ucode/audit/extractors/provenance.rb +34 -0
- data/lib/ucode/audit/extractors/style.rb +88 -0
- data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
- data/lib/ucode/audit/extractors.rb +31 -0
- data/lib/ucode/audit/plane_aggregator.rb +37 -0
- data/lib/ucode/audit/registry.rb +63 -0
- data/lib/ucode/audit/script_aggregator.rb +92 -0
- data/lib/ucode/audit.rb +27 -0
- data/lib/ucode/cache.rb +113 -0
- data/lib/ucode/cli.rb +272 -0
- data/lib/ucode/commands/build.rb +68 -0
- data/lib/ucode/commands/cache.rb +46 -0
- data/lib/ucode/commands/fetch.rb +62 -0
- data/lib/ucode/commands/font_coverage.rb +57 -0
- data/lib/ucode/commands/glyphs.rb +136 -0
- data/lib/ucode/commands/lookup.rb +65 -0
- data/lib/ucode/commands/parse.rb +62 -0
- data/lib/ucode/commands/site.rb +33 -0
- data/lib/ucode/commands.rb +19 -0
- data/lib/ucode/config.rb +110 -0
- data/lib/ucode/coordinator/indices.rb +34 -0
- data/lib/ucode/coordinator.rb +397 -0
- data/lib/ucode/database.rb +214 -0
- data/lib/ucode/db_builder.rb +107 -0
- data/lib/ucode/error.rb +96 -0
- data/lib/ucode/fetch/code_charts.rb +57 -0
- data/lib/ucode/fetch/http.rb +83 -0
- data/lib/ucode/fetch/ucd_zip.rb +57 -0
- data/lib/ucode/fetch/unihan_zip.rb +57 -0
- data/lib/ucode/fetch.rb +14 -0
- data/lib/ucode/glyphs/cell_extractor.rb +130 -0
- data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
- data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
- data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
- data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
- data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
- data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
- data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
- data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
- data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
- data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
- data/lib/ucode/glyphs/grid.rb +30 -0
- data/lib/ucode/glyphs/grid_detector.rb +165 -0
- data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
- data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
- data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
- data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
- data/lib/ucode/glyphs/last_resort/source.rb +125 -0
- data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
- data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
- data/lib/ucode/glyphs/last_resort.rb +36 -0
- data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
- data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
- data/lib/ucode/glyphs/page_renderer.rb +221 -0
- data/lib/ucode/glyphs/path_bbox.rb +62 -0
- data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
- data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
- data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
- data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
- data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
- data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
- data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
- data/lib/ucode/glyphs/real_fonts.rb +32 -0
- data/lib/ucode/glyphs/writer.rb +250 -0
- data/lib/ucode/glyphs.rb +27 -0
- data/lib/ucode/index.rb +106 -0
- data/lib/ucode/index_builder.rb +94 -0
- data/lib/ucode/models/audit/audit_axis.rb +30 -0
- data/lib/ucode/models/audit/audit_diff.rb +77 -0
- data/lib/ucode/models/audit/audit_report.rb +137 -0
- data/lib/ucode/models/audit/baseline.rb +32 -0
- data/lib/ucode/models/audit/block_summary.rb +72 -0
- data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
- data/lib/ucode/models/audit/codepoint_range.rb +39 -0
- data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
- data/lib/ucode/models/audit/color_capabilities.rb +91 -0
- data/lib/ucode/models/audit/discrepancy.rb +38 -0
- data/lib/ucode/models/audit/duplicate_group.rb +23 -0
- data/lib/ucode/models/audit/embedding_type.rb +81 -0
- data/lib/ucode/models/audit/field_change.rb +28 -0
- data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
- data/lib/ucode/models/audit/gasp_range.rb +63 -0
- data/lib/ucode/models/audit/hinting.rb +99 -0
- data/lib/ucode/models/audit/library_summary.rb +40 -0
- data/lib/ucode/models/audit/licensing.rb +48 -0
- data/lib/ucode/models/audit/metrics.rb +111 -0
- data/lib/ucode/models/audit/named_instance.rb +41 -0
- data/lib/ucode/models/audit/opentype_layout.rb +38 -0
- data/lib/ucode/models/audit/plane_summary.rb +31 -0
- data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
- data/lib/ucode/models/audit/script_features.rb +28 -0
- data/lib/ucode/models/audit/script_summary.rb +54 -0
- data/lib/ucode/models/audit/variation_detail.rb +42 -0
- data/lib/ucode/models/audit.rb +50 -0
- data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
- data/lib/ucode/models/bidi_mirroring.rb +19 -0
- data/lib/ucode/models/binary_property_assignment.rb +26 -0
- data/lib/ucode/models/block.rb +36 -0
- data/lib/ucode/models/case_folding_rule.rb +23 -0
- data/lib/ucode/models/cjk_radical.rb +23 -0
- data/lib/ucode/models/codepoint/bidi.rb +28 -0
- data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
- data/lib/ucode/models/codepoint/case_folding.rb +25 -0
- data/lib/ucode/models/codepoint/casing.rb +32 -0
- data/lib/ucode/models/codepoint/decomposition.rb +27 -0
- data/lib/ucode/models/codepoint/display.rb +24 -0
- data/lib/ucode/models/codepoint/emoji.rb +29 -0
- data/lib/ucode/models/codepoint/hangul.rb +20 -0
- data/lib/ucode/models/codepoint/identifier.rb +30 -0
- data/lib/ucode/models/codepoint/indic.rb +20 -0
- data/lib/ucode/models/codepoint/joining.rb +20 -0
- data/lib/ucode/models/codepoint/normalization.rb +35 -0
- data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
- data/lib/ucode/models/codepoint.rb +122 -0
- data/lib/ucode/models/name_alias.rb +21 -0
- data/lib/ucode/models/named_sequence.rb +19 -0
- data/lib/ucode/models/names_list_entry.rb +38 -0
- data/lib/ucode/models/plane.rb +36 -0
- data/lib/ucode/models/property_alias.rb +24 -0
- data/lib/ucode/models/property_value_alias.rb +26 -0
- data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
- data/lib/ucode/models/relationship/cross_reference.rb +17 -0
- data/lib/ucode/models/relationship/footnote.rb +24 -0
- data/lib/ucode/models/relationship/informal_alias.rb +18 -0
- data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
- data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
- data/lib/ucode/models/relationship.rb +57 -0
- data/lib/ucode/models/script.rb +41 -0
- data/lib/ucode/models/special_casing_rule.rb +28 -0
- data/lib/ucode/models/standardized_variant.rb +24 -0
- data/lib/ucode/models/unihan_entry.rb +23 -0
- data/lib/ucode/models.rb +47 -0
- data/lib/ucode/parsers/auxiliary.rb +26 -0
- data/lib/ucode/parsers/base.rb +137 -0
- data/lib/ucode/parsers/bidi_brackets.rb +41 -0
- data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
- data/lib/ucode/parsers/blocks.rb +63 -0
- data/lib/ucode/parsers/case_folding.rb +53 -0
- data/lib/ucode/parsers/cjk_radicals.rb +102 -0
- data/lib/ucode/parsers/derived_age.rb +59 -0
- data/lib/ucode/parsers/derived_core_properties.rb +60 -0
- data/lib/ucode/parsers/extracted_properties.rb +74 -0
- data/lib/ucode/parsers/name_aliases.rb +44 -0
- data/lib/ucode/parsers/named_sequences.rb +51 -0
- data/lib/ucode/parsers/names_list.rb +250 -0
- data/lib/ucode/parsers/property_aliases.rb +41 -0
- data/lib/ucode/parsers/property_value_aliases.rb +46 -0
- data/lib/ucode/parsers/script_extensions.rb +64 -0
- data/lib/ucode/parsers/scripts.rb +60 -0
- data/lib/ucode/parsers/special_casing.rb +62 -0
- data/lib/ucode/parsers/standardized_variants.rb +56 -0
- data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
- data/lib/ucode/parsers/unicode_data.rb +268 -0
- data/lib/ucode/parsers/unihan.rb +125 -0
- data/lib/ucode/parsers.rb +35 -0
- data/lib/ucode/range_entry.rb +58 -0
- data/lib/ucode/repo/aggregate_writer.rb +364 -0
- data/lib/ucode/repo/atomic_writes.rb +48 -0
- data/lib/ucode/repo/codepoint_writer.rb +96 -0
- data/lib/ucode/repo/paths.rb +122 -0
- data/lib/ucode/repo.rb +22 -0
- data/lib/ucode/site/config_emitter.rb +124 -0
- data/lib/ucode/site/generator.rb +178 -0
- data/lib/ucode/site/search_index.rb +68 -0
- data/lib/ucode/site/template/.gitignore +4 -0
- data/lib/ucode/site/template/.vitepress/config.ts +8 -0
- data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
- data/lib/ucode/site/template/char/[codepoint].md +13 -0
- data/lib/ucode/site/template/components/BlockView.vue +57 -0
- data/lib/ucode/site/template/components/CharView.vue +85 -0
- data/lib/ucode/site/template/components/PlaneView.vue +56 -0
- data/lib/ucode/site/template/components/SearchView.vue +66 -0
- data/lib/ucode/site/template/index.md +25 -0
- data/lib/ucode/site/template/package.json +18 -0
- data/lib/ucode/site/template/search.md +9 -0
- data/lib/ucode/site.rb +13 -0
- data/lib/ucode/version.rb +5 -0
- data/lib/ucode/version_resolver.rb +76 -0
- data/lib/ucode.rb +74 -0
- data/ucode.gemspec +56 -0
- metadata +404 -0
|
@@ -0,0 +1,364 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "json"
|
|
5
|
+
require "time"
|
|
6
|
+
|
|
7
|
+
require "ucode/models"
|
|
8
|
+
require "ucode/repo/atomic_writes"
|
|
9
|
+
require "ucode/repo/paths"
|
|
10
|
+
|
|
11
|
+
module Ucode
|
|
12
|
+
module Repo
|
|
13
|
+
# Writes every aggregate JSON file under `output/`:
|
|
14
|
+
#
|
|
15
|
+
# output/planes/<n>.json
|
|
16
|
+
# output/blocks/<ID>.json
|
|
17
|
+
# output/blocks/index.json (block index)
|
|
18
|
+
# output/scripts/<code>.json
|
|
19
|
+
# output/index/names.json (cp_id → name)
|
|
20
|
+
# output/index/labels.json (cp_id → {name, gc, sc})
|
|
21
|
+
# output/index/codepoint_to_block.json (cp_id → block_id)
|
|
22
|
+
# output/relationships/*.json (per-property tables)
|
|
23
|
+
# output/enums.json (property aliases + value aliases)
|
|
24
|
+
# output/named_sequences/<slug>.json
|
|
25
|
+
# output/manifest.json
|
|
26
|
+
#
|
|
27
|
+
# **Single pass**: callers feed one CodePoint at a time via `#add`;
|
|
28
|
+
# `#flush` writes all derived files using the Coordinator's indices
|
|
29
|
+
# for the static tables (relationships, enums, named sequences).
|
|
30
|
+
#
|
|
31
|
+
# **MECE**:
|
|
32
|
+
# - paths: `Repo::Paths`
|
|
33
|
+
# - atomic writes: `Repo::AtomicWrites`
|
|
34
|
+
# - stream aggregation: this class
|
|
35
|
+
# - serialization: lutaml-model `to_yaml_hash` / `to_json`
|
|
36
|
+
class AggregateWriter
|
|
37
|
+
include AtomicWrites
|
|
38
|
+
|
|
39
|
+
# Static metadata for the 17 Unicode planes. Planes 4–13 are
|
|
40
|
+
# unassigned in Unicode 17; their entries use placeholder names.
|
|
41
|
+
PLANE_TABLE = {
|
|
42
|
+
0 => ["Basic Multilingual Plane", "BMP"],
|
|
43
|
+
1 => ["Supplementary Multilingual Plane", "SMP"],
|
|
44
|
+
2 => ["Supplementary Ideographic Plane", "SIP"],
|
|
45
|
+
3 => ["Tertiary Ideographic Plane", "TIP"],
|
|
46
|
+
4 => ["Unassigned Plane 4", "—"],
|
|
47
|
+
5 => ["Unassigned Plane 5", "—"],
|
|
48
|
+
6 => ["Unassigned Plane 6", "—"],
|
|
49
|
+
7 => ["Unassigned Plane 7", "—"],
|
|
50
|
+
8 => ["Unassigned Plane 8", "—"],
|
|
51
|
+
9 => ["Unassigned Plane 9", "—"],
|
|
52
|
+
10 => ["Unassigned Plane 10", "—"],
|
|
53
|
+
11 => ["Unassigned Plane 11", "—"],
|
|
54
|
+
12 => ["Unassigned Plane 12", "—"],
|
|
55
|
+
13 => ["Unassigned Plane 13", "—"],
|
|
56
|
+
14 => ["Supplementary Special-purpose Plane", "SSP"],
|
|
57
|
+
15 => ["Supplementary Private Use Area-A", "SPUA-A"],
|
|
58
|
+
16 => ["Supplementary Private Use Area-B", "SPUA-B"],
|
|
59
|
+
}.freeze
|
|
60
|
+
private_constant :PLANE_TABLE
|
|
61
|
+
|
|
62
|
+
# Coordinator::Indices fields paired with the file slug used
|
|
63
|
+
# under `output/relationships/`. Each field is a Hash<Integer,
|
|
64
|
+
# Record> or Hash<Integer, Array<Record>>.
|
|
65
|
+
RELATIONSHIP_SOURCES = {
|
|
66
|
+
special_casing: "special_casing",
|
|
67
|
+
case_folding: "case_folding",
|
|
68
|
+
bidi_mirroring: "bidi_mirroring",
|
|
69
|
+
bidi_brackets: "bidi_brackets",
|
|
70
|
+
cjk_radicals: "cjk_radicals",
|
|
71
|
+
standardized_variants: "standardized_variants",
|
|
72
|
+
name_aliases: "name_aliases",
|
|
73
|
+
}.freeze
|
|
74
|
+
private_constant :RELATIONSHIP_SOURCES
|
|
75
|
+
|
|
76
|
+
attr_reader :codepoint_count
|
|
77
|
+
|
|
78
|
+
# @param output_root [String, Pathname]
|
|
79
|
+
def initialize(output_root)
|
|
80
|
+
@output_root = Pathname.new(output_root)
|
|
81
|
+
@block_codepoint_ids = Hash.new { |h, k| h[k] = [] }
|
|
82
|
+
@script_codepoint_ids = Hash.new { |h, k| h[k] = [] }
|
|
83
|
+
@names_index = {}
|
|
84
|
+
@labels_index = {}
|
|
85
|
+
@cp_to_block = {}
|
|
86
|
+
@codepoint_count = 0
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Fold one CodePoint into the stream accumulators. No-ops if the
|
|
90
|
+
# cp has no block_id (it has no home in the output tree).
|
|
91
|
+
# @param cp [Ucode::Models::CodePoint]
|
|
92
|
+
# @return [void]
|
|
93
|
+
def add(cp)
|
|
94
|
+
return if cp.block_id.nil?
|
|
95
|
+
|
|
96
|
+
@block_codepoint_ids[cp.block_id] << cp.id
|
|
97
|
+
if cp.script_code
|
|
98
|
+
@script_codepoint_ids[cp.script_code] << cp.id
|
|
99
|
+
end
|
|
100
|
+
if cp.name && !cp.name.empty?
|
|
101
|
+
@names_index[cp.id] = cp.name
|
|
102
|
+
end
|
|
103
|
+
@labels_index[cp.id] = build_label(cp)
|
|
104
|
+
@cp_to_block[cp.id] = cp.block_id
|
|
105
|
+
@codepoint_count += 1
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Write every aggregate file. Optional params supply data that is
|
|
109
|
+
# not in `Coordinator::Indices` (the Coordinator only resolves the
|
|
110
|
+
# `sc` subset of PropertyValueAliases; the full alias tables and
|
|
111
|
+
# the named sequences are passed through from the CLI/parsers).
|
|
112
|
+
#
|
|
113
|
+
# @param ucd_version [String]
|
|
114
|
+
# @param indices [Ucode::Coordinator::Indices]
|
|
115
|
+
# @param property_aliases [Array<Ucode::Models::PropertyAlias>]
|
|
116
|
+
# @param property_value_aliases [Array<Ucode::Models::PropertyValueAlias>]
|
|
117
|
+
# @param named_sequences [Array<Ucode::Models::NamedSequence>]
|
|
118
|
+
# @param glyph_count [Integer]
|
|
119
|
+
# @return [Integer] number of files written
|
|
120
|
+
def flush(ucd_version:, indices:, property_aliases: [],
|
|
121
|
+
property_value_aliases: [], named_sequences: [], glyph_count: 0)
|
|
122
|
+
writes = 0
|
|
123
|
+
writes += write_planes(indices.blocks)
|
|
124
|
+
writes += write_blocks(indices.blocks)
|
|
125
|
+
writes += write_scripts(indices.scripts)
|
|
126
|
+
writes += write_indexes
|
|
127
|
+
writes += write_relationships(indices)
|
|
128
|
+
writes += write_enums(property_aliases, property_value_aliases)
|
|
129
|
+
writes += write_named_sequences(named_sequences)
|
|
130
|
+
writes += write_manifest(ucd_version: ucd_version, glyph_count: glyph_count)
|
|
131
|
+
writes
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
private
|
|
135
|
+
|
|
136
|
+
# ---- Per-codepoint accumulator helpers ---------------------------
|
|
137
|
+
|
|
138
|
+
def build_label(cp)
|
|
139
|
+
label = { "name" => cp.name, "gc" => cp.general_category, "sc" => cp.script_code }
|
|
140
|
+
label.reject { |_, v| v.nil? }
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# ---- Plane files -------------------------------------------------
|
|
144
|
+
|
|
145
|
+
def write_planes(blocks)
|
|
146
|
+
plane_block_ids = group_block_ids_by_plane(blocks)
|
|
147
|
+
count = 0
|
|
148
|
+
(0..16).each do |n|
|
|
149
|
+
path = Paths.plane_metadata_path(@output_root, n)
|
|
150
|
+
count += 1 if write_atomic(path, plane_payload(n, plane_block_ids[n] || []))
|
|
151
|
+
end
|
|
152
|
+
count
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def group_block_ids_by_plane(blocks)
|
|
156
|
+
blocks.each_with_object(Hash.new { |h, k| h[k] = [] }) do |block, h|
|
|
157
|
+
h[block.plane_number] << block.id
|
|
158
|
+
end
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def plane_payload(plane_number, block_ids)
|
|
162
|
+
name, abbrev = PLANE_TABLE.fetch(plane_number)
|
|
163
|
+
range_first = plane_number * 0x10000
|
|
164
|
+
range_last = range_first + 0xFFFF
|
|
165
|
+
to_pretty_json(
|
|
166
|
+
"number" => plane_number,
|
|
167
|
+
"name" => name,
|
|
168
|
+
"abbrev" => abbrev,
|
|
169
|
+
"range_first" => range_first,
|
|
170
|
+
"range_last" => range_last,
|
|
171
|
+
"block_ids" => block_ids,
|
|
172
|
+
)
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
# ---- Block files -------------------------------------------------
|
|
176
|
+
|
|
177
|
+
def write_blocks(blocks)
|
|
178
|
+
count = blocks.sum do |block|
|
|
179
|
+
path = Paths.block_metadata_path(@output_root, block.id)
|
|
180
|
+
write_atomic(path, block_payload(block)) ? 1 : 0
|
|
181
|
+
end
|
|
182
|
+
count + write_blocks_index(blocks)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
def write_blocks_index(blocks)
|
|
186
|
+
path = Paths.blocks_index_path(@output_root)
|
|
187
|
+
summary = blocks.map do |block|
|
|
188
|
+
{
|
|
189
|
+
"id" => block.id,
|
|
190
|
+
"name" => block.name,
|
|
191
|
+
"first_cp" => block.range_first,
|
|
192
|
+
"last_cp" => block.range_last,
|
|
193
|
+
"plane_number" => block.plane_number,
|
|
194
|
+
}
|
|
195
|
+
end
|
|
196
|
+
write_atomic(path, to_pretty_json(summary)) ? 1 : 0
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def block_payload(block)
|
|
200
|
+
to_pretty_json(
|
|
201
|
+
"id" => block.id,
|
|
202
|
+
"name" => block.name,
|
|
203
|
+
"range_first" => block.range_first,
|
|
204
|
+
"range_last" => block.range_last,
|
|
205
|
+
"plane_number" => block.plane_number,
|
|
206
|
+
"codepoint_ids" => (@block_codepoint_ids[block.id] || []),
|
|
207
|
+
)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
# ---- Script files ------------------------------------------------
|
|
211
|
+
|
|
212
|
+
def write_scripts(scripts)
|
|
213
|
+
count = 0
|
|
214
|
+
scripts.group_by(&:code).each do |code, ranges|
|
|
215
|
+
next if code.nil? || code.empty?
|
|
216
|
+
|
|
217
|
+
path = Paths.script_metadata_path(@output_root, code)
|
|
218
|
+
count += 1 if write_atomic(path, script_payload(code, ranges))
|
|
219
|
+
end
|
|
220
|
+
count
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def script_payload(code, ranges)
|
|
224
|
+
to_pretty_json(
|
|
225
|
+
"code" => code,
|
|
226
|
+
"name" => ranges.first&.name,
|
|
227
|
+
"range_first" => ranges.map(&:range_first).min,
|
|
228
|
+
"range_last" => ranges.map(&:range_last).max,
|
|
229
|
+
"codepoint_ids" => (@script_codepoint_ids[code] || []),
|
|
230
|
+
)
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# ---- Lookup indexes ---------------------------------------------
|
|
234
|
+
|
|
235
|
+
def write_indexes
|
|
236
|
+
count = 0
|
|
237
|
+
count += 1 if write_atomic(Paths.names_index_path(@output_root), to_pretty_json(@names_index))
|
|
238
|
+
count += 1 if write_atomic(Paths.labels_index_path(@output_root), to_pretty_json(@labels_index))
|
|
239
|
+
count += 1 if write_atomic(codepoint_to_block_path, to_pretty_json(@cp_to_block))
|
|
240
|
+
count
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def codepoint_to_block_path
|
|
244
|
+
Pathname(@output_root).join("index", "codepoint_to_block.json")
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# ---- Relationships ----------------------------------------------
|
|
248
|
+
|
|
249
|
+
def write_relationships(indices)
|
|
250
|
+
RELATIONSHIP_SOURCES.sum do |field, slug|
|
|
251
|
+
records = indices.public_send(field)
|
|
252
|
+
write_relationship_file(slug, records)
|
|
253
|
+
end
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
def write_relationship_file(slug, records)
|
|
257
|
+
return 0 if records.nil? || records.empty?
|
|
258
|
+
|
|
259
|
+
path = Pathname(@output_root).join("relationships", "#{slug}.json")
|
|
260
|
+
write_atomic(path, relationship_payload(records)) ? 1 : 0
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# records is Hash<Integer, Record>, Hash<Integer, Array<Record>>,
|
|
264
|
+
# Hash<String, Record>, or Hash<String, Array<Record>>.
|
|
265
|
+
# Output: { "U+XXXX" => record.to_yaml_hash, ... } or
|
|
266
|
+
# { "U+XXXX" => [record.to_yaml_hash, ...], ... }
|
|
267
|
+
def relationship_payload(records)
|
|
268
|
+
payload = records.each_with_object({}) do |(key, value), h|
|
|
269
|
+
h[key_to_cp_id(key)] = serialize_value(value)
|
|
270
|
+
end
|
|
271
|
+
to_pretty_json(payload)
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# Indices that are keyed by Integer codepoint (most of them) get
|
|
275
|
+
# formatted into "U+XXXX". Indices keyed by string ids already
|
|
276
|
+
# (cjk_radicals by ideograph_id, standardized_variants by base_id)
|
|
277
|
+
# are passed through verbatim.
|
|
278
|
+
def key_to_cp_id(key)
|
|
279
|
+
key.is_a?(Integer) ? Paths.cp_id(key) : key
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def serialize_value(value)
|
|
283
|
+
return value.map { |v| serialize_one(v) } if value.is_a?(Array)
|
|
284
|
+
|
|
285
|
+
serialize_one(value)
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
def serialize_one(record)
|
|
289
|
+
record.to_yaml_hash
|
|
290
|
+
end
|
|
291
|
+
|
|
292
|
+
# ---- Enums -------------------------------------------------------
|
|
293
|
+
|
|
294
|
+
def write_enums(property_aliases, property_value_aliases)
|
|
295
|
+
path = Pathname(@output_root).join("enums.json")
|
|
296
|
+
payload = {
|
|
297
|
+
"properties" => property_aliases.map(&:to_yaml_hash),
|
|
298
|
+
"property_values" => property_value_aliases.map(&:to_yaml_hash),
|
|
299
|
+
}
|
|
300
|
+
write_atomic(path, to_pretty_json(payload)) ? 1 : 0
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# ---- Named sequences --------------------------------------------
|
|
304
|
+
|
|
305
|
+
def write_named_sequences(named_sequences)
|
|
306
|
+
return 0 if named_sequences.nil? || named_sequences.empty?
|
|
307
|
+
|
|
308
|
+
dir = Pathname(@output_root).join("named_sequences")
|
|
309
|
+
named_sequences.sum do |ns|
|
|
310
|
+
path = dir.join("#{slug_for(ns)}.json")
|
|
311
|
+
write_atomic(path, ns.to_json(pretty: true)) ? 1 : 0
|
|
312
|
+
end
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# Slug derived from the name: downcase, non-alphanumerics → "_".
|
|
316
|
+
def slug_for(named_sequence)
|
|
317
|
+
named_sequence.name
|
|
318
|
+
.downcase
|
|
319
|
+
.gsub(/[^a-z0-9]+/, "_")
|
|
320
|
+
.gsub(/^_+|_+$/, "")
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# ---- Manifest ---------------------------------------------------
|
|
324
|
+
|
|
325
|
+
# Fields that define the manifest's semantic content. When these
|
|
326
|
+
# match the existing manifest on disk, we preserve the old
|
|
327
|
+
# `generated_at` so that re-runs are byte-idempotent (no rewrite
|
|
328
|
+
# unless something actually changed).
|
|
329
|
+
MANIFEST_CONTENT_KEYS = %w[
|
|
330
|
+
ucd_version codepoint_count glyph_count schema_version
|
|
331
|
+
].freeze
|
|
332
|
+
private_constant :MANIFEST_CONTENT_KEYS
|
|
333
|
+
|
|
334
|
+
def write_manifest(ucd_version:, glyph_count:)
|
|
335
|
+
path = Paths.manifest_path(@output_root)
|
|
336
|
+
content = {
|
|
337
|
+
"ucd_version" => ucd_version,
|
|
338
|
+
"codepoint_count" => @codepoint_count,
|
|
339
|
+
"glyph_count" => glyph_count,
|
|
340
|
+
"schema_version" => "1",
|
|
341
|
+
}
|
|
342
|
+
ts = preserved_or_new_timestamp(path, content)
|
|
343
|
+
payload = content.merge("generated_at" => ts)
|
|
344
|
+
write_atomic(path, to_pretty_json(payload)) ? 1 : 0
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def preserved_or_new_timestamp(path, content)
|
|
348
|
+
existing = read_manifest(path)
|
|
349
|
+
return Time.now.utc.iso8601 unless existing
|
|
350
|
+
|
|
351
|
+
unchanged = MANIFEST_CONTENT_KEYS.all? { |k| existing[k] == content[k] }
|
|
352
|
+
unchanged ? existing["generated_at"] : Time.now.utc.iso8601
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
def read_manifest(path)
|
|
356
|
+
return nil unless path.exist?
|
|
357
|
+
|
|
358
|
+
JSON.parse(path.read)
|
|
359
|
+
rescue JSON::ParserError
|
|
360
|
+
nil
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "json"
|
|
5
|
+
|
|
6
|
+
require "ucode/repo/paths"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Repo
|
|
10
|
+
# Atomic, idempotent file-write helpers shared by CodepointWriter
|
|
11
|
+
# and AggregateWriter.
|
|
12
|
+
#
|
|
13
|
+
# - Atomic: write to a sibling `.tmp` file, then rename. A crash
|
|
14
|
+
# mid-write leaves either the old file or no file, never a
|
|
15
|
+
# truncated one.
|
|
16
|
+
# - Idempotent: byte-compare the existing file before writing;
|
|
17
|
+
# identical content is a no-op. Safe to re-run on the full
|
|
18
|
+
# dataset.
|
|
19
|
+
module AtomicWrites
|
|
20
|
+
# @param path [Pathname]
|
|
21
|
+
# @param payload [String] the exact bytes to write
|
|
22
|
+
# @return [Boolean] true if the file was written, false if skipped
|
|
23
|
+
def write_atomic(path, payload)
|
|
24
|
+
return false if same_content?(path, payload)
|
|
25
|
+
|
|
26
|
+
path.dirname.mkpath
|
|
27
|
+
tmp = Paths.tmp_path(path)
|
|
28
|
+
tmp.write(payload)
|
|
29
|
+
tmp.rename(path.to_s)
|
|
30
|
+
true
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @param path [Pathname]
|
|
34
|
+
# @param payload [String]
|
|
35
|
+
# @return [Boolean]
|
|
36
|
+
def same_content?(path, payload)
|
|
37
|
+
path.exist? && path.read == payload
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Pretty JSON for any Hash/Array value.
|
|
41
|
+
# @param value [Hash, Array]
|
|
42
|
+
# @return [String]
|
|
43
|
+
def to_pretty_json(value)
|
|
44
|
+
JSON.pretty_generate(value)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "thread"
|
|
5
|
+
|
|
6
|
+
require "ucode/repo/atomic_writes"
|
|
7
|
+
require "ucode/repo/paths"
|
|
8
|
+
|
|
9
|
+
module Ucode
|
|
10
|
+
module Repo
|
|
11
|
+
# Writes one `index.json` per codepoint under `output/blocks/<id>/<cp>/`.
|
|
12
|
+
#
|
|
13
|
+
# Streaming + threaded + idempotent:
|
|
14
|
+
#
|
|
15
|
+
# - **Streaming**: callers pass an Enumerator; the writer pulls one
|
|
16
|
+
# codepoint at a time, never the full 160k set in memory.
|
|
17
|
+
# - **Threaded**: a fixed-size worker pool drains a shared queue.
|
|
18
|
+
# Each codepoint maps to a unique path → no per-file contention.
|
|
19
|
+
# - **Idempotent**: existing files are byte-compared to the new
|
|
20
|
+
# payload before writing; identical content is a no-op. Safe to
|
|
21
|
+
# re-run on the full dataset.
|
|
22
|
+
# - **Atomic**: writes go to `<path>.tmp`, then rename. A crash
|
|
23
|
+
# mid-write leaves either the old file or no file, never a
|
|
24
|
+
# truncated one.
|
|
25
|
+
class CodepointWriter
|
|
26
|
+
include AtomicWrites
|
|
27
|
+
|
|
28
|
+
# @param output_root [String, Pathname]
|
|
29
|
+
# @param parallel_workers [Integer] size of the worker pool. Set to
|
|
30
|
+
# 1 (or less) to run synchronously — useful in tests.
|
|
31
|
+
def initialize(output_root, parallel_workers: 8)
|
|
32
|
+
@output_root = Pathname.new(output_root)
|
|
33
|
+
@parallel_workers = parallel_workers
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Write one codepoint synchronously.
|
|
37
|
+
# @param codepoint [Ucode::Models::CodePoint]
|
|
38
|
+
# @return [Pathname, nil] the path written, or nil if skipped
|
|
39
|
+
# (missing block_id or content-identical to existing file)
|
|
40
|
+
def write(codepoint)
|
|
41
|
+
return nil if codepoint.block_id.nil?
|
|
42
|
+
|
|
43
|
+
path = Paths.codepoint_json_path(@output_root, codepoint.block_id, codepoint.id)
|
|
44
|
+
payload = serialize(codepoint)
|
|
45
|
+
return nil unless write_atomic(path, payload)
|
|
46
|
+
|
|
47
|
+
path
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Drain an Enumerator through the worker pool. Returns the total
|
|
51
|
+
# count of codepoints seen (whether or not each one was written).
|
|
52
|
+
# @param enum [Enumerator<Ucode::Models::CodePoint>, Enumerable]
|
|
53
|
+
# @return [Integer]
|
|
54
|
+
def write_each(enum)
|
|
55
|
+
return drain_inline(enum) if @parallel_workers <= 1
|
|
56
|
+
|
|
57
|
+
drain_threaded(enum)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def drain_inline(enum)
|
|
63
|
+
count = 0
|
|
64
|
+
enum.each { |cp| write(cp); count += 1 }
|
|
65
|
+
count
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def drain_threaded(enum)
|
|
69
|
+
queue = Queue.new
|
|
70
|
+
mutex = Mutex.new
|
|
71
|
+
count = 0
|
|
72
|
+
|
|
73
|
+
workers = Array.new(@parallel_workers) do
|
|
74
|
+
Thread.new do
|
|
75
|
+
loop do
|
|
76
|
+
cp = queue.pop
|
|
77
|
+
break if cp.nil?
|
|
78
|
+
|
|
79
|
+
write(cp)
|
|
80
|
+
mutex.synchronize { count += 1 }
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
enum.each { |cp| queue << cp }
|
|
86
|
+
@parallel_workers.times { queue << nil }
|
|
87
|
+
workers.each(&:join)
|
|
88
|
+
count
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def serialize(codepoint)
|
|
92
|
+
codepoint.to_json(pretty: true)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
module Ucode
|
|
6
|
+
module Repo
|
|
7
|
+
# Pure functions describing the on-disk layout of the output tree.
|
|
8
|
+
#
|
|
9
|
+
# The only code that knows the path conventions. Site generator,
|
|
10
|
+
# CLI, glyph writer, and fontisan adapter all go through here.
|
|
11
|
+
#
|
|
12
|
+
# All methods are pure: no I/O, no global state, no side effects.
|
|
13
|
+
# Returns Pathname instances so callers can compose further.
|
|
14
|
+
module Paths
|
|
15
|
+
BLOCKS_DIR = "blocks"
|
|
16
|
+
PLANES_DIR = "planes"
|
|
17
|
+
SCRIPTS_DIR = "scripts"
|
|
18
|
+
INDEX_DIR = "index"
|
|
19
|
+
INDEX_FILENAME = "index.json"
|
|
20
|
+
GLYPH_FILENAME = "glyph.svg"
|
|
21
|
+
PLANE_FILENAME_PREFIX = "" # plane files are <n>.json
|
|
22
|
+
private_constant :BLOCKS_DIR, :PLANES_DIR, :SCRIPTS_DIR, :INDEX_DIR,
|
|
23
|
+
:INDEX_FILENAME, :GLYPH_FILENAME,
|
|
24
|
+
:PLANE_FILENAME_PREFIX
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
# Format an integer codepoint as the canonical "U+XXXX" id used
|
|
28
|
+
# everywhere (paths, JSON, cross-references). Always at least
|
|
29
|
+
# 4 hex digits, uppercase, no extra padding.
|
|
30
|
+
# @param cp [Integer]
|
|
31
|
+
# @return [String]
|
|
32
|
+
def cp_id(cp)
|
|
33
|
+
format("U+%04X", cp)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# @param output_root [String, Pathname]
|
|
37
|
+
# @param block_id [String] verbatim block id (e.g. "ASCII", "CJK_Ext_A")
|
|
38
|
+
# @return [Pathname]
|
|
39
|
+
def block_dir(output_root, block_id)
|
|
40
|
+
Pathname(output_root).join(BLOCKS_DIR, block_id)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @param output_root [String, Pathname]
|
|
44
|
+
# @param block_id [String]
|
|
45
|
+
# @param cp_id [String] e.g. "U+0041"
|
|
46
|
+
# @return [Pathname]
|
|
47
|
+
def codepoint_dir(output_root, block_id, cp_id)
|
|
48
|
+
block_dir(output_root, block_id).join(cp_id)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# @param output_root [String, Pathname]
|
|
52
|
+
# @param block_id [String]
|
|
53
|
+
# @param cp_id [String]
|
|
54
|
+
# @return [Pathname]
|
|
55
|
+
def codepoint_json_path(output_root, block_id, cp_id)
|
|
56
|
+
codepoint_dir(output_root, block_id, cp_id).join(INDEX_FILENAME)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# @param output_root [String, Pathname]
|
|
60
|
+
# @param block_id [String]
|
|
61
|
+
# @param cp_id [String]
|
|
62
|
+
# @return [Pathname]
|
|
63
|
+
def codepoint_glyph_path(output_root, block_id, cp_id)
|
|
64
|
+
codepoint_dir(output_root, block_id, cp_id).join(GLYPH_FILENAME)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# @param output_root [String, Pathname]
|
|
68
|
+
# @param block_id [String]
|
|
69
|
+
# @return [Pathname]
|
|
70
|
+
def block_metadata_path(output_root, block_id)
|
|
71
|
+
block_dir(output_root, block_id).join(INDEX_FILENAME)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# @param output_root [String, Pathname]
|
|
75
|
+
# @return [Pathname]
|
|
76
|
+
def blocks_index_path(output_root)
|
|
77
|
+
Pathname(output_root).join(BLOCKS_DIR, INDEX_FILENAME)
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# @param output_root [String, Pathname]
|
|
81
|
+
# @param plane_number [Integer]
|
|
82
|
+
# @return [Pathname]
|
|
83
|
+
def plane_metadata_path(output_root, plane_number)
|
|
84
|
+
Pathname(output_root).join(PLANES_DIR, "#{plane_number}.json")
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# @param output_root [String, Pathname]
|
|
88
|
+
# @param script_code [String]
|
|
89
|
+
# @return [Pathname]
|
|
90
|
+
def script_metadata_path(output_root, script_code)
|
|
91
|
+
Pathname(output_root).join(SCRIPTS_DIR, "#{script_code}.json")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# @param output_root [String, Pathname]
|
|
95
|
+
# @return [Pathname]
|
|
96
|
+
def names_index_path(output_root)
|
|
97
|
+
Pathname(output_root).join(INDEX_DIR, "names.json")
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# @param output_root [String, Pathname]
|
|
101
|
+
# @return [Pathname]
|
|
102
|
+
def labels_index_path(output_root)
|
|
103
|
+
Pathname(output_root).join(INDEX_DIR, "labels.json")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# @param output_root [String, Pathname]
|
|
107
|
+
# @return [Pathname]
|
|
108
|
+
def manifest_path(output_root)
|
|
109
|
+
Pathname(output_root).join("manifest.json")
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Temporary path for atomic writes — same directory as `path`,
|
|
113
|
+
# so rename stays within one filesystem.
|
|
114
|
+
# @param path [Pathname]
|
|
115
|
+
# @return [Pathname]
|
|
116
|
+
def tmp_path(path)
|
|
117
|
+
path.parent.join("#{path.basename}.tmp")
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
data/lib/ucode/repo.rb
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
# Repo — writes the output tree under output/.
|
|
5
|
+
#
|
|
6
|
+
# One folder per codepoint (CJK included), no exceptions:
|
|
7
|
+
#
|
|
8
|
+
# output/planes/<n>.json
|
|
9
|
+
# output/blocks/<ORIGINAL_NAME>.json
|
|
10
|
+
# output/blocks/<ORIGINAL_NAME>/<U+XXXX>/index.json
|
|
11
|
+
# output/blocks/<ORIGINAL_NAME>/<U+XXXX>/glyph.svg
|
|
12
|
+
# output/scripts/<ScriptCode>.json
|
|
13
|
+
# output/index/names.json
|
|
14
|
+
# output/index/labels.json
|
|
15
|
+
# output/manifest.json
|
|
16
|
+
module Repo
|
|
17
|
+
autoload :Paths, "ucode/repo/paths"
|
|
18
|
+
autoload :AtomicWrites, "ucode/repo/atomic_writes"
|
|
19
|
+
autoload :CodepointWriter, "ucode/repo/codepoint_writer"
|
|
20
|
+
autoload :AggregateWriter, "ucode/repo/aggregate_writer"
|
|
21
|
+
end
|
|
22
|
+
end
|