ucode 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ucode/audit/reference_factory.rb +66 -0
- data/lib/ucode/audit.rb +1 -0
- data/lib/ucode/cli.rb +35 -16
- data/lib/ucode/commands/audit.rb +0 -1
- data/lib/ucode/commands/build.rb +4 -0
- data/lib/ucode/commands/canonical_build.rb +2 -3
- data/lib/ucode/commands/fetch.rb +12 -14
- data/lib/ucode/commands/glyphs.rb +25 -67
- data/lib/ucode/commands/lookup.rb +11 -11
- data/lib/ucode/commands/parse.rb +7 -5
- data/lib/ucode/commands/release.rb +0 -1
- data/lib/ucode/commands/universal_set.rb +10 -14
- data/lib/ucode/coordinator/indices.rb +38 -2
- data/lib/ucode/glyphs/pipeline.rb +106 -0
- data/lib/ucode/glyphs.rb +1 -0
- data/lib/ucode/repo/aggregate_writer.rb +60 -298
- data/lib/ucode/repo/writers/blocks_writer.rb +73 -0
- data/lib/ucode/repo/writers/enums_writer.rb +38 -0
- data/lib/ucode/repo/writers/indexes_writer.rb +53 -0
- data/lib/ucode/repo/writers/manifest_writer.rb +78 -0
- data/lib/ucode/repo/writers/named_sequences_writer.rb +47 -0
- data/lib/ucode/repo/writers/planes_writer.rb +82 -0
- data/lib/ucode/repo/writers/relationships_writer.rb +71 -0
- data/lib/ucode/repo/writers/scripts_writer.rb +54 -0
- data/lib/ucode/repo/writers.rb +20 -0
- data/lib/ucode/repo.rb +1 -0
- data/lib/ucode/version.rb +1 -1
- data/ucode.gemspec +56 -0
- metadata +18 -5
- data/Gemfile.lock +0 -406
- data/lib/ucode/commands/audit/reference_builder.rb +0 -64
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/repo/atomic_writes"
|
|
4
|
+
require "ucode/repo/paths"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Repo
|
|
8
|
+
module Writers
|
|
9
|
+
# Writes `output/blocks/<ID>.json` for every block plus
|
|
10
|
+
# `output/blocks/index.json` as a summary.
|
|
11
|
+
#
|
|
12
|
+
# One of the eight per-concern writers split out from
|
|
13
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
14
|
+
class BlocksWriter
|
|
15
|
+
include AtomicWrites
|
|
16
|
+
|
|
17
|
+
# @param output_root [Pathname]
|
|
18
|
+
# @param blocks [Array<Ucode::Models::Block>]
|
|
19
|
+
# @param block_codepoint_ids [Hash{String => Array<String>}]
|
|
20
|
+
# block_id → sorted cp_id list, accumulated during the
|
|
21
|
+
# streaming pass
|
|
22
|
+
# @param block_ages [Hash{String => String}] block_id → earliest
|
|
23
|
+
# DerivedAge string; nil entries get written as nil
|
|
24
|
+
def initialize(output_root:, blocks:, block_codepoint_ids:, block_ages:)
|
|
25
|
+
@output_root = output_root
|
|
26
|
+
@blocks = blocks
|
|
27
|
+
@block_codepoint_ids = block_codepoint_ids
|
|
28
|
+
@block_ages = block_ages
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# @return [Integer] number of files written (one per block plus
|
|
32
|
+
# one for the index)
|
|
33
|
+
def write
|
|
34
|
+
count = @blocks.sum do |block|
|
|
35
|
+
block.age = @block_ages[block.id]
|
|
36
|
+
path = Paths.block_metadata_path(@output_root, block.id)
|
|
37
|
+
write_atomic(path, block_payload(block)) ? 1 : 0
|
|
38
|
+
end
|
|
39
|
+
count + write_blocks_index
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def write_blocks_index
|
|
45
|
+
path = Paths.blocks_index_path(@output_root)
|
|
46
|
+
summary = @blocks.map do |block|
|
|
47
|
+
{
|
|
48
|
+
"id" => block.id,
|
|
49
|
+
"name" => block.name,
|
|
50
|
+
"first_cp" => block.range_first,
|
|
51
|
+
"last_cp" => block.range_last,
|
|
52
|
+
"plane_number" => block.plane_number,
|
|
53
|
+
"age" => @block_ages[block.id],
|
|
54
|
+
}
|
|
55
|
+
end
|
|
56
|
+
write_atomic(path, to_pretty_json(summary)) ? 1 : 0
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def block_payload(block)
|
|
60
|
+
to_pretty_json(
|
|
61
|
+
"id" => block.id,
|
|
62
|
+
"name" => block.name,
|
|
63
|
+
"range_first" => block.range_first,
|
|
64
|
+
"range_last" => block.range_last,
|
|
65
|
+
"plane_number" => block.plane_number,
|
|
66
|
+
"age" => @block_ages[block.id],
|
|
67
|
+
"codepoint_ids" => (@block_codepoint_ids[block.id] || []),
|
|
68
|
+
)
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "ucode/repo/atomic_writes"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Repo
|
|
8
|
+
module Writers
|
|
9
|
+
# Writes `output/enums.json` carrying the full property-alias
|
|
10
|
+
# and property-value-alias tables.
|
|
11
|
+
#
|
|
12
|
+
# One of the eight per-concern writers split out from
|
|
13
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
14
|
+
class EnumsWriter
|
|
15
|
+
include AtomicWrites
|
|
16
|
+
|
|
17
|
+
# @param output_root [Pathname]
|
|
18
|
+
# @param property_aliases [Array<Ucode::Models::PropertyAlias>]
|
|
19
|
+
# @param property_value_aliases [Array<Ucode::Models::PropertyValueAlias>]
|
|
20
|
+
def initialize(output_root:, property_aliases:, property_value_aliases:)
|
|
21
|
+
@output_root = output_root
|
|
22
|
+
@property_aliases = property_aliases
|
|
23
|
+
@property_value_aliases = property_value_aliases
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# @return [Integer] 1 if written, 0 otherwise
|
|
27
|
+
def write
|
|
28
|
+
path = Pathname(@output_root).join("enums.json")
|
|
29
|
+
payload = {
|
|
30
|
+
"properties" => @property_aliases.map(&:to_yaml_hash),
|
|
31
|
+
"property_values" => @property_value_aliases.map(&:to_yaml_hash),
|
|
32
|
+
}
|
|
33
|
+
write_atomic(path, to_pretty_json(payload)) ? 1 : 0
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "ucode/repo/atomic_writes"
|
|
5
|
+
require "ucode/repo/paths"
|
|
6
|
+
|
|
7
|
+
module Ucode
|
|
8
|
+
module Repo
|
|
9
|
+
module Writers
|
|
10
|
+
# Writes the three lookup indexes:
|
|
11
|
+
#
|
|
12
|
+
# output/index/names.json (cp_id → name)
|
|
13
|
+
# output/index/labels.json (cp_id → {name, gc, sc, cc, bc, mir})
|
|
14
|
+
# output/index/codepoint_to_block.json (cp_id → block_id)
|
|
15
|
+
#
|
|
16
|
+
# One of the eight per-concern writers split out from
|
|
17
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
18
|
+
class IndexesWriter
|
|
19
|
+
include AtomicWrites
|
|
20
|
+
|
|
21
|
+
# @param output_root [Pathname]
|
|
22
|
+
# @param names [Hash{String => String}] cp_id → name
|
|
23
|
+
# @param labels [Hash{String => Hash}] cp_id → label fields
|
|
24
|
+
# @param cp_to_block [Hash{String => String}] cp_id → block_id
|
|
25
|
+
def initialize(output_root:, names:, labels:, cp_to_block:)
|
|
26
|
+
@output_root = output_root
|
|
27
|
+
@names = names
|
|
28
|
+
@labels = labels
|
|
29
|
+
@cp_to_block = cp_to_block
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# @return [Integer] number of index files written (always 3
|
|
33
|
+
# when the directory is reachable)
|
|
34
|
+
def write
|
|
35
|
+
count = 0
|
|
36
|
+
count += 1 if write_atomic(Paths.names_index_path(@output_root),
|
|
37
|
+
to_pretty_json(@names))
|
|
38
|
+
count += 1 if write_atomic(Paths.labels_index_path(@output_root),
|
|
39
|
+
to_pretty_json(@labels))
|
|
40
|
+
count += 1 if write_atomic(codepoint_to_block_path,
|
|
41
|
+
to_pretty_json(@cp_to_block))
|
|
42
|
+
count
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
private
|
|
46
|
+
|
|
47
|
+
def codepoint_to_block_path
|
|
48
|
+
Pathname(@output_root).join("index", "codepoint_to_block.json")
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "pathname"
|
|
5
|
+
require "time"
|
|
6
|
+
|
|
7
|
+
require "ucode/repo/atomic_writes"
|
|
8
|
+
require "ucode/repo/paths"
|
|
9
|
+
|
|
10
|
+
module Ucode
|
|
11
|
+
module Repo
|
|
12
|
+
module Writers
|
|
13
|
+
# Writes `output/manifest.json`. The `generated_at` timestamp is
|
|
14
|
+
# preserved across no-op re-runs (same content keys → keep old
|
|
15
|
+
# timestamp) so the byte content is byte-idempotent.
|
|
16
|
+
#
|
|
17
|
+
# One of the eight per-concern writers split out from
|
|
18
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
19
|
+
class ManifestWriter
|
|
20
|
+
include AtomicWrites
|
|
21
|
+
|
|
22
|
+
# Fields that define the manifest's semantic content. When
|
|
23
|
+
# these match the existing manifest on disk, we preserve the
|
|
24
|
+
# old `generated_at` so re-runs are byte-idempotent.
|
|
25
|
+
CONTENT_KEYS = %w[
|
|
26
|
+
ucd_version codepoint_count glyph_count schema_version
|
|
27
|
+
].freeze
|
|
28
|
+
private_constant :CONTENT_KEYS
|
|
29
|
+
|
|
30
|
+
SCHEMA_VERSION = "1"
|
|
31
|
+
private_constant :SCHEMA_VERSION
|
|
32
|
+
|
|
33
|
+
# @param output_root [Pathname]
|
|
34
|
+
# @param ucd_version [String]
|
|
35
|
+
# @param codepoint_count [Integer]
|
|
36
|
+
# @param glyph_count [Integer]
|
|
37
|
+
def initialize(output_root:, ucd_version:, codepoint_count:, glyph_count:)
|
|
38
|
+
@output_root = output_root
|
|
39
|
+
@ucd_version = ucd_version
|
|
40
|
+
@codepoint_count = codepoint_count
|
|
41
|
+
@glyph_count = glyph_count
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# @return [Integer] 1 if written, 0 otherwise
|
|
45
|
+
def write
|
|
46
|
+
path = Paths.manifest_path(@output_root)
|
|
47
|
+
content = {
|
|
48
|
+
"ucd_version" => @ucd_version,
|
|
49
|
+
"codepoint_count" => @codepoint_count,
|
|
50
|
+
"glyph_count" => @glyph_count,
|
|
51
|
+
"schema_version" => SCHEMA_VERSION,
|
|
52
|
+
}
|
|
53
|
+
ts = preserved_or_new_timestamp(path, content)
|
|
54
|
+
payload = content.merge("generated_at" => ts)
|
|
55
|
+
write_atomic(path, to_pretty_json(payload)) ? 1 : 0
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def preserved_or_new_timestamp(path, content)
|
|
61
|
+
existing = read_manifest(path)
|
|
62
|
+
return Time.now.utc.iso8601 unless existing
|
|
63
|
+
|
|
64
|
+
unchanged = CONTENT_KEYS.all? { |k| existing[k] == content[k] }
|
|
65
|
+
unchanged ? existing["generated_at"] : Time.now.utc.iso8601
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def read_manifest(path)
|
|
69
|
+
return nil unless path.exist?
|
|
70
|
+
|
|
71
|
+
JSON.parse(path.read)
|
|
72
|
+
rescue JSON::ParserError
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "ucode/repo/atomic_writes"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Repo
|
|
8
|
+
module Writers
|
|
9
|
+
# Writes one file per named sequence under
|
|
10
|
+
# `output/named_sequences/<slug>.json`. Empty input writes nothing.
|
|
11
|
+
#
|
|
12
|
+
# One of the eight per-concern writers split out from
|
|
13
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
14
|
+
class NamedSequencesWriter
|
|
15
|
+
include AtomicWrites
|
|
16
|
+
|
|
17
|
+
# @param output_root [Pathname]
|
|
18
|
+
# @param named_sequences [Array<Ucode::Models::NamedSequence>]
|
|
19
|
+
def initialize(output_root:, named_sequences:)
|
|
20
|
+
@output_root = output_root
|
|
21
|
+
@named_sequences = named_sequences
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# @return [Integer] number of named-sequence files written
|
|
25
|
+
def write
|
|
26
|
+
return 0 if @named_sequences.nil? || @named_sequences.empty?
|
|
27
|
+
|
|
28
|
+
dir = Pathname(@output_root).join("named_sequences")
|
|
29
|
+
@named_sequences.sum do |ns|
|
|
30
|
+
path = dir.join("#{slug_for(ns)}.json")
|
|
31
|
+
write_atomic(path, ns.to_json(pretty: true)) ? 1 : 0
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
# Slug derived from the name: downcase, non-alphanumerics → "_".
|
|
38
|
+
def slug_for(named_sequence)
|
|
39
|
+
named_sequence.name
|
|
40
|
+
.downcase
|
|
41
|
+
.gsub(/[^a-z0-9]+/, "_")
|
|
42
|
+
.gsub(/^_+|_+$/, "")
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/repo/atomic_writes"
|
|
4
|
+
require "ucode/repo/paths"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Repo
|
|
8
|
+
module Writers
|
|
9
|
+
# Writes `output/planes/<n>.json` for every plane (0..16).
|
|
10
|
+
#
|
|
11
|
+
# One of the eight per-concern writers split out from
|
|
12
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
13
|
+
class PlanesWriter
|
|
14
|
+
include AtomicWrites
|
|
15
|
+
|
|
16
|
+
# Static metadata for the 17 Unicode planes. Planes 4–13 are
|
|
17
|
+
# unassigned in Unicode 17; their entries use placeholder names.
|
|
18
|
+
PLANE_TABLE = {
|
|
19
|
+
0 => ["Basic Multilingual Plane", "BMP"],
|
|
20
|
+
1 => ["Supplementary Multilingual Plane", "SMP"],
|
|
21
|
+
2 => ["Supplementary Ideographic Plane", "SIP"],
|
|
22
|
+
3 => ["Tertiary Ideographic Plane", "TIP"],
|
|
23
|
+
4 => ["Unassigned Plane 4", "—"],
|
|
24
|
+
5 => ["Unassigned Plane 5", "—"],
|
|
25
|
+
6 => ["Unassigned Plane 6", "—"],
|
|
26
|
+
7 => ["Unassigned Plane 7", "—"],
|
|
27
|
+
8 => ["Unassigned Plane 8", "—"],
|
|
28
|
+
9 => ["Unassigned Plane 9", "—"],
|
|
29
|
+
10 => ["Unassigned Plane 10", "—"],
|
|
30
|
+
11 => ["Unassigned Plane 11", "—"],
|
|
31
|
+
12 => ["Unassigned Plane 12", "—"],
|
|
32
|
+
13 => ["Unassigned Plane 13", "—"],
|
|
33
|
+
14 => ["Supplementary Special-purpose Plane", "SSP"],
|
|
34
|
+
15 => ["Supplementary Private Use Area-A", "SPUA-A"],
|
|
35
|
+
16 => ["Supplementary Private Use Area-B", "SPUA-B"],
|
|
36
|
+
}.freeze
|
|
37
|
+
private_constant :PLANE_TABLE
|
|
38
|
+
|
|
39
|
+
# @param output_root [Pathname]
|
|
40
|
+
# @param blocks [Array<Ucode::Models::Block>]
|
|
41
|
+
def initialize(output_root:, blocks:)
|
|
42
|
+
@output_root = output_root
|
|
43
|
+
@blocks = blocks
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# @return [Integer] number of plane files written (always 17
|
|
47
|
+
# when the directory is reachable; one per plane number)
|
|
48
|
+
def write
|
|
49
|
+
plane_block_ids = group_block_ids_by_plane
|
|
50
|
+
count = 0
|
|
51
|
+
(0..16).each do |n|
|
|
52
|
+
path = Paths.plane_metadata_path(@output_root, n)
|
|
53
|
+
count += 1 if write_atomic(path, plane_payload(n, plane_block_ids[n] || []))
|
|
54
|
+
end
|
|
55
|
+
count
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
private
|
|
59
|
+
|
|
60
|
+
def group_block_ids_by_plane
|
|
61
|
+
@blocks.each_with_object(Hash.new { |h, k| h[k] = [] }) do |block, h|
|
|
62
|
+
h[block.plane_number] << block.id
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def plane_payload(plane_number, block_ids)
|
|
67
|
+
name, abbrev = PLANE_TABLE.fetch(plane_number)
|
|
68
|
+
range_first = plane_number * 0x10000
|
|
69
|
+
range_last = range_first + 0xFFFF
|
|
70
|
+
to_pretty_json(
|
|
71
|
+
"number" => plane_number,
|
|
72
|
+
"name" => name,
|
|
73
|
+
"abbrev" => abbrev,
|
|
74
|
+
"range_first" => range_first,
|
|
75
|
+
"range_last" => range_last,
|
|
76
|
+
"block_ids" => block_ids,
|
|
77
|
+
)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "ucode/coordinator"
|
|
5
|
+
require "ucode/repo/atomic_writes"
|
|
6
|
+
require "ucode/repo/paths"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Repo
|
|
10
|
+
module Writers
|
|
11
|
+
# Writes one file per relationship table under
|
|
12
|
+
# `output/relationships/`. The set of tables is enumerated by
|
|
13
|
+
# `Coordinator::Indices#each_relationship` (see Candidate 1 of the
|
|
14
|
+
# 2026-06-29 review).
|
|
15
|
+
#
|
|
16
|
+
# One of the eight per-concern writers split out from
|
|
17
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
18
|
+
class RelationshipsWriter
|
|
19
|
+
include AtomicWrites
|
|
20
|
+
|
|
21
|
+
# @param output_root [Pathname]
|
|
22
|
+
# @param indices [Ucode::Coordinator::Indices]
|
|
23
|
+
def initialize(output_root:, indices:)
|
|
24
|
+
@output_root = output_root
|
|
25
|
+
@indices = indices
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @return [Integer] number of relationship files written
|
|
29
|
+
def write
|
|
30
|
+
@indices.each_relationship.sum do |slug, records|
|
|
31
|
+
write_relationship_file(slug, records)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
private
|
|
36
|
+
|
|
37
|
+
def write_relationship_file(slug, records)
|
|
38
|
+
return 0 if records.nil? || records.empty?
|
|
39
|
+
|
|
40
|
+
path = Pathname(@output_root).join("relationships", "#{slug}.json")
|
|
41
|
+
write_atomic(path, relationship_payload(records)) ? 1 : 0
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# records is Hash<Integer, Record>, Hash<Integer, Array<Record>>,
|
|
45
|
+
# Hash<String, Record>, or Hash<String, Array<Record>>.
|
|
46
|
+
def relationship_payload(records)
|
|
47
|
+
payload = records.each_with_object({}) do |(key, value), h|
|
|
48
|
+
h[key_to_cp_id(key)] = serialize_value(value)
|
|
49
|
+
end
|
|
50
|
+
to_pretty_json(payload)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Integer codepoint keys are formatted as "U+XXXX"; string id
|
|
54
|
+
# keys (cjk_radicals, standardized_variants) pass through.
|
|
55
|
+
def key_to_cp_id(key)
|
|
56
|
+
key.is_a?(Integer) ? Paths.cp_id(key) : key
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def serialize_value(value)
|
|
60
|
+
return value.map { |v| serialize_one(v) } if value.is_a?(Array)
|
|
61
|
+
|
|
62
|
+
serialize_one(value)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def serialize_one(record)
|
|
66
|
+
record.to_yaml_hash
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ucode/repo/atomic_writes"
|
|
4
|
+
require "ucode/repo/paths"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
module Repo
|
|
8
|
+
module Writers
|
|
9
|
+
# Writes `output/scripts/<code>.json` for every assigned script.
|
|
10
|
+
#
|
|
11
|
+
# One of the eight per-concern writers split out from
|
|
12
|
+
# AggregateWriter — see Candidate 5 of the 2026-06-29 review.
|
|
13
|
+
class ScriptsWriter
|
|
14
|
+
include AtomicWrites
|
|
15
|
+
|
|
16
|
+
# @param output_root [Pathname]
|
|
17
|
+
# @param scripts [Array<Ucode::Models::Script>] from
|
|
18
|
+
# Coordinator::Indices
|
|
19
|
+
# @param script_codepoint_ids [Hash{String => Array<String>}]
|
|
20
|
+
# ISO 15924 code → cp_id list, accumulated during the
|
|
21
|
+
# streaming pass
|
|
22
|
+
def initialize(output_root:, scripts:, script_codepoint_ids:)
|
|
23
|
+
@output_root = output_root
|
|
24
|
+
@scripts = scripts
|
|
25
|
+
@script_codepoint_ids = script_codepoint_ids
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @return [Integer] number of script files written
|
|
29
|
+
def write
|
|
30
|
+
count = 0
|
|
31
|
+
@scripts.group_by(&:code).each do |code, ranges|
|
|
32
|
+
next if code.nil? || code.empty?
|
|
33
|
+
|
|
34
|
+
path = Paths.script_metadata_path(@output_root, code)
|
|
35
|
+
count += 1 if write_atomic(path, script_payload(code, ranges))
|
|
36
|
+
end
|
|
37
|
+
count
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
def script_payload(code, ranges)
|
|
43
|
+
to_pretty_json(
|
|
44
|
+
"code" => code,
|
|
45
|
+
"name" => ranges.first&.name,
|
|
46
|
+
"range_first" => ranges.map(&:range_first).min,
|
|
47
|
+
"range_last" => ranges.map(&:range_last).max,
|
|
48
|
+
"codepoint_ids" => (@script_codepoint_ids[code] || []),
|
|
49
|
+
)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
module Repo
|
|
5
|
+
# Per-concern writer classes, one per output file kind. Each
|
|
6
|
+
# conforms to the `#write → Integer` interface (returns the count
|
|
7
|
+
# of files written). Composed by AggregateWriter#flush — adding a
|
|
8
|
+
# new aggregate = one writer class + one line in AggregateWriter.
|
|
9
|
+
module Writers
|
|
10
|
+
autoload :PlanesWriter, "ucode/repo/writers/planes_writer"
|
|
11
|
+
autoload :BlocksWriter, "ucode/repo/writers/blocks_writer"
|
|
12
|
+
autoload :ScriptsWriter, "ucode/repo/writers/scripts_writer"
|
|
13
|
+
autoload :IndexesWriter, "ucode/repo/writers/indexes_writer"
|
|
14
|
+
autoload :RelationshipsWriter, "ucode/repo/writers/relationships_writer"
|
|
15
|
+
autoload :EnumsWriter, "ucode/repo/writers/enums_writer"
|
|
16
|
+
autoload :NamedSequencesWriter, "ucode/repo/writers/named_sequences_writer"
|
|
17
|
+
autoload :ManifestWriter, "ucode/repo/writers/manifest_writer"
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
data/lib/ucode/repo.rb
CHANGED
|
@@ -18,6 +18,7 @@ module Ucode
|
|
|
18
18
|
autoload :AtomicWrites, "ucode/repo/atomic_writes"
|
|
19
19
|
autoload :CodepointWriter, "ucode/repo/codepoint_writer"
|
|
20
20
|
autoload :AggregateWriter, "ucode/repo/aggregate_writer"
|
|
21
|
+
autoload :Writers, "ucode/repo/writers"
|
|
21
22
|
autoload :BuildReportAccumulator, "ucode/repo/build_report_accumulator"
|
|
22
23
|
autoload :BuildReportWriter, "ucode/repo/build_report_writer"
|
|
23
24
|
autoload :BuildValidator, "ucode/repo/build_validator"
|
data/lib/ucode/version.rb
CHANGED
data/ucode.gemspec
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "lib/ucode/version"
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = "ucode"
|
|
7
|
+
spec.version = Ucode::VERSION
|
|
8
|
+
spec.authors = ["Ribose Inc."]
|
|
9
|
+
spec.email = ["open.source@ribose.com"]
|
|
10
|
+
|
|
11
|
+
spec.summary = "Unicode Character Database toolkit — lookup, dataset, glyphs, site"
|
|
12
|
+
spec.description = <<~DESC
|
|
13
|
+
ucode turns the Unicode Character Database (UCD) text files and the official
|
|
14
|
+
Unicode Code Charts into a structured, browsable dataset. For every assigned
|
|
15
|
+
codepoint it produces a JSON document with full UCD properties, the
|
|
16
|
+
human-curated relationships from NamesList.txt, Unihan readings, and
|
|
17
|
+
machine-computed references; an SVG of the official glyph vector-extracted
|
|
18
|
+
from the Code Charts; and a Vitepress site for browsing Plane, Block,
|
|
19
|
+
and Character.
|
|
20
|
+
DESC
|
|
21
|
+
|
|
22
|
+
spec.homepage = "https://github.com/fontist/ucode"
|
|
23
|
+
spec.license = "BSD-2-Clause"
|
|
24
|
+
spec.required_ruby_version = ">= 3.2.0"
|
|
25
|
+
|
|
26
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
27
|
+
spec.metadata["source_code_uri"] = "https://github.com/fontist/ucode"
|
|
28
|
+
spec.metadata["changelog_uri"] = "https://github.com/fontist/ucode/blob/main/CHANGELOG.md"
|
|
29
|
+
spec.metadata["rubygems_mfa_required"] = "true"
|
|
30
|
+
|
|
31
|
+
spec.files = Dir.chdir(__dir__) do
|
|
32
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
33
|
+
f == __FILE__ ||
|
|
34
|
+
f.start_with?(".") ||
|
|
35
|
+
f.start_with?("spec/") ||
|
|
36
|
+
f.start_with?("benchmark/") ||
|
|
37
|
+
f.start_with?("TODO.impl/") ||
|
|
38
|
+
f.start_with?("docs/") ||
|
|
39
|
+
f.start_with?("site/")
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
spec.bindir = "exe"
|
|
44
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
45
|
+
spec.require_paths = ["lib"]
|
|
46
|
+
|
|
47
|
+
spec.add_dependency "base64"
|
|
48
|
+
spec.add_dependency "fontisan", "~> 0.2"
|
|
49
|
+
spec.add_dependency "fontist", "~> 3.0"
|
|
50
|
+
spec.add_dependency "logger"
|
|
51
|
+
spec.add_dependency "lutaml-model", "~> 0.8"
|
|
52
|
+
spec.add_dependency "nokogiri", "~> 1.16"
|
|
53
|
+
spec.add_dependency "rubyzip", "~> 2.3"
|
|
54
|
+
spec.add_dependency "sqlite3", "~> 2.0"
|
|
55
|
+
spec.add_dependency "thor", "~> 1.3"
|
|
56
|
+
end
|
metadata
CHANGED
|
@@ -1,13 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: ucode
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
|
+
autorequire:
|
|
8
9
|
bindir: exe
|
|
9
10
|
cert_chain: []
|
|
10
|
-
date:
|
|
11
|
+
date: 2026-06-29 00:00:00.000000000 Z
|
|
11
12
|
dependencies:
|
|
12
13
|
- !ruby/object:Gem::Dependency
|
|
13
14
|
name: base64
|
|
@@ -153,7 +154,6 @@ files:
|
|
|
153
154
|
- CHANGELOG.md
|
|
154
155
|
- CLAUDE.md
|
|
155
156
|
- Gemfile
|
|
156
|
-
- Gemfile.lock
|
|
157
157
|
- README.md
|
|
158
158
|
- Rakefile
|
|
159
159
|
- TODO.full/00-README.md
|
|
@@ -282,6 +282,7 @@ files:
|
|
|
282
282
|
- lib/ucode/audit/library_aggregator.rb
|
|
283
283
|
- lib/ucode/audit/library_auditor.rb
|
|
284
284
|
- lib/ucode/audit/plane_aggregator.rb
|
|
285
|
+
- lib/ucode/audit/reference_factory.rb
|
|
285
286
|
- lib/ucode/audit/registry.rb
|
|
286
287
|
- lib/ucode/audit/release.rb
|
|
287
288
|
- lib/ucode/audit/release/emitter.rb
|
|
@@ -301,7 +302,6 @@ files:
|
|
|
301
302
|
- lib/ucode/commands/audit/compare_command.rb
|
|
302
303
|
- lib/ucode/commands/audit/font_command.rb
|
|
303
304
|
- lib/ucode/commands/audit/library_command.rb
|
|
304
|
-
- lib/ucode/commands/audit/reference_builder.rb
|
|
305
305
|
- lib/ucode/commands/block_feed.rb
|
|
306
306
|
- lib/ucode/commands/build.rb
|
|
307
307
|
- lib/ucode/commands/cache.rb
|
|
@@ -357,6 +357,7 @@ files:
|
|
|
357
357
|
- lib/ucode/glyphs/pdf2svg_renderer.rb
|
|
358
358
|
- lib/ucode/glyphs/pdf_fetcher.rb
|
|
359
359
|
- lib/ucode/glyphs/pdftocairo_renderer.rb
|
|
360
|
+
- lib/ucode/glyphs/pipeline.rb
|
|
360
361
|
- lib/ucode/glyphs/real_fonts.rb
|
|
361
362
|
- lib/ucode/glyphs/real_fonts/block_coverage.rb
|
|
362
363
|
- lib/ucode/glyphs/real_fonts/cmap_cache.rb
|
|
@@ -499,6 +500,15 @@ files:
|
|
|
499
500
|
- lib/ucode/repo/build_validator.rb
|
|
500
501
|
- lib/ucode/repo/codepoint_writer.rb
|
|
501
502
|
- lib/ucode/repo/paths.rb
|
|
503
|
+
- lib/ucode/repo/writers.rb
|
|
504
|
+
- lib/ucode/repo/writers/blocks_writer.rb
|
|
505
|
+
- lib/ucode/repo/writers/enums_writer.rb
|
|
506
|
+
- lib/ucode/repo/writers/indexes_writer.rb
|
|
507
|
+
- lib/ucode/repo/writers/manifest_writer.rb
|
|
508
|
+
- lib/ucode/repo/writers/named_sequences_writer.rb
|
|
509
|
+
- lib/ucode/repo/writers/planes_writer.rb
|
|
510
|
+
- lib/ucode/repo/writers/relationships_writer.rb
|
|
511
|
+
- lib/ucode/repo/writers/scripts_writer.rb
|
|
502
512
|
- lib/ucode/site.rb
|
|
503
513
|
- lib/ucode/site/config_emitter.rb
|
|
504
514
|
- lib/ucode/site/generator.rb
|
|
@@ -517,6 +527,7 @@ files:
|
|
|
517
527
|
- lib/ucode/version.rb
|
|
518
528
|
- lib/ucode/version_resolver.rb
|
|
519
529
|
- schema/block-feed.output.schema.yml
|
|
530
|
+
- ucode.gemspec
|
|
520
531
|
homepage: https://github.com/fontist/ucode
|
|
521
532
|
licenses:
|
|
522
533
|
- BSD-2-Clause
|
|
@@ -525,6 +536,7 @@ metadata:
|
|
|
525
536
|
source_code_uri: https://github.com/fontist/ucode
|
|
526
537
|
changelog_uri: https://github.com/fontist/ucode/blob/main/CHANGELOG.md
|
|
527
538
|
rubygems_mfa_required: 'true'
|
|
539
|
+
post_install_message:
|
|
528
540
|
rdoc_options: []
|
|
529
541
|
require_paths:
|
|
530
542
|
- lib
|
|
@@ -539,7 +551,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
539
551
|
- !ruby/object:Gem::Version
|
|
540
552
|
version: '0'
|
|
541
553
|
requirements: []
|
|
542
|
-
rubygems_version: 3.
|
|
554
|
+
rubygems_version: 3.5.22
|
|
555
|
+
signing_key:
|
|
543
556
|
specification_version: 4
|
|
544
557
|
summary: Unicode Character Database toolkit — lookup, dataset, glyphs, site
|
|
545
558
|
test_files: []
|