ucode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CLAUDE.md +211 -0
- data/Gemfile +22 -0
- data/Gemfile.lock +406 -0
- data/README.md +469 -0
- data/Rakefile +18 -0
- data/TODO.new/00-README.md +66 -0
- data/TODO.new/01-pillar-terminology-alignment.md +69 -0
- data/TODO.new/02-audit-schema-design.md +255 -0
- data/TODO.new/03-directory-output-spec.md +203 -0
- data/TODO.new/04-fontist-org-contract.md +173 -0
- data/TODO.new/05-baseline-unicode17-coverage-audit.md +144 -0
- data/TODO.new/06-audit-namespace-skeleton.md +105 -0
- data/TODO.new/07-audit-models-port.md +132 -0
- data/TODO.new/08-extractors-cheap-port.md +113 -0
- data/TODO.new/09-extractors-expensive-port.md +99 -0
- data/TODO.new/10-aggregations-ucd-rewrite.md +168 -0
- data/TODO.new/11-differ-and-library-auditor-port.md +102 -0
- data/TODO.new/12-formatters-port.md +115 -0
- data/TODO.new/13-directory-emitter.md +147 -0
- data/TODO.new/14-html-face-browser.md +144 -0
- data/TODO.new/15-html-library-browser.md +102 -0
- data/TODO.new/16-cli-audit-subcommands.md +142 -0
- data/TODO.new/17-fontisan-cleanup-audit.md +147 -0
- data/TODO.new/18-fontisan-cleanup-ucd.md +156 -0
- data/TODO.new/19-fontisan-docs-update.md +155 -0
- data/TODO.new/20-canonical-resolver-4-tier.md +182 -0
- data/TODO.new/21-canonical-unicode17-build.md +148 -0
- data/TODO.new/22-implementation-order.md +176 -0
- data/UCODE_CHANGELOG.md +97 -0
- data/exe/ucode +8 -0
- data/lib/ucode/aggregator.rb +77 -0
- data/lib/ucode/audit/block_aggregator.rb +90 -0
- data/lib/ucode/audit/codepoint_range_coalescer.rb +42 -0
- data/lib/ucode/audit/context.rb +137 -0
- data/lib/ucode/audit/discrepancy_detector.rb +213 -0
- data/lib/ucode/audit/extractors/aggregations.rb +70 -0
- data/lib/ucode/audit/extractors/base.rb +21 -0
- data/lib/ucode/audit/extractors/color_capabilities.rb +143 -0
- data/lib/ucode/audit/extractors/coverage.rb +55 -0
- data/lib/ucode/audit/extractors/hinting.rb +199 -0
- data/lib/ucode/audit/extractors/identity.rb +65 -0
- data/lib/ucode/audit/extractors/licensing.rb +75 -0
- data/lib/ucode/audit/extractors/metrics.rb +108 -0
- data/lib/ucode/audit/extractors/opentype_layout.rb +71 -0
- data/lib/ucode/audit/extractors/provenance.rb +34 -0
- data/lib/ucode/audit/extractors/style.rb +88 -0
- data/lib/ucode/audit/extractors/variation_detail.rb +101 -0
- data/lib/ucode/audit/extractors.rb +31 -0
- data/lib/ucode/audit/plane_aggregator.rb +37 -0
- data/lib/ucode/audit/registry.rb +63 -0
- data/lib/ucode/audit/script_aggregator.rb +92 -0
- data/lib/ucode/audit.rb +27 -0
- data/lib/ucode/cache.rb +113 -0
- data/lib/ucode/cli.rb +272 -0
- data/lib/ucode/commands/build.rb +68 -0
- data/lib/ucode/commands/cache.rb +46 -0
- data/lib/ucode/commands/fetch.rb +62 -0
- data/lib/ucode/commands/font_coverage.rb +57 -0
- data/lib/ucode/commands/glyphs.rb +136 -0
- data/lib/ucode/commands/lookup.rb +65 -0
- data/lib/ucode/commands/parse.rb +62 -0
- data/lib/ucode/commands/site.rb +33 -0
- data/lib/ucode/commands.rb +19 -0
- data/lib/ucode/config.rb +110 -0
- data/lib/ucode/coordinator/indices.rb +34 -0
- data/lib/ucode/coordinator.rb +397 -0
- data/lib/ucode/database.rb +214 -0
- data/lib/ucode/db_builder.rb +107 -0
- data/lib/ucode/error.rb +96 -0
- data/lib/ucode/fetch/code_charts.rb +57 -0
- data/lib/ucode/fetch/http.rb +83 -0
- data/lib/ucode/fetch/ucd_zip.rb +57 -0
- data/lib/ucode/fetch/unihan_zip.rb +57 -0
- data/lib/ucode/fetch.rb +14 -0
- data/lib/ucode/glyphs/cell_extractor.rb +130 -0
- data/lib/ucode/glyphs/dvisvgm_renderer.rb +29 -0
- data/lib/ucode/glyphs/embedded_fonts/catalog.rb +372 -0
- data/lib/ucode/glyphs/embedded_fonts/content_stream_correlator.rb +228 -0
- data/lib/ucode/glyphs/embedded_fonts/font_entry.rb +126 -0
- data/lib/ucode/glyphs/embedded_fonts/renderer.rb +47 -0
- data/lib/ucode/glyphs/embedded_fonts/source.rb +94 -0
- data/lib/ucode/glyphs/embedded_fonts/svg.rb +123 -0
- data/lib/ucode/glyphs/embedded_fonts/tounicode.rb +103 -0
- data/lib/ucode/glyphs/embedded_fonts/writer.rb +76 -0
- data/lib/ucode/glyphs/embedded_fonts.rb +50 -0
- data/lib/ucode/glyphs/grid.rb +30 -0
- data/lib/ucode/glyphs/grid_detector.rb +165 -0
- data/lib/ucode/glyphs/last_resort/cmap_index.rb +96 -0
- data/lib/ucode/glyphs/last_resort/contents.rb +74 -0
- data/lib/ucode/glyphs/last_resort/glif.rb +124 -0
- data/lib/ucode/glyphs/last_resort/renderer.rb +67 -0
- data/lib/ucode/glyphs/last_resort/source.rb +125 -0
- data/lib/ucode/glyphs/last_resort/svg.rb +247 -0
- data/lib/ucode/glyphs/last_resort/writer.rb +83 -0
- data/lib/ucode/glyphs/last_resort.rb +36 -0
- data/lib/ucode/glyphs/monolith_page_map.rb +181 -0
- data/lib/ucode/glyphs/mutool_renderer.rb +28 -0
- data/lib/ucode/glyphs/page_renderer.rb +221 -0
- data/lib/ucode/glyphs/path_bbox.rb +62 -0
- data/lib/ucode/glyphs/pdf2svg_renderer.rb +26 -0
- data/lib/ucode/glyphs/pdf_fetcher.rb +102 -0
- data/lib/ucode/glyphs/pdftocairo_renderer.rb +32 -0
- data/lib/ucode/glyphs/real_fonts/block_coverage.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/coverage_auditor.rb +117 -0
- data/lib/ucode/glyphs/real_fonts/font_coverage_report.rb +45 -0
- data/lib/ucode/glyphs/real_fonts/font_locator.rb +95 -0
- data/lib/ucode/glyphs/real_fonts/unicode_17_blocks.rb +104 -0
- data/lib/ucode/glyphs/real_fonts/writer.rb +50 -0
- data/lib/ucode/glyphs/real_fonts.rb +32 -0
- data/lib/ucode/glyphs/writer.rb +250 -0
- data/lib/ucode/glyphs.rb +27 -0
- data/lib/ucode/index.rb +106 -0
- data/lib/ucode/index_builder.rb +94 -0
- data/lib/ucode/models/audit/audit_axis.rb +30 -0
- data/lib/ucode/models/audit/audit_diff.rb +77 -0
- data/lib/ucode/models/audit/audit_report.rb +137 -0
- data/lib/ucode/models/audit/baseline.rb +32 -0
- data/lib/ucode/models/audit/block_summary.rb +72 -0
- data/lib/ucode/models/audit/codepoint_detail.rb +45 -0
- data/lib/ucode/models/audit/codepoint_range.rb +39 -0
- data/lib/ucode/models/audit/codepoint_set_diff.rb +34 -0
- data/lib/ucode/models/audit/color_capabilities.rb +91 -0
- data/lib/ucode/models/audit/discrepancy.rb +38 -0
- data/lib/ucode/models/audit/duplicate_group.rb +23 -0
- data/lib/ucode/models/audit/embedding_type.rb +81 -0
- data/lib/ucode/models/audit/field_change.rb +28 -0
- data/lib/ucode/models/audit/fs_selection_flags.rb +65 -0
- data/lib/ucode/models/audit/gasp_range.rb +63 -0
- data/lib/ucode/models/audit/hinting.rb +99 -0
- data/lib/ucode/models/audit/library_summary.rb +40 -0
- data/lib/ucode/models/audit/licensing.rb +48 -0
- data/lib/ucode/models/audit/metrics.rb +111 -0
- data/lib/ucode/models/audit/named_instance.rb +41 -0
- data/lib/ucode/models/audit/opentype_layout.rb +38 -0
- data/lib/ucode/models/audit/plane_summary.rb +31 -0
- data/lib/ucode/models/audit/script_coverage_row.rb +26 -0
- data/lib/ucode/models/audit/script_features.rb +28 -0
- data/lib/ucode/models/audit/script_summary.rb +54 -0
- data/lib/ucode/models/audit/variation_detail.rb +42 -0
- data/lib/ucode/models/audit.rb +50 -0
- data/lib/ucode/models/bidi_bracket_pair.rb +20 -0
- data/lib/ucode/models/bidi_mirroring.rb +19 -0
- data/lib/ucode/models/binary_property_assignment.rb +26 -0
- data/lib/ucode/models/block.rb +36 -0
- data/lib/ucode/models/case_folding_rule.rb +23 -0
- data/lib/ucode/models/cjk_radical.rb +23 -0
- data/lib/ucode/models/codepoint/bidi.rb +28 -0
- data/lib/ucode/models/codepoint/break_segmentation.rb +22 -0
- data/lib/ucode/models/codepoint/case_folding.rb +25 -0
- data/lib/ucode/models/codepoint/casing.rb +32 -0
- data/lib/ucode/models/codepoint/decomposition.rb +27 -0
- data/lib/ucode/models/codepoint/display.rb +24 -0
- data/lib/ucode/models/codepoint/emoji.rb +29 -0
- data/lib/ucode/models/codepoint/hangul.rb +20 -0
- data/lib/ucode/models/codepoint/identifier.rb +30 -0
- data/lib/ucode/models/codepoint/indic.rb +20 -0
- data/lib/ucode/models/codepoint/joining.rb +20 -0
- data/lib/ucode/models/codepoint/normalization.rb +35 -0
- data/lib/ucode/models/codepoint/numeric_value.rb +35 -0
- data/lib/ucode/models/codepoint.rb +122 -0
- data/lib/ucode/models/name_alias.rb +21 -0
- data/lib/ucode/models/named_sequence.rb +19 -0
- data/lib/ucode/models/names_list_entry.rb +38 -0
- data/lib/ucode/models/plane.rb +36 -0
- data/lib/ucode/models/property_alias.rb +24 -0
- data/lib/ucode/models/property_value_alias.rb +26 -0
- data/lib/ucode/models/relationship/compat_equiv.rb +18 -0
- data/lib/ucode/models/relationship/cross_reference.rb +17 -0
- data/lib/ucode/models/relationship/footnote.rb +24 -0
- data/lib/ucode/models/relationship/informal_alias.rb +18 -0
- data/lib/ucode/models/relationship/sample_sequence.rb +24 -0
- data/lib/ucode/models/relationship/variation_sequence.rb +19 -0
- data/lib/ucode/models/relationship.rb +57 -0
- data/lib/ucode/models/script.rb +41 -0
- data/lib/ucode/models/special_casing_rule.rb +28 -0
- data/lib/ucode/models/standardized_variant.rb +24 -0
- data/lib/ucode/models/unihan_entry.rb +23 -0
- data/lib/ucode/models.rb +47 -0
- data/lib/ucode/parsers/auxiliary.rb +26 -0
- data/lib/ucode/parsers/base.rb +137 -0
- data/lib/ucode/parsers/bidi_brackets.rb +41 -0
- data/lib/ucode/parsers/bidi_mirroring.rb +37 -0
- data/lib/ucode/parsers/blocks.rb +63 -0
- data/lib/ucode/parsers/case_folding.rb +53 -0
- data/lib/ucode/parsers/cjk_radicals.rb +102 -0
- data/lib/ucode/parsers/derived_age.rb +59 -0
- data/lib/ucode/parsers/derived_core_properties.rb +60 -0
- data/lib/ucode/parsers/extracted_properties.rb +74 -0
- data/lib/ucode/parsers/name_aliases.rb +44 -0
- data/lib/ucode/parsers/named_sequences.rb +51 -0
- data/lib/ucode/parsers/names_list.rb +250 -0
- data/lib/ucode/parsers/property_aliases.rb +41 -0
- data/lib/ucode/parsers/property_value_aliases.rb +46 -0
- data/lib/ucode/parsers/script_extensions.rb +64 -0
- data/lib/ucode/parsers/scripts.rb +60 -0
- data/lib/ucode/parsers/special_casing.rb +62 -0
- data/lib/ucode/parsers/standardized_variants.rb +56 -0
- data/lib/ucode/parsers/unicode_data/hangul_name.rb +73 -0
- data/lib/ucode/parsers/unicode_data.rb +268 -0
- data/lib/ucode/parsers/unihan.rb +125 -0
- data/lib/ucode/parsers.rb +35 -0
- data/lib/ucode/range_entry.rb +58 -0
- data/lib/ucode/repo/aggregate_writer.rb +364 -0
- data/lib/ucode/repo/atomic_writes.rb +48 -0
- data/lib/ucode/repo/codepoint_writer.rb +96 -0
- data/lib/ucode/repo/paths.rb +122 -0
- data/lib/ucode/repo.rb +22 -0
- data/lib/ucode/site/config_emitter.rb +124 -0
- data/lib/ucode/site/generator.rb +178 -0
- data/lib/ucode/site/search_index.rb +68 -0
- data/lib/ucode/site/template/.gitignore +4 -0
- data/lib/ucode/site/template/.vitepress/config.ts +8 -0
- data/lib/ucode/site/template/.vitepress/theme/index.js +20 -0
- data/lib/ucode/site/template/char/[codepoint].md +13 -0
- data/lib/ucode/site/template/components/BlockView.vue +57 -0
- data/lib/ucode/site/template/components/CharView.vue +85 -0
- data/lib/ucode/site/template/components/PlaneView.vue +56 -0
- data/lib/ucode/site/template/components/SearchView.vue +66 -0
- data/lib/ucode/site/template/index.md +25 -0
- data/lib/ucode/site/template/package.json +18 -0
- data/lib/ucode/site/template/search.md +9 -0
- data/lib/ucode/site.rb +13 -0
- data/lib/ucode/version.rb +5 -0
- data/lib/ucode/version_resolver.rb +76 -0
- data/lib/ucode.rb +74 -0
- data/ucode.gemspec +56 -0
- metadata +404 -0
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/cache"
|
|
6
|
+
require "ucode/database"
|
|
7
|
+
require "ucode/repo"
|
|
8
|
+
require "ucode/version_resolver"
|
|
9
|
+
|
|
10
|
+
module Ucode
|
|
11
|
+
module Commands
|
|
12
|
+
# `ucode lookup` — read-only lookups against the SQLite cache and
|
|
13
|
+
# the output JSON tree. Three subactions: block, script, char.
|
|
14
|
+
class LookupCommand
|
|
15
|
+
BlockResult = Struct.new(:codepoint, :block, keyword_init: true)
|
|
16
|
+
ScriptResult = Struct.new(:codepoint, :script, keyword_init: true)
|
|
17
|
+
CharResult = Struct.new(:codepoint, :block_id, :glyph_path, keyword_init: true)
|
|
18
|
+
private_constant :BlockResult, :ScriptResult, :CharResult
|
|
19
|
+
|
|
20
|
+
# @param version_intent [nil, :default, :latest, String]
|
|
21
|
+
# @param codepoint [Integer]
|
|
22
|
+
# @return [BlockResult]
|
|
23
|
+
def lookup_block(version_intent, codepoint:)
|
|
24
|
+
version = VersionResolver.resolve(version_intent)
|
|
25
|
+
with_db(version) { |db| db.lookup_block(codepoint) }
|
|
26
|
+
.then { |block| BlockResult.new(codepoint: codepoint, block: block) }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# @param version_intent [nil, :default, :latest, String]
|
|
30
|
+
# @param codepoint [Integer]
|
|
31
|
+
# @return [ScriptResult]
|
|
32
|
+
def lookup_script(version_intent, codepoint:)
|
|
33
|
+
version = VersionResolver.resolve(version_intent)
|
|
34
|
+
with_db(version) { |db| db.lookup_script(codepoint) }
|
|
35
|
+
.then { |script| ScriptResult.new(codepoint: codepoint, script: script) }
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @param version_intent [nil, :default, :latest, String]
|
|
39
|
+
# @param codepoint [Integer]
|
|
40
|
+
# @param output_root [String, Pathname]
|
|
41
|
+
# @return [CharResult]
|
|
42
|
+
def lookup_char(version_intent, codepoint:, output_root:)
|
|
43
|
+
version = VersionResolver.resolve(version_intent)
|
|
44
|
+
block_id = with_db(version) { |db| db.lookup_block(codepoint) }
|
|
45
|
+
glyph = block_id ? glyph_path(output_root, block_id, codepoint) : nil
|
|
46
|
+
CharResult.new(codepoint: codepoint, block_id: block_id, glyph_path: glyph)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def with_db(version)
|
|
52
|
+
db = Database.open(version)
|
|
53
|
+
yield db
|
|
54
|
+
ensure
|
|
55
|
+
db&.close
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def glyph_path(output_root, block_id, codepoint)
|
|
59
|
+
cp_id = Repo::Paths.cp_id(codepoint)
|
|
60
|
+
path = Repo::Paths.codepoint_glyph_path(output_root, block_id, cp_id)
|
|
61
|
+
path.exist? ? path : nil
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
end
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/cache"
|
|
6
|
+
require "ucode/coordinator"
|
|
7
|
+
require "ucode/parsers"
|
|
8
|
+
require "ucode/repo"
|
|
9
|
+
require "ucode/version_resolver"
|
|
10
|
+
|
|
11
|
+
module Ucode
|
|
12
|
+
module Commands
|
|
13
|
+
# `ucode parse` — streams the Coordinator output into the on-disk
|
|
14
|
+
# JSON tree at `output/`. Single pass: enrich + write per-cp JSON +
|
|
15
|
+
# accumulate aggregates + final flush.
|
|
16
|
+
class ParseCommand
|
|
17
|
+
# @param version_intent [nil, :default, :latest, String]
|
|
18
|
+
# @param output_root [String, Pathname]
|
|
19
|
+
# @return [Hash] { version:, codepoint_count: }
|
|
20
|
+
def call(version_intent, output_root:)
|
|
21
|
+
version = VersionResolver.resolve(version_intent)
|
|
22
|
+
root = Pathname.new(output_root)
|
|
23
|
+
ucd_dir = Cache.ucd_dir(version)
|
|
24
|
+
unihan_dir = Cache.unihan_dir(version)
|
|
25
|
+
|
|
26
|
+
coordinator = Coordinator.new
|
|
27
|
+
codepoint_writer = Repo::CodepointWriter.new(root, parallel_workers: workers)
|
|
28
|
+
aggregate = Repo::AggregateWriter.new(root)
|
|
29
|
+
indices_holder = nil
|
|
30
|
+
|
|
31
|
+
coordinator.each_codepoint_with_indices(ucd_dir: ucd_dir, unihan_dir: unihan_dir) do |indices, cp|
|
|
32
|
+
indices_holder ||= indices
|
|
33
|
+
codepoint_writer.write(cp)
|
|
34
|
+
aggregate.add(cp)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
aggregate.flush(
|
|
38
|
+
ucd_version: version,
|
|
39
|
+
indices: indices_holder || coordinator.indices_for(ucd_dir: ucd_dir, unihan_dir: unihan_dir),
|
|
40
|
+
property_aliases: load_records(ucd_dir, "PropertyAliases.txt", Parsers::PropertyAliases),
|
|
41
|
+
property_value_aliases: load_records(ucd_dir, "PropertyValueAliases.txt", Parsers::PropertyValueAliases),
|
|
42
|
+
named_sequences: load_records(ucd_dir, "NamedSequences.txt", Parsers::NamedSequences),
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
{ version: version, codepoint_count: aggregate.codepoint_count }
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
private
|
|
49
|
+
|
|
50
|
+
def workers
|
|
51
|
+
Ucode.configuration.parallel_workers
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def load_records(ucd_dir, filename, parser)
|
|
55
|
+
path = ucd_dir.join(filename)
|
|
56
|
+
return [] unless path.exist?
|
|
57
|
+
|
|
58
|
+
parser.each_record(path).to_a
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
|
|
5
|
+
require "ucode/site"
|
|
6
|
+
require "ucode/version_resolver"
|
|
7
|
+
|
|
8
|
+
module Ucode
|
|
9
|
+
module Commands
|
|
10
|
+
# `ucode site` — init the Vitepress scaffold + build config/pages
|
|
11
|
+
# from the current `output/` tree. Two subactions.
|
|
12
|
+
class SiteCommand
|
|
13
|
+
# @param site_root [String, Pathname]
|
|
14
|
+
# @return [Hash] { files_copied: }
|
|
15
|
+
def init(site_root:)
|
|
16
|
+
root = Pathname.new(site_root)
|
|
17
|
+
count = Site::Generator.new(output_root: "/", site_root: root).init
|
|
18
|
+
{ files_copied: count }
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# @param output_root [String, Pathname]
|
|
22
|
+
# @param site_root [String, Pathname]
|
|
23
|
+
# @return [Hash] the Generator's build tally
|
|
24
|
+
def build(output_root:, site_root:, **_unused)
|
|
25
|
+
gen = Site::Generator.new(
|
|
26
|
+
output_root: Pathname.new(output_root),
|
|
27
|
+
site_root: Pathname.new(site_root),
|
|
28
|
+
)
|
|
29
|
+
gen.build
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
# Commands — one Thor class per CLI subcommand.
|
|
5
|
+
#
|
|
6
|
+
# Each command delegates the actual work to a `*Command::Action` (or
|
|
7
|
+
# similar) structured-result class. The Thor method is purely dispatch
|
|
8
|
+
# + formatting. This keeps Thor thin and the work testable in-process.
|
|
9
|
+
module Commands
|
|
10
|
+
autoload :FetchCommand, "ucode/commands/fetch"
|
|
11
|
+
autoload :ParseCommand, "ucode/commands/parse"
|
|
12
|
+
autoload :GlyphsCommand, "ucode/commands/glyphs"
|
|
13
|
+
autoload :SiteCommand, "ucode/commands/site"
|
|
14
|
+
autoload :LookupCommand, "ucode/commands/lookup"
|
|
15
|
+
autoload :CacheCommand, "ucode/commands/cache"
|
|
16
|
+
autoload :BuildCommand, "ucode/commands/build"
|
|
17
|
+
autoload :FontCoverageCommand, "ucode/commands/font_coverage"
|
|
18
|
+
end
|
|
19
|
+
end
|
data/lib/ucode/config.rb
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "pathname"
|
|
4
|
+
require "logger"
|
|
5
|
+
|
|
6
|
+
module Ucode
|
|
7
|
+
# Single injection point for all ucode runtime configuration.
|
|
8
|
+
#
|
|
9
|
+
# This is the ONLY place in the codebase that reads ENV directly. Every
|
|
10
|
+
# other class reads configuration through Ucode.configuration.
|
|
11
|
+
#
|
|
12
|
+
# Tests inject fresh Config instances; production reads ENV once on first
|
|
13
|
+
# access via Ucode.configuration.
|
|
14
|
+
class Config
|
|
15
|
+
KNOWN_VERSIONS = %w[15.0.0 15.1.0 16.0.0 17.0.0].freeze
|
|
16
|
+
|
|
17
|
+
DEFAULT_CACHE_ROOT = nil
|
|
18
|
+
|
|
19
|
+
attr_accessor :cache_root, :output_dir, :default_version, :known_versions,
|
|
20
|
+
:http_timeout, :http_retries, :pdf_renderer,
|
|
21
|
+
:parallel_workers, :ucd_base_url, :unihan_base_url,
|
|
22
|
+
:charts_base_url, :listing_url, :extracted_files,
|
|
23
|
+
:auxiliary_files
|
|
24
|
+
|
|
25
|
+
def initialize
|
|
26
|
+
@cache_root = default_cache_root
|
|
27
|
+
@output_dir = Pathname.new("./output")
|
|
28
|
+
@default_version = "17.0.0"
|
|
29
|
+
@known_versions = KNOWN_VERSIONS.dup
|
|
30
|
+
@http_timeout = env_int("UCODE_HTTP_TIMEOUT", 30)
|
|
31
|
+
@http_retries = env_int("UCODE_HTTP_RETRIES", 3)
|
|
32
|
+
@pdf_renderer = :mutool
|
|
33
|
+
@parallel_workers = env_int("UCODE_PARALLEL_WORKERS", 8)
|
|
34
|
+
@ucd_base_url = "https://www.unicode.org/Public"
|
|
35
|
+
@unihan_base_url = "https://www.unicode.org/Public"
|
|
36
|
+
@charts_base_url = "https://www.unicode.org/charts/PDF"
|
|
37
|
+
@listing_url = "https://www.unicode.org/Public/"
|
|
38
|
+
@extracted_files = default_extracted_files
|
|
39
|
+
@auxiliary_files = default_auxiliary_files
|
|
40
|
+
@logger = Logger.new($stderr, level: Logger::WARN)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Logger shared by every subsystem (Fetch, Coordinator, Writer, …).
|
|
44
|
+
# Tests can swap to a StringIO logger to capture output.
|
|
45
|
+
attr_reader :logger
|
|
46
|
+
|
|
47
|
+
def logger=(logger)
|
|
48
|
+
@logger = logger
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def known?(version)
|
|
52
|
+
known_versions.include?(version)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
private
|
|
56
|
+
|
|
57
|
+
def default_cache_root
|
|
58
|
+
xdg = ENV["XDG_CACHE_HOME"]
|
|
59
|
+
base = nil_or_empty?(xdg) ? File.join(Dir.home, ".cache") : xdg
|
|
60
|
+
Pathname.new(base).join("ucode", "unicode")
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def nil_or_empty?(value)
|
|
64
|
+
value.nil? || value.empty?
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def env_int(name, default)
|
|
68
|
+
value = ENV[name]
|
|
69
|
+
return default if value.nil? || value.empty?
|
|
70
|
+
|
|
71
|
+
Integer(value)
|
|
72
|
+
rescue ArgumentError
|
|
73
|
+
default
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def default_extracted_files
|
|
77
|
+
%w[
|
|
78
|
+
DerivedName.txt
|
|
79
|
+
DerivedGeneralCategory.txt
|
|
80
|
+
DerivedCombiningClass.txt
|
|
81
|
+
DerivedBidiClass.txt
|
|
82
|
+
DerivedDecompositionType.txt
|
|
83
|
+
DerivedNumericType.txt
|
|
84
|
+
DerivedNumericValues.txt
|
|
85
|
+
DerivedJoiningGroup.txt
|
|
86
|
+
DerivedJoiningType.txt
|
|
87
|
+
DerivedLineBreak.txt
|
|
88
|
+
DerivedBinaryProperties.txt
|
|
89
|
+
DerivedAge.txt
|
|
90
|
+
DerivedCoreProperties.txt
|
|
91
|
+
DerivedNormalizationProps.txt
|
|
92
|
+
]
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def default_auxiliary_files
|
|
96
|
+
%w[
|
|
97
|
+
auxiliary/GraphemeBreakProperty.txt
|
|
98
|
+
auxiliary/WordBreakProperty.txt
|
|
99
|
+
auxiliary/SentenceBreakProperty.txt
|
|
100
|
+
auxiliary/VerticalOrientation.txt
|
|
101
|
+
auxiliary/IndicPositionalCategory.txt
|
|
102
|
+
auxiliary/IndicSyllabicCategory.txt
|
|
103
|
+
auxiliary/IdentifierStatus.txt
|
|
104
|
+
auxiliary/IdentifierType.txt
|
|
105
|
+
LineBreak.txt
|
|
106
|
+
EastAsianWidth.txt
|
|
107
|
+
]
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Ucode
|
|
4
|
+
class Coordinator
|
|
5
|
+
# Bag of pre-built indices consumed by the per-codepoint enrichment
|
|
6
|
+
# pass. Every field is a frozen-shaped collection that is read-only
|
|
7
|
+
# after `build_indices` returns: range files land in sorted Arrays
|
|
8
|
+
# (bsearched by `range_first`); per-cp files land in flat Hashes keyed
|
|
9
|
+
# by Integer codepoint or by "U+XXXX" id string.
|
|
10
|
+
#
|
|
11
|
+
# Defined with `keyword_init: true` so the Coordinator's `Indices.new`
|
|
12
|
+
# call reads as a self-documenting catalogue of every parsed file —
|
|
13
|
+
# adding a new index is one keyword arg here, one builder call in
|
|
14
|
+
# `Coordinator#build_indices`, and one assignment in `#enrich`.
|
|
15
|
+
Indices = Struct.new(
|
|
16
|
+
:blocks,
|
|
17
|
+
:scripts,
|
|
18
|
+
:property_value_aliases,
|
|
19
|
+
:derived_age,
|
|
20
|
+
:binary_properties,
|
|
21
|
+
:script_extensions,
|
|
22
|
+
:bidi_mirroring,
|
|
23
|
+
:bidi_brackets,
|
|
24
|
+
:special_casing,
|
|
25
|
+
:case_folding,
|
|
26
|
+
:name_aliases,
|
|
27
|
+
:cjk_radicals,
|
|
28
|
+
:standardized_variants,
|
|
29
|
+
:names_list,
|
|
30
|
+
:unihan,
|
|
31
|
+
keyword_init: true,
|
|
32
|
+
)
|
|
33
|
+
end
|
|
34
|
+
end
|