yosina 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +36 -0
- data/Gemfile +6 -0
- data/README.ja.md +229 -0
- data/README.md +229 -0
- data/Rakefile +30 -0
- data/codegen/dataset.rb +215 -0
- data/codegen/emitters/circled_or_squared_transliterator_data.rb +30 -0
- data/codegen/emitters/combined_transliterator_data.rb +28 -0
- data/codegen/emitters/hyphens_transliterator_data.rb +48 -0
- data/codegen/emitters/ivs_svs_base_transliterator_data.rb +121 -0
- data/codegen/emitters/simple_transliterator.rb +76 -0
- data/codegen/emitters/utils.rb +45 -0
- data/codegen/emitters.rb +8 -0
- data/codegen/main.rb +109 -0
- data/lib/yosina/char.rb +65 -0
- data/lib/yosina/chars.rb +152 -0
- data/lib/yosina/recipes.rb +359 -0
- data/lib/yosina/transliterator.rb +49 -0
- data/lib/yosina/transliterators/circled_or_squared.rb +67 -0
- data/lib/yosina/transliterators/circled_or_squared_data.rb +469 -0
- data/lib/yosina/transliterators/combined.rb +52 -0
- data/lib/yosina/transliterators/combined_data.rb +495 -0
- data/lib/yosina/transliterators/hira_kata.rb +106 -0
- data/lib/yosina/transliterators/hira_kata_composition.rb +103 -0
- data/lib/yosina/transliterators/hira_kata_table.rb +116 -0
- data/lib/yosina/transliterators/hyphens.rb +83 -0
- data/lib/yosina/transliterators/hyphens_data.rb +60 -0
- data/lib/yosina/transliterators/ideographic_annotations.rb +73 -0
- data/lib/yosina/transliterators/ivs_svs_base.rb +169 -0
- data/lib/yosina/transliterators/ivs_svs_base_data.rb +0 -0
- data/lib/yosina/transliterators/japanese_iteration_marks.rb +261 -0
- data/lib/yosina/transliterators/jisx0201_and_alike.rb +451 -0
- data/lib/yosina/transliterators/kanji_old_new.rb +1137 -0
- data/lib/yosina/transliterators/mathematical_alphanumerics.rb +799 -0
- data/lib/yosina/transliterators/prolonged_sound_marks.rb +206 -0
- data/lib/yosina/transliterators/radicals.rb +361 -0
- data/lib/yosina/transliterators/spaces.rb +79 -0
- data/lib/yosina/transliterators.rb +57 -0
- data/lib/yosina/version.rb +5 -0
- data/lib/yosina.rb +62 -0
- data/yosina.gemspec +41 -0
- metadata +159 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'transliterators/spaces'
|
|
4
|
+
require_relative 'transliterators/kanji_old_new'
|
|
5
|
+
require_relative 'transliterators/radicals'
|
|
6
|
+
require_relative 'transliterators/ideographic_annotations'
|
|
7
|
+
require_relative 'transliterators/mathematical_alphanumerics'
|
|
8
|
+
require_relative 'transliterators/prolonged_sound_marks'
|
|
9
|
+
require_relative 'transliterators/hyphens'
|
|
10
|
+
require_relative 'transliterators/hira_kata'
|
|
11
|
+
require_relative 'transliterators/hira_kata_composition'
|
|
12
|
+
require_relative 'transliterators/ivs_svs_base'
|
|
13
|
+
require_relative 'transliterators/jisx0201_and_alike'
|
|
14
|
+
require_relative 'transliterators/circled_or_squared'
|
|
15
|
+
require_relative 'transliterators/combined'
|
|
16
|
+
require_relative 'transliterators/japanese_iteration_marks'
|
|
17
|
+
|
|
18
|
+
module Yosina
|
|
19
|
+
# Registry for transliterator factories
|
|
20
|
+
module Transliterators
|
|
21
|
+
FACTORIES = {
|
|
22
|
+
spaces: Transliterators::Spaces,
|
|
23
|
+
kanji_old_new: Transliterators::KanjiOldNew,
|
|
24
|
+
radicals: Transliterators::Radicals,
|
|
25
|
+
ideographic_annotations: Transliterators::IdeographicAnnotations,
|
|
26
|
+
mathematical_alphanumerics: Transliterators::MathematicalAlphanumerics,
|
|
27
|
+
prolonged_sound_marks: Transliterators::ProlongedSoundMarks,
|
|
28
|
+
hyphens: Transliterators::Hyphens,
|
|
29
|
+
hira_kata: Transliterators::HiraKata,
|
|
30
|
+
hira_kata_composition: Transliterators::HiraKataComposition,
|
|
31
|
+
ivs_svs_base: Transliterators::IvsSvsBase,
|
|
32
|
+
jisx0201_and_alike: Transliterators::Jisx0201AndAlike,
|
|
33
|
+
combined: Transliterators::Combined,
|
|
34
|
+
circled_or_squared: CircledOrSquared,
|
|
35
|
+
japanese_iteration_marks: Transliterators::JapaneseIterationMarks
|
|
36
|
+
}.freeze
|
|
37
|
+
|
|
38
|
+
# Get a transliterator factory by name
|
|
39
|
+
#
|
|
40
|
+
# @param name [String, Symbol] The name of the transliterator
|
|
41
|
+
# @return [Module, nil] The transliterator factory module or nil if not found
|
|
42
|
+
def self.get_factory(name)
|
|
43
|
+
if name.is_a?(String)
|
|
44
|
+
# Convert string to symbol format (e.g. 'kanji-old-new' -> :kanji_old_new)
|
|
45
|
+
name = name.gsub('-', '_').to_sym
|
|
46
|
+
end
|
|
47
|
+
FACTORIES[name]
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# List all available transliterator names
|
|
51
|
+
#
|
|
52
|
+
# @return [Array<Symbol>] Array of available transliterator names
|
|
53
|
+
def self.available_transliterators
|
|
54
|
+
FACTORIES.keys
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
end
|
data/lib/yosina.rb
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'yosina/version'
|
|
4
|
+
require_relative 'yosina/char'
|
|
5
|
+
require_relative 'yosina/transliterator'
|
|
6
|
+
require_relative 'yosina/chars'
|
|
7
|
+
require_relative 'yosina/recipes'
|
|
8
|
+
require_relative 'yosina/transliterators'
|
|
9
|
+
|
|
10
|
+
# Main module for Yosina text transliteration library
|
|
11
|
+
module Yosina
|
|
12
|
+
class Error < StandardError; end
|
|
13
|
+
class TransliteratorFactoryError < Error; end
|
|
14
|
+
class TransliterationError < Error; end
|
|
15
|
+
|
|
16
|
+
# Frontend convenience function to create a string-to-string transliterator
|
|
17
|
+
# from a recipe or a list of configs.
|
|
18
|
+
#
|
|
19
|
+
# @param configs_or_recipe [Array<TransliteratorConfig>, Array<String>, TransliterationRecipe]
|
|
20
|
+
# A recipe or a list of transliterator configs/names
|
|
21
|
+
# @return [Proc] A transliterator function that takes a string and returns a string
|
|
22
|
+
def self.make_transliterator(configs_or_recipe)
|
|
23
|
+
configs = case configs_or_recipe
|
|
24
|
+
when TransliterationRecipe
|
|
25
|
+
configs_or_recipe.build_transliterator_configs.map do |config_array|
|
|
26
|
+
TransliteratorConfig.new(config_array[0], config_array[1])
|
|
27
|
+
end
|
|
28
|
+
when Array
|
|
29
|
+
configs_or_recipe.map do |item|
|
|
30
|
+
case item
|
|
31
|
+
when String, Symbol
|
|
32
|
+
TransliteratorConfig.new(item)
|
|
33
|
+
when Array
|
|
34
|
+
TransliteratorConfig.new(item[0], item[1])
|
|
35
|
+
else
|
|
36
|
+
item
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
else
|
|
40
|
+
raise ArgumentError, 'configs_or_recipe must be Array or TransliterationRecipe'
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
transliterator = create_chained_transliterator(configs)
|
|
44
|
+
|
|
45
|
+
proc do |input|
|
|
46
|
+
chars = Chars.build_char_array(input)
|
|
47
|
+
result_chars = transliterator.call(chars)
|
|
48
|
+
result_chars.to_s
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private_class_method def self.create_chained_transliterator(configs)
|
|
53
|
+
transliterators = configs.map do |config|
|
|
54
|
+
factory = Transliterators.get_factory(config.name)
|
|
55
|
+
raise TransliteratorFactoryError, "Unknown transliterator: #{config.name}" unless factory
|
|
56
|
+
|
|
57
|
+
factory.call(config.options || {})
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
ChainedTransliterator.new(transliterators)
|
|
61
|
+
end
|
|
62
|
+
end
|
data/yosina.gemspec
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/yosina/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'yosina'
|
|
7
|
+
spec.version = Yosina::VERSION
|
|
8
|
+
spec.authors = ['Moriyoshi Koizumi']
|
|
9
|
+
spec.email = ['mozo@mozo.jp']
|
|
10
|
+
|
|
11
|
+
spec.summary = 'Japanese text transliteration library'
|
|
12
|
+
spec.description = 'Yosina is a transliteration library that specifically deals with the letters and symbols used' \
|
|
13
|
+
' in Japanese writing.'
|
|
14
|
+
spec.homepage = 'https://github.com/yosina-lib/yosina'
|
|
15
|
+
spec.license = 'MIT'
|
|
16
|
+
spec.required_ruby_version = '>= 2.7.0'
|
|
17
|
+
|
|
18
|
+
spec.metadata['allowed_push_host'] = 'https://rubygems.org'
|
|
19
|
+
|
|
20
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
21
|
+
spec.metadata['source_code_uri'] = 'https://github.com/yosina-lib/yosina'
|
|
22
|
+
spec.metadata['changelog_uri'] = 'https://github.com/yosina-lib/yosina/releases'
|
|
23
|
+
|
|
24
|
+
# Specify which files should be added to the gem when it is released.
|
|
25
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
26
|
+
spec.files = Dir.chdir(__dir__) do
|
|
27
|
+
`git ls-files -z`.split("\x0").reject do |f|
|
|
28
|
+
(File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
spec.bindir = 'exe'
|
|
32
|
+
spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
|
|
33
|
+
spec.require_paths = ['lib']
|
|
34
|
+
|
|
35
|
+
# Development dependencies
|
|
36
|
+
spec.add_development_dependency 'minitest', '~> 5.25'
|
|
37
|
+
spec.add_development_dependency 'rake', '~> 13'
|
|
38
|
+
spec.add_development_dependency 'rubocop', '~> 1.79'
|
|
39
|
+
spec.add_development_dependency 'rubocop-minitest', '~> 0.38'
|
|
40
|
+
spec.add_development_dependency 'yard', '~> 0.9'
|
|
41
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: yosina
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Moriyoshi Koizumi
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2025-08-19 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: minitest
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '5.25'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '5.25'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: rake
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '13'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '13'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rubocop
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '1.79'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '1.79'
|
|
55
|
+
- !ruby/object:Gem::Dependency
|
|
56
|
+
name: rubocop-minitest
|
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
|
58
|
+
requirements:
|
|
59
|
+
- - "~>"
|
|
60
|
+
- !ruby/object:Gem::Version
|
|
61
|
+
version: '0.38'
|
|
62
|
+
type: :development
|
|
63
|
+
prerelease: false
|
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
+
requirements:
|
|
66
|
+
- - "~>"
|
|
67
|
+
- !ruby/object:Gem::Version
|
|
68
|
+
version: '0.38'
|
|
69
|
+
- !ruby/object:Gem::Dependency
|
|
70
|
+
name: yard
|
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
|
72
|
+
requirements:
|
|
73
|
+
- - "~>"
|
|
74
|
+
- !ruby/object:Gem::Version
|
|
75
|
+
version: '0.9'
|
|
76
|
+
type: :development
|
|
77
|
+
prerelease: false
|
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - "~>"
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0.9'
|
|
83
|
+
description: Yosina is a transliteration library that specifically deals with the
|
|
84
|
+
letters and symbols used in Japanese writing.
|
|
85
|
+
email:
|
|
86
|
+
- mozo@mozo.jp
|
|
87
|
+
executables: []
|
|
88
|
+
extensions: []
|
|
89
|
+
extra_rdoc_files: []
|
|
90
|
+
files:
|
|
91
|
+
- ".rubocop.yml"
|
|
92
|
+
- Gemfile
|
|
93
|
+
- README.ja.md
|
|
94
|
+
- README.md
|
|
95
|
+
- Rakefile
|
|
96
|
+
- codegen/dataset.rb
|
|
97
|
+
- codegen/emitters.rb
|
|
98
|
+
- codegen/emitters/circled_or_squared_transliterator_data.rb
|
|
99
|
+
- codegen/emitters/combined_transliterator_data.rb
|
|
100
|
+
- codegen/emitters/hyphens_transliterator_data.rb
|
|
101
|
+
- codegen/emitters/ivs_svs_base_transliterator_data.rb
|
|
102
|
+
- codegen/emitters/simple_transliterator.rb
|
|
103
|
+
- codegen/emitters/utils.rb
|
|
104
|
+
- codegen/main.rb
|
|
105
|
+
- lib/yosina.rb
|
|
106
|
+
- lib/yosina/char.rb
|
|
107
|
+
- lib/yosina/chars.rb
|
|
108
|
+
- lib/yosina/recipes.rb
|
|
109
|
+
- lib/yosina/transliterator.rb
|
|
110
|
+
- lib/yosina/transliterators.rb
|
|
111
|
+
- lib/yosina/transliterators/circled_or_squared.rb
|
|
112
|
+
- lib/yosina/transliterators/circled_or_squared_data.rb
|
|
113
|
+
- lib/yosina/transliterators/combined.rb
|
|
114
|
+
- lib/yosina/transliterators/combined_data.rb
|
|
115
|
+
- lib/yosina/transliterators/hira_kata.rb
|
|
116
|
+
- lib/yosina/transliterators/hira_kata_composition.rb
|
|
117
|
+
- lib/yosina/transliterators/hira_kata_table.rb
|
|
118
|
+
- lib/yosina/transliterators/hyphens.rb
|
|
119
|
+
- lib/yosina/transliterators/hyphens_data.rb
|
|
120
|
+
- lib/yosina/transliterators/ideographic_annotations.rb
|
|
121
|
+
- lib/yosina/transliterators/ivs_svs_base.rb
|
|
122
|
+
- lib/yosina/transliterators/ivs_svs_base_data.rb
|
|
123
|
+
- lib/yosina/transliterators/japanese_iteration_marks.rb
|
|
124
|
+
- lib/yosina/transliterators/jisx0201_and_alike.rb
|
|
125
|
+
- lib/yosina/transliterators/kanji_old_new.rb
|
|
126
|
+
- lib/yosina/transliterators/mathematical_alphanumerics.rb
|
|
127
|
+
- lib/yosina/transliterators/prolonged_sound_marks.rb
|
|
128
|
+
- lib/yosina/transliterators/radicals.rb
|
|
129
|
+
- lib/yosina/transliterators/spaces.rb
|
|
130
|
+
- lib/yosina/version.rb
|
|
131
|
+
- yosina.gemspec
|
|
132
|
+
homepage: https://github.com/yosina-lib/yosina
|
|
133
|
+
licenses:
|
|
134
|
+
- MIT
|
|
135
|
+
metadata:
|
|
136
|
+
allowed_push_host: https://rubygems.org
|
|
137
|
+
homepage_uri: https://github.com/yosina-lib/yosina
|
|
138
|
+
source_code_uri: https://github.com/yosina-lib/yosina
|
|
139
|
+
changelog_uri: https://github.com/yosina-lib/yosina/releases
|
|
140
|
+
post_install_message:
|
|
141
|
+
rdoc_options: []
|
|
142
|
+
require_paths:
|
|
143
|
+
- lib
|
|
144
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
145
|
+
requirements:
|
|
146
|
+
- - ">="
|
|
147
|
+
- !ruby/object:Gem::Version
|
|
148
|
+
version: 2.7.0
|
|
149
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
150
|
+
requirements:
|
|
151
|
+
- - ">="
|
|
152
|
+
- !ruby/object:Gem::Version
|
|
153
|
+
version: '0'
|
|
154
|
+
requirements: []
|
|
155
|
+
rubygems_version: 3.5.11
|
|
156
|
+
signing_key:
|
|
157
|
+
specification_version: 4
|
|
158
|
+
summary: Japanese text transliteration library
|
|
159
|
+
test_files: []
|