yosina 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +7 -0
  2. data/.rubocop.yml +36 -0
  3. data/Gemfile +6 -0
  4. data/README.ja.md +229 -0
  5. data/README.md +229 -0
  6. data/Rakefile +30 -0
  7. data/codegen/dataset.rb +215 -0
  8. data/codegen/emitters/circled_or_squared_transliterator_data.rb +30 -0
  9. data/codegen/emitters/combined_transliterator_data.rb +28 -0
  10. data/codegen/emitters/hyphens_transliterator_data.rb +48 -0
  11. data/codegen/emitters/ivs_svs_base_transliterator_data.rb +121 -0
  12. data/codegen/emitters/simple_transliterator.rb +76 -0
  13. data/codegen/emitters/utils.rb +45 -0
  14. data/codegen/emitters.rb +8 -0
  15. data/codegen/main.rb +109 -0
  16. data/lib/yosina/char.rb +65 -0
  17. data/lib/yosina/chars.rb +152 -0
  18. data/lib/yosina/recipes.rb +359 -0
  19. data/lib/yosina/transliterator.rb +49 -0
  20. data/lib/yosina/transliterators/circled_or_squared.rb +67 -0
  21. data/lib/yosina/transliterators/circled_or_squared_data.rb +469 -0
  22. data/lib/yosina/transliterators/combined.rb +52 -0
  23. data/lib/yosina/transliterators/combined_data.rb +495 -0
  24. data/lib/yosina/transliterators/hira_kata.rb +106 -0
  25. data/lib/yosina/transliterators/hira_kata_composition.rb +103 -0
  26. data/lib/yosina/transliterators/hira_kata_table.rb +116 -0
  27. data/lib/yosina/transliterators/hyphens.rb +83 -0
  28. data/lib/yosina/transliterators/hyphens_data.rb +60 -0
  29. data/lib/yosina/transliterators/ideographic_annotations.rb +73 -0
  30. data/lib/yosina/transliterators/ivs_svs_base.rb +169 -0
  31. data/lib/yosina/transliterators/ivs_svs_base_data.rb +0 -0
  32. data/lib/yosina/transliterators/japanese_iteration_marks.rb +261 -0
  33. data/lib/yosina/transliterators/jisx0201_and_alike.rb +451 -0
  34. data/lib/yosina/transliterators/kanji_old_new.rb +1137 -0
  35. data/lib/yosina/transliterators/mathematical_alphanumerics.rb +799 -0
  36. data/lib/yosina/transliterators/prolonged_sound_marks.rb +206 -0
  37. data/lib/yosina/transliterators/radicals.rb +361 -0
  38. data/lib/yosina/transliterators/spaces.rb +79 -0
  39. data/lib/yosina/transliterators.rb +57 -0
  40. data/lib/yosina/version.rb +5 -0
  41. data/lib/yosina.rb +62 -0
  42. data/yosina.gemspec +41 -0
  43. metadata +159 -0
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'transliterators/spaces'
4
+ require_relative 'transliterators/kanji_old_new'
5
+ require_relative 'transliterators/radicals'
6
+ require_relative 'transliterators/ideographic_annotations'
7
+ require_relative 'transliterators/mathematical_alphanumerics'
8
+ require_relative 'transliterators/prolonged_sound_marks'
9
+ require_relative 'transliterators/hyphens'
10
+ require_relative 'transliterators/hira_kata'
11
+ require_relative 'transliterators/hira_kata_composition'
12
+ require_relative 'transliterators/ivs_svs_base'
13
+ require_relative 'transliterators/jisx0201_and_alike'
14
+ require_relative 'transliterators/circled_or_squared'
15
+ require_relative 'transliterators/combined'
16
+ require_relative 'transliterators/japanese_iteration_marks'
17
+
18
+ module Yosina
19
+ # Registry for transliterator factories
20
+ module Transliterators
21
+ FACTORIES = {
22
+ spaces: Transliterators::Spaces,
23
+ kanji_old_new: Transliterators::KanjiOldNew,
24
+ radicals: Transliterators::Radicals,
25
+ ideographic_annotations: Transliterators::IdeographicAnnotations,
26
+ mathematical_alphanumerics: Transliterators::MathematicalAlphanumerics,
27
+ prolonged_sound_marks: Transliterators::ProlongedSoundMarks,
28
+ hyphens: Transliterators::Hyphens,
29
+ hira_kata: Transliterators::HiraKata,
30
+ hira_kata_composition: Transliterators::HiraKataComposition,
31
+ ivs_svs_base: Transliterators::IvsSvsBase,
32
+ jisx0201_and_alike: Transliterators::Jisx0201AndAlike,
33
+ combined: Transliterators::Combined,
34
+ circled_or_squared: CircledOrSquared,
35
+ japanese_iteration_marks: Transliterators::JapaneseIterationMarks
36
+ }.freeze
37
+
38
+ # Get a transliterator factory by name
39
+ #
40
+ # @param name [String, Symbol] The name of the transliterator
41
+ # @return [Module, nil] The transliterator factory module or nil if not found
42
+ def self.get_factory(name)
43
+ if name.is_a?(String)
44
+ # Convert string to symbol format (e.g. 'kanji-old-new' -> :kanji_old_new)
45
+ name = name.gsub('-', '_').to_sym
46
+ end
47
+ FACTORIES[name]
48
+ end
49
+
50
+ # List all available transliterator names
51
+ #
52
+ # @return [Array<Symbol>] Array of available transliterator names
53
+ def self.available_transliterators
54
+ FACTORIES.keys
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Yosina
4
+ VERSION = '0.1.0'
5
+ end
data/lib/yosina.rb ADDED
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'yosina/version'
4
+ require_relative 'yosina/char'
5
+ require_relative 'yosina/transliterator'
6
+ require_relative 'yosina/chars'
7
+ require_relative 'yosina/recipes'
8
+ require_relative 'yosina/transliterators'
9
+
10
+ # Main module for Yosina text transliteration library
11
+ module Yosina
12
+ class Error < StandardError; end
13
+ class TransliteratorFactoryError < Error; end
14
+ class TransliterationError < Error; end
15
+
16
+ # Frontend convenience function to create a string-to-string transliterator
17
+ # from a recipe or a list of configs.
18
+ #
19
+ # @param configs_or_recipe [Array<TransliteratorConfig>, Array<String>, TransliterationRecipe]
20
+ # A recipe or a list of transliterator configs/names
21
+ # @return [Proc] A transliterator function that takes a string and returns a string
22
+ def self.make_transliterator(configs_or_recipe)
23
+ configs = case configs_or_recipe
24
+ when TransliterationRecipe
25
+ configs_or_recipe.build_transliterator_configs.map do |config_array|
26
+ TransliteratorConfig.new(config_array[0], config_array[1])
27
+ end
28
+ when Array
29
+ configs_or_recipe.map do |item|
30
+ case item
31
+ when String, Symbol
32
+ TransliteratorConfig.new(item)
33
+ when Array
34
+ TransliteratorConfig.new(item[0], item[1])
35
+ else
36
+ item
37
+ end
38
+ end
39
+ else
40
+ raise ArgumentError, 'configs_or_recipe must be Array or TransliterationRecipe'
41
+ end
42
+
43
+ transliterator = create_chained_transliterator(configs)
44
+
45
+ proc do |input|
46
+ chars = Chars.build_char_array(input)
47
+ result_chars = transliterator.call(chars)
48
+ result_chars.to_s
49
+ end
50
+ end
51
+
52
+ private_class_method def self.create_chained_transliterator(configs)
53
+ transliterators = configs.map do |config|
54
+ factory = Transliterators.get_factory(config.name)
55
+ raise TransliteratorFactoryError, "Unknown transliterator: #{config.name}" unless factory
56
+
57
+ factory.call(config.options || {})
58
+ end
59
+
60
+ ChainedTransliterator.new(transliterators)
61
+ end
62
+ end
data/yosina.gemspec ADDED
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/yosina/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'yosina'
7
+ spec.version = Yosina::VERSION
8
+ spec.authors = ['Moriyoshi Koizumi']
9
+ spec.email = ['mozo@mozo.jp']
10
+
11
+ spec.summary = 'Japanese text transliteration library'
12
+ spec.description = 'Yosina is a transliteration library that specifically deals with the letters and symbols used' \
13
+ ' in Japanese writing.'
14
+ spec.homepage = 'https://github.com/yosina-lib/yosina'
15
+ spec.license = 'MIT'
16
+ spec.required_ruby_version = '>= 2.7.0'
17
+
18
+ spec.metadata['allowed_push_host'] = 'https://rubygems.org'
19
+
20
+ spec.metadata['homepage_uri'] = spec.homepage
21
+ spec.metadata['source_code_uri'] = 'https://github.com/yosina-lib/yosina'
22
+ spec.metadata['changelog_uri'] = 'https://github.com/yosina-lib/yosina/releases'
23
+
24
+ # Specify which files should be added to the gem when it is released.
25
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
26
+ spec.files = Dir.chdir(__dir__) do
27
+ `git ls-files -z`.split("\x0").reject do |f|
28
+ (File.expand_path(f) == __FILE__) || f.start_with?(*%w[bin/ test/ spec/ features/ .git .circleci appveyor])
29
+ end
30
+ end
31
+ spec.bindir = 'exe'
32
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
33
+ spec.require_paths = ['lib']
34
+
35
+ # Development dependencies
36
+ spec.add_development_dependency 'minitest', '~> 5.25'
37
+ spec.add_development_dependency 'rake', '~> 13'
38
+ spec.add_development_dependency 'rubocop', '~> 1.79'
39
+ spec.add_development_dependency 'rubocop-minitest', '~> 0.38'
40
+ spec.add_development_dependency 'yard', '~> 0.9'
41
+ end
metadata ADDED
@@ -0,0 +1,159 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yosina
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Moriyoshi Koizumi
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2025-08-19 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: minitest
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '5.25'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '5.25'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '13'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '13'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rubocop
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.79'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.79'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rubocop-minitest
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.38'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.38'
69
+ - !ruby/object:Gem::Dependency
70
+ name: yard
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '0.9'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '0.9'
83
+ description: Yosina is a transliteration library that specifically deals with the
84
+ letters and symbols used in Japanese writing.
85
+ email:
86
+ - mozo@mozo.jp
87
+ executables: []
88
+ extensions: []
89
+ extra_rdoc_files: []
90
+ files:
91
+ - ".rubocop.yml"
92
+ - Gemfile
93
+ - README.ja.md
94
+ - README.md
95
+ - Rakefile
96
+ - codegen/dataset.rb
97
+ - codegen/emitters.rb
98
+ - codegen/emitters/circled_or_squared_transliterator_data.rb
99
+ - codegen/emitters/combined_transliterator_data.rb
100
+ - codegen/emitters/hyphens_transliterator_data.rb
101
+ - codegen/emitters/ivs_svs_base_transliterator_data.rb
102
+ - codegen/emitters/simple_transliterator.rb
103
+ - codegen/emitters/utils.rb
104
+ - codegen/main.rb
105
+ - lib/yosina.rb
106
+ - lib/yosina/char.rb
107
+ - lib/yosina/chars.rb
108
+ - lib/yosina/recipes.rb
109
+ - lib/yosina/transliterator.rb
110
+ - lib/yosina/transliterators.rb
111
+ - lib/yosina/transliterators/circled_or_squared.rb
112
+ - lib/yosina/transliterators/circled_or_squared_data.rb
113
+ - lib/yosina/transliterators/combined.rb
114
+ - lib/yosina/transliterators/combined_data.rb
115
+ - lib/yosina/transliterators/hira_kata.rb
116
+ - lib/yosina/transliterators/hira_kata_composition.rb
117
+ - lib/yosina/transliterators/hira_kata_table.rb
118
+ - lib/yosina/transliterators/hyphens.rb
119
+ - lib/yosina/transliterators/hyphens_data.rb
120
+ - lib/yosina/transliterators/ideographic_annotations.rb
121
+ - lib/yosina/transliterators/ivs_svs_base.rb
122
+ - lib/yosina/transliterators/ivs_svs_base_data.rb
123
+ - lib/yosina/transliterators/japanese_iteration_marks.rb
124
+ - lib/yosina/transliterators/jisx0201_and_alike.rb
125
+ - lib/yosina/transliterators/kanji_old_new.rb
126
+ - lib/yosina/transliterators/mathematical_alphanumerics.rb
127
+ - lib/yosina/transliterators/prolonged_sound_marks.rb
128
+ - lib/yosina/transliterators/radicals.rb
129
+ - lib/yosina/transliterators/spaces.rb
130
+ - lib/yosina/version.rb
131
+ - yosina.gemspec
132
+ homepage: https://github.com/yosina-lib/yosina
133
+ licenses:
134
+ - MIT
135
+ metadata:
136
+ allowed_push_host: https://rubygems.org
137
+ homepage_uri: https://github.com/yosina-lib/yosina
138
+ source_code_uri: https://github.com/yosina-lib/yosina
139
+ changelog_uri: https://github.com/yosina-lib/yosina/releases
140
+ post_install_message:
141
+ rdoc_options: []
142
+ require_paths:
143
+ - lib
144
+ required_ruby_version: !ruby/object:Gem::Requirement
145
+ requirements:
146
+ - - ">="
147
+ - !ruby/object:Gem::Version
148
+ version: 2.7.0
149
+ required_rubygems_version: !ruby/object:Gem::Requirement
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ requirements: []
155
+ rubygems_version: 3.5.11
156
+ signing_key:
157
+ specification_version: 4
158
+ summary: Japanese text transliteration library
159
+ test_files: []