ke2daira 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c05a97016dd61db84acc369c2b97805265fb8d3c5b7db226d7058ab7d5708bdf
4
- data.tar.gz: fac2bd729b6ef78fa148856e80c4c45d39d0a847696dd657429071a3d6339904
3
+ metadata.gz: c4aaa1e18e53c6f8aa6c87b8457da21af7212910cfa195993c19675e4ba7e70f
4
+ data.tar.gz: 4f547aa4bd60e8eea105883e08b29d7c19cdda509dff8cb0d7a472f3db715b1a
5
5
  SHA512:
6
- metadata.gz: d061c43d6a5a339af1ac903741a474d05c56ede78407ba083a6cbb4f266a548d4eb02fe11a5ddffc3937e1758020e6e20281ba564ceb81d0d21b5fbc20adc5db
7
- data.tar.gz: 0c8d094c9044eee27483ff9e121b646f0878f04de39c1584a312338bd57ad11900ca6b0bd6751ae8c5c2e4eaf248e8fa3f05e54089aa37cd6e5136f5c02cf3d4
6
+ metadata.gz: 0166f9cdce6444a1aa3e1bb80061d2d125eae8163a184c210cf0e77ca725b11a761bc76082f95c6b03060ef40f5cc26f883566ad023231a980914844d5ec2ee1
7
+ data.tar.gz: ddc9c67a576e5cfab2d7a330cfce56fbc13b79f3006ab3ac8906f9ee441087d5a764bddbdd88d368e342da591fb714cfda94372318fb875237940e4efa09507b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.2.1
2
+
3
+ - Fixed uninitialized constants
4
+
5
+ ## 0.2.0
6
+
7
+ - Added support for Yoon (拗音)
8
+
1
9
  ## 0.1.0
2
10
 
3
11
  - Initial release
data/Gemfile.lock CHANGED
@@ -1,24 +1,27 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- ke2daira (0.1.0)
4
+ ke2daira (0.2.1)
5
5
  suika (~> 0.3.2)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- ast (2.4.2)
10
+ ast (2.4.3)
11
11
  dartsclone (0.3.2)
12
12
  diff-lcs (1.5.0)
13
- json (2.6.3)
14
- language_server-protocol (3.17.0.3)
15
- parallel (1.22.1)
16
- parser (3.2.2.0)
13
+ json (2.12.2)
14
+ language_server-protocol (3.17.0.5)
15
+ lint_roller (1.1.0)
16
+ parallel (1.27.0)
17
+ parser (3.3.8.0)
17
18
  ast (~> 2.4.1)
19
+ racc
20
+ prism (1.4.0)
21
+ racc (1.8.1)
18
22
  rainbow (3.1.1)
19
23
  rake (13.0.6)
20
- regexp_parser (2.7.0)
21
- rexml (3.2.5)
24
+ regexp_parser (2.10.0)
22
25
  rspec (3.12.0)
23
26
  rspec-core (~> 3.12.0)
24
27
  rspec-expectations (~> 3.12.0)
@@ -32,29 +35,42 @@ GEM
32
35
  diff-lcs (>= 1.2.0, < 2.0)
33
36
  rspec-support (~> 3.12.0)
34
37
  rspec-support (3.12.0)
35
- rubocop (1.48.1)
38
+ rubocop (1.75.8)
36
39
  json (~> 2.3)
40
+ language_server-protocol (~> 3.17.0.2)
41
+ lint_roller (~> 1.1.0)
37
42
  parallel (~> 1.10)
38
- parser (>= 3.2.0.0)
43
+ parser (>= 3.3.0.2)
39
44
  rainbow (>= 2.2.2, < 4.0)
40
- regexp_parser (>= 1.8, < 3.0)
41
- rexml (>= 3.2.5, < 4.0)
42
- rubocop-ast (>= 1.26.0, < 2.0)
45
+ regexp_parser (>= 2.9.3, < 3.0)
46
+ rubocop-ast (>= 1.44.0, < 2.0)
43
47
  ruby-progressbar (~> 1.7)
44
- unicode-display_width (>= 2.4.0, < 3.0)
45
- rubocop-ast (1.28.0)
46
- parser (>= 3.2.1.0)
47
- rubocop-performance (1.16.0)
48
- rubocop (>= 1.7.0, < 2.0)
49
- rubocop-ast (>= 0.4.0)
48
+ unicode-display_width (>= 2.4.0, < 4.0)
49
+ rubocop-ast (1.45.1)
50
+ parser (>= 3.3.7.2)
51
+ prism (~> 1.4)
52
+ rubocop-performance (1.25.0)
53
+ lint_roller (~> 1.1)
54
+ rubocop (>= 1.75.0, < 2.0)
55
+ rubocop-ast (>= 1.38.0, < 2.0)
50
56
  ruby-progressbar (1.13.0)
51
- standard (1.26.0)
57
+ standard (1.50.0)
52
58
  language_server-protocol (~> 3.17.0.2)
53
- rubocop (~> 1.48.1)
54
- rubocop-performance (~> 1.16.0)
59
+ lint_roller (~> 1.0)
60
+ rubocop (~> 1.75.5)
61
+ standard-custom (~> 1.0.0)
62
+ standard-performance (~> 1.8)
63
+ standard-custom (1.0.2)
64
+ lint_roller (~> 1.0)
65
+ rubocop (~> 1.50)
66
+ standard-performance (1.8.0)
67
+ lint_roller (~> 1.1)
68
+ rubocop-performance (~> 1.25.0)
55
69
  suika (0.3.2)
56
70
  dartsclone (>= 0.2.0)
57
- unicode-display_width (2.4.2)
71
+ unicode-display_width (3.1.4)
72
+ unicode-emoji (~> 4.0, >= 4.0.4)
73
+ unicode-emoji (4.0.4)
58
74
 
59
75
  PLATFORMS
60
76
  x86_64-linux
data/ke2daira.gemspec ADDED
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/ke2daira/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "ke2daira"
7
+ spec.version = Ke2daira::VERSION
8
+ spec.authors = ["otariidae"]
9
+ spec.email = ["otariidae@users.noreply.github.com"]
10
+
11
+ spec.summary = "ke2daira"
12
+ spec.description = "A Ruby implementation of ke2daira"
13
+ spec.homepage = "https://github.com/otariidae/ke2daira.rb"
14
+ spec.license = "MIT"
15
+ spec.required_ruby_version = ">= 3.2.0"
16
+
17
+ spec.metadata["allowed_push_host"] = "https://rubygems.org"
18
+
19
+ spec.metadata["homepage_uri"] = spec.homepage
20
+ spec.metadata["source_code_uri"] = spec.homepage
21
+ spec.metadata["changelog_uri"] = "https://github.com/otariidae/ke2daira.rb/blob/main/CHANGELOG.md"
22
+
23
+ # Specify which files should be added to the gem when it is released.
24
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
25
+ spec.files = Dir.chdir(__dir__) do
26
+ `git ls-files -z`.split("\x0").reject do |f|
27
+ (f == __FILE__) || f.match(%r{\A(?:(?:bin|test|spec|features)/|\.(?:git|travis|circleci)|appveyor)})
28
+ end
29
+ end
30
+ spec.bindir = "exe"
31
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
32
+ spec.require_paths = ["lib"]
33
+
34
+ # Uncomment to register a new dependency of your gem
35
+ spec.add_dependency "suika", "~> 0.3.2"
36
+
37
+ # For more information and examples about making a new gem, check out our
38
+ # guide at: https://bundler.io/guides/creating_gem.html
39
+ spec.metadata["rubygems_mfa_required"] = "true"
40
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ke2daira
4
+ module Kana2Mora
5
+ SUTEKANA = Set["ァ", "ィ", "ゥ", "ェ", "ォ", "ャ", "ュ", "ョ", "ヮ"]
6
+
7
+ # converts Katakana into a list of mora.
8
+ def self.katakana2mora(katakana)
9
+ chars = katakana.chars
10
+ moras = []
11
+ chars.each do |char|
12
+ if SUTEKANA.include?(char)
13
+ previous_char = moras.pop || ""
14
+ mora = previous_char + char
15
+ moras << mora
16
+ next
17
+ end
18
+ moras << char
19
+ end
20
+ moras
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "suika"
4
+
5
+ module Ke2daira
6
+ class Ke2dairanizer
7
+ def initialize(tagger: ::Suika::Tagger.new, separator: " ")
8
+ @tagger = tagger
9
+ @separator = separator
10
+ end
11
+
12
+ attr_reader :tagger, :separator
13
+
14
+ # Ke2dairanize the givin name
15
+ # @param fullname [String]
16
+ # @return [String] ke2dairanized name
17
+ def ke2dairanize(fullname) # rubocop:disable Metrics/AbcSize
18
+ names = fullname.strip.split(separator)
19
+ yomis = names.map { |name| to_yomi(name) }
20
+
21
+ return yomis[0] if yomis.length == 1
22
+
23
+ first_word_moras = Kana2Mora.katakana2mora(yomis[0])
24
+ first_word_head = first_word_moras[0]
25
+ first_word_tail = first_word_moras[1..]
26
+
27
+ last_word_moras = Kana2Mora.katakana2mora(yomis[-1])
28
+ last_word_head = last_word_moras[0]
29
+ last_word_tail = last_word_moras[1..]
30
+
31
+ new_first_word = last_word_head + first_word_tail.join
32
+ new_last_word = first_word_head + last_word_tail.join
33
+
34
+ yomis[0] = new_first_word
35
+ yomis[-1] = new_last_word
36
+ yomis.join(separator)
37
+ end
38
+
39
+ private
40
+
41
+ # Tokenize the givin sentence
42
+ # @param sentence [String] Japanese text to be tokenized
43
+ # @return [Array<SuikaToken>]
44
+ def tokenize(sentence)
45
+ raw_tokens = tagger.parse(sentence)
46
+ raw_tokens.map { |raw_token| raw_token2suikatoken(raw_token) }
47
+ end
48
+
49
+ # Convert Suika raw token to SuikaToken
50
+ # @param raw_token [String] Suika raw token
51
+ # @return [SuikaToken]
52
+ def raw_token2suikatoken(raw_token)
53
+ surface_form, rest_raw_token = raw_token.split("\t")
54
+ pos, pos_detail1, pos_detail2, pos_detail3, conjugated_type, conjugated_form,
55
+ basic_form, reading, pronunciation = rest_raw_token.split(",")
56
+ SuikaToken.new(surface_form, pos, pos_detail1, pos_detail2, pos_detail3, conjugated_type,
57
+ conjugated_form, basic_form, reading, pronunciation)
58
+ end
59
+
60
+ # Convert the givin word to its reading
61
+ # @param word [String] a Japanese word
62
+ # @return [String] reading of the word
63
+ def to_yomi(word)
64
+ tokens = tokenize(word)
65
+ # fallback to the surface form when the reading is missing
66
+ tokens.map do |token|
67
+ token.reading || token.surface_form
68
+ end.join
69
+ end
70
+ end
71
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ke2daira
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.1"
5
5
  end
data/lib/ke2daira.rb CHANGED
@@ -1,69 +1,15 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require_relative "ke2daira/kana2mora"
4
+ require_relative "ke2daira/ke2dairanizer"
3
5
  require_relative "ke2daira/version"
4
- require "suika"
5
6
 
6
7
  # Ke2daira is a library to ke2dairanize
7
8
  module Ke2daira
8
- SEPARATOR = " "
9
- private_constant :SEPARATOR
9
+ SuikaToken = Data.define(:surface_form, :pos, :pos_detail1, :pos_detail2, :pos_detail3, :conjugated_type, :conjugated_form, :basic_form, :reading, :pronunciation)
10
10
 
11
- TAGGER = Suika::Tagger.new
12
- private_constant :TAGGER
13
-
14
- SuikaToken = Data.define(:surface_form, :pos, :pos_detail1, :pos_detail2, :pos_detail3, :conjugated_type,
15
- :conjugated_form, :basic_form, :reading, :pronunciation)
16
- private_constant :SuikaToken
17
-
18
- # Ke2dairanize the givin name
19
- # @param fullname [String]
20
- # @return [String] ke2dairanized name
21
- def self.ke2dairanize(fullname) # rubocop:disable Metrics/AbcSize
22
- names = fullname.strip.split(SEPARATOR)
23
- yomis = names.map { |name| to_yomi(name) }
24
-
25
- return yomis[0] if yomis.length == 1
26
-
27
- first_word = yomis[0]
28
- last_word = yomis[-1]
29
-
30
- # swap first characters of first and last name
31
- yomis[0] = last_word[0] + first_word[1..]
32
- yomis[-1] = first_word[0] + last_word[1..]
33
- yomis.join(SEPARATOR)
34
- end
35
-
36
- class << self
37
- private
38
-
39
- # Tokenize the givin sentence
40
- # @param sentence [String] Japanese text to be tokenized
41
- # @return [Array<SuikaToken>]
42
- def tokenize(sentence)
43
- raw_tokens = TAGGER.parse(sentence)
44
- raw_tokens.map { |raw_token| raw_token2suikatoken(raw_token) }
45
- end
46
-
47
- # Convert Suika raw token to SuikaToken
48
- # @param raw_token [String] Suika raw token
49
- # @return [SuikaToken]
50
- def raw_token2suikatoken(raw_token)
51
- surface_form, rest_raw_token = raw_token.split("\t")
52
- pos, pos_detail1, pos_detail2, pos_detail3, conjugated_type, conjugated_form,
53
- basic_form, reading, pronunciation = rest_raw_token.split(",")
54
- SuikaToken.new(surface_form, pos, pos_detail1, pos_detail2, pos_detail3, conjugated_type,
55
- conjugated_form, basic_form, reading, pronunciation)
56
- end
57
-
58
- # Convert the givin word to its reading
59
- # @param word [String] a Japanese word
60
- # @return [String] reading of the word
61
- def to_yomi(word)
62
- tokens = tokenize(word)
63
- # fallback to the surface form when the reading is missing
64
- tokens.map do |token|
65
- token.reading || token.surface_form
66
- end.join
67
- end
11
+ def self.ke2dairanize(fullname)
12
+ @ke2dairanizer ||= Ke2dairanizer.new
13
+ @ke2dairanizer.ke2dairanize(fullname)
68
14
  end
69
15
  end
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ke2daira
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - otariidae
8
- autorequire:
9
8
  bindir: exe
10
9
  cert_chain: []
11
- date: 2023-04-16 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: suika
@@ -38,7 +37,10 @@ files:
38
37
  - LICENSE.txt
39
38
  - README.md
40
39
  - Rakefile
40
+ - ke2daira.gemspec
41
41
  - lib/ke2daira.rb
42
+ - lib/ke2daira/kana2mora.rb
43
+ - lib/ke2daira/ke2dairanizer.rb
42
44
  - lib/ke2daira/version.rb
43
45
  - sig/ke2daira.rbs
44
46
  homepage: https://github.com/otariidae/ke2daira.rb
@@ -50,7 +52,6 @@ metadata:
50
52
  source_code_uri: https://github.com/otariidae/ke2daira.rb
51
53
  changelog_uri: https://github.com/otariidae/ke2daira.rb/blob/main/CHANGELOG.md
52
54
  rubygems_mfa_required: 'true'
53
- post_install_message:
54
55
  rdoc_options: []
55
56
  require_paths:
56
57
  - lib
@@ -65,8 +66,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
65
66
  - !ruby/object:Gem::Version
66
67
  version: '0'
67
68
  requirements: []
68
- rubygems_version: 3.4.10
69
- signing_key:
69
+ rubygems_version: 3.6.7
70
70
  specification_version: 4
71
71
  summary: ke2daira
72
72
  test_files: []