jisho_sort 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: cd6b034701246bd87b1010e88382dd95ba9e72b237a286b984e941d049680a0a
4
+ data.tar.gz: d9b54a9eec95dd837973032562fcffd0a491e150e93a51eff18069f5d87bcf46
5
+ SHA512:
6
+ metadata.gz: 98ec88fb0b147653349f057150cabf0fcde117d7a9dd1852a0b930129fcff5c7ebc23885538de110eeb6022556966aa7dcf97b3f913b399ad33531ac88384771
7
+ data.tar.gz: 5556bba3f0456cacec3ada3fecb68eb79850a3163f160baf92524945f38ef63875bda3b6777a2fdb48ede47073f064d0cf6b560162bbad6d6ab1f4cd95742ec6
data/README.md ADDED
@@ -0,0 +1,53 @@
1
+ # jisho_sort
2
+
3
+ `jisho_sort` is a Ruby sorting library based on MeCab.
4
+ It provides functionality to sort Japanese strings containing a mix of Chinese and Japanese Letters in dictionary order.
5
+
6
+ ## Installation
7
+
8
+ This code depends on MeCab, so you need to install MeCab.
9
+ For more MeCab Install instructions, please refer to the [MeCab installation guide](https://taku910.github.io/mecab/#install).
10
+
11
+ After installing MeCab, Add this line to Gemfile:
12
+
13
+ ```ruby
14
+ gem 'jisho_sort'
15
+ ```
16
+
17
+ And then execute:
18
+
19
+ ```sh
20
+ bundle install
21
+ ```
22
+
23
+ Or install it as:
24
+
25
+ ```sh
26
+ gem install jisho_sort
27
+ ```
28
+
29
+ ## Usage
30
+
31
+ Here's a basic example of how to use JishoSort:
32
+
33
+ ```ruby
34
+ require 'jisho_sort'
35
+
36
+ array = [
37
+ 'ゆく川の流れは絶えずして、しかももとの水にあらず',
38
+ 'メロスは激怒した',
39
+ '国境の長いトンネルを抜けると雪国であった'
40
+ ]
41
+ ```
42
+
43
+ When you run this code, you will get the following result:
44
+
45
+ ```sh
46
+ $ ruby sample.rb
47
+ => ["ゆく川の流れは絶えずして、しかももとの水にあらず", "メロスは激怒した", "国境の長いトンネルを抜けると雪国であった"]
48
+ ```
49
+
50
+
51
+ ## License
52
+
53
+ The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,13 @@
1
+ module JishoSort
2
+ module Comparable
3
+ # Compares the current string include chines and japanese character with another string based on their japanese pronunciation(furigana).
4
+ #
5
+ # @param other [String] The object to compare with.
6
+ # @return [Integer] Returns -1 if the current object's furigana is less than the other's, 0 if they are equal, and 1 if it is greater.
7
+ def compare_by_furigana(other)
8
+ raise ArgumentError unless other.instance_of?(String)
9
+
10
+ furigana <=> other.furigana
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,34 @@
1
+ require 'jisho_sort/sortable'
2
+
3
+ module Enumerable
4
+ include JishoSort::Sortable
5
+
6
+ # Sorts an array of strings using the Japanese pronunciation (dictionary)order.
7
+ # If a block is given, it sorts using the provided block.
8
+ #
9
+ # @yield [a, b] Optional block to customize the sorting logic.
10
+ # @yieldparam a [String] The first string to compare.
11
+ # @yieldparam b [String] The second string to compare.
12
+ # @return [Array<String>] The sorted array.
13
+ # @raise [ArgumentError] If no block is given and any element is not a String.
14
+ def jisho_sort(&block)
15
+ raise ArgumentError if block.nil? && !all? { |item| item.instance_of?(String) }
16
+
17
+ return sort { |a, b| a.compare_by_furigana(b) } if block.nil?
18
+
19
+ sort(&block)
20
+ end
21
+
22
+ # Sorts the elements of the enumerable based on the japanese pronunciation of the strings
23
+ # returned by the given block.
24
+ #
25
+ # @yield [item] Gives each element of the enumerable to the block.
26
+ # @yieldreturn [String] The string whose japanese pronunciation will be used for sorting.
27
+ # @return [Array] A new array with the elements sorted by the japanese pronunciation of the strings.
28
+ # @raise [ArgumentError] If any element does not yield a String.
29
+ def jisho_sort_by
30
+ raise ArgumentError unless all? { |item| yield(item).instance_of?(String) }
31
+
32
+ sort_by{ |item| yield(item).furigana }
33
+ end
34
+ end
@@ -0,0 +1,7 @@
1
+ module JishoSort
2
+ module Sortable
3
+ def compare_by_furigana(other)
4
+ furigana <=> other.furigana
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ require 'jisho_sort/tokenizable'
2
+ require 'jisho_sort/comparable'
3
+
4
+ class String
5
+ include JishoSort::Tokenizable
6
+ include JishoSort::Comparable
7
+ end
@@ -0,0 +1,46 @@
1
+ require 'natto'
2
+
3
+ module JishoSort
4
+ module Tokenizable
5
+ NATTO_KATAKANA_TYPE = 7
6
+ NATTO_FURIGANA_INDEX = 7
7
+
8
+ def furigana
9
+ tokenize
10
+ end
11
+
12
+ private
13
+
14
+ def tokenize
15
+ call_natto_parser
16
+ end
17
+
18
+ def call_natto_parser
19
+ strings = separate_ascii_string_from_others
20
+
21
+ # Extract the elements of furigana within this gem.
22
+ # When specifying a format in the argument of Natto::MeCab.new, the error `MECAB_NBEST request type is not set (Natto::MeCabError)` occurs.
23
+ nm = Natto::MeCab.new
24
+ memo = []
25
+ strings.each do |s|
26
+ next memo << s if s.ascii_only?
27
+
28
+ nm.parse(s) do |n|
29
+ next if n.is_eos?
30
+ next memo << n.surface if n.char_type == NATTO_KATAKANA_TYPE
31
+
32
+ n_furigana = n.feature.split(',')[NATTO_FURIGANA_INDEX]
33
+
34
+ memo << n_furigana
35
+ end
36
+ end
37
+
38
+ memo.join
39
+ end
40
+
41
+ def separate_ascii_string_from_others
42
+ chars.chunk_while { |s1, s2| s1.ascii_only? == s2.ascii_only? }
43
+ .map(&:join)
44
+ end
45
+ end
46
+ end
data/lib/jisho_sort.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'jisho_sort/string'
2
+ require 'jisho_sort/enumerable'
3
+
4
+ module JishoSort
5
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jisho_sort
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.2
5
+ platform: ruby
6
+ authors:
7
+ - fvknk
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 2025-02-24 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: ffi
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 1.9.0
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: 1.9.0
26
+ - !ruby/object:Gem::Dependency
27
+ name: natto
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ description: Sort Japanese letters, including Chinese letters, in dictionary order.
41
+ executables: []
42
+ extensions: []
43
+ extra_rdoc_files: []
44
+ files:
45
+ - README.md
46
+ - lib/jisho_sort.rb
47
+ - lib/jisho_sort/comparable.rb
48
+ - lib/jisho_sort/enumerable.rb
49
+ - lib/jisho_sort/sortable.rb
50
+ - lib/jisho_sort/string.rb
51
+ - lib/jisho_sort/tokenizable.rb
52
+ homepage: https://github.com/fvknk
53
+ licenses: []
54
+ metadata:
55
+ rubygems_mfa_required: 'true'
56
+ rdoc_options: []
57
+ require_paths:
58
+ - lib
59
+ required_ruby_version: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ required_rubygems_version: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ requirements: []
70
+ rubygems_version: 3.6.2
71
+ specification_version: 4
72
+ summary: Sort Japanese letters in dictionary order.
73
+ test_files: []