jisho_sort 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +53 -0
- data/lib/jisho_sort/comparable.rb +13 -0
- data/lib/jisho_sort/enumerable.rb +34 -0
- data/lib/jisho_sort/sortable.rb +7 -0
- data/lib/jisho_sort/string.rb +7 -0
- data/lib/jisho_sort/tokenizable.rb +46 -0
- data/lib/jisho_sort.rb +5 -0
- metadata +73 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cd6b034701246bd87b1010e88382dd95ba9e72b237a286b984e941d049680a0a
|
4
|
+
data.tar.gz: d9b54a9eec95dd837973032562fcffd0a491e150e93a51eff18069f5d87bcf46
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 98ec88fb0b147653349f057150cabf0fcde117d7a9dd1852a0b930129fcff5c7ebc23885538de110eeb6022556966aa7dcf97b3f913b399ad33531ac88384771
|
7
|
+
data.tar.gz: 5556bba3f0456cacec3ada3fecb68eb79850a3163f160baf92524945f38ef63875bda3b6777a2fdb48ede47073f064d0cf6b560162bbad6d6ab1f4cd95742ec6
|
data/README.md
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
# jisho_sort
|
2
|
+
|
3
|
+
`jisho_sort` is a Ruby sorting library based on MeCab.
|
4
|
+
It provides functionality to sort Japanese strings containing a mix of Chinese and Japanese Letters in dictionary order.
|
5
|
+
|
6
|
+
## Installation
|
7
|
+
|
8
|
+
This code depends on MeCab, so you need to install MeCab.
|
9
|
+
For more MeCab Install instructions, please refer to the [MeCab installation guide](https://taku910.github.io/mecab/#install).
|
10
|
+
|
11
|
+
After installing MeCab, Add this line to Gemfile:
|
12
|
+
|
13
|
+
```ruby
|
14
|
+
gem 'jisho_sort'
|
15
|
+
```
|
16
|
+
|
17
|
+
And then execute:
|
18
|
+
|
19
|
+
```sh
|
20
|
+
bundle install
|
21
|
+
```
|
22
|
+
|
23
|
+
Or install it as:
|
24
|
+
|
25
|
+
```sh
|
26
|
+
gem install jisho_sort
|
27
|
+
```
|
28
|
+
|
29
|
+
## Usage
|
30
|
+
|
31
|
+
Here's a basic example of how to use JishoSort:
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
require 'jisho_sort'
|
35
|
+
|
36
|
+
array = [
|
37
|
+
'ゆく川の流れは絶えずして、しかももとの水にあらず',
|
38
|
+
'メロスは激怒した',
|
39
|
+
'国境の長いトンネルを抜けると雪国であった'
|
40
|
+
]
|
41
|
+
```
|
42
|
+
|
43
|
+
When you run this code, you will get the following result:
|
44
|
+
|
45
|
+
```sh
|
46
|
+
$ ruby sample.rb
|
47
|
+
=> ["ゆく川の流れは絶えずして、しかももとの水にあらず", "メロスは激怒した", "国境の長いトンネルを抜けると雪国であった"]
|
48
|
+
```
|
49
|
+
|
50
|
+
|
51
|
+
## License
|
52
|
+
|
53
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module JishoSort
|
2
|
+
module Comparable
|
3
|
+
# Compares the current string include chines and japanese character with another string based on their japanese pronunciation(furigana).
|
4
|
+
#
|
5
|
+
# @param other [String] The object to compare with.
|
6
|
+
# @return [Integer] Returns -1 if the current object's furigana is less than the other's, 0 if they are equal, and 1 if it is greater.
|
7
|
+
def compare_by_furigana(other)
|
8
|
+
raise ArgumentError unless other.instance_of?(String)
|
9
|
+
|
10
|
+
furigana <=> other.furigana
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'jisho_sort/sortable'
|
2
|
+
|
3
|
+
module Enumerable
|
4
|
+
include JishoSort::Sortable
|
5
|
+
|
6
|
+
# Sorts an array of strings using the Japanese pronunciation (dictionary)order.
|
7
|
+
# If a block is given, it sorts using the provided block.
|
8
|
+
#
|
9
|
+
# @yield [a, b] Optional block to customize the sorting logic.
|
10
|
+
# @yieldparam a [String] The first string to compare.
|
11
|
+
# @yieldparam b [String] The second string to compare.
|
12
|
+
# @return [Array<String>] The sorted array.
|
13
|
+
# @raise [ArgumentError] If no block is given and any element is not a String.
|
14
|
+
def jisho_sort(&block)
|
15
|
+
raise ArgumentError if block.nil? && !all? { |item| item.instance_of?(String) }
|
16
|
+
|
17
|
+
return sort { |a, b| a.compare_by_furigana(b) } if block.nil?
|
18
|
+
|
19
|
+
sort(&block)
|
20
|
+
end
|
21
|
+
|
22
|
+
# Sorts the elements of the enumerable based on the japanese pronunciation of the strings
|
23
|
+
# returned by the given block.
|
24
|
+
#
|
25
|
+
# @yield [item] Gives each element of the enumerable to the block.
|
26
|
+
# @yieldreturn [String] The string whose japanese pronunciation will be used for sorting.
|
27
|
+
# @return [Array] A new array with the elements sorted by the japanese pronunciation of the strings.
|
28
|
+
# @raise [ArgumentError] If any element does not yield a String.
|
29
|
+
def jisho_sort_by
|
30
|
+
raise ArgumentError unless all? { |item| yield(item).instance_of?(String) }
|
31
|
+
|
32
|
+
sort_by{ |item| yield(item).furigana }
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'natto'
|
2
|
+
|
3
|
+
module JishoSort
|
4
|
+
module Tokenizable
|
5
|
+
NATTO_KATAKANA_TYPE = 7
|
6
|
+
NATTO_FURIGANA_INDEX = 7
|
7
|
+
|
8
|
+
def furigana
|
9
|
+
tokenize
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
def tokenize
|
15
|
+
call_natto_parser
|
16
|
+
end
|
17
|
+
|
18
|
+
def call_natto_parser
|
19
|
+
strings = separate_ascii_string_from_others
|
20
|
+
|
21
|
+
# Extract the elements of furigana within this gem.
|
22
|
+
# When specifying a format in the argument of Natto::MeCab.new, the error `MECAB_NBEST request type is not set (Natto::MeCabError)` occurs.
|
23
|
+
nm = Natto::MeCab.new
|
24
|
+
memo = []
|
25
|
+
strings.each do |s|
|
26
|
+
next memo << s if s.ascii_only?
|
27
|
+
|
28
|
+
nm.parse(s) do |n|
|
29
|
+
next if n.is_eos?
|
30
|
+
next memo << n.surface if n.char_type == NATTO_KATAKANA_TYPE
|
31
|
+
|
32
|
+
n_furigana = n.feature.split(',')[NATTO_FURIGANA_INDEX]
|
33
|
+
|
34
|
+
memo << n_furigana
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
memo.join
|
39
|
+
end
|
40
|
+
|
41
|
+
def separate_ascii_string_from_others
|
42
|
+
chars.chunk_while { |s1, s2| s1.ascii_only? == s2.ascii_only? }
|
43
|
+
.map(&:join)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/jisho_sort.rb
ADDED
metadata
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jisho_sort
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- fvknk
|
8
|
+
bindir: bin
|
9
|
+
cert_chain: []
|
10
|
+
date: 2025-02-24 00:00:00.000000000 Z
|
11
|
+
dependencies:
|
12
|
+
- !ruby/object:Gem::Dependency
|
13
|
+
name: ffi
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
15
|
+
requirements:
|
16
|
+
- - ">="
|
17
|
+
- !ruby/object:Gem::Version
|
18
|
+
version: 1.9.0
|
19
|
+
type: :runtime
|
20
|
+
prerelease: false
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
22
|
+
requirements:
|
23
|
+
- - ">="
|
24
|
+
- !ruby/object:Gem::Version
|
25
|
+
version: 1.9.0
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: natto
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :runtime
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '0'
|
40
|
+
description: Sort Japanese letters, including Chinese letters, in dictionary order.
|
41
|
+
executables: []
|
42
|
+
extensions: []
|
43
|
+
extra_rdoc_files: []
|
44
|
+
files:
|
45
|
+
- README.md
|
46
|
+
- lib/jisho_sort.rb
|
47
|
+
- lib/jisho_sort/comparable.rb
|
48
|
+
- lib/jisho_sort/enumerable.rb
|
49
|
+
- lib/jisho_sort/sortable.rb
|
50
|
+
- lib/jisho_sort/string.rb
|
51
|
+
- lib/jisho_sort/tokenizable.rb
|
52
|
+
homepage: https://github.com/fvknk
|
53
|
+
licenses: []
|
54
|
+
metadata:
|
55
|
+
rubygems_mfa_required: 'true'
|
56
|
+
rdoc_options: []
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
requirements: []
|
70
|
+
rubygems_version: 3.6.2
|
71
|
+
specification_version: 4
|
72
|
+
summary: Sort Japanese letters in dictionary order.
|
73
|
+
test_files: []
|