script_detector_2 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 76df1da987c76f07c1116e360f2686bcb177d5be42d5faf8e8898561d9975f7a
4
- data.tar.gz: 81e79124e3d9e1e9283891c12970a84ab09dd5b2f6aaa4c3e626e07f852d2696
3
+ metadata.gz: 9709f6c83f82a3bf7073bb8b9cfc95a1edea6885e0f596ef6e93f01a62c26dbb
4
+ data.tar.gz: 3ed583c8487617e9687b3c776116f0cc22140993ef9debf4f8542057e9f9f232
5
5
  SHA512:
6
- metadata.gz: 34d104036ce6fd3aa8140cfb659d745e1d786996b7139d489cd352c1a4cb835991a4e45a8634c28d6ba9cdc65fe39d3c3a3da3bddd19dd3ae799f741059eb484
7
- data.tar.gz: d7ddc49d2a0a835e1549632f7b0f7a07b6983b345798e491ed819f93a30c72ef6b90a0b1063105def6ee22f8defc3d92ebd7d19511d5b0cbc1ba29d55fc2fa29
6
+ metadata.gz: 5d17c19eee4868b540af844e08c95133415de8ccf0293dd208c11a29c14315b716c1563c45fd22081fbbf604147c837fa14bc98772a4103b2df080e4d09f89fa
7
+ data.tar.gz: 4a7894f23f49494f9debcafc967217ccb00549234014166f9fe4f1fe3d13b12b15fbe63eac426b164dfa763acbc9b4e15dfa8cd616de84720f1b2ea3249abfde
data/.rubocop.yml CHANGED
@@ -1,5 +1,9 @@
1
1
  inherit_from: .rubocop_todo.yml
2
2
 
3
+ inherit_mode:
4
+ merge:
5
+ - Exclude
6
+
3
7
  AllCops:
4
8
  TargetRubyVersion: 2.5
5
9
  SuggestExtensions: false
data/.rubocop_todo.yml CHANGED
@@ -1,6 +1,6 @@
1
1
  # This configuration was generated by
2
2
  # `rubocop --auto-gen-config`
3
- # on 2021-08-22 13:16:05 UTC using RuboCop version 1.19.1.
3
+ # on 2021-10-11 07:56:31 UTC using RuboCop version 1.21.0.
4
4
  # The point is for the user to remove these configuration records
5
5
  # one by one as the offenses are removed from the code base.
6
6
  # Note that changes in the inspected code, or installation of new
@@ -9,9 +9,9 @@
9
9
  # Offense count: 1
10
10
  # Configuration parameters: IgnoredMethods.
11
11
  Metrics/CyclomaticComplexity:
12
- Max: 8
12
+ Max: 10
13
13
 
14
- # Offense count: 1
14
+ # Offense count: 2
15
15
  # Configuration parameters: CountComments, CountAsOne, ExcludedMethods, IgnoredMethods.
16
16
  Metrics/MethodLength:
17
17
  Max: 14
@@ -19,4 +19,4 @@ Metrics/MethodLength:
19
19
  # Offense count: 1
20
20
  # Configuration parameters: IgnoredMethods.
21
21
  Metrics/PerceivedComplexity:
22
- Max: 10
22
+ Max: 11
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.7.4
data/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.0] - 2021-10-13
4
+
5
+ - Add `kana?` and `hangul?` methods
6
+ - Improve accuracy of `identify_script` method
7
+ - `chinese?` method now returns actual boolean instead of merely something
8
+ truthy
9
+
3
10
  ## [0.2.0] - 2021-08-23
4
11
 
5
12
  - Slight optimization of script-matching regexps
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- script_detector_2 (0.2.0)
4
+ script_detector_2 (0.3.0)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -18,9 +18,9 @@ GEM
18
18
  kramdown-parser-gfm (1.1.0)
19
19
  kramdown (~> 2.0)
20
20
  minitest (5.14.4)
21
- nokogiri (1.12.3-x86_64-darwin)
21
+ nokogiri (1.12.5-x86_64-darwin)
22
22
  racc (~> 1.4)
23
- parallel (1.20.1)
23
+ parallel (1.21.0)
24
24
  parser (3.0.2.0)
25
25
  ast (~> 2.4.1)
26
26
  racc (1.5.2)
@@ -30,20 +30,20 @@ GEM
30
30
  reverse_markdown (2.0.0)
31
31
  nokogiri
32
32
  rexml (3.2.5)
33
- rubocop (1.19.1)
33
+ rubocop (1.22.1)
34
34
  parallel (~> 1.10)
35
35
  parser (>= 3.0.0.0)
36
36
  rainbow (>= 2.2.2, < 4.0)
37
37
  regexp_parser (>= 1.8, < 3.0)
38
38
  rexml
39
- rubocop-ast (>= 1.9.1, < 2.0)
39
+ rubocop-ast (>= 1.12.0, < 2.0)
40
40
  ruby-progressbar (~> 1.7)
41
41
  unicode-display_width (>= 1.4.0, < 3.0)
42
- rubocop-ast (1.10.0)
42
+ rubocop-ast (1.12.0)
43
43
  parser (>= 3.0.1.1)
44
44
  ruby-progressbar (1.11.0)
45
45
  rubyzip (2.3.2)
46
- solargraph (0.43.0)
46
+ solargraph (0.44.0)
47
47
  backport (~> 1.2)
48
48
  benchmark
49
49
  bundler (>= 1.17.2)
@@ -60,7 +60,7 @@ GEM
60
60
  yard (~> 0.9, >= 0.9.24)
61
61
  thor (1.1.0)
62
62
  tilt (2.0.10)
63
- unicode-display_width (2.0.0)
63
+ unicode-display_width (2.1.0)
64
64
  yard (0.9.26)
65
65
 
66
66
  PLATFORMS
@@ -76,4 +76,4 @@ DEPENDENCIES
76
76
  solargraph
77
77
 
78
78
  BUNDLED WITH
79
- 2.2.26
79
+ 2.2.29
data/README.md CHANGED
@@ -12,7 +12,7 @@ Unlike the original script_detector, this gem:
12
12
  - Uses the
13
13
  [kUnihanCore2020](https://www.unicode.org/reports/tr38/#kUnihanCore2020)
14
14
  property of the Unicode Unihan database to determine which characters belong
15
- to which script (Unicode 13)
15
+ to which script (Unicode 14)
16
16
  ([details](http://www.unicode.org/L2/L2019/19388-unihan-core-2020.pdf))
17
17
  - Uses [ISO 15924 script names](https://en.wikipedia.org/wiki/ISO_15924) in
18
18
  symbol form as return values (instead of English strings)
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ScriptDetector2
4
- VERSION = '0.2.0'
4
+ VERSION = '0.3.0'
5
5
  end
@@ -10,8 +10,8 @@ module ScriptDetector2
10
10
  # @param string [String]
11
11
  # @return [Boolean]
12
12
  def japanese?(string)
13
- return true if string =~ /[\p{Hiragana}\p{Katakana}]/
14
- return false if string =~ /\p{Hangul}/
13
+ return true if kana?(string)
14
+ return false if hangul?(string)
15
15
 
16
16
  kanji = string.scan(/\p{Han}/)
17
17
  return false unless kanji.any?
@@ -19,12 +19,18 @@ module ScriptDetector2
19
19
  kanji.all?(JAPANESE_PATTERN)
20
20
  end
21
21
 
22
+ # @param string [String]
23
+ # @return [Boolean]
24
+ def kana?(string)
25
+ /[\p{Hiragana}\p{Katakana}]/.match?(string)
26
+ end
27
+
22
28
  # @param string [String]
23
29
  # @return [Boolean]
24
30
  def chinese?(string)
25
31
  return false if string =~ /[\p{Hiragana}\p{Katakana}\p{Hangul}]/
26
32
 
27
- string =~ /\p{Han}/
33
+ /\p{Han}/.match?(string)
28
34
  end
29
35
 
30
36
  # @param string [String]
@@ -52,8 +58,8 @@ module ScriptDetector2
52
58
  # @param string [String]
53
59
  # @return [Boolean]
54
60
  def korean?(string)
55
- return true if string =~ /\p{Hangul}/
56
- return false if string =~ /[\p{Hiragana}\p{Katakana}]/
61
+ return true if hangul?(string)
62
+ return false if kana?(string)
57
63
 
58
64
  hanja = string.scan(/\p{Han}/)
59
65
  return false unless hanja.any?
@@ -61,19 +67,28 @@ module ScriptDetector2
61
67
  hanja.all?(KOREAN_PATTERN)
62
68
  end
63
69
 
70
+ # @param string [String]
71
+ # @return [Boolean]
72
+ def hangul?(string)
73
+ /\p{Hangul}/.match?(string)
74
+ end
75
+
64
76
  # @param string [String]
65
77
  # @return [Symbol]
66
78
  def identify_script(string)
67
- return :Jpan if japanese?(string)
68
- return :Kore if korean?(string)
79
+ return :Jpan if kana?(string)
80
+ return :Kore if hangul?(string)
69
81
 
70
82
  is_hant = traditional_chinese?(string)
71
83
  is_hans = simplified_chinese?(string)
72
84
  if is_hant && is_hans then :Hani
73
85
  elsif is_hans then :Hans
74
86
  elsif is_hant then :Hant
87
+ elsif japanese?(string) then :Jpan
88
+ elsif korean?(string) then :Kore
75
89
  elsif chinese?(string) then :Hani # rubocop:disable Lint/DuplicateBranch
76
- else :Zyyy
90
+ else
91
+ :Zyyy
77
92
  end
78
93
  end
79
94
  end
data/tasks/gen_src.rake CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  TMP_DIR = 'tmp'
4
4
  UNIHAN_ZIP = File.join(TMP_DIR, 'Unihan.zip')
5
- UNIHAN_URL = 'https://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip'
5
+ UNIHAN_URL = 'https://www.unicode.org/Public/14.0.0/ucd/Unihan.zip'
6
6
 
7
7
  directory TMP_DIR
8
8
 
data/tasks/unihan.rb CHANGED
@@ -5,15 +5,20 @@ module Unihan
5
5
  CODEPOINT_PATTERN = /U\+(?<hex>[A-F0-9]+)/.freeze
6
6
 
7
7
  class << self
8
- # @param readings_data [Hash<Integer,Hash{String => String}>]
8
+ # @param dict_data [Hash<Integer,Hash{String => String}>]
9
9
  # @param tags [Array<String>]
10
10
  # @return [Regexp]
11
11
  def gen_unihan_core_pattern(dict_data, *tags)
12
- codepoints = dict_data.select do |_, data|
12
+ gen_pattern(codepoints_for_tags(dict_data, tags))
13
+ end
14
+
15
+ # @param dict_data [Hash<Integer,Hash{String => String}>]
16
+ # @param tags [Array<String>]
17
+ # @return [Array<Integer>]
18
+ def codepoints_for_tags(dict_data, tags)
19
+ dict_data.select do |_, data|
13
20
  tags.all? { |t| data['kUnihanCore2020']&.include?(t) }
14
21
  end.keys
15
-
16
- gen_pattern(codepoints)
17
22
  end
18
23
 
19
24
  # @param codepoints [Array<Integer>]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: script_detector_2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aaron Madlon-Kay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-08-23 00:00:00.000000000 Z
11
+ date: 2021-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: byebug
@@ -76,6 +76,7 @@ files:
76
76
  - ".dir-locals.el"
77
77
  - ".rubocop.yml"
78
78
  - ".rubocop_todo.yml"
79
+ - ".ruby-version"
79
80
  - ".solargraph.yml"
80
81
  - CHANGELOG.md
81
82
  - Gemfile