thai_keyboard_corrector 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ade7acc9f2b07c72e55d0f89cfe1c3a23002f5c01ba19e5eff0b27b1c1d483ca
4
- data.tar.gz: f010ec32ab5064c6346643162e029876d5c7450e4d584bf3d7cdc4e569ed1b59
3
+ metadata.gz: adf3f3e1587f08281375e2d9971af347e33f9d136ee36c9df2b94895174bdb80
4
+ data.tar.gz: c3585823ef59f640c80d80fa906a4ade237874c237ee898c7fb0da9235b9b875
5
5
  SHA512:
6
- metadata.gz: 1005a6452fd77bd31271c0db49612ff39f8451a4be5de998524d2ccf5edbce384b753a90ec681b17ca9a8127327333e4847156ec502c02e850fdb370c7acedbb
7
- data.tar.gz: b18cffcd017adb5f5d1dd64782f0a8b87c1bdccffa96028765c5a8ca48ddd284127092fa34d12160d72aaa83088bf7c50e5a0fc0feead296409e04554fbe2cc4
6
+ metadata.gz: 3edf85c588c050020ce4a087466067cd83576ba2de3922f8121529f215243cba63defe98845f0a1997c47187ab3e4cb504cd711692d70094de37aeb1c36df8bd
7
+ data.tar.gz: a6d8ce0cbc345143393ba0bbcb841d69fdd17eb376e2cf415815d1be7c76c635fa2ac532001e94d894f56921a968796d5ed07aff444f64564054b0d12a25522d
data/README.md CHANGED
@@ -20,7 +20,7 @@
20
20
 
21
21
  ```bash
22
22
  gem install thai_keyboard_corrector
23
- ````
23
+ ```
24
24
 
25
25
  (หรือเพิ่ม `gem "thai_keyboard_corrector"` ใน Gemfile)
26
26
 
@@ -13,61 +13,27 @@ module ThaiKeyboardCorrector
13
13
  # Returns :thai_in_en, :en_in_th, :thai, :en, :mixed, :unknown
14
14
  def detect_layout(str)
15
15
  clean = str.strip
16
- return :unknown if clean.empty?
16
+ clean_no_ws = clean.gsub(/\s+/, '') # strip ALL whitespace
17
17
 
18
- thai_cnt, latin_cnt = char_stats(clean)
19
- return :unknown if thai_cnt.zero? && latin_cnt.zero?
18
+ return :unknown if clean_no_ws.empty?
20
19
 
21
- # ---------- pure-Latin ----------
22
- if thai_cnt.zero?
23
- # ▼ Treat 1-3-letter words as Thai-in-EN (they’re almost never real English)
24
- return :thai_in_en if clean.length <= 3 &&
25
- hit_ratio(clean, Mapping::ENG_TO_THAI) >= FULL_HIT
20
+ thai_cnt, latin_cnt = char_stats(clean) # char_stats already ignores ws
26
21
 
27
- return :thai_in_en if clean.match?(/[^A-Za-z]/) &&
28
- hit_ratio(clean, Mapping::ENG_TO_THAI) >= THRESHOLD
22
+ # Majority-vote rule ----------------------------------------------
23
+ return :en_in_th if thai_cnt > latin_cnt # mostly Thai glyphs
24
+ return :thai_in_en if latin_cnt > thai_cnt # mostly Latin letters
29
25
 
30
- return :en
31
- end
32
-
33
- # ---------- pure-Thai ----------
34
- if latin_cnt.zero?
35
- return :thai if clean.length < 4 # ignore tiny words like “ดี”
36
-
37
- eng = Mapping.map_thai_to_eng(clean)
38
- vowelish = eng.count(VOWELS).positive?
39
- if eng.match?(/\A[a-z]+\z/i) && vowelish &&
40
- hit_ratio(clean, Mapping::THAI_TO_ENG) >= THRESHOLD
41
- return :en_in_th
42
- end
43
-
44
- return :thai
45
- end
46
-
47
- # ---------- mixed ----------
48
- return :mixed if thai_cnt.positive? && latin_cnt.positive?
49
- return :thai_in_en if hit_ratio(clean, Mapping::ENG_TO_THAI) >= THRESHOLD
50
- return :en_in_th if hit_ratio(clean, Mapping::THAI_TO_ENG) >= THRESHOLD
51
-
52
- :mixed
26
+ # If counts are equal (or zero) we can’t be sure
27
+ thai_cnt.zero? && latin_cnt.zero? ? :unknown : :mixed
53
28
  end
54
29
 
55
30
  # helpers ----------------------------------------------------------------
56
31
  def char_stats(str)
57
- clean = str.gsub(/\s+/, '') # remove ALL Unicode whitespace
32
+ clean = str.gsub(/\s+/, '')
58
33
  thai = clean.each_char.count { |c| THAI_RANGE.include?(c.ord) }
59
- latin = clean.each_char.count { |c| c =~ /[A-Za-z]/ }
34
+ latin = clean.each_char.count { |c| c.match?(/[A-Za-z]/) }
60
35
  [thai, latin]
61
36
  end
62
37
  private_class_method :char_stats
63
-
64
- def hit_ratio(str, table)
65
- chars = str.gsub(/\s+/, '').chars # whitespace-free array
66
- return 0.0 if chars.empty?
67
-
68
- hits = chars.count { |c| table.key?(c) }
69
- hits.to_f / chars.length
70
- end
71
- private_class_method :hit_ratio
72
38
  end
73
39
  end
@@ -2,8 +2,8 @@
2
2
 
3
3
  module ThaiKeyboardCorrector
4
4
  # Mapping module provides methods to convert between English and Thai characters
5
- module Mapping
6
- # Base (lower-case) map
5
+ module mapping
6
+ # 1. Base map – lower-case EN → Thai Kedmanee
7
7
  BASE = {
8
8
  'q' => 'ๆ', 'w' => 'ไ', 'e' => 'ำ', 'r' => 'พ', 't' => 'ะ',
9
9
  'y' => 'ั', 'u' => 'ี', 'i' => 'ร', 'o' => 'น', 'p' => 'ย',
@@ -14,8 +14,32 @@ module ThaiKeyboardCorrector
14
14
  'n' => 'ื', 'm' => 'ท', ',' => 'ม', '.' => 'ใ', '/' => 'ฝ'
15
15
  }.freeze
16
16
 
17
+ # 2. Shift-layer Thai glyphs → underlying EN key
18
+ SHIFT = {
19
+ # ── Number row (Shift+1 … Shift+0) – Thai digits
20
+ '๑' => '1', '๒' => '2', '๓' => '3', '๔' => '4', '๕' => '5',
21
+ '๖' => '6', '๗' => '7', '๘' => '8', '๙' => '9', '๐' => '0',
22
+
23
+ # ── Top-letter row (Q–P)
24
+ 'ฃ' => 'w', 'ฅ' => 'e', 'ฆ' => 'r', 'ฑ' => 't', 'ํ' => 'y',
25
+ 'ฐ' => 'u', 'ณ' => 'i', 'ญ' => 'o', 'ธ' => 't', # (Shift+T duplicate)
26
+
27
+ # ── Home row (A–L)
28
+ 'ฤ' => 'a', 'ฦ' => 's', 'ฌ' => 'h', 'ศ' => 'l', 'ษ' => ';', 'ฮ' => "'",
29
+
30
+ # ── Bottom row (Z–/)
31
+ 'ฒ' => 'z', 'ฬ' => 'x',
32
+ 'ฯ' => 'm', # Thai paiyannoi
33
+ '฿' => '.', # Baht sign (Shift+.)
34
+ '๏' => '/', # Thai “head mark”
35
+
36
+ # Already mapped earlier (duplicates kept for clarity—harmless):
37
+ '๛' => ',' # end-paragraph mark
38
+ }.freeze
39
+
40
+ # 3. Final maps (frozen once, never mutated)
17
41
  ENG_TO_THAI = BASE.merge(BASE.transform_keys(&:upcase)).freeze
18
- THAI_TO_ENG = BASE.invert.freeze # ⇒ always lower-case
42
+ THAI_TO_ENG = BASE.invert.merge(SHIFT).freeze
19
43
 
20
44
  module_function
21
45
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module ThaiKeyboardCorrector
4
- VERSION = '0.1.1'
4
+ VERSION = '0.2.1'
5
5
  end
@@ -13,11 +13,11 @@ module ThaiKeyboardCorrector
13
13
  # @return [String] corrected or original string
14
14
  def correct(str)
15
15
  case detect_layout(str)
16
- when :thai_in_en
16
+ when :thai_in_en # mostly Latin → convert EN→TH
17
17
  Mapping.map_eng_to_thai(str)
18
- when :en_in_th
18
+ when :en_in_th # mostly Thai → convert TH→EN
19
19
  Mapping.map_thai_to_eng(str)
20
- else
20
+ else # :thai, :en, :mixed, :unknown
21
21
  str
22
22
  end
23
23
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: thai_keyboard_corrector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chayut Orapinpatipat
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-06-20 00:00:00.000000000 Z
11
+ date: 2025-06-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec