gsm_encoder 0.1.6 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +5 -5
  2. data/README.md +4 -0
  3. data/lib/gsm_encoder.rb +77 -64
  4. metadata +3 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: cc5e53ca18eacc9115c7395a5f6c833f22571d19
4
- data.tar.gz: eab20ccf00096deef3577367472e87ae88dfa6c0
2
+ SHA256:
3
+ metadata.gz: be392aec01587b907e1e0454bec610cc068359a88d614cb31850d5208503cf9d
4
+ data.tar.gz: 13b2b3a007cae53be426a8da7413676e020554cccf62ef838b87e4266f71d368
5
5
  SHA512:
6
- metadata.gz: afd5459dc5be3e784c6b691d1ab6e6600cc0f333965cd1ad87b068a28787b98c92abc75534078f60e79e37506c0d8d2cbe5afe940032115949733593e71ba814
7
- data.tar.gz: 088b1bbe74116558fd69645a61f8a4bc8df9f7f501f893071f733165002c6774ef6f6a30502ed728b4b9e9a1b3604623577c4abec30080b70bf331b3fc443c9f
6
+ metadata.gz: fc9ace5dc2f040ad1fc4ed994dc43c74c3db7b513cdf9d6a1cf3491eb794635dc64c88e6c5ad2c3fe73cc1b9d4d65b9202d388cfc9ee32f46b0bf4c6eac2301b
7
+ data.tar.gz: 4edfb38d92c90f16963bea9525e47f082af4a320badd64b79b1e382f56abc8c291a3cd3fa0c8187fcfdff9cc8cb5ce1ad5724928c214c7e1769f5cca12244b10
data/README.md CHANGED
@@ -51,3 +51,7 @@ Code cleanup and speedup
51
51
  ### 0.1.5
52
52
 
53
53
  Even faster. About 4 times. Many thanks to @dlarrabee
54
+
55
+ ### 0.1.7
56
+
57
+ Even faster! Many thanks to @ThomasSevestre
data/lib/gsm_encoder.rb CHANGED
@@ -1,4 +1,4 @@
1
- #encoding: utf-8
1
+ # frozen_string_literal: true
2
2
  # Stealing from Twitter's Java implementation
3
3
  # https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java
4
4
 
@@ -7,18 +7,18 @@
7
7
  # alphabet. It also supports the default extension table. The default alphabet
8
8
  # and it's extension table is defined in GSM 03.38.
9
9
  module GSMEncoder
10
- DEFAULT_REPLACE_CHAR = "?"
10
+ # GSM to UTF8 tables
11
+ nl = 10.chr
12
+ cr = 13.chr
13
+ bs = 92.chr
11
14
 
12
- EXTENDED_ESCAPE = 0x1b
13
- NL = 10.chr
14
- CR = 13.chr
15
- BS = 92.chr
15
+ GSM_ESCAPE = 0x1b
16
16
 
17
- CHAR_TABLE = [
17
+ GSM_TABLE = [
18
18
  '@', '£', '$', '¥', 'è', 'é', 'ù', 'ì',
19
- 'ò', 'Ç', NL, 'Ø', 'ø', CR , 'Å', 'å',
19
+ 'ò', 'Ç', nl, 'Ø', 'ø', cr , 'Å', 'å',
20
20
  'Δ', '_', 'Φ', 'Γ', 'Λ', 'Ω', 'Π', 'Ψ',
21
- 'Σ', 'Θ', 'Ξ', " ", 'Æ', 'æ', 'ß', 'É', # 0x1B is actually an escape which we'll encode to a space char
21
+ 'Σ', 'Θ', 'Ξ', nil, 'Æ', 'æ', 'ß', 'É', # 0x1b is the escape
22
22
  " ", '!', '"', '#', '¤', '%', '&', "'",
23
23
  '(', ')', '*', '+', ',', '-', '.', '/',
24
24
  '0', '1', '2', '3', '4', '5', '6', '7',
@@ -31,73 +31,86 @@ module GSMEncoder
31
31
  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
32
32
  'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
33
33
  'x', 'y', 'z', 'ä', 'ö', 'ñ', 'ü', 'à',
34
- ].join # make it string to speedup lookup
34
+ ].freeze
35
35
 
36
- # Extended character table. Characters in this table are accessed by the
37
- # 'escape' character in the base table. It is important that none of the
38
- # 'inactive' characters ever be matchable with a valid base-table
39
- # character as this breaks the encoding loop.
40
- EXT_CHAR_TABLE = [
41
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 'ç', 0, 0, 0, 0, 0, 0,
42
- 0, 0, 0, 0, '^', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
43
- 0, 0, 0, 0, 0, 0, 0, 0, '{', '}', 0, 0, 0, 0, 0, BS,
44
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, '[', '~', ']', 0,
45
- '|', 'Á', 0, 0, 0, 0, 0, 0, 0, 'Í', 0, 0, 0, 0, 0, 'Ó',
46
- 0, 0, 0, 0, 0, 'Ú', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
47
- 0, 'á', 0, 0, 0, '€', 0, 0, 0, 'í', 0, 0, 0, 0, 0, 'ó',
48
- 0, 0, 0, 0, 0, 'ú', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
49
- ]
36
+ GSM_EXT_TABLE = [
37
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, 'ç', nil, nil, nil, nil, nil, nil,
38
+ nil, nil, nil, nil, '^', nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
39
+ nil, nil, nil, nil, nil, nil, nil, nil, '{', '}', nil, nil, nil, nil, nil, bs,
40
+ nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, '[', '~', ']', nil,
41
+ '|', 'Á', nil, nil, nil, nil, nil, nil, nil, 'Í', nil, nil, nil, nil, nil, 'Ó',
42
+ nil, nil, nil, nil, nil, 'Ú', nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
43
+ nil, 'á', nil, nil, nil, '€', nil, nil, nil, 'í', nil, nil, nil, nil, nil, 'ó',
44
+ nil, nil, nil, nil, nil, 'ú', nil, nil, nil, nil, nil, nil, nil, nil, nil, nil,
45
+ ].freeze
50
46
 
51
- REGEX = /\A[ -_a-~#{Regexp.escape(CHAR_TABLE + EXT_CHAR_TABLE.select {|c| c != 0}.join)}]*\Z/
52
-
53
- # Verifies that this charset can represent every character in the Ruby
54
- # String.
55
- # @param str The String to verfiy
56
- # @return True if the charset can represent every character in the Ruby
57
- # String, otherwise false.
58
- def can_encode?(str)
59
- !str || !!(REGEX =~ str)
47
+ # build UTF8 to GSM tables
48
+ UTF8_ARRAY_SIZE = 128
49
+ UTF8_ARRAY = []
50
+ UTF8_HASH = {}
51
+ [
52
+ [ GSM_TABLE, nil ],
53
+ [ GSM_EXT_TABLE, GSM_ESCAPE ]
54
+ ].each do |table, prefix|
55
+ table.each.with_index do |char, index|
56
+ next if char.nil?
57
+ # build GSM value
58
+ if prefix
59
+ value = +''.encode('binary')
60
+ value << prefix
61
+ value << index
62
+ else
63
+ value = index
64
+ end
65
+ # store GSM value
66
+ key = char.codepoints.first
67
+ if key < UTF8_ARRAY_SIZE
68
+ UTF8_ARRAY[key] = value
69
+ else
70
+ UTF8_HASH[key] = value
71
+ end
72
+ end
60
73
  end
74
+ UTF8_ARRAY.freeze
75
+ UTF8_ARRAY.each(&:freeze)
76
+ UTF8_HASH.freeze
77
+ UTF8_HASH.each_value(&:freeze)
61
78
 
62
- def encode(str, replace_char=nil)
63
- return nil if !str
79
+ REGEXP = /\A[#{Regexp.escape((GSM_TABLE + GSM_EXT_TABLE).compact.join)}]*\Z/
64
80
 
65
- replace_char = DEFAULT_REPLACE_CHAR if !replace_char || !can_encode?(replace_char)
66
-
67
- buffer = ''.encode('binary')
81
+ # Verifies that the given string can be encoded in GSM 03.38
82
+ def can_encode?(str)
83
+ !str || !!(REGEXP =~ str)
84
+ end
68
85
 
69
- begin
70
- str.each_char do |c|
71
- if index = CHAR_TABLE.rindex(c)
72
- buffer << index
73
- elsif index = EXT_CHAR_TABLE.index(c)
74
- buffer << EXTENDED_ESCAPE
75
- buffer << index
76
- else
77
- buffer << replace_char
78
- end
86
+ # Encode given UTF-8 string to GSM 03.38
87
+ def encode(string, replacement = "?")
88
+ return nil if string.nil?
89
+ replacement = replacement == "?" ? 63 : encode(replacement)
90
+ buffer = String.new(encoding: "binary")
91
+ string.each_codepoint do |codepoint|
92
+ if codepoint < UTF8_ARRAY_SIZE
93
+ buffer << (UTF8_ARRAY[codepoint] || replacement)
94
+ else
95
+ buffer << (UTF8_HASH[codepoint] || replacement)
79
96
  end
80
- rescue
81
- # TODO: ?
82
97
  end
83
98
  buffer
84
99
  end
85
100
 
86
- def decode bstring
87
- return nil if !bstring
88
-
89
- buffer = ''.encode('utf-8')
90
-
91
- table = CHAR_TABLE
92
- bstring.bytes.each do |c|
93
- code = c & 0x000000ff
94
- if code == EXTENDED_ESCAPE
95
- # take next char from extension table
96
- table = EXT_CHAR_TABLE
101
+ # Encode given GSM 03.38 string to UTF-8
102
+ def decode(string, replacement = "?")
103
+ return nil if string.nil?
104
+ buffer = +""
105
+ escaped = false
106
+ string.each_byte do |c|
107
+ if c == GSM_ESCAPE
108
+ escaped = true
109
+ elsif escaped
110
+ buffer << GSM_EXT_TABLE[c] || replacement
111
+ escaped = false
97
112
  else
98
- buffer << (code >= table.length ? '?' : table[code])
99
- # go back to the default table
100
- table = CHAR_TABLE
113
+ buffer << GSM_TABLE[c] || replacement
101
114
  end
102
115
  end
103
116
  buffer
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: gsm_encoder
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Yury Korolev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-03-18 00:00:00.000000000 Z
12
+ date: 2024-09-13 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: GSMEncoder encodes and decodes Ruby Strings to and from the SMS default
15
15
  alphabet. It also supports the default extension table. The default alphabet and
@@ -41,8 +41,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
41
41
  - !ruby/object:Gem::Version
42
42
  version: 1.3.6
43
43
  requirements: []
44
- rubyforge_project:
45
- rubygems_version: 2.2.0
44
+ rubygems_version: 3.0.3.1
46
45
  signing_key:
47
46
  specification_version: 4
48
47
  summary: ruby GSM 03.38 encoder/decoder