gsm_encoder 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +25 -0
- data/lib/gsm_encoder.rb +145 -0
- metadata +49 -0
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
GSMEncoder encodes and decodes Ruby Strings to and from the SMS default
|
2
|
+
alphabet. It also supports the default extension table. The default alphabet
|
3
|
+
and it's extension table is defined in GSM 03.38
|
4
|
+
|
5
|
+
This is port of Twitter's Java [implementation](https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java)
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
gem install gsm_encoder
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
require 'gsm_encoder'
|
14
|
+
|
15
|
+
# encoding
|
16
|
+
GSMEncoder.encode 'hello @ world' # => binary string
|
17
|
+
|
18
|
+
# decoding
|
19
|
+
GSMEncoder.decode(GSMEncoder.encode('hi')) # => 'hi'
|
20
|
+
|
21
|
+
# can encode?
|
22
|
+
GSMEncoder.can_represent?('`') # => false
|
23
|
+
|
24
|
+
# replaces unsupported chars with '?'
|
25
|
+
GSMEncoder.encode('`') # => '?'
|
data/lib/gsm_encoder.rb
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# Stealing from Twitter's Java implementation
|
2
|
+
# https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java
|
3
|
+
|
4
|
+
#
|
5
|
+
# This class encodes and decodes Ruby Strings to and from the SMS default
|
6
|
+
# alphabet. It also supports the default extension table. The default alphabet
|
7
|
+
# and it's extension table is defined in GSM 03.38.
|
8
|
+
module GSMEncoder
|
9
|
+
|
10
|
+
EXTENDED_ESCAPE = 0x1b
|
11
|
+
|
12
|
+
CHAR_TABLE = [
|
13
|
+
'@', "\u00a3", '$', "\u00a5", "\u00e8", "\u00e9", "\u00f9", "\u00ec",
|
14
|
+
"\u00f2", "\u00c7", '\n', "\u00d8", "\u00f8", '\r', "\u00c5", "\u00e5",
|
15
|
+
"\u0394", '_', "\u03a6", "\u0393", "\u039b", "\u03a9", "\u03a0", "\u03a8",
|
16
|
+
"\u03a3", "\u0398", "\u039e", " ", "\u00c6", "\u00e6", "\u00df", "\u00c9",
|
17
|
+
" ", '!', '"', '#', "\u00a4", '%', '&', "'",
|
18
|
+
'(', ')', '*', '+', ',', '-', '.', '/',
|
19
|
+
'0', '1', '2', '3', '4', '5', '6', '7',
|
20
|
+
'8', '9', ':', ';', '<', '=', '>', '?',
|
21
|
+
"\u00a1", 'A', 'B', 'C', 'D', 'E', 'F', 'G',
|
22
|
+
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
|
23
|
+
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
|
24
|
+
'X', 'Y', 'Z', "\u00c4", "\u00d6", "\u00d1", "\u00dc", "\u00a7",
|
25
|
+
"\u00bf", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
|
26
|
+
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
27
|
+
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
|
28
|
+
'x', 'y', 'z', "\u00e4", "\u00f6", "\u00f1", "\u00fc", "\u00e0",
|
29
|
+
]
|
30
|
+
|
31
|
+
|
32
|
+
# Extended character table. Characters in this table are accessed by the
|
33
|
+
# 'escape' character in the base table. It is important that none of the
|
34
|
+
# 'inactive' characters ever be matchable with a valid base-table
|
35
|
+
# character as this breaks the encoding loop.
|
36
|
+
EXT_CHAR_TABLE = [
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, "^", 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
'{', '}', 0, 0, 0, 0, 0, "\\",
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, '[', '~', ']', 0,
|
45
|
+
'|', 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
0, 0, 0, 0, 0, "\u20ac", 0, 0,
|
50
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
51
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
52
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
53
|
+
]
|
54
|
+
|
55
|
+
# Verifies that this charset can represent every character in the Ruby
|
56
|
+
# String.
|
57
|
+
# @param str The String to verfiy
|
58
|
+
# @return True if the charset can represent every character in the Ruby
|
59
|
+
# String, otherwise false.
|
60
|
+
def can_encode? str
|
61
|
+
return true if !str
|
62
|
+
|
63
|
+
len = str.size
|
64
|
+
str.chars.each do |c|
|
65
|
+
# a very easy check a-z, A-Z, and 0-9 are always valid
|
66
|
+
if c >= ?A && c <= ?Z || c >= ?a && c <= ?z || c >= ?0 && c <= ?9
|
67
|
+
next
|
68
|
+
else
|
69
|
+
# search both charmaps (if char is in either, we're good!)
|
70
|
+
found = false
|
71
|
+
j = 0
|
72
|
+
while j < CHAR_TABLE.length
|
73
|
+
if c == CHAR_TABLE[j] || c == EXT_CHAR_TABLE[j]
|
74
|
+
found = true
|
75
|
+
break
|
76
|
+
end
|
77
|
+
j += 1
|
78
|
+
end
|
79
|
+
# if we searched both charmaps and didn't find it, then its bad
|
80
|
+
return false if !found
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
def encode str
|
88
|
+
return nil if !str
|
89
|
+
|
90
|
+
buffer = ''.encode('binary')
|
91
|
+
|
92
|
+
begin
|
93
|
+
str.chars.each do |c|
|
94
|
+
search = 0
|
95
|
+
while search < CHAR_TABLE.length
|
96
|
+
if search == EXTENDED_ESCAPE
|
97
|
+
search += 1
|
98
|
+
next
|
99
|
+
end
|
100
|
+
if c == CHAR_TABLE[search]
|
101
|
+
buffer << search
|
102
|
+
break
|
103
|
+
end
|
104
|
+
if c == EXT_CHAR_TABLE[search]
|
105
|
+
buffer << EXTENDED_ESCAPE
|
106
|
+
buffer << search
|
107
|
+
break
|
108
|
+
end
|
109
|
+
search += 1
|
110
|
+
end
|
111
|
+
if search == CHAR_TABLE.length
|
112
|
+
buffer << '?'
|
113
|
+
end
|
114
|
+
end
|
115
|
+
rescue
|
116
|
+
# TODO: ?
|
117
|
+
end
|
118
|
+
buffer
|
119
|
+
end
|
120
|
+
|
121
|
+
def decode bstring
|
122
|
+
return nil if !bstring
|
123
|
+
|
124
|
+
buffer = ''.encode('utf-8')
|
125
|
+
|
126
|
+
table = CHAR_TABLE
|
127
|
+
bstring.bytes.each do |c|
|
128
|
+
code = c & 0x000000ff
|
129
|
+
if code == EXTENDED_ESCAPE
|
130
|
+
# take next char from extension table
|
131
|
+
table = EXT_CHAR_TABLE
|
132
|
+
else
|
133
|
+
buffer << (code >= table.length ? '?' : table[code])
|
134
|
+
# go back to the default table
|
135
|
+
table = CHAR_TABLE
|
136
|
+
end
|
137
|
+
end
|
138
|
+
buffer
|
139
|
+
end
|
140
|
+
|
141
|
+
module_function :can_encode?
|
142
|
+
module_function :encode
|
143
|
+
module_function :decode
|
144
|
+
|
145
|
+
end
|
metadata
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gsm_encoder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Yury Korolev
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-08-18 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! "GSMEncoder encodes and decodes Ruby Strings to and from the SMS default\n
|
15
|
+
\ alphabet. It also supports the default extension table. The default alphabet\n
|
16
|
+
\ and it's extension table is defined in GSM 03.38"
|
17
|
+
email:
|
18
|
+
- yury.korolev@gmail.com
|
19
|
+
executables: []
|
20
|
+
extensions: []
|
21
|
+
extra_rdoc_files: []
|
22
|
+
files:
|
23
|
+
- lib/gsm_encoder.rb
|
24
|
+
- README.md
|
25
|
+
homepage: http://github.com/yury/gsm_encoder
|
26
|
+
licenses: []
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
none: false
|
33
|
+
requirements:
|
34
|
+
- - ! '>='
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.3.6
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 1.8.8
|
46
|
+
signing_key:
|
47
|
+
specification_version: 3
|
48
|
+
summary: GSM 03.38 encoder/decoder
|
49
|
+
test_files: []
|