gsm_encoder 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +25 -0
- data/lib/gsm_encoder.rb +145 -0
- metadata +49 -0
data/README.md
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
GSMEncoder encodes and decodes Ruby Strings to and from the SMS default
|
2
|
+
alphabet. It also supports the default extension table. The default alphabet
|
3
|
+
and it's extension table is defined in GSM 03.38
|
4
|
+
|
5
|
+
This is port of Twitter's Java [implementation](https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java)
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
gem install gsm_encoder
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
require 'gsm_encoder'
|
14
|
+
|
15
|
+
# encoding
|
16
|
+
GSMEncoder.encode 'hello @ world' # => binary string
|
17
|
+
|
18
|
+
# decoding
|
19
|
+
GSMEncoder.decode(GSMEncoder.encode('hi')) # => 'hi'
|
20
|
+
|
21
|
+
# can encode?
|
22
|
+
GSMEncoder.can_represent?('`') # => false
|
23
|
+
|
24
|
+
# replaces unsupported chars with '?'
|
25
|
+
GSMEncoder.encode('`') # => '?'
|
data/lib/gsm_encoder.rb
ADDED
@@ -0,0 +1,145 @@
|
|
1
|
+
# Stealing from Twitter's Java implementation
|
2
|
+
# https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java
|
3
|
+
|
4
|
+
#
|
5
|
+
# This class encodes and decodes Ruby Strings to and from the SMS default
|
6
|
+
# alphabet. It also supports the default extension table. The default alphabet
|
7
|
+
# and it's extension table is defined in GSM 03.38.
|
8
|
+
module GSMEncoder
|
9
|
+
|
10
|
+
EXTENDED_ESCAPE = 0x1b
|
11
|
+
|
12
|
+
CHAR_TABLE = [
|
13
|
+
'@', "\u00a3", '$', "\u00a5", "\u00e8", "\u00e9", "\u00f9", "\u00ec",
|
14
|
+
"\u00f2", "\u00c7", '\n', "\u00d8", "\u00f8", '\r', "\u00c5", "\u00e5",
|
15
|
+
"\u0394", '_', "\u03a6", "\u0393", "\u039b", "\u03a9", "\u03a0", "\u03a8",
|
16
|
+
"\u03a3", "\u0398", "\u039e", " ", "\u00c6", "\u00e6", "\u00df", "\u00c9",
|
17
|
+
" ", '!', '"', '#', "\u00a4", '%', '&', "'",
|
18
|
+
'(', ')', '*', '+', ',', '-', '.', '/',
|
19
|
+
'0', '1', '2', '3', '4', '5', '6', '7',
|
20
|
+
'8', '9', ':', ';', '<', '=', '>', '?',
|
21
|
+
"\u00a1", 'A', 'B', 'C', 'D', 'E', 'F', 'G',
|
22
|
+
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
|
23
|
+
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
|
24
|
+
'X', 'Y', 'Z', "\u00c4", "\u00d6", "\u00d1", "\u00dc", "\u00a7",
|
25
|
+
"\u00bf", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
|
26
|
+
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
27
|
+
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
|
28
|
+
'x', 'y', 'z', "\u00e4", "\u00f6", "\u00f1", "\u00fc", "\u00e0",
|
29
|
+
]
|
30
|
+
|
31
|
+
|
32
|
+
# Extended character table. Characters in this table are accessed by the
|
33
|
+
# 'escape' character in the base table. It is important that none of the
|
34
|
+
# 'inactive' characters ever be matchable with a valid base-table
|
35
|
+
# character as this breaks the encoding loop.
|
36
|
+
EXT_CHAR_TABLE = [
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
0, 0, 0, 0, "^", 0, 0, 0,
|
40
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
41
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
42
|
+
'{', '}', 0, 0, 0, 0, 0, "\\",
|
43
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
44
|
+
0, 0, 0, 0, '[', '~', ']', 0,
|
45
|
+
'|', 0, 0, 0, 0, 0, 0, 0,
|
46
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
47
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
48
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
49
|
+
0, 0, 0, 0, 0, "\u20ac", 0, 0,
|
50
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
51
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
52
|
+
0, 0, 0, 0, 0, 0, 0, 0,
|
53
|
+
]
|
54
|
+
|
55
|
+
# Verifies that this charset can represent every character in the Ruby
|
56
|
+
# String.
|
57
|
+
# @param str The String to verfiy
|
58
|
+
# @return True if the charset can represent every character in the Ruby
|
59
|
+
# String, otherwise false.
|
60
|
+
def can_encode? str
|
61
|
+
return true if !str
|
62
|
+
|
63
|
+
len = str.size
|
64
|
+
str.chars.each do |c|
|
65
|
+
# a very easy check a-z, A-Z, and 0-9 are always valid
|
66
|
+
if c >= ?A && c <= ?Z || c >= ?a && c <= ?z || c >= ?0 && c <= ?9
|
67
|
+
next
|
68
|
+
else
|
69
|
+
# search both charmaps (if char is in either, we're good!)
|
70
|
+
found = false
|
71
|
+
j = 0
|
72
|
+
while j < CHAR_TABLE.length
|
73
|
+
if c == CHAR_TABLE[j] || c == EXT_CHAR_TABLE[j]
|
74
|
+
found = true
|
75
|
+
break
|
76
|
+
end
|
77
|
+
j += 1
|
78
|
+
end
|
79
|
+
# if we searched both charmaps and didn't find it, then its bad
|
80
|
+
return false if !found
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
true
|
85
|
+
end
|
86
|
+
|
87
|
+
def encode str
|
88
|
+
return nil if !str
|
89
|
+
|
90
|
+
buffer = ''.encode('binary')
|
91
|
+
|
92
|
+
begin
|
93
|
+
str.chars.each do |c|
|
94
|
+
search = 0
|
95
|
+
while search < CHAR_TABLE.length
|
96
|
+
if search == EXTENDED_ESCAPE
|
97
|
+
search += 1
|
98
|
+
next
|
99
|
+
end
|
100
|
+
if c == CHAR_TABLE[search]
|
101
|
+
buffer << search
|
102
|
+
break
|
103
|
+
end
|
104
|
+
if c == EXT_CHAR_TABLE[search]
|
105
|
+
buffer << EXTENDED_ESCAPE
|
106
|
+
buffer << search
|
107
|
+
break
|
108
|
+
end
|
109
|
+
search += 1
|
110
|
+
end
|
111
|
+
if search == CHAR_TABLE.length
|
112
|
+
buffer << '?'
|
113
|
+
end
|
114
|
+
end
|
115
|
+
rescue
|
116
|
+
# TODO: ?
|
117
|
+
end
|
118
|
+
buffer
|
119
|
+
end
|
120
|
+
|
121
|
+
def decode bstring
|
122
|
+
return nil if !bstring
|
123
|
+
|
124
|
+
buffer = ''.encode('utf-8')
|
125
|
+
|
126
|
+
table = CHAR_TABLE
|
127
|
+
bstring.bytes.each do |c|
|
128
|
+
code = c & 0x000000ff
|
129
|
+
if code == EXTENDED_ESCAPE
|
130
|
+
# take next char from extension table
|
131
|
+
table = EXT_CHAR_TABLE
|
132
|
+
else
|
133
|
+
buffer << (code >= table.length ? '?' : table[code])
|
134
|
+
# go back to the default table
|
135
|
+
table = CHAR_TABLE
|
136
|
+
end
|
137
|
+
end
|
138
|
+
buffer
|
139
|
+
end
|
140
|
+
|
141
|
+
module_function :can_encode?
|
142
|
+
module_function :encode
|
143
|
+
module_function :decode
|
144
|
+
|
145
|
+
end
|
metadata
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: gsm_encoder
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Yury Korolev
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2011-08-18 00:00:00.000000000 Z
|
13
|
+
dependencies: []
|
14
|
+
description: ! "GSMEncoder encodes and decodes Ruby Strings to and from the SMS default\n
|
15
|
+
\ alphabet. It also supports the default extension table. The default alphabet\n
|
16
|
+
\ and it's extension table is defined in GSM 03.38"
|
17
|
+
email:
|
18
|
+
- yury.korolev@gmail.com
|
19
|
+
executables: []
|
20
|
+
extensions: []
|
21
|
+
extra_rdoc_files: []
|
22
|
+
files:
|
23
|
+
- lib/gsm_encoder.rb
|
24
|
+
- README.md
|
25
|
+
homepage: http://github.com/yury/gsm_encoder
|
26
|
+
licenses: []
|
27
|
+
post_install_message:
|
28
|
+
rdoc_options: []
|
29
|
+
require_paths:
|
30
|
+
- lib
|
31
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
32
|
+
none: false
|
33
|
+
requirements:
|
34
|
+
- - ! '>='
|
35
|
+
- !ruby/object:Gem::Version
|
36
|
+
version: '0'
|
37
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ! '>='
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 1.3.6
|
43
|
+
requirements: []
|
44
|
+
rubyforge_project:
|
45
|
+
rubygems_version: 1.8.8
|
46
|
+
signing_key:
|
47
|
+
specification_version: 3
|
48
|
+
summary: GSM 03.38 encoder/decoder
|
49
|
+
test_files: []
|