gsm_encoder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.md +25 -0
  2. data/lib/gsm_encoder.rb +145 -0
  3. metadata +49 -0
data/README.md ADDED
@@ -0,0 +1,25 @@
1
+ GSMEncoder encodes and decodes Ruby Strings to and from the SMS default
2
+ alphabet. It also supports the default extension table. The default alphabet
3
+ and it's extension table is defined in GSM 03.38
4
+
5
+ This is port of Twitter's Java [implementation](https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java)
6
+
7
+ ## Installation
8
+
9
+ gem install gsm_encoder
10
+
11
+ ## Usage
12
+
13
+ require 'gsm_encoder'
14
+
15
+ # encoding
16
+ GSMEncoder.encode 'hello @ world' # => binary string
17
+
18
+ # decoding
19
+ GSMEncoder.decode(GSMEncoder.encode('hi')) # => 'hi'
20
+
21
+ # can encode?
22
+ GSMEncoder.can_represent?('`') # => false
23
+
24
+ # replaces unsupported chars with '?'
25
+ GSMEncoder.encode('`') # => '?'
@@ -0,0 +1,145 @@
1
+ # Stealing from Twitter's Java implementation
2
+ # https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java
3
+
4
+ #
5
+ # This class encodes and decodes Ruby Strings to and from the SMS default
6
+ # alphabet. It also supports the default extension table. The default alphabet
7
+ # and it's extension table is defined in GSM 03.38.
8
+ module GSMEncoder
9
+
10
+ EXTENDED_ESCAPE = 0x1b
11
+
12
+ CHAR_TABLE = [
13
+ '@', "\u00a3", '$', "\u00a5", "\u00e8", "\u00e9", "\u00f9", "\u00ec",
14
+ "\u00f2", "\u00c7", '\n', "\u00d8", "\u00f8", '\r', "\u00c5", "\u00e5",
15
+ "\u0394", '_', "\u03a6", "\u0393", "\u039b", "\u03a9", "\u03a0", "\u03a8",
16
+ "\u03a3", "\u0398", "\u039e", " ", "\u00c6", "\u00e6", "\u00df", "\u00c9",
17
+ " ", '!', '"', '#', "\u00a4", '%', '&', "'",
18
+ '(', ')', '*', '+', ',', '-', '.', '/',
19
+ '0', '1', '2', '3', '4', '5', '6', '7',
20
+ '8', '9', ':', ';', '<', '=', '>', '?',
21
+ "\u00a1", 'A', 'B', 'C', 'D', 'E', 'F', 'G',
22
+ 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
23
+ 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
24
+ 'X', 'Y', 'Z', "\u00c4", "\u00d6", "\u00d1", "\u00dc", "\u00a7",
25
+ "\u00bf", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
26
+ 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
27
+ 'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
28
+ 'x', 'y', 'z', "\u00e4", "\u00f6", "\u00f1", "\u00fc", "\u00e0",
29
+ ]
30
+
31
+
32
+ # Extended character table. Characters in this table are accessed by the
33
+ # 'escape' character in the base table. It is important that none of the
34
+ # 'inactive' characters ever be matchable with a valid base-table
35
+ # character as this breaks the encoding loop.
36
+ EXT_CHAR_TABLE = [
37
+ 0, 0, 0, 0, 0, 0, 0, 0,
38
+ 0, 0, 0, 0, 0, 0, 0, 0,
39
+ 0, 0, 0, 0, "^", 0, 0, 0,
40
+ 0, 0, 0, 0, 0, 0, 0, 0,
41
+ 0, 0, 0, 0, 0, 0, 0, 0,
42
+ '{', '}', 0, 0, 0, 0, 0, "\\",
43
+ 0, 0, 0, 0, 0, 0, 0, 0,
44
+ 0, 0, 0, 0, '[', '~', ']', 0,
45
+ '|', 0, 0, 0, 0, 0, 0, 0,
46
+ 0, 0, 0, 0, 0, 0, 0, 0,
47
+ 0, 0, 0, 0, 0, 0, 0, 0,
48
+ 0, 0, 0, 0, 0, 0, 0, 0,
49
+ 0, 0, 0, 0, 0, "\u20ac", 0, 0,
50
+ 0, 0, 0, 0, 0, 0, 0, 0,
51
+ 0, 0, 0, 0, 0, 0, 0, 0,
52
+ 0, 0, 0, 0, 0, 0, 0, 0,
53
+ ]
54
+
55
+ # Verifies that this charset can represent every character in the Ruby
56
+ # String.
57
+ # @param str The String to verfiy
58
+ # @return True if the charset can represent every character in the Ruby
59
+ # String, otherwise false.
60
+ def can_encode? str
61
+ return true if !str
62
+
63
+ len = str.size
64
+ str.chars.each do |c|
65
+ # a very easy check a-z, A-Z, and 0-9 are always valid
66
+ if c >= ?A && c <= ?Z || c >= ?a && c <= ?z || c >= ?0 && c <= ?9
67
+ next
68
+ else
69
+ # search both charmaps (if char is in either, we're good!)
70
+ found = false
71
+ j = 0
72
+ while j < CHAR_TABLE.length
73
+ if c == CHAR_TABLE[j] || c == EXT_CHAR_TABLE[j]
74
+ found = true
75
+ break
76
+ end
77
+ j += 1
78
+ end
79
+ # if we searched both charmaps and didn't find it, then its bad
80
+ return false if !found
81
+ end
82
+ end
83
+
84
+ true
85
+ end
86
+
87
+ def encode str
88
+ return nil if !str
89
+
90
+ buffer = ''.encode('binary')
91
+
92
+ begin
93
+ str.chars.each do |c|
94
+ search = 0
95
+ while search < CHAR_TABLE.length
96
+ if search == EXTENDED_ESCAPE
97
+ search += 1
98
+ next
99
+ end
100
+ if c == CHAR_TABLE[search]
101
+ buffer << search
102
+ break
103
+ end
104
+ if c == EXT_CHAR_TABLE[search]
105
+ buffer << EXTENDED_ESCAPE
106
+ buffer << search
107
+ break
108
+ end
109
+ search += 1
110
+ end
111
+ if search == CHAR_TABLE.length
112
+ buffer << '?'
113
+ end
114
+ end
115
+ rescue
116
+ # TODO: ?
117
+ end
118
+ buffer
119
+ end
120
+
121
+ def decode bstring
122
+ return nil if !bstring
123
+
124
+ buffer = ''.encode('utf-8')
125
+
126
+ table = CHAR_TABLE
127
+ bstring.bytes.each do |c|
128
+ code = c & 0x000000ff
129
+ if code == EXTENDED_ESCAPE
130
+ # take next char from extension table
131
+ table = EXT_CHAR_TABLE
132
+ else
133
+ buffer << (code >= table.length ? '?' : table[code])
134
+ # go back to the default table
135
+ table = CHAR_TABLE
136
+ end
137
+ end
138
+ buffer
139
+ end
140
+
141
+ module_function :can_encode?
142
+ module_function :encode
143
+ module_function :decode
144
+
145
+ end
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gsm_encoder
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Yury Korolev
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2011-08-18 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: ! "GSMEncoder encodes and decodes Ruby Strings to and from the SMS default\n
15
+ \ alphabet. It also supports the default extension table. The default alphabet\n
16
+ \ and it's extension table is defined in GSM 03.38"
17
+ email:
18
+ - yury.korolev@gmail.com
19
+ executables: []
20
+ extensions: []
21
+ extra_rdoc_files: []
22
+ files:
23
+ - lib/gsm_encoder.rb
24
+ - README.md
25
+ homepage: http://github.com/yury/gsm_encoder
26
+ licenses: []
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ none: false
33
+ requirements:
34
+ - - ! '>='
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ required_rubygems_version: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ! '>='
41
+ - !ruby/object:Gem::Version
42
+ version: 1.3.6
43
+ requirements: []
44
+ rubyforge_project:
45
+ rubygems_version: 1.8.8
46
+ signing_key:
47
+ specification_version: 3
48
+ summary: GSM 03.38 encoder/decoder
49
+ test_files: []