RubyGems - gsm_encoder - Versions diffs - 0.1.0 - Mend

gsm_encoder 0.1.0

Files changed (3) hide show

data/README.md ADDED Viewed

@@ -0,0 +1,25 @@
+GSMEncoder encodes and decodes Ruby Strings to and from the SMS default
+alphabet. It also supports the default extension table. The default alphabet
+and it's extension table is defined in GSM 03.38
+This is port of Twitter's Java [implementation](https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java)
+## Installation
+    gem install gsm_encoder
+## Usage
+    require 'gsm_encoder'
+    # encoding
+    GSMEncoder.encode 'hello @ world' # => binary string
+    # decoding
+    GSMEncoder.decode(GSMEncoder.encode('hi')) # => 'hi'
+    # can encode?
+    GSMEncoder.can_represent?('`') # => false
+    # replaces unsupported chars with '?'
+    GSMEncoder.encode('`') # => '?'

data/lib/gsm_encoder.rb ADDED Viewed

@@ -0,0 +1,145 @@
+# Stealing from Twitter's Java implementation
+# https://github.com/twitter/cloudhopper-commons-charset/blob/master/src/main/java/com/cloudhopper/commons/charset/GSMCharset.java
+#
+# This class encodes and decodes Ruby Strings to and from the SMS default
+# alphabet. It also supports the default extension table. The default alphabet
+# and it's extension table is defined in GSM 03.38.
+module GSMEncoder
+  EXTENDED_ESCAPE = 0x1b
+  CHAR_TABLE = [
+    '@', "\u00a3", '$', "\u00a5", "\u00e8", "\u00e9", "\u00f9", "\u00ec",
+    "\u00f2", "\u00c7", '\n', "\u00d8", "\u00f8", '\r', "\u00c5", "\u00e5",
+    "\u0394", '_', "\u03a6", "\u0393", "\u039b", "\u03a9", "\u03a0", "\u03a8",
+    "\u03a3", "\u0398", "\u039e", " ", "\u00c6", "\u00e6", "\u00df", "\u00c9",
+    " ", '!', '"', '#', "\u00a4", '%', '&', "'",
+    '(', ')', '*', '+', ',', '-', '.', '/',
+    '0', '1', '2', '3', '4', '5', '6', '7',
+    '8', '9', ':', ';', '<', '=', '>', '?',
+    "\u00a1", 'A', 'B', 'C', 'D', 'E', 'F', 'G',
+    'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
+    'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
+    'X', 'Y', 'Z', "\u00c4", "\u00d6", "\u00d1", "\u00dc", "\u00a7",
+    "\u00bf", 'a', 'b', 'c', 'd', 'e', 'f', 'g',
+    'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
+    'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
+    'x', 'y', 'z', "\u00e4", "\u00f6", "\u00f1", "\u00fc", "\u00e0",
+  ]
+  # Extended character table. Characters in this table are accessed by the
+  # 'escape' character in the base table. It is important that none of the
+  # 'inactive' characters ever be matchable with a valid base-table
+  # character as this breaks the encoding loop.
+  EXT_CHAR_TABLE = [
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, "^", 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    '{', '}', 0, 0, 0, 0, 0, "\\",
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, '[', '~', ']', 0,
+    '|', 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, "\u20ac", 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0,
+  ]
+  # Verifies that this charset can represent every character in the Ruby
+  # String.
+  # @param str The String to verfiy
+  # @return True if the charset can represent every character in the Ruby
+  #   String, otherwise false.
+  def can_encode? str
+    return true if !str
+    len = str.size
+    str.chars.each do |c|
+      # a very easy check a-z, A-Z, and 0-9 are always valid
+      if c >= ?A && c <= ?Z || c >= ?a && c <= ?z || c >= ?0 && c <= ?9
+        next
+      else
+        # search both charmaps (if char is in either, we're good!)
+        found = false
+        j = 0
+        while j < CHAR_TABLE.length
+          if c == CHAR_TABLE[j] || c == EXT_CHAR_TABLE[j]
+            found = true
+            break
+          end
+          j += 1
+        end
+        # if we searched both charmaps and didn't find it, then its bad
+        return false if !found
+      end
+    end
+    true
+  end
+  def encode str
+    return nil if !str
+    buffer = ''.encode('binary')
+    begin
+      str.chars.each do |c|
+        search = 0
+        while search < CHAR_TABLE.length
+          if search == EXTENDED_ESCAPE
+            search += 1
+            next
+          end
+          if c == CHAR_TABLE[search]
+            buffer << search
+            break
+          end
+          if c == EXT_CHAR_TABLE[search]
+            buffer << EXTENDED_ESCAPE
+            buffer << search
+            break
+          end
+          search += 1
+        end
+        if search == CHAR_TABLE.length
+          buffer << '?'
+        end
+      end
+    rescue
+      # TODO: ?
+    end
+    buffer
+  end
+  def decode bstring
+    return nil if !bstring
+    buffer = ''.encode('utf-8')
+    table = CHAR_TABLE
+    bstring.bytes.each do |c|
+      code = c & 0x000000ff
+      if code == EXTENDED_ESCAPE
+        # take next char from extension table
+        table = EXT_CHAR_TABLE
+      else
+        buffer << (code >= table.length ? '?' : table[code])
+        # go back to the default table
+        table = CHAR_TABLE
+      end
+    end
+    buffer
+  end
+  module_function :can_encode?
+  module_function :encode
+  module_function :decode
+end

metadata ADDED Viewed

@@ -0,0 +1,49 @@
+--- !ruby/object:Gem::Specification
+name: gsm_encoder
+version: !ruby/object:Gem::Version
+  version: 0.1.0
+  prerelease:
+platform: ruby
+authors:
+- Yury Korolev
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2011-08-18 00:00:00.000000000 Z
+dependencies: []
+description: ! "GSMEncoder encodes and decodes Ruby Strings to and from the SMS default\n
+  \ alphabet. It also supports the default extension table. The default alphabet\n
+  \ and it's extension table is defined in GSM 03.38"
+email:
+- yury.korolev@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/gsm_encoder.rb
+- README.md
+homepage: http://github.com/yury/gsm_encoder
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  none: false
+  requirements:
+  - - ! '>='
+    - !ruby/object:Gem::Version
+      version: 1.3.6
+requirements: []
+rubyforge_project:
+rubygems_version: 1.8.8
+signing_key:
+specification_version: 3
+summary: GSM 03.38 encoder/decoder
+test_files: []