charlock_holmes 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +30 -0
- data/MIT-LICENSE +20 -0
- data/README.md +68 -0
- data/Rakefile +29 -0
- data/benchmark/detection.rb +39 -0
- data/benchmark/test.txt +693 -0
- data/charlock_holmes.gemspec +25 -0
- data/ext/charlock_holmes/charlock_holmes.c +119 -0
- data/ext/charlock_holmes/extconf.rb +10 -0
- data/lib/charlock_holmes.rb +6 -0
- data/lib/charlock_holmes/encoding_detector.rb +12 -0
- data/lib/charlock_holmes/string.rb +28 -0
- data/lib/charlock_holmes/version.rb +3 -0
- data/spec/encoding_detector_spec.rb +54 -0
- data/spec/fixtures/AnsiGraph.psm1 +0 -0
- data/spec/fixtures/TwigExtensionsDate.es.yml +8 -0
- data/spec/fixtures/cl-messagepack.lisp +264 -0
- data/spec/fixtures/core.rkt +254 -0
- data/spec/fixtures/laholator.py +131 -0
- data/spec/fixtures/repl2.cljs +109 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/string_method_spec.rb +22 -0
- metadata +117 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require './lib/charlock_holmes/version' unless defined? CharlockHolmes::VERSION
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = %q{charlock_holmes}
|
7
|
+
s.version = CharlockHolmes::VERSION
|
8
|
+
s.authors = ["Brian Lopez", "Vicent Martí"]
|
9
|
+
s.date = Time.now.utc.strftime("%Y-%m-%d")
|
10
|
+
s.email = %q{seniorlopez@gmail.com}
|
11
|
+
s.extensions = ["ext/charlock_holmes/extconf.rb"]
|
12
|
+
s.files = `git ls-files`.split("\n")
|
13
|
+
s.homepage = %q{http://github.com/brianmario/charlock_holmes}
|
14
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
15
|
+
s.require_paths = ["lib", "ext"]
|
16
|
+
s.rubygems_version = %q{1.4.2}
|
17
|
+
s.summary = %q{Character encoding detection, brought to you by ICU}
|
18
|
+
s.test_files = `git ls-files spec`.split("\n")
|
19
|
+
|
20
|
+
# tests
|
21
|
+
s.add_development_dependency 'rake-compiler', ">= 0.7.5"
|
22
|
+
s.add_development_dependency 'rspec', ">= 2.0.0"
|
23
|
+
# benchmarks
|
24
|
+
s.add_development_dependency 'chardet'
|
25
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#include "unicode/ucsdet.h"
|
2
|
+
|
3
|
+
#include <ruby.h>
|
4
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
5
|
+
#include <ruby/encoding.h>
|
6
|
+
#endif
|
7
|
+
|
8
|
+
static VALUE rb_mCharlockHolmes;
|
9
|
+
static VALUE rb_cEncodingDetector;
|
10
|
+
|
11
|
+
static VALUE charlock_new_str2(const char *str)
|
12
|
+
{
|
13
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
14
|
+
return rb_external_str_new_with_enc(str, strlen(str), rb_utf8_encoding());
|
15
|
+
#else
|
16
|
+
return rb_str_new2(str);
|
17
|
+
#endif
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
|
21
|
+
{
|
22
|
+
UErrorCode status = U_ZERO_ERROR;
|
23
|
+
const char *mname;
|
24
|
+
const char *mlang;
|
25
|
+
int mconfidence;
|
26
|
+
VALUE rb_match;
|
27
|
+
|
28
|
+
if (!match)
|
29
|
+
return Qnil;
|
30
|
+
|
31
|
+
mname = ucsdet_getName(match, &status);
|
32
|
+
mlang = ucsdet_getLanguage(match, &status);
|
33
|
+
mconfidence = ucsdet_getConfidence(match, &status);
|
34
|
+
|
35
|
+
rb_match = rb_hash_new();
|
36
|
+
|
37
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), charlock_new_str2(mname));
|
38
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(mconfidence));
|
39
|
+
|
40
|
+
if (mlang && mlang[0])
|
41
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("language")), charlock_new_str2(mlang));
|
42
|
+
|
43
|
+
return rb_match;
|
44
|
+
}
|
45
|
+
|
46
|
+
/*
|
47
|
+
* call-seq: detection_hash = EncodingDetector.detect "some string"
|
48
|
+
*
|
49
|
+
* Attempt to detect the encoding of this string
|
50
|
+
*
|
51
|
+
* Returns: a Hash with :encoding, :language and :confidence
|
52
|
+
*/
|
53
|
+
static VALUE rb_encdec_detect(VALUE self, VALUE rb_str)
|
54
|
+
{
|
55
|
+
UErrorCode status = U_ZERO_ERROR;
|
56
|
+
UCharsetDetector *csd;
|
57
|
+
|
58
|
+
Check_Type(rb_str, T_STRING);
|
59
|
+
Data_Get_Struct(self, UCharsetDetector, csd);
|
60
|
+
|
61
|
+
ucsdet_setText(csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
62
|
+
return rb_encdec_buildmatch(ucsdet_detect(csd, &status));
|
63
|
+
}
|
64
|
+
|
65
|
+
|
66
|
+
/*
|
67
|
+
* call-seq: detection_hash_array = EncodingDetector.detect_all "some string"
|
68
|
+
*
|
69
|
+
* Attempt to detect the encoding of this string, and return
|
70
|
+
* a list with all the possible encodings that match it.
|
71
|
+
*
|
72
|
+
* Returns: a List with zero or more Hashes,
|
73
|
+
* each one of them with with :encoding, :language and :confidence
|
74
|
+
*/
|
75
|
+
static VALUE rb_encdec_detect_all(VALUE self, VALUE rb_str)
|
76
|
+
{
|
77
|
+
UErrorCode status = U_ZERO_ERROR;
|
78
|
+
UCharsetDetector *csd;
|
79
|
+
const UCharsetMatch **csm;
|
80
|
+
VALUE rb_ret;
|
81
|
+
int i, match_count;
|
82
|
+
|
83
|
+
Check_Type(rb_str, T_STRING);
|
84
|
+
Data_Get_Struct(self, UCharsetDetector, csd);
|
85
|
+
|
86
|
+
rb_ret = rb_ary_new();
|
87
|
+
|
88
|
+
ucsdet_setText(csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
89
|
+
csm = ucsdet_detectAll(csd, &match_count, &status);
|
90
|
+
|
91
|
+
for (i = 0; i < match_count; ++i) {
|
92
|
+
rb_ary_push(rb_ret, rb_encdec_buildmatch(csm[i]));
|
93
|
+
}
|
94
|
+
|
95
|
+
return rb_ret;
|
96
|
+
}
|
97
|
+
|
98
|
+
|
99
|
+
static void rb_encdec__free(void *csd)
|
100
|
+
{
|
101
|
+
ucsdet_close((UCharsetDetector *)csd);
|
102
|
+
}
|
103
|
+
|
104
|
+
static VALUE rb_encdec__alloc(VALUE klass)
|
105
|
+
{
|
106
|
+
UErrorCode status = U_ZERO_ERROR;
|
107
|
+
UCharsetDetector *csd = ucsdet_open(&status);
|
108
|
+
return Data_Wrap_Struct(klass, NULL, rb_encdec__free, (void *)csd);
|
109
|
+
}
|
110
|
+
|
111
|
+
void Init_charlock_holmes()
|
112
|
+
{
|
113
|
+
rb_mCharlockHolmes = rb_define_module("CharlockHolmes");
|
114
|
+
|
115
|
+
rb_cEncodingDetector = rb_define_class_under(rb_mCharlockHolmes, "EncodingDetector", rb_cObject);
|
116
|
+
rb_define_alloc_func(rb_cEncodingDetector, rb_encdec__alloc);
|
117
|
+
rb_define_method(rb_cEncodingDetector, "detect", rb_encdec_detect, 1);
|
118
|
+
rb_define_method(rb_cEncodingDetector, "detect_all", rb_encdec_detect_all, 1);
|
119
|
+
}
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module CharlockHolmes
|
2
|
+
class EncodingDetector
|
3
|
+
# Attempt to detect the encoding of this string
|
4
|
+
#
|
5
|
+
# NOTE: This will create a new CharlockHolmes::EncodingDetector instance on every call
|
6
|
+
#
|
7
|
+
# Returns: a Hash with :encoding, :language and :confidence
|
8
|
+
def self.detect(str)
|
9
|
+
new.detect(str)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'charlock_holmes' unless defined? CharlockHolmes
|
2
|
+
|
3
|
+
class String
|
4
|
+
# Attempt to detect the encoding of this string
|
5
|
+
#
|
6
|
+
# Returns: a Hash with :encoding, :language and :confidence
|
7
|
+
def detect_encoding
|
8
|
+
encoding_detector.detect(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
if RUBY_VERSION =~ /1.9/
|
12
|
+
# Attempt to detect the encoding of this string
|
13
|
+
# then set the encoding to what was detected ala `force_encoding`
|
14
|
+
#
|
15
|
+
# Returns: a Hash with :encoding, :language and :confidence
|
16
|
+
def detect_encoding!
|
17
|
+
if detected = self.detect_encoding
|
18
|
+
self.force_encoding detected[:encoding]
|
19
|
+
detected
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
def encoding_detector
|
26
|
+
@encoding_detector ||= CharlockHolmes::EncodingDetector.new
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe CharlockHolmes::EncodingDetector do
|
4
|
+
before :all do
|
5
|
+
@detector = CharlockHolmes::EncodingDetector.new
|
6
|
+
end
|
7
|
+
|
8
|
+
test 'has a detect class-level method' do
|
9
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect
|
10
|
+
detected = CharlockHolmes::EncodingDetector.detect 'test'
|
11
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
12
|
+
end
|
13
|
+
|
14
|
+
test 'has a detect method' do
|
15
|
+
@detector.respond_to? :detect
|
16
|
+
detected = @detector.detect 'test'
|
17
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
18
|
+
end
|
19
|
+
|
20
|
+
test 'has a detect_all method' do
|
21
|
+
@detector.respond_to? :detect_all
|
22
|
+
detected_list = @detector.detect_all 'test'
|
23
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
24
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'encoding detection' do
|
28
|
+
MAPPING = [
|
29
|
+
['repl2.cljs', 'ISO-8859-1'],
|
30
|
+
['core.rkt', 'UTF-8'],
|
31
|
+
['cl-messagepack.lisp', 'ISO-8859-1'],
|
32
|
+
['TwigExtensionsDate.es.yml', 'UTF-8'],
|
33
|
+
['AnsiGraph.psm1', 'UTF-16LE'],
|
34
|
+
['laholator.py', 'UTF-8']
|
35
|
+
]
|
36
|
+
|
37
|
+
MAPPING.each do |mapping|
|
38
|
+
file, encoding = mapping
|
39
|
+
|
40
|
+
test "#{file} should be detected as #{encoding}" do
|
41
|
+
path = File.expand_path "../fixtures/#{file}", __FILE__
|
42
|
+
content = File.read path
|
43
|
+
guessed = @detector.detect content
|
44
|
+
|
45
|
+
assert_equal encoding, guessed[:encoding]
|
46
|
+
|
47
|
+
if content.respond_to? :force_encoding
|
48
|
+
content.force_encoding guessed[:encoding]
|
49
|
+
assert content.valid_encoding?
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
Binary file
|
@@ -0,0 +1,8 @@
|
|
1
|
+
date.year: '%year% año|%year% años'
|
2
|
+
date.month: '%month% mes|%month% meses'
|
3
|
+
date.day: '%day% día|%day% días'
|
4
|
+
date.hour: '%hour% hora|%hour% horas'
|
5
|
+
date.minute: '%minute% minuto|%minute% minutos'
|
6
|
+
date.second: '%second% segundo|%second% segundos'
|
7
|
+
date.new: 'menos de un minuto'
|
8
|
+
date.and: ' y '
|
@@ -0,0 +1,264 @@
|
|
1
|
+
;;;; cl-messagepack.lisp
|
2
|
+
|
3
|
+
(in-package #:messagepack)
|
4
|
+
|
5
|
+
(declaim (optimize (debug 3)))
|
6
|
+
|
7
|
+
(eval-when (:compile-toplevel :load-toplevel :execute)
|
8
|
+
(defun mkstr (&rest args)
|
9
|
+
(format nil "~{~a~}" args))
|
10
|
+
(defun mksymb (&rest args)
|
11
|
+
(intern (apply #'mkstr args))))
|
12
|
+
|
13
|
+
(defmacro signed-unsigned-convertors (size)
|
14
|
+
(let ((speed (if (< size 32) 3 0)))
|
15
|
+
`(progn
|
16
|
+
(defun ,(mksymb 'sb size '-> 'ub size) (sb)
|
17
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
18
|
+
(type (integer ,(- (expt 2 (1- size))) ,(1- (expt 2 (1- size)))) sb))
|
19
|
+
(if (< sb 0)
|
20
|
+
(ldb (byte ,size 0) sb)
|
21
|
+
sb))
|
22
|
+
(defun ,(mksymb 'ub size '-> 'sb size) (sb)
|
23
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
24
|
+
(type (mod ,(expt 2 size)) sb))
|
25
|
+
(if (logbitp (1- ,size) sb)
|
26
|
+
(- (1+ (logxor (1- (expt 2 ,size)) sb)))
|
27
|
+
sb)))))
|
28
|
+
|
29
|
+
(signed-unsigned-convertors 8)
|
30
|
+
(signed-unsigned-convertors 16)
|
31
|
+
(signed-unsigned-convertors 32)
|
32
|
+
(signed-unsigned-convertors 64)
|
33
|
+
|
34
|
+
(defun write-hex (data)
|
35
|
+
(let (line)
|
36
|
+
(loop
|
37
|
+
for i from 0 to (1- (length data))
|
38
|
+
do (push (elt data i) line)
|
39
|
+
when (= (length line) 16)
|
40
|
+
do
|
41
|
+
(format t "~{~2,'0x ~}~%" (nreverse line))
|
42
|
+
(setf line nil))
|
43
|
+
(when line
|
44
|
+
(format t "~{~2,'0x ~}~%" (nreverse line)))))
|
45
|
+
|
46
|
+
(defun encode (data)
|
47
|
+
(flexi-streams:with-output-to-sequence (stream)
|
48
|
+
(encode-stream data stream)))
|
49
|
+
|
50
|
+
(defun make-hash (data)
|
51
|
+
(let ((result (make-hash-table)))
|
52
|
+
(dolist (kv data)
|
53
|
+
(cond ((consp (cdr kv))
|
54
|
+
(setf (gethash (first kv) result) (second kv)))
|
55
|
+
(t
|
56
|
+
(setf (gethash (car kv) result) (cdr kv)))))
|
57
|
+
result))
|
58
|
+
|
59
|
+
(defun is-byte-array (data-type)
|
60
|
+
(and (vectorp data-type)
|
61
|
+
(equal '(unsigned-byte 8) (array-element-type data-type))))
|
62
|
+
|
63
|
+
(defun encode-stream (data stream)
|
64
|
+
(cond ((floatp data) (encode-float data stream))
|
65
|
+
((numberp data) (encode-integer data stream))
|
66
|
+
((null data) (write-byte #xc0 stream))
|
67
|
+
((eq data t) (write-byte #xc3 stream))
|
68
|
+
((stringp data)
|
69
|
+
(encode-string data stream))
|
70
|
+
((is-byte-array data)
|
71
|
+
(encode-raw-bytes data stream))
|
72
|
+
((or (consp data) (vectorp data))
|
73
|
+
(encode-array data stream))
|
74
|
+
((hash-table-p data)
|
75
|
+
(encode-hash data stream))
|
76
|
+
((symbolp data)
|
77
|
+
(encode-string (symbol-name data) stream))
|
78
|
+
(t (error "Cannot encode data."))))
|
79
|
+
|
80
|
+
(defun encode-string (data stream)
|
81
|
+
(encode-raw-bytes (babel:string-to-octets data) stream))
|
82
|
+
|
83
|
+
#+sbcl (defun sbcl-encode-float (data stream)
|
84
|
+
(cond ((equal (type-of data) 'single-float)
|
85
|
+
(write-byte #xca stream)
|
86
|
+
(store-big-endian (sb-kernel:single-float-bits data) stream 4))
|
87
|
+
((equal (type-of data) 'double-float)
|
88
|
+
(write-byte #xcb stream)
|
89
|
+
(store-big-endian (sb-kernel:double-float-high-bits data) stream 4)
|
90
|
+
(store-big-endian (sb-kernel:double-float-low-bits data) stream 4)))
|
91
|
+
t)
|
92
|
+
|
93
|
+
(defun encode-float (data stream)
|
94
|
+
(or #+sbcl (sbcl-encode-float data stream)
|
95
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
96
|
+
|
97
|
+
(defun encode-each (data stream &optional (encoder #'encode-stream))
|
98
|
+
(cond ((hash-table-p data)
|
99
|
+
(maphash (lambda (key value)
|
100
|
+
(funcall encoder key stream)
|
101
|
+
(funcall encoder value stream))
|
102
|
+
data))
|
103
|
+
((or (vectorp data) (consp data))
|
104
|
+
(mapc (lambda (subdata)
|
105
|
+
(funcall encoder subdata stream))
|
106
|
+
(coerce data 'list)))
|
107
|
+
(t (error "Not sequence or hash table."))))
|
108
|
+
|
109
|
+
(defun encode-sequence (data stream
|
110
|
+
short-prefix short-length
|
111
|
+
typecode-16 typecode-32
|
112
|
+
&optional (encoder #'encode-stream))
|
113
|
+
(let ((len (if (hash-table-p data)
|
114
|
+
(hash-table-count data)
|
115
|
+
(length data))))
|
116
|
+
(cond ((<= 0 len short-length)
|
117
|
+
(write-byte (+ short-prefix len) stream)
|
118
|
+
(encode-each data stream encoder))
|
119
|
+
((<= 0 len 65535)
|
120
|
+
(write-byte typecode-16 stream)
|
121
|
+
(store-big-endian len stream 2)
|
122
|
+
(encode-each data stream encoder))
|
123
|
+
((<= 0 len (1- (expt 2 32)))
|
124
|
+
(write-byte typecode-32 stream)
|
125
|
+
(store-big-endian len stream 4)
|
126
|
+
(encode-each data stream encoder)))))
|
127
|
+
|
128
|
+
(defun encode-hash (data stream)
|
129
|
+
(encode-sequence data stream #x80 15 #xdc #xdd))
|
130
|
+
|
131
|
+
(defun encode-array (data stream)
|
132
|
+
(encode-sequence data stream #x90 15 #xdc #xdd))
|
133
|
+
|
134
|
+
(defun encode-raw-bytes (data stream)
|
135
|
+
(encode-sequence data stream #xa0 31 #xda #xdb #'write-byte))
|
136
|
+
|
137
|
+
(defun encode-integer (data stream)
|
138
|
+
(cond ((<= 0 data 127) (write-byte data stream))
|
139
|
+
((<= -32 data -1) (write-byte (sb8->ub8 data) stream))
|
140
|
+
((<= 0 data 255)
|
141
|
+
(write-byte #xcc stream)
|
142
|
+
(write-byte data stream))
|
143
|
+
((<= 0 data 65535)
|
144
|
+
(write-byte #xcd stream)
|
145
|
+
(store-big-endian data stream 2))
|
146
|
+
((<= 0 data (1- (expt 2 32)))
|
147
|
+
(write-byte #xce stream)
|
148
|
+
(store-big-endian data stream 4))
|
149
|
+
((<= 0 data (1- (expt 2 64)))
|
150
|
+
(write-byte #xcf stream)
|
151
|
+
(store-big-endian data stream 8))
|
152
|
+
((<= -128 data 127)
|
153
|
+
(write-byte #xd0 stream)
|
154
|
+
(write-byte (sb8->ub8 data) stream))
|
155
|
+
((<= -32768 data 32767)
|
156
|
+
(write-byte #xd1 stream)
|
157
|
+
(write-byte (sb16->ub16 data) stream))
|
158
|
+
((<= (- (expt 2 31)) data (1- (expt 2 31)))
|
159
|
+
(write-byte #xd2 stream)
|
160
|
+
(write-byte (sb32->ub32 data) stream))
|
161
|
+
((<= (- (expt 2 63)) data (1- (expt 2 63)))
|
162
|
+
(write-byte #xd3 stream)
|
163
|
+
(write-byte (sb64->ub64 data) stream))
|
164
|
+
(t (error "Integer too large or too small."))))
|
165
|
+
|
166
|
+
(defun store-big-endian (number stream byte-count)
|
167
|
+
(let (byte-list)
|
168
|
+
(loop
|
169
|
+
while (> number 0)
|
170
|
+
do
|
171
|
+
(push (rem number 256)
|
172
|
+
byte-list)
|
173
|
+
(setf number (ash number -8)))
|
174
|
+
(loop
|
175
|
+
while (< (length byte-list) byte-count)
|
176
|
+
do (push 0 byte-list))
|
177
|
+
(when (> (length byte-list) byte-count)
|
178
|
+
(error "Number too large."))
|
179
|
+
(write-sequence byte-list stream)))
|
180
|
+
|
181
|
+
(defun decode (byte-array)
|
182
|
+
(flexi-streams:with-input-from-sequence (stream byte-array)
|
183
|
+
(decode-stream stream)))
|
184
|
+
|
185
|
+
(defun decode-stream (stream)
|
186
|
+
(let ((byte (read-byte stream)))
|
187
|
+
(cond ((= 0 (ldb (byte 1 7) byte))
|
188
|
+
byte)
|
189
|
+
((= 7 (ldb (byte 3 5) byte))
|
190
|
+
(ub8->sb8 byte))
|
191
|
+
((= #xcc byte)
|
192
|
+
(read-byte stream))
|
193
|
+
((= #xcd byte)
|
194
|
+
(load-big-endian stream 2))
|
195
|
+
((= #xce byte)
|
196
|
+
(load-big-endian stream 4))
|
197
|
+
((= #xcf byte)
|
198
|
+
(load-big-endian stream 8))
|
199
|
+
((= #xd0 byte)
|
200
|
+
(ub8->sb8 (read-byte stream)))
|
201
|
+
((= #xd1 byte)
|
202
|
+
(ub16->sb16 (load-big-endian stream 2)))
|
203
|
+
((= #xd2 byte)
|
204
|
+
(ub32->sb32 (load-big-endian stream 4)))
|
205
|
+
((= #xd3 byte)
|
206
|
+
(ub64->sb64 (load-big-endian stream 8)))
|
207
|
+
((= #xc0 byte)
|
208
|
+
nil)
|
209
|
+
((= #xc3 byte)
|
210
|
+
t)
|
211
|
+
((= #xc2 byte)
|
212
|
+
nil)
|
213
|
+
((= #xca byte)
|
214
|
+
(or #+sbcl (sb-kernel:make-single-float (load-big-endian stream 4))
|
215
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
216
|
+
((= #xcb byte)
|
217
|
+
(or #+sbcl (sb-kernel:make-double-float (load-big-endian stream 4)
|
218
|
+
(load-big-endian stream 4))
|
219
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
220
|
+
((= 5 (ldb (byte 3 5) byte))
|
221
|
+
(decode-raw-sequence (ldb (byte 5 0) byte) stream))
|
222
|
+
((= #xda byte)
|
223
|
+
(decode-raw-sequence (load-big-endian stream 2) stream))
|
224
|
+
((= #xdb byte)
|
225
|
+
(decode-raw-sequence (load-big-endian stream 4) stream))
|
226
|
+
((= 9 (ldb (byte 4 4) byte))
|
227
|
+
(decode-array (- byte #x90) stream))
|
228
|
+
((= #xdc byte)
|
229
|
+
(decode-array (load-big-endian stream 2) stream))
|
230
|
+
((= #xdd byte)
|
231
|
+
(decode-array (load-big-endian stream 4) stream))
|
232
|
+
((= 8 (ldb (byte 4 4) byte))
|
233
|
+
(decode-map (- byte #x80) stream))
|
234
|
+
((= #xde byte)
|
235
|
+
(decode-map (load-big-endian stream 2) stream))
|
236
|
+
((= #xdf byte)
|
237
|
+
(decode-map (load-big-endian stream 4) stream)))))
|
238
|
+
|
239
|
+
(defun decode-map (length stream)
|
240
|
+
(let ((hash-table (make-hash-table :test #'equal)))
|
241
|
+
(loop repeat length
|
242
|
+
do (let ((key (decode-stream stream))
|
243
|
+
(value (decode-stream stream)))
|
244
|
+
(setf (gethash key hash-table) value)))
|
245
|
+
hash-table))
|
246
|
+
|
247
|
+
(defun decode-array (length stream)
|
248
|
+
(let ((array (make-array length)))
|
249
|
+
(dotimes (i length)
|
250
|
+
(setf (aref array i) (decode-stream stream)))
|
251
|
+
array))
|
252
|
+
|
253
|
+
(defun decode-raw-sequence (length stream)
|
254
|
+
(let ((seq (make-array length :element-type '(mod 256))))
|
255
|
+
(read-sequence seq stream)
|
256
|
+
(babel:octets-to-string seq)))
|
257
|
+
|
258
|
+
(defun load-big-endian (stream byte-count)
|
259
|
+
(let ((result 0))
|
260
|
+
(loop
|
261
|
+
repeat byte-count
|
262
|
+
do (setf result (+ (ash result 8)
|
263
|
+
(read-byte stream))))
|
264
|
+
result))
|