charlock_holmes 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +30 -0
- data/MIT-LICENSE +20 -0
- data/README.md +68 -0
- data/Rakefile +29 -0
- data/benchmark/detection.rb +39 -0
- data/benchmark/test.txt +693 -0
- data/charlock_holmes.gemspec +25 -0
- data/ext/charlock_holmes/charlock_holmes.c +119 -0
- data/ext/charlock_holmes/extconf.rb +10 -0
- data/lib/charlock_holmes.rb +6 -0
- data/lib/charlock_holmes/encoding_detector.rb +12 -0
- data/lib/charlock_holmes/string.rb +28 -0
- data/lib/charlock_holmes/version.rb +3 -0
- data/spec/encoding_detector_spec.rb +54 -0
- data/spec/fixtures/AnsiGraph.psm1 +0 -0
- data/spec/fixtures/TwigExtensionsDate.es.yml +8 -0
- data/spec/fixtures/cl-messagepack.lisp +264 -0
- data/spec/fixtures/core.rkt +254 -0
- data/spec/fixtures/laholator.py +131 -0
- data/spec/fixtures/repl2.cljs +109 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/string_method_spec.rb +22 -0
- metadata +117 -0
@@ -0,0 +1,25 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require './lib/charlock_holmes/version' unless defined? CharlockHolmes::VERSION
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = %q{charlock_holmes}
|
7
|
+
s.version = CharlockHolmes::VERSION
|
8
|
+
s.authors = ["Brian Lopez", "Vicent Martí"]
|
9
|
+
s.date = Time.now.utc.strftime("%Y-%m-%d")
|
10
|
+
s.email = %q{seniorlopez@gmail.com}
|
11
|
+
s.extensions = ["ext/charlock_holmes/extconf.rb"]
|
12
|
+
s.files = `git ls-files`.split("\n")
|
13
|
+
s.homepage = %q{http://github.com/brianmario/charlock_holmes}
|
14
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
15
|
+
s.require_paths = ["lib", "ext"]
|
16
|
+
s.rubygems_version = %q{1.4.2}
|
17
|
+
s.summary = %q{Character encoding detection, brought to you by ICU}
|
18
|
+
s.test_files = `git ls-files spec`.split("\n")
|
19
|
+
|
20
|
+
# tests
|
21
|
+
s.add_development_dependency 'rake-compiler', ">= 0.7.5"
|
22
|
+
s.add_development_dependency 'rspec', ">= 2.0.0"
|
23
|
+
# benchmarks
|
24
|
+
s.add_development_dependency 'chardet'
|
25
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
#include "unicode/ucsdet.h"
|
2
|
+
|
3
|
+
#include <ruby.h>
|
4
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
5
|
+
#include <ruby/encoding.h>
|
6
|
+
#endif
|
7
|
+
|
8
|
+
static VALUE rb_mCharlockHolmes;
|
9
|
+
static VALUE rb_cEncodingDetector;
|
10
|
+
|
11
|
+
static VALUE charlock_new_str2(const char *str)
|
12
|
+
{
|
13
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
14
|
+
return rb_external_str_new_with_enc(str, strlen(str), rb_utf8_encoding());
|
15
|
+
#else
|
16
|
+
return rb_str_new2(str);
|
17
|
+
#endif
|
18
|
+
}
|
19
|
+
|
20
|
+
static VALUE rb_encdec_buildmatch(const UCharsetMatch *match)
|
21
|
+
{
|
22
|
+
UErrorCode status = U_ZERO_ERROR;
|
23
|
+
const char *mname;
|
24
|
+
const char *mlang;
|
25
|
+
int mconfidence;
|
26
|
+
VALUE rb_match;
|
27
|
+
|
28
|
+
if (!match)
|
29
|
+
return Qnil;
|
30
|
+
|
31
|
+
mname = ucsdet_getName(match, &status);
|
32
|
+
mlang = ucsdet_getLanguage(match, &status);
|
33
|
+
mconfidence = ucsdet_getConfidence(match, &status);
|
34
|
+
|
35
|
+
rb_match = rb_hash_new();
|
36
|
+
|
37
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("encoding")), charlock_new_str2(mname));
|
38
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("confidence")), INT2NUM(mconfidence));
|
39
|
+
|
40
|
+
if (mlang && mlang[0])
|
41
|
+
rb_hash_aset(rb_match, ID2SYM(rb_intern("language")), charlock_new_str2(mlang));
|
42
|
+
|
43
|
+
return rb_match;
|
44
|
+
}
|
45
|
+
|
46
|
+
/*
|
47
|
+
* call-seq: detection_hash = EncodingDetector.detect "some string"
|
48
|
+
*
|
49
|
+
* Attempt to detect the encoding of this string
|
50
|
+
*
|
51
|
+
* Returns: a Hash with :encoding, :language and :confidence
|
52
|
+
*/
|
53
|
+
static VALUE rb_encdec_detect(VALUE self, VALUE rb_str)
|
54
|
+
{
|
55
|
+
UErrorCode status = U_ZERO_ERROR;
|
56
|
+
UCharsetDetector *csd;
|
57
|
+
|
58
|
+
Check_Type(rb_str, T_STRING);
|
59
|
+
Data_Get_Struct(self, UCharsetDetector, csd);
|
60
|
+
|
61
|
+
ucsdet_setText(csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
62
|
+
return rb_encdec_buildmatch(ucsdet_detect(csd, &status));
|
63
|
+
}
|
64
|
+
|
65
|
+
|
66
|
+
/*
|
67
|
+
* call-seq: detection_hash_array = EncodingDetector.detect_all "some string"
|
68
|
+
*
|
69
|
+
* Attempt to detect the encoding of this string, and return
|
70
|
+
* a list with all the possible encodings that match it.
|
71
|
+
*
|
72
|
+
* Returns: a List with zero or more Hashes,
|
73
|
+
* each one of them with with :encoding, :language and :confidence
|
74
|
+
*/
|
75
|
+
static VALUE rb_encdec_detect_all(VALUE self, VALUE rb_str)
|
76
|
+
{
|
77
|
+
UErrorCode status = U_ZERO_ERROR;
|
78
|
+
UCharsetDetector *csd;
|
79
|
+
const UCharsetMatch **csm;
|
80
|
+
VALUE rb_ret;
|
81
|
+
int i, match_count;
|
82
|
+
|
83
|
+
Check_Type(rb_str, T_STRING);
|
84
|
+
Data_Get_Struct(self, UCharsetDetector, csd);
|
85
|
+
|
86
|
+
rb_ret = rb_ary_new();
|
87
|
+
|
88
|
+
ucsdet_setText(csd, RSTRING_PTR(rb_str), (int32_t)RSTRING_LEN(rb_str), &status);
|
89
|
+
csm = ucsdet_detectAll(csd, &match_count, &status);
|
90
|
+
|
91
|
+
for (i = 0; i < match_count; ++i) {
|
92
|
+
rb_ary_push(rb_ret, rb_encdec_buildmatch(csm[i]));
|
93
|
+
}
|
94
|
+
|
95
|
+
return rb_ret;
|
96
|
+
}
|
97
|
+
|
98
|
+
|
99
|
+
static void rb_encdec__free(void *csd)
|
100
|
+
{
|
101
|
+
ucsdet_close((UCharsetDetector *)csd);
|
102
|
+
}
|
103
|
+
|
104
|
+
static VALUE rb_encdec__alloc(VALUE klass)
|
105
|
+
{
|
106
|
+
UErrorCode status = U_ZERO_ERROR;
|
107
|
+
UCharsetDetector *csd = ucsdet_open(&status);
|
108
|
+
return Data_Wrap_Struct(klass, NULL, rb_encdec__free, (void *)csd);
|
109
|
+
}
|
110
|
+
|
111
|
+
void Init_charlock_holmes()
|
112
|
+
{
|
113
|
+
rb_mCharlockHolmes = rb_define_module("CharlockHolmes");
|
114
|
+
|
115
|
+
rb_cEncodingDetector = rb_define_class_under(rb_mCharlockHolmes, "EncodingDetector", rb_cObject);
|
116
|
+
rb_define_alloc_func(rb_cEncodingDetector, rb_encdec__alloc);
|
117
|
+
rb_define_method(rb_cEncodingDetector, "detect", rb_encdec_detect, 1);
|
118
|
+
rb_define_method(rb_cEncodingDetector, "detect_all", rb_encdec_detect_all, 1);
|
119
|
+
}
|
@@ -0,0 +1,12 @@
|
|
1
|
+
module CharlockHolmes
|
2
|
+
class EncodingDetector
|
3
|
+
# Attempt to detect the encoding of this string
|
4
|
+
#
|
5
|
+
# NOTE: This will create a new CharlockHolmes::EncodingDetector instance on every call
|
6
|
+
#
|
7
|
+
# Returns: a Hash with :encoding, :language and :confidence
|
8
|
+
def self.detect(str)
|
9
|
+
new.detect(str)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'charlock_holmes' unless defined? CharlockHolmes
|
2
|
+
|
3
|
+
class String
|
4
|
+
# Attempt to detect the encoding of this string
|
5
|
+
#
|
6
|
+
# Returns: a Hash with :encoding, :language and :confidence
|
7
|
+
def detect_encoding
|
8
|
+
encoding_detector.detect(self)
|
9
|
+
end
|
10
|
+
|
11
|
+
if RUBY_VERSION =~ /1.9/
|
12
|
+
# Attempt to detect the encoding of this string
|
13
|
+
# then set the encoding to what was detected ala `force_encoding`
|
14
|
+
#
|
15
|
+
# Returns: a Hash with :encoding, :language and :confidence
|
16
|
+
def detect_encoding!
|
17
|
+
if detected = self.detect_encoding
|
18
|
+
self.force_encoding detected[:encoding]
|
19
|
+
detected
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
protected
|
25
|
+
def encoding_detector
|
26
|
+
@encoding_detector ||= CharlockHolmes::EncodingDetector.new
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe CharlockHolmes::EncodingDetector do
|
4
|
+
before :all do
|
5
|
+
@detector = CharlockHolmes::EncodingDetector.new
|
6
|
+
end
|
7
|
+
|
8
|
+
test 'has a detect class-level method' do
|
9
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect
|
10
|
+
detected = CharlockHolmes::EncodingDetector.detect 'test'
|
11
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
12
|
+
end
|
13
|
+
|
14
|
+
test 'has a detect method' do
|
15
|
+
@detector.respond_to? :detect
|
16
|
+
detected = @detector.detect 'test'
|
17
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
18
|
+
end
|
19
|
+
|
20
|
+
test 'has a detect_all method' do
|
21
|
+
@detector.respond_to? :detect_all
|
22
|
+
detected_list = @detector.detect_all 'test'
|
23
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
24
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
25
|
+
end
|
26
|
+
|
27
|
+
context 'encoding detection' do
|
28
|
+
MAPPING = [
|
29
|
+
['repl2.cljs', 'ISO-8859-1'],
|
30
|
+
['core.rkt', 'UTF-8'],
|
31
|
+
['cl-messagepack.lisp', 'ISO-8859-1'],
|
32
|
+
['TwigExtensionsDate.es.yml', 'UTF-8'],
|
33
|
+
['AnsiGraph.psm1', 'UTF-16LE'],
|
34
|
+
['laholator.py', 'UTF-8']
|
35
|
+
]
|
36
|
+
|
37
|
+
MAPPING.each do |mapping|
|
38
|
+
file, encoding = mapping
|
39
|
+
|
40
|
+
test "#{file} should be detected as #{encoding}" do
|
41
|
+
path = File.expand_path "../fixtures/#{file}", __FILE__
|
42
|
+
content = File.read path
|
43
|
+
guessed = @detector.detect content
|
44
|
+
|
45
|
+
assert_equal encoding, guessed[:encoding]
|
46
|
+
|
47
|
+
if content.respond_to? :force_encoding
|
48
|
+
content.force_encoding guessed[:encoding]
|
49
|
+
assert content.valid_encoding?
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
Binary file
|
@@ -0,0 +1,8 @@
|
|
1
|
+
date.year: '%year% año|%year% años'
|
2
|
+
date.month: '%month% mes|%month% meses'
|
3
|
+
date.day: '%day% día|%day% días'
|
4
|
+
date.hour: '%hour% hora|%hour% horas'
|
5
|
+
date.minute: '%minute% minuto|%minute% minutos'
|
6
|
+
date.second: '%second% segundo|%second% segundos'
|
7
|
+
date.new: 'menos de un minuto'
|
8
|
+
date.and: ' y '
|
@@ -0,0 +1,264 @@
|
|
1
|
+
;;;; cl-messagepack.lisp
|
2
|
+
|
3
|
+
(in-package #:messagepack)
|
4
|
+
|
5
|
+
(declaim (optimize (debug 3)))
|
6
|
+
|
7
|
+
(eval-when (:compile-toplevel :load-toplevel :execute)
|
8
|
+
(defun mkstr (&rest args)
|
9
|
+
(format nil "~{~a~}" args))
|
10
|
+
(defun mksymb (&rest args)
|
11
|
+
(intern (apply #'mkstr args))))
|
12
|
+
|
13
|
+
(defmacro signed-unsigned-convertors (size)
|
14
|
+
(let ((speed (if (< size 32) 3 0)))
|
15
|
+
`(progn
|
16
|
+
(defun ,(mksymb 'sb size '-> 'ub size) (sb)
|
17
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
18
|
+
(type (integer ,(- (expt 2 (1- size))) ,(1- (expt 2 (1- size)))) sb))
|
19
|
+
(if (< sb 0)
|
20
|
+
(ldb (byte ,size 0) sb)
|
21
|
+
sb))
|
22
|
+
(defun ,(mksymb 'ub size '-> 'sb size) (sb)
|
23
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
24
|
+
(type (mod ,(expt 2 size)) sb))
|
25
|
+
(if (logbitp (1- ,size) sb)
|
26
|
+
(- (1+ (logxor (1- (expt 2 ,size)) sb)))
|
27
|
+
sb)))))
|
28
|
+
|
29
|
+
(signed-unsigned-convertors 8)
|
30
|
+
(signed-unsigned-convertors 16)
|
31
|
+
(signed-unsigned-convertors 32)
|
32
|
+
(signed-unsigned-convertors 64)
|
33
|
+
|
34
|
+
(defun write-hex (data)
|
35
|
+
(let (line)
|
36
|
+
(loop
|
37
|
+
for i from 0 to (1- (length data))
|
38
|
+
do (push (elt data i) line)
|
39
|
+
when (= (length line) 16)
|
40
|
+
do
|
41
|
+
(format t "~{~2,'0x ~}~%" (nreverse line))
|
42
|
+
(setf line nil))
|
43
|
+
(when line
|
44
|
+
(format t "~{~2,'0x ~}~%" (nreverse line)))))
|
45
|
+
|
46
|
+
(defun encode (data)
|
47
|
+
(flexi-streams:with-output-to-sequence (stream)
|
48
|
+
(encode-stream data stream)))
|
49
|
+
|
50
|
+
(defun make-hash (data)
|
51
|
+
(let ((result (make-hash-table)))
|
52
|
+
(dolist (kv data)
|
53
|
+
(cond ((consp (cdr kv))
|
54
|
+
(setf (gethash (first kv) result) (second kv)))
|
55
|
+
(t
|
56
|
+
(setf (gethash (car kv) result) (cdr kv)))))
|
57
|
+
result))
|
58
|
+
|
59
|
+
(defun is-byte-array (data-type)
|
60
|
+
(and (vectorp data-type)
|
61
|
+
(equal '(unsigned-byte 8) (array-element-type data-type))))
|
62
|
+
|
63
|
+
(defun encode-stream (data stream)
|
64
|
+
(cond ((floatp data) (encode-float data stream))
|
65
|
+
((numberp data) (encode-integer data stream))
|
66
|
+
((null data) (write-byte #xc0 stream))
|
67
|
+
((eq data t) (write-byte #xc3 stream))
|
68
|
+
((stringp data)
|
69
|
+
(encode-string data stream))
|
70
|
+
((is-byte-array data)
|
71
|
+
(encode-raw-bytes data stream))
|
72
|
+
((or (consp data) (vectorp data))
|
73
|
+
(encode-array data stream))
|
74
|
+
((hash-table-p data)
|
75
|
+
(encode-hash data stream))
|
76
|
+
((symbolp data)
|
77
|
+
(encode-string (symbol-name data) stream))
|
78
|
+
(t (error "Cannot encode data."))))
|
79
|
+
|
80
|
+
(defun encode-string (data stream)
|
81
|
+
(encode-raw-bytes (babel:string-to-octets data) stream))
|
82
|
+
|
83
|
+
#+sbcl (defun sbcl-encode-float (data stream)
|
84
|
+
(cond ((equal (type-of data) 'single-float)
|
85
|
+
(write-byte #xca stream)
|
86
|
+
(store-big-endian (sb-kernel:single-float-bits data) stream 4))
|
87
|
+
((equal (type-of data) 'double-float)
|
88
|
+
(write-byte #xcb stream)
|
89
|
+
(store-big-endian (sb-kernel:double-float-high-bits data) stream 4)
|
90
|
+
(store-big-endian (sb-kernel:double-float-low-bits data) stream 4)))
|
91
|
+
t)
|
92
|
+
|
93
|
+
(defun encode-float (data stream)
|
94
|
+
(or #+sbcl (sbcl-encode-float data stream)
|
95
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
96
|
+
|
97
|
+
(defun encode-each (data stream &optional (encoder #'encode-stream))
|
98
|
+
(cond ((hash-table-p data)
|
99
|
+
(maphash (lambda (key value)
|
100
|
+
(funcall encoder key stream)
|
101
|
+
(funcall encoder value stream))
|
102
|
+
data))
|
103
|
+
((or (vectorp data) (consp data))
|
104
|
+
(mapc (lambda (subdata)
|
105
|
+
(funcall encoder subdata stream))
|
106
|
+
(coerce data 'list)))
|
107
|
+
(t (error "Not sequence or hash table."))))
|
108
|
+
|
109
|
+
(defun encode-sequence (data stream
|
110
|
+
short-prefix short-length
|
111
|
+
typecode-16 typecode-32
|
112
|
+
&optional (encoder #'encode-stream))
|
113
|
+
(let ((len (if (hash-table-p data)
|
114
|
+
(hash-table-count data)
|
115
|
+
(length data))))
|
116
|
+
(cond ((<= 0 len short-length)
|
117
|
+
(write-byte (+ short-prefix len) stream)
|
118
|
+
(encode-each data stream encoder))
|
119
|
+
((<= 0 len 65535)
|
120
|
+
(write-byte typecode-16 stream)
|
121
|
+
(store-big-endian len stream 2)
|
122
|
+
(encode-each data stream encoder))
|
123
|
+
((<= 0 len (1- (expt 2 32)))
|
124
|
+
(write-byte typecode-32 stream)
|
125
|
+
(store-big-endian len stream 4)
|
126
|
+
(encode-each data stream encoder)))))
|
127
|
+
|
128
|
+
(defun encode-hash (data stream)
|
129
|
+
(encode-sequence data stream #x80 15 #xdc #xdd))
|
130
|
+
|
131
|
+
(defun encode-array (data stream)
|
132
|
+
(encode-sequence data stream #x90 15 #xdc #xdd))
|
133
|
+
|
134
|
+
(defun encode-raw-bytes (data stream)
|
135
|
+
(encode-sequence data stream #xa0 31 #xda #xdb #'write-byte))
|
136
|
+
|
137
|
+
(defun encode-integer (data stream)
|
138
|
+
(cond ((<= 0 data 127) (write-byte data stream))
|
139
|
+
((<= -32 data -1) (write-byte (sb8->ub8 data) stream))
|
140
|
+
((<= 0 data 255)
|
141
|
+
(write-byte #xcc stream)
|
142
|
+
(write-byte data stream))
|
143
|
+
((<= 0 data 65535)
|
144
|
+
(write-byte #xcd stream)
|
145
|
+
(store-big-endian data stream 2))
|
146
|
+
((<= 0 data (1- (expt 2 32)))
|
147
|
+
(write-byte #xce stream)
|
148
|
+
(store-big-endian data stream 4))
|
149
|
+
((<= 0 data (1- (expt 2 64)))
|
150
|
+
(write-byte #xcf stream)
|
151
|
+
(store-big-endian data stream 8))
|
152
|
+
((<= -128 data 127)
|
153
|
+
(write-byte #xd0 stream)
|
154
|
+
(write-byte (sb8->ub8 data) stream))
|
155
|
+
((<= -32768 data 32767)
|
156
|
+
(write-byte #xd1 stream)
|
157
|
+
(write-byte (sb16->ub16 data) stream))
|
158
|
+
((<= (- (expt 2 31)) data (1- (expt 2 31)))
|
159
|
+
(write-byte #xd2 stream)
|
160
|
+
(write-byte (sb32->ub32 data) stream))
|
161
|
+
((<= (- (expt 2 63)) data (1- (expt 2 63)))
|
162
|
+
(write-byte #xd3 stream)
|
163
|
+
(write-byte (sb64->ub64 data) stream))
|
164
|
+
(t (error "Integer too large or too small."))))
|
165
|
+
|
166
|
+
(defun store-big-endian (number stream byte-count)
|
167
|
+
(let (byte-list)
|
168
|
+
(loop
|
169
|
+
while (> number 0)
|
170
|
+
do
|
171
|
+
(push (rem number 256)
|
172
|
+
byte-list)
|
173
|
+
(setf number (ash number -8)))
|
174
|
+
(loop
|
175
|
+
while (< (length byte-list) byte-count)
|
176
|
+
do (push 0 byte-list))
|
177
|
+
(when (> (length byte-list) byte-count)
|
178
|
+
(error "Number too large."))
|
179
|
+
(write-sequence byte-list stream)))
|
180
|
+
|
181
|
+
(defun decode (byte-array)
|
182
|
+
(flexi-streams:with-input-from-sequence (stream byte-array)
|
183
|
+
(decode-stream stream)))
|
184
|
+
|
185
|
+
(defun decode-stream (stream)
|
186
|
+
(let ((byte (read-byte stream)))
|
187
|
+
(cond ((= 0 (ldb (byte 1 7) byte))
|
188
|
+
byte)
|
189
|
+
((= 7 (ldb (byte 3 5) byte))
|
190
|
+
(ub8->sb8 byte))
|
191
|
+
((= #xcc byte)
|
192
|
+
(read-byte stream))
|
193
|
+
((= #xcd byte)
|
194
|
+
(load-big-endian stream 2))
|
195
|
+
((= #xce byte)
|
196
|
+
(load-big-endian stream 4))
|
197
|
+
((= #xcf byte)
|
198
|
+
(load-big-endian stream 8))
|
199
|
+
((= #xd0 byte)
|
200
|
+
(ub8->sb8 (read-byte stream)))
|
201
|
+
((= #xd1 byte)
|
202
|
+
(ub16->sb16 (load-big-endian stream 2)))
|
203
|
+
((= #xd2 byte)
|
204
|
+
(ub32->sb32 (load-big-endian stream 4)))
|
205
|
+
((= #xd3 byte)
|
206
|
+
(ub64->sb64 (load-big-endian stream 8)))
|
207
|
+
((= #xc0 byte)
|
208
|
+
nil)
|
209
|
+
((= #xc3 byte)
|
210
|
+
t)
|
211
|
+
((= #xc2 byte)
|
212
|
+
nil)
|
213
|
+
((= #xca byte)
|
214
|
+
(or #+sbcl (sb-kernel:make-single-float (load-big-endian stream 4))
|
215
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
216
|
+
((= #xcb byte)
|
217
|
+
(or #+sbcl (sb-kernel:make-double-float (load-big-endian stream 4)
|
218
|
+
(load-big-endian stream 4))
|
219
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
220
|
+
((= 5 (ldb (byte 3 5) byte))
|
221
|
+
(decode-raw-sequence (ldb (byte 5 0) byte) stream))
|
222
|
+
((= #xda byte)
|
223
|
+
(decode-raw-sequence (load-big-endian stream 2) stream))
|
224
|
+
((= #xdb byte)
|
225
|
+
(decode-raw-sequence (load-big-endian stream 4) stream))
|
226
|
+
((= 9 (ldb (byte 4 4) byte))
|
227
|
+
(decode-array (- byte #x90) stream))
|
228
|
+
((= #xdc byte)
|
229
|
+
(decode-array (load-big-endian stream 2) stream))
|
230
|
+
((= #xdd byte)
|
231
|
+
(decode-array (load-big-endian stream 4) stream))
|
232
|
+
((= 8 (ldb (byte 4 4) byte))
|
233
|
+
(decode-map (- byte #x80) stream))
|
234
|
+
((= #xde byte)
|
235
|
+
(decode-map (load-big-endian stream 2) stream))
|
236
|
+
((= #xdf byte)
|
237
|
+
(decode-map (load-big-endian stream 4) stream)))))
|
238
|
+
|
239
|
+
(defun decode-map (length stream)
|
240
|
+
(let ((hash-table (make-hash-table :test #'equal)))
|
241
|
+
(loop repeat length
|
242
|
+
do (let ((key (decode-stream stream))
|
243
|
+
(value (decode-stream stream)))
|
244
|
+
(setf (gethash key hash-table) value)))
|
245
|
+
hash-table))
|
246
|
+
|
247
|
+
(defun decode-array (length stream)
|
248
|
+
(let ((array (make-array length)))
|
249
|
+
(dotimes (i length)
|
250
|
+
(setf (aref array i) (decode-stream stream)))
|
251
|
+
array))
|
252
|
+
|
253
|
+
(defun decode-raw-sequence (length stream)
|
254
|
+
(let ((seq (make-array length :element-type '(mod 256))))
|
255
|
+
(read-sequence seq stream)
|
256
|
+
(babel:octets-to-string seq)))
|
257
|
+
|
258
|
+
(defun load-big-endian (stream byte-count)
|
259
|
+
(let ((result 0))
|
260
|
+
(loop
|
261
|
+
repeat byte-count
|
262
|
+
do (setf result (+ (ash result 8)
|
263
|
+
(read-byte stream))))
|
264
|
+
result))
|