charlock_holmes_heroku 0.6.13
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +30 -0
- data/MIT-LICENSE +20 -0
- data/README.md +111 -0
- data/Rakefile +29 -0
- data/benchmark/detection.rb +39 -0
- data/benchmark/test.txt +693 -0
- data/charlock_holmes.gemspec +25 -0
- data/ext/charlock_holmes/common.h +41 -0
- data/ext/charlock_holmes/converter.c +53 -0
- data/ext/charlock_holmes/encoding_detector.c +295 -0
- data/ext/charlock_holmes/ext.c +13 -0
- data/ext/charlock_holmes/extconf.rb +86 -0
- data/ext/charlock_holmes/src/icu4c-49_1_2-src.tgz +0 -0
- data/ext/charlock_holmes/src/icu4c-52_1-src.tgz +0 -0
- data/lib/charlock_holmes.rb +6 -0
- data/lib/charlock_holmes/encoding_detector.rb +33 -0
- data/lib/charlock_holmes/string.rb +34 -0
- data/lib/charlock_holmes/version.rb +3 -0
- data/spec/converter_spec.rb +29 -0
- data/spec/encoding_detector_spec.rb +122 -0
- data/spec/fixtures/AnsiGraph.psm1 +0 -0
- data/spec/fixtures/TwigExtensionsDate.es.yml +8 -0
- data/spec/fixtures/cl-messagepack.lisp +264 -0
- data/spec/fixtures/core.rkt +254 -0
- data/spec/fixtures/hello_world +0 -0
- data/spec/fixtures/laholator.py +131 -0
- data/spec/fixtures/repl2.cljs +109 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/string_method_spec.rb +52 -0
- metadata +133 -0
Binary file
|
Binary file
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module CharlockHolmes
|
2
|
+
class EncodingDetector
|
3
|
+
alias :strip_tags? :strip_tags
|
4
|
+
|
5
|
+
# Attempt to detect the encoding of this string
|
6
|
+
#
|
7
|
+
# NOTE: This will create a new CharlockHolmes::EncodingDetector instance on every call
|
8
|
+
#
|
9
|
+
# str - a String, what you want to detect the encoding of
|
10
|
+
# hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
11
|
+
# be used as an additional hint to the charset detector
|
12
|
+
#
|
13
|
+
# Returns: a Hash with :encoding, :language, :type and :confidence
|
14
|
+
def self.detect(str, hint_enc=nil)
|
15
|
+
new.detect(str, hint_enc)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Attempt to detect the encoding of this string, and return
|
19
|
+
# a list with all the possible encodings that match it.
|
20
|
+
#
|
21
|
+
# NOTE: This will create a new CharlockHolmes::EncodingDetector instance on every call
|
22
|
+
#
|
23
|
+
# str - a String, what you want to detect the encoding of
|
24
|
+
# hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
25
|
+
# be used as an additional hint to the charset detector
|
26
|
+
#
|
27
|
+
# Returns: an Array with zero or more Hashes,
|
28
|
+
# each one of them with with :encoding, :language, :type and :confidence
|
29
|
+
def self.detect_all(str, hint_enc=nil)
|
30
|
+
new.detect_all(str, hint_enc)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'charlock_holmes' unless defined? CharlockHolmes
|
2
|
+
|
3
|
+
class String
|
4
|
+
# Attempt to detect the encoding of this string
|
5
|
+
#
|
6
|
+
# Returns: a Hash with :encoding, :language, :type and :confidence
|
7
|
+
def detect_encoding(hint_enc=nil)
|
8
|
+
detector = CharlockHolmes::EncodingDetector.new
|
9
|
+
detector.detect(self, hint_enc)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Attempt to detect the encoding of this string, and return
|
13
|
+
# a list with all the possible encodings that match it.
|
14
|
+
#
|
15
|
+
# Returns: an Array with zero or more Hashes,
|
16
|
+
# each one of them with with :encoding, :language, :type and :confidence
|
17
|
+
def detect_encodings(hint_enc=nil)
|
18
|
+
detector = CharlockHolmes::EncodingDetector.new
|
19
|
+
detector.detect_all(self, hint_enc)
|
20
|
+
end
|
21
|
+
|
22
|
+
if RUBY_VERSION =~ /1.9/
|
23
|
+
# Attempt to detect the encoding of this string
|
24
|
+
# then set the encoding to what was detected ala `force_encoding`
|
25
|
+
#
|
26
|
+
# Returns: self
|
27
|
+
def detect_encoding!(hint_enc=nil)
|
28
|
+
if detected = self.detect_encoding(hint_enc)
|
29
|
+
self.force_encoding(detected[:encoding]) if detected[:encoding]
|
30
|
+
end
|
31
|
+
self
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe CharlockHolmes::Converter do
|
6
|
+
test 'is able to convert regular ascii content from ISO-8859-1 to UTF-16, and back again' do
|
7
|
+
input = 'test'
|
8
|
+
|
9
|
+
output = CharlockHolmes::Converter.convert input, 'ISO-8859-1', 'UTF-16'
|
10
|
+
assert input.bytesize < output.bytesize
|
11
|
+
assert input != output
|
12
|
+
|
13
|
+
output = CharlockHolmes::Converter.convert output, 'UTF-16', 'ISO-8859-1'
|
14
|
+
assert input.bytesize == output.bytesize
|
15
|
+
assert input == output
|
16
|
+
end
|
17
|
+
|
18
|
+
test 'is able to convert UTF-8 content from UTF-8 to UTF-16, and back again' do
|
19
|
+
input = 'λ, λ, λ'
|
20
|
+
|
21
|
+
output = CharlockHolmes::Converter.convert input, 'UTF-8', 'UTF-16'
|
22
|
+
assert input.bytesize < output.bytesize
|
23
|
+
assert input != output
|
24
|
+
|
25
|
+
output = CharlockHolmes::Converter.convert output, 'UTF-16', 'UTF-8'
|
26
|
+
assert input.bytesize == output.bytesize
|
27
|
+
assert input == output
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe CharlockHolmes::EncodingDetector do
|
6
|
+
before :all do
|
7
|
+
@detector = CharlockHolmes::EncodingDetector.new
|
8
|
+
end
|
9
|
+
|
10
|
+
test 'has a class-level detect method' do
|
11
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect
|
12
|
+
detected = CharlockHolmes::EncodingDetector.detect 'test'
|
13
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
14
|
+
end
|
15
|
+
|
16
|
+
test 'has a class-level detect method that accepts an encoding hint' do
|
17
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect
|
18
|
+
detected = CharlockHolmes::EncodingDetector.detect 'test', 'UTF-8'
|
19
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
20
|
+
end
|
21
|
+
|
22
|
+
test 'has a class-level detect_all method' do
|
23
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect_all
|
24
|
+
detected_list = CharlockHolmes::EncodingDetector.detect_all 'test'
|
25
|
+
assert detected_list.is_a? Array
|
26
|
+
|
27
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
28
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
29
|
+
end
|
30
|
+
|
31
|
+
test 'has a class-level detect_all method that accepts an encoding hint' do
|
32
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect_all
|
33
|
+
detected_list = CharlockHolmes::EncodingDetector.detect_all 'test', 'UTF-8'
|
34
|
+
assert detected_list.is_a? Array
|
35
|
+
|
36
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
37
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
38
|
+
end
|
39
|
+
|
40
|
+
test 'has a detect method' do
|
41
|
+
@detector.respond_to? :detect
|
42
|
+
detected = @detector.detect 'test'
|
43
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
44
|
+
end
|
45
|
+
|
46
|
+
test 'has a detect method that accepts an encoding hint' do
|
47
|
+
@detector.respond_to? :detect
|
48
|
+
detected = @detector.detect 'test', 'UTF-8'
|
49
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
50
|
+
end
|
51
|
+
|
52
|
+
test 'has a detect_all method' do
|
53
|
+
@detector.respond_to? :detect_all
|
54
|
+
detected_list = @detector.detect_all 'test'
|
55
|
+
assert detected_list.is_a? Array
|
56
|
+
|
57
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
58
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
59
|
+
end
|
60
|
+
|
61
|
+
test 'has a detect_all method that accepts an encoding hint' do
|
62
|
+
@detector.respond_to? :detect_all
|
63
|
+
detected_list = @detector.detect_all 'test', 'UTF-8'
|
64
|
+
assert detected_list.is_a? Array
|
65
|
+
|
66
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
67
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
68
|
+
end
|
69
|
+
|
70
|
+
test 'has a strip_tags flag' do
|
71
|
+
detector = CharlockHolmes::EncodingDetector.new
|
72
|
+
detector.strip_tags = true
|
73
|
+
assert detector.strip_tags
|
74
|
+
|
75
|
+
detection = detector.detect "<div ascii_attribute='some more ascii'>λ, λ, λ</div>"
|
76
|
+
assert_equal 'UTF-8', detection[:encoding]
|
77
|
+
|
78
|
+
detector.strip_tags = false
|
79
|
+
assert !detector.strip_tags
|
80
|
+
|
81
|
+
detection = detector.detect "<div ascii_attribute='some more ascii'>λ, λ, λ</div>"
|
82
|
+
assert_equal 'UTF-8', detection[:encoding]
|
83
|
+
end
|
84
|
+
|
85
|
+
test 'has a list of supported encodings' do
|
86
|
+
CharlockHolmes::EncodingDetector.respond_to? :supported_encodings
|
87
|
+
supported_encodings = CharlockHolmes::EncodingDetector.supported_encodings
|
88
|
+
|
89
|
+
assert supported_encodings.is_a?(Array)
|
90
|
+
assert supported_encodings.include? 'UTF-8'
|
91
|
+
end
|
92
|
+
|
93
|
+
context 'encoding detection' do
|
94
|
+
MAPPING = [
|
95
|
+
['repl2.cljs', 'ISO-8859-1', :text],
|
96
|
+
['core.rkt', 'UTF-8', :text],
|
97
|
+
['cl-messagepack.lisp', 'ISO-8859-1', :text],
|
98
|
+
['TwigExtensionsDate.es.yml', 'UTF-8', :text],
|
99
|
+
['AnsiGraph.psm1', 'UTF-16LE', :text],
|
100
|
+
['laholator.py', 'UTF-8', :text],
|
101
|
+
['hello_world', nil, :binary]
|
102
|
+
]
|
103
|
+
|
104
|
+
MAPPING.each do |mapping|
|
105
|
+
file, encoding, type = mapping
|
106
|
+
|
107
|
+
test "#{file} should be detected as #{encoding || 'binary'}" do
|
108
|
+
path = File.expand_path "../fixtures/#{file}", __FILE__
|
109
|
+
content = File.read path
|
110
|
+
guessed = @detector.detect content
|
111
|
+
|
112
|
+
assert_equal encoding, guessed[:encoding]
|
113
|
+
assert_equal type, guessed[:type]
|
114
|
+
|
115
|
+
if content.respond_to?(:force_encoding) && guessed[:type] == :text
|
116
|
+
content.force_encoding guessed[:encoding]
|
117
|
+
assert content.valid_encoding?
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
Binary file
|
@@ -0,0 +1,8 @@
|
|
1
|
+
date.year: '%year% año|%year% años'
|
2
|
+
date.month: '%month% mes|%month% meses'
|
3
|
+
date.day: '%day% día|%day% días'
|
4
|
+
date.hour: '%hour% hora|%hour% horas'
|
5
|
+
date.minute: '%minute% minuto|%minute% minutos'
|
6
|
+
date.second: '%second% segundo|%second% segundos'
|
7
|
+
date.new: 'menos de un minuto'
|
8
|
+
date.and: ' y '
|
@@ -0,0 +1,264 @@
|
|
1
|
+
;;;; cl-messagepack.lisp
|
2
|
+
|
3
|
+
(in-package #:messagepack)
|
4
|
+
|
5
|
+
(declaim (optimize (debug 3)))
|
6
|
+
|
7
|
+
(eval-when (:compile-toplevel :load-toplevel :execute)
|
8
|
+
(defun mkstr (&rest args)
|
9
|
+
(format nil "~{~a~}" args))
|
10
|
+
(defun mksymb (&rest args)
|
11
|
+
(intern (apply #'mkstr args))))
|
12
|
+
|
13
|
+
(defmacro signed-unsigned-convertors (size)
|
14
|
+
(let ((speed (if (< size 32) 3 0)))
|
15
|
+
`(progn
|
16
|
+
(defun ,(mksymb 'sb size '-> 'ub size) (sb)
|
17
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
18
|
+
(type (integer ,(- (expt 2 (1- size))) ,(1- (expt 2 (1- size)))) sb))
|
19
|
+
(if (< sb 0)
|
20
|
+
(ldb (byte ,size 0) sb)
|
21
|
+
sb))
|
22
|
+
(defun ,(mksymb 'ub size '-> 'sb size) (sb)
|
23
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
24
|
+
(type (mod ,(expt 2 size)) sb))
|
25
|
+
(if (logbitp (1- ,size) sb)
|
26
|
+
(- (1+ (logxor (1- (expt 2 ,size)) sb)))
|
27
|
+
sb)))))
|
28
|
+
|
29
|
+
(signed-unsigned-convertors 8)
|
30
|
+
(signed-unsigned-convertors 16)
|
31
|
+
(signed-unsigned-convertors 32)
|
32
|
+
(signed-unsigned-convertors 64)
|
33
|
+
|
34
|
+
(defun write-hex (data)
|
35
|
+
(let (line)
|
36
|
+
(loop
|
37
|
+
for i from 0 to (1- (length data))
|
38
|
+
do (push (elt data i) line)
|
39
|
+
when (= (length line) 16)
|
40
|
+
do
|
41
|
+
(format t "~{~2,'0x ~}~%" (nreverse line))
|
42
|
+
(setf line nil))
|
43
|
+
(when line
|
44
|
+
(format t "~{~2,'0x ~}~%" (nreverse line)))))
|
45
|
+
|
46
|
+
(defun encode (data)
|
47
|
+
(flexi-streams:with-output-to-sequence (stream)
|
48
|
+
(encode-stream data stream)))
|
49
|
+
|
50
|
+
(defun make-hash (data)
|
51
|
+
(let ((result (make-hash-table)))
|
52
|
+
(dolist (kv data)
|
53
|
+
(cond ((consp (cdr kv))
|
54
|
+
(setf (gethash (first kv) result) (second kv)))
|
55
|
+
(t
|
56
|
+
(setf (gethash (car kv) result) (cdr kv)))))
|
57
|
+
result))
|
58
|
+
|
59
|
+
(defun is-byte-array (data-type)
|
60
|
+
(and (vectorp data-type)
|
61
|
+
(equal '(unsigned-byte 8) (array-element-type data-type))))
|
62
|
+
|
63
|
+
(defun encode-stream (data stream)
|
64
|
+
(cond ((floatp data) (encode-float data stream))
|
65
|
+
((numberp data) (encode-integer data stream))
|
66
|
+
((null data) (write-byte #xc0 stream))
|
67
|
+
((eq data t) (write-byte #xc3 stream))
|
68
|
+
((stringp data)
|
69
|
+
(encode-string data stream))
|
70
|
+
((is-byte-array data)
|
71
|
+
(encode-raw-bytes data stream))
|
72
|
+
((or (consp data) (vectorp data))
|
73
|
+
(encode-array data stream))
|
74
|
+
((hash-table-p data)
|
75
|
+
(encode-hash data stream))
|
76
|
+
((symbolp data)
|
77
|
+
(encode-string (symbol-name data) stream))
|
78
|
+
(t (error "Cannot encode data."))))
|
79
|
+
|
80
|
+
(defun encode-string (data stream)
|
81
|
+
(encode-raw-bytes (babel:string-to-octets data) stream))
|
82
|
+
|
83
|
+
#+sbcl (defun sbcl-encode-float (data stream)
|
84
|
+
(cond ((equal (type-of data) 'single-float)
|
85
|
+
(write-byte #xca stream)
|
86
|
+
(store-big-endian (sb-kernel:single-float-bits data) stream 4))
|
87
|
+
((equal (type-of data) 'double-float)
|
88
|
+
(write-byte #xcb stream)
|
89
|
+
(store-big-endian (sb-kernel:double-float-high-bits data) stream 4)
|
90
|
+
(store-big-endian (sb-kernel:double-float-low-bits data) stream 4)))
|
91
|
+
t)
|
92
|
+
|
93
|
+
(defun encode-float (data stream)
|
94
|
+
(or #+sbcl (sbcl-encode-float data stream)
|
95
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
96
|
+
|
97
|
+
(defun encode-each (data stream &optional (encoder #'encode-stream))
|
98
|
+
(cond ((hash-table-p data)
|
99
|
+
(maphash (lambda (key value)
|
100
|
+
(funcall encoder key stream)
|
101
|
+
(funcall encoder value stream))
|
102
|
+
data))
|
103
|
+
((or (vectorp data) (consp data))
|
104
|
+
(mapc (lambda (subdata)
|
105
|
+
(funcall encoder subdata stream))
|
106
|
+
(coerce data 'list)))
|
107
|
+
(t (error "Not sequence or hash table."))))
|
108
|
+
|
109
|
+
(defun encode-sequence (data stream
|
110
|
+
short-prefix short-length
|
111
|
+
typecode-16 typecode-32
|
112
|
+
&optional (encoder #'encode-stream))
|
113
|
+
(let ((len (if (hash-table-p data)
|
114
|
+
(hash-table-count data)
|
115
|
+
(length data))))
|
116
|
+
(cond ((<= 0 len short-length)
|
117
|
+
(write-byte (+ short-prefix len) stream)
|
118
|
+
(encode-each data stream encoder))
|
119
|
+
((<= 0 len 65535)
|
120
|
+
(write-byte typecode-16 stream)
|
121
|
+
(store-big-endian len stream 2)
|
122
|
+
(encode-each data stream encoder))
|
123
|
+
((<= 0 len (1- (expt 2 32)))
|
124
|
+
(write-byte typecode-32 stream)
|
125
|
+
(store-big-endian len stream 4)
|
126
|
+
(encode-each data stream encoder)))))
|
127
|
+
|
128
|
+
(defun encode-hash (data stream)
|
129
|
+
(encode-sequence data stream #x80 15 #xdc #xdd))
|
130
|
+
|
131
|
+
(defun encode-array (data stream)
|
132
|
+
(encode-sequence data stream #x90 15 #xdc #xdd))
|
133
|
+
|
134
|
+
(defun encode-raw-bytes (data stream)
|
135
|
+
(encode-sequence data stream #xa0 31 #xda #xdb #'write-byte))
|
136
|
+
|
137
|
+
(defun encode-integer (data stream)
|
138
|
+
(cond ((<= 0 data 127) (write-byte data stream))
|
139
|
+
((<= -32 data -1) (write-byte (sb8->ub8 data) stream))
|
140
|
+
((<= 0 data 255)
|
141
|
+
(write-byte #xcc stream)
|
142
|
+
(write-byte data stream))
|
143
|
+
((<= 0 data 65535)
|
144
|
+
(write-byte #xcd stream)
|
145
|
+
(store-big-endian data stream 2))
|
146
|
+
((<= 0 data (1- (expt 2 32)))
|
147
|
+
(write-byte #xce stream)
|
148
|
+
(store-big-endian data stream 4))
|
149
|
+
((<= 0 data (1- (expt 2 64)))
|
150
|
+
(write-byte #xcf stream)
|
151
|
+
(store-big-endian data stream 8))
|
152
|
+
((<= -128 data 127)
|
153
|
+
(write-byte #xd0 stream)
|
154
|
+
(write-byte (sb8->ub8 data) stream))
|
155
|
+
((<= -32768 data 32767)
|
156
|
+
(write-byte #xd1 stream)
|
157
|
+
(write-byte (sb16->ub16 data) stream))
|
158
|
+
((<= (- (expt 2 31)) data (1- (expt 2 31)))
|
159
|
+
(write-byte #xd2 stream)
|
160
|
+
(write-byte (sb32->ub32 data) stream))
|
161
|
+
((<= (- (expt 2 63)) data (1- (expt 2 63)))
|
162
|
+
(write-byte #xd3 stream)
|
163
|
+
(write-byte (sb64->ub64 data) stream))
|
164
|
+
(t (error "Integer too large or too small."))))
|
165
|
+
|
166
|
+
(defun store-big-endian (number stream byte-count)
|
167
|
+
(let (byte-list)
|
168
|
+
(loop
|
169
|
+
while (> number 0)
|
170
|
+
do
|
171
|
+
(push (rem number 256)
|
172
|
+
byte-list)
|
173
|
+
(setf number (ash number -8)))
|
174
|
+
(loop
|
175
|
+
while (< (length byte-list) byte-count)
|
176
|
+
do (push 0 byte-list))
|
177
|
+
(when (> (length byte-list) byte-count)
|
178
|
+
(error "Number too large."))
|
179
|
+
(write-sequence byte-list stream)))
|
180
|
+
|
181
|
+
(defun decode (byte-array)
|
182
|
+
(flexi-streams:with-input-from-sequence (stream byte-array)
|
183
|
+
(decode-stream stream)))
|
184
|
+
|
185
|
+
(defun decode-stream (stream)
|
186
|
+
(let ((byte (read-byte stream)))
|
187
|
+
(cond ((= 0 (ldb (byte 1 7) byte))
|
188
|
+
byte)
|
189
|
+
((= 7 (ldb (byte 3 5) byte))
|
190
|
+
(ub8->sb8 byte))
|
191
|
+
((= #xcc byte)
|
192
|
+
(read-byte stream))
|
193
|
+
((= #xcd byte)
|
194
|
+
(load-big-endian stream 2))
|
195
|
+
((= #xce byte)
|
196
|
+
(load-big-endian stream 4))
|
197
|
+
((= #xcf byte)
|
198
|
+
(load-big-endian stream 8))
|
199
|
+
((= #xd0 byte)
|
200
|
+
(ub8->sb8 (read-byte stream)))
|
201
|
+
((= #xd1 byte)
|
202
|
+
(ub16->sb16 (load-big-endian stream 2)))
|
203
|
+
((= #xd2 byte)
|
204
|
+
(ub32->sb32 (load-big-endian stream 4)))
|
205
|
+
((= #xd3 byte)
|
206
|
+
(ub64->sb64 (load-big-endian stream 8)))
|
207
|
+
((= #xc0 byte)
|
208
|
+
nil)
|
209
|
+
((= #xc3 byte)
|
210
|
+
t)
|
211
|
+
((= #xc2 byte)
|
212
|
+
nil)
|
213
|
+
((= #xca byte)
|
214
|
+
(or #+sbcl (sb-kernel:make-single-float (load-big-endian stream 4))
|
215
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
216
|
+
((= #xcb byte)
|
217
|
+
(or #+sbcl (sb-kernel:make-double-float (load-big-endian stream 4)
|
218
|
+
(load-big-endian stream 4))
|
219
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
220
|
+
((= 5 (ldb (byte 3 5) byte))
|
221
|
+
(decode-raw-sequence (ldb (byte 5 0) byte) stream))
|
222
|
+
((= #xda byte)
|
223
|
+
(decode-raw-sequence (load-big-endian stream 2) stream))
|
224
|
+
((= #xdb byte)
|
225
|
+
(decode-raw-sequence (load-big-endian stream 4) stream))
|
226
|
+
((= 9 (ldb (byte 4 4) byte))
|
227
|
+
(decode-array (- byte #x90) stream))
|
228
|
+
((= #xdc byte)
|
229
|
+
(decode-array (load-big-endian stream 2) stream))
|
230
|
+
((= #xdd byte)
|
231
|
+
(decode-array (load-big-endian stream 4) stream))
|
232
|
+
((= 8 (ldb (byte 4 4) byte))
|
233
|
+
(decode-map (- byte #x80) stream))
|
234
|
+
((= #xde byte)
|
235
|
+
(decode-map (load-big-endian stream 2) stream))
|
236
|
+
((= #xdf byte)
|
237
|
+
(decode-map (load-big-endian stream 4) stream)))))
|
238
|
+
|
239
|
+
(defun decode-map (length stream)
|
240
|
+
(let ((hash-table (make-hash-table :test #'equal)))
|
241
|
+
(loop repeat length
|
242
|
+
do (let ((key (decode-stream stream))
|
243
|
+
(value (decode-stream stream)))
|
244
|
+
(setf (gethash key hash-table) value)))
|
245
|
+
hash-table))
|
246
|
+
|
247
|
+
(defun decode-array (length stream)
|
248
|
+
(let ((array (make-array length)))
|
249
|
+
(dotimes (i length)
|
250
|
+
(setf (aref array i) (decode-stream stream)))
|
251
|
+
array))
|
252
|
+
|
253
|
+
(defun decode-raw-sequence (length stream)
|
254
|
+
(let ((seq (make-array length :element-type '(mod 256))))
|
255
|
+
(read-sequence seq stream)
|
256
|
+
(babel:octets-to-string seq)))
|
257
|
+
|
258
|
+
(defun load-big-endian (stream byte-count)
|
259
|
+
(let ((result 0))
|
260
|
+
(loop
|
261
|
+
repeat byte-count
|
262
|
+
do (setf result (+ (ash result 8)
|
263
|
+
(read-byte stream))))
|
264
|
+
result))
|