charlock_holmes_heroku 0.6.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +3 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +30 -0
- data/MIT-LICENSE +20 -0
- data/README.md +111 -0
- data/Rakefile +29 -0
- data/benchmark/detection.rb +39 -0
- data/benchmark/test.txt +693 -0
- data/charlock_holmes.gemspec +25 -0
- data/ext/charlock_holmes/common.h +41 -0
- data/ext/charlock_holmes/converter.c +53 -0
- data/ext/charlock_holmes/encoding_detector.c +295 -0
- data/ext/charlock_holmes/ext.c +13 -0
- data/ext/charlock_holmes/extconf.rb +86 -0
- data/ext/charlock_holmes/src/icu4c-49_1_2-src.tgz +0 -0
- data/ext/charlock_holmes/src/icu4c-52_1-src.tgz +0 -0
- data/lib/charlock_holmes.rb +6 -0
- data/lib/charlock_holmes/encoding_detector.rb +33 -0
- data/lib/charlock_holmes/string.rb +34 -0
- data/lib/charlock_holmes/version.rb +3 -0
- data/spec/converter_spec.rb +29 -0
- data/spec/encoding_detector_spec.rb +122 -0
- data/spec/fixtures/AnsiGraph.psm1 +0 -0
- data/spec/fixtures/TwigExtensionsDate.es.yml +8 -0
- data/spec/fixtures/cl-messagepack.lisp +264 -0
- data/spec/fixtures/core.rkt +254 -0
- data/spec/fixtures/hello_world +0 -0
- data/spec/fixtures/laholator.py +131 -0
- data/spec/fixtures/repl2.cljs +109 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/string_method_spec.rb +52 -0
- metadata +133 -0
Binary file
|
Binary file
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module CharlockHolmes
|
2
|
+
class EncodingDetector
|
3
|
+
alias :strip_tags? :strip_tags
|
4
|
+
|
5
|
+
# Attempt to detect the encoding of this string
|
6
|
+
#
|
7
|
+
# NOTE: This will create a new CharlockHolmes::EncodingDetector instance on every call
|
8
|
+
#
|
9
|
+
# str - a String, what you want to detect the encoding of
|
10
|
+
# hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
11
|
+
# be used as an additional hint to the charset detector
|
12
|
+
#
|
13
|
+
# Returns: a Hash with :encoding, :language, :type and :confidence
|
14
|
+
def self.detect(str, hint_enc=nil)
|
15
|
+
new.detect(str, hint_enc)
|
16
|
+
end
|
17
|
+
|
18
|
+
# Attempt to detect the encoding of this string, and return
|
19
|
+
# a list with all the possible encodings that match it.
|
20
|
+
#
|
21
|
+
# NOTE: This will create a new CharlockHolmes::EncodingDetector instance on every call
|
22
|
+
#
|
23
|
+
# str - a String, what you want to detect the encoding of
|
24
|
+
# hint_enc - an optional String (like "UTF-8"), the encoding name which will
|
25
|
+
# be used as an additional hint to the charset detector
|
26
|
+
#
|
27
|
+
# Returns: an Array with zero or more Hashes,
|
28
|
+
# each one of them with with :encoding, :language, :type and :confidence
|
29
|
+
def self.detect_all(str, hint_enc=nil)
|
30
|
+
new.detect_all(str, hint_enc)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'charlock_holmes' unless defined? CharlockHolmes
|
2
|
+
|
3
|
+
class String
|
4
|
+
# Attempt to detect the encoding of this string
|
5
|
+
#
|
6
|
+
# Returns: a Hash with :encoding, :language, :type and :confidence
|
7
|
+
def detect_encoding(hint_enc=nil)
|
8
|
+
detector = CharlockHolmes::EncodingDetector.new
|
9
|
+
detector.detect(self, hint_enc)
|
10
|
+
end
|
11
|
+
|
12
|
+
# Attempt to detect the encoding of this string, and return
|
13
|
+
# a list with all the possible encodings that match it.
|
14
|
+
#
|
15
|
+
# Returns: an Array with zero or more Hashes,
|
16
|
+
# each one of them with with :encoding, :language, :type and :confidence
|
17
|
+
def detect_encodings(hint_enc=nil)
|
18
|
+
detector = CharlockHolmes::EncodingDetector.new
|
19
|
+
detector.detect_all(self, hint_enc)
|
20
|
+
end
|
21
|
+
|
22
|
+
if RUBY_VERSION =~ /1.9/
|
23
|
+
# Attempt to detect the encoding of this string
|
24
|
+
# then set the encoding to what was detected ala `force_encoding`
|
25
|
+
#
|
26
|
+
# Returns: self
|
27
|
+
def detect_encoding!(hint_enc=nil)
|
28
|
+
if detected = self.detect_encoding(hint_enc)
|
29
|
+
self.force_encoding(detected[:encoding]) if detected[:encoding]
|
30
|
+
end
|
31
|
+
self
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe CharlockHolmes::Converter do
|
6
|
+
test 'is able to convert regular ascii content from ISO-8859-1 to UTF-16, and back again' do
|
7
|
+
input = 'test'
|
8
|
+
|
9
|
+
output = CharlockHolmes::Converter.convert input, 'ISO-8859-1', 'UTF-16'
|
10
|
+
assert input.bytesize < output.bytesize
|
11
|
+
assert input != output
|
12
|
+
|
13
|
+
output = CharlockHolmes::Converter.convert output, 'UTF-16', 'ISO-8859-1'
|
14
|
+
assert input.bytesize == output.bytesize
|
15
|
+
assert input == output
|
16
|
+
end
|
17
|
+
|
18
|
+
test 'is able to convert UTF-8 content from UTF-8 to UTF-16, and back again' do
|
19
|
+
input = 'λ, λ, λ'
|
20
|
+
|
21
|
+
output = CharlockHolmes::Converter.convert input, 'UTF-8', 'UTF-16'
|
22
|
+
assert input.bytesize < output.bytesize
|
23
|
+
assert input != output
|
24
|
+
|
25
|
+
output = CharlockHolmes::Converter.convert output, 'UTF-16', 'UTF-8'
|
26
|
+
assert input.bytesize == output.bytesize
|
27
|
+
assert input == output
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,122 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'spec_helper'
|
4
|
+
|
5
|
+
describe CharlockHolmes::EncodingDetector do
|
6
|
+
before :all do
|
7
|
+
@detector = CharlockHolmes::EncodingDetector.new
|
8
|
+
end
|
9
|
+
|
10
|
+
test 'has a class-level detect method' do
|
11
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect
|
12
|
+
detected = CharlockHolmes::EncodingDetector.detect 'test'
|
13
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
14
|
+
end
|
15
|
+
|
16
|
+
test 'has a class-level detect method that accepts an encoding hint' do
|
17
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect
|
18
|
+
detected = CharlockHolmes::EncodingDetector.detect 'test', 'UTF-8'
|
19
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
20
|
+
end
|
21
|
+
|
22
|
+
test 'has a class-level detect_all method' do
|
23
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect_all
|
24
|
+
detected_list = CharlockHolmes::EncodingDetector.detect_all 'test'
|
25
|
+
assert detected_list.is_a? Array
|
26
|
+
|
27
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
28
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
29
|
+
end
|
30
|
+
|
31
|
+
test 'has a class-level detect_all method that accepts an encoding hint' do
|
32
|
+
CharlockHolmes::EncodingDetector.respond_to? :detect_all
|
33
|
+
detected_list = CharlockHolmes::EncodingDetector.detect_all 'test', 'UTF-8'
|
34
|
+
assert detected_list.is_a? Array
|
35
|
+
|
36
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
37
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
38
|
+
end
|
39
|
+
|
40
|
+
test 'has a detect method' do
|
41
|
+
@detector.respond_to? :detect
|
42
|
+
detected = @detector.detect 'test'
|
43
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
44
|
+
end
|
45
|
+
|
46
|
+
test 'has a detect method that accepts an encoding hint' do
|
47
|
+
@detector.respond_to? :detect
|
48
|
+
detected = @detector.detect 'test', 'UTF-8'
|
49
|
+
assert_equal 'ISO-8859-1', detected[:encoding]
|
50
|
+
end
|
51
|
+
|
52
|
+
test 'has a detect_all method' do
|
53
|
+
@detector.respond_to? :detect_all
|
54
|
+
detected_list = @detector.detect_all 'test'
|
55
|
+
assert detected_list.is_a? Array
|
56
|
+
|
57
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
58
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
59
|
+
end
|
60
|
+
|
61
|
+
test 'has a detect_all method that accepts an encoding hint' do
|
62
|
+
@detector.respond_to? :detect_all
|
63
|
+
detected_list = @detector.detect_all 'test', 'UTF-8'
|
64
|
+
assert detected_list.is_a? Array
|
65
|
+
|
66
|
+
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
67
|
+
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
68
|
+
end
|
69
|
+
|
70
|
+
test 'has a strip_tags flag' do
|
71
|
+
detector = CharlockHolmes::EncodingDetector.new
|
72
|
+
detector.strip_tags = true
|
73
|
+
assert detector.strip_tags
|
74
|
+
|
75
|
+
detection = detector.detect "<div ascii_attribute='some more ascii'>λ, λ, λ</div>"
|
76
|
+
assert_equal 'UTF-8', detection[:encoding]
|
77
|
+
|
78
|
+
detector.strip_tags = false
|
79
|
+
assert !detector.strip_tags
|
80
|
+
|
81
|
+
detection = detector.detect "<div ascii_attribute='some more ascii'>λ, λ, λ</div>"
|
82
|
+
assert_equal 'UTF-8', detection[:encoding]
|
83
|
+
end
|
84
|
+
|
85
|
+
test 'has a list of supported encodings' do
|
86
|
+
CharlockHolmes::EncodingDetector.respond_to? :supported_encodings
|
87
|
+
supported_encodings = CharlockHolmes::EncodingDetector.supported_encodings
|
88
|
+
|
89
|
+
assert supported_encodings.is_a?(Array)
|
90
|
+
assert supported_encodings.include? 'UTF-8'
|
91
|
+
end
|
92
|
+
|
93
|
+
context 'encoding detection' do
|
94
|
+
MAPPING = [
|
95
|
+
['repl2.cljs', 'ISO-8859-1', :text],
|
96
|
+
['core.rkt', 'UTF-8', :text],
|
97
|
+
['cl-messagepack.lisp', 'ISO-8859-1', :text],
|
98
|
+
['TwigExtensionsDate.es.yml', 'UTF-8', :text],
|
99
|
+
['AnsiGraph.psm1', 'UTF-16LE', :text],
|
100
|
+
['laholator.py', 'UTF-8', :text],
|
101
|
+
['hello_world', nil, :binary]
|
102
|
+
]
|
103
|
+
|
104
|
+
MAPPING.each do |mapping|
|
105
|
+
file, encoding, type = mapping
|
106
|
+
|
107
|
+
test "#{file} should be detected as #{encoding || 'binary'}" do
|
108
|
+
path = File.expand_path "../fixtures/#{file}", __FILE__
|
109
|
+
content = File.read path
|
110
|
+
guessed = @detector.detect content
|
111
|
+
|
112
|
+
assert_equal encoding, guessed[:encoding]
|
113
|
+
assert_equal type, guessed[:type]
|
114
|
+
|
115
|
+
if content.respond_to?(:force_encoding) && guessed[:type] == :text
|
116
|
+
content.force_encoding guessed[:encoding]
|
117
|
+
assert content.valid_encoding?
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
Binary file
|
@@ -0,0 +1,8 @@
|
|
1
|
+
date.year: '%year% año|%year% años'
|
2
|
+
date.month: '%month% mes|%month% meses'
|
3
|
+
date.day: '%day% día|%day% días'
|
4
|
+
date.hour: '%hour% hora|%hour% horas'
|
5
|
+
date.minute: '%minute% minuto|%minute% minutos'
|
6
|
+
date.second: '%second% segundo|%second% segundos'
|
7
|
+
date.new: 'menos de un minuto'
|
8
|
+
date.and: ' y '
|
@@ -0,0 +1,264 @@
|
|
1
|
+
;;;; cl-messagepack.lisp
|
2
|
+
|
3
|
+
(in-package #:messagepack)
|
4
|
+
|
5
|
+
(declaim (optimize (debug 3)))
|
6
|
+
|
7
|
+
(eval-when (:compile-toplevel :load-toplevel :execute)
|
8
|
+
(defun mkstr (&rest args)
|
9
|
+
(format nil "~{~a~}" args))
|
10
|
+
(defun mksymb (&rest args)
|
11
|
+
(intern (apply #'mkstr args))))
|
12
|
+
|
13
|
+
(defmacro signed-unsigned-convertors (size)
|
14
|
+
(let ((speed (if (< size 32) 3 0)))
|
15
|
+
`(progn
|
16
|
+
(defun ,(mksymb 'sb size '-> 'ub size) (sb)
|
17
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
18
|
+
(type (integer ,(- (expt 2 (1- size))) ,(1- (expt 2 (1- size)))) sb))
|
19
|
+
(if (< sb 0)
|
20
|
+
(ldb (byte ,size 0) sb)
|
21
|
+
sb))
|
22
|
+
(defun ,(mksymb 'ub size '-> 'sb size) (sb)
|
23
|
+
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
24
|
+
(type (mod ,(expt 2 size)) sb))
|
25
|
+
(if (logbitp (1- ,size) sb)
|
26
|
+
(- (1+ (logxor (1- (expt 2 ,size)) sb)))
|
27
|
+
sb)))))
|
28
|
+
|
29
|
+
(signed-unsigned-convertors 8)
|
30
|
+
(signed-unsigned-convertors 16)
|
31
|
+
(signed-unsigned-convertors 32)
|
32
|
+
(signed-unsigned-convertors 64)
|
33
|
+
|
34
|
+
(defun write-hex (data)
|
35
|
+
(let (line)
|
36
|
+
(loop
|
37
|
+
for i from 0 to (1- (length data))
|
38
|
+
do (push (elt data i) line)
|
39
|
+
when (= (length line) 16)
|
40
|
+
do
|
41
|
+
(format t "~{~2,'0x ~}~%" (nreverse line))
|
42
|
+
(setf line nil))
|
43
|
+
(when line
|
44
|
+
(format t "~{~2,'0x ~}~%" (nreverse line)))))
|
45
|
+
|
46
|
+
(defun encode (data)
|
47
|
+
(flexi-streams:with-output-to-sequence (stream)
|
48
|
+
(encode-stream data stream)))
|
49
|
+
|
50
|
+
(defun make-hash (data)
|
51
|
+
(let ((result (make-hash-table)))
|
52
|
+
(dolist (kv data)
|
53
|
+
(cond ((consp (cdr kv))
|
54
|
+
(setf (gethash (first kv) result) (second kv)))
|
55
|
+
(t
|
56
|
+
(setf (gethash (car kv) result) (cdr kv)))))
|
57
|
+
result))
|
58
|
+
|
59
|
+
(defun is-byte-array (data-type)
|
60
|
+
(and (vectorp data-type)
|
61
|
+
(equal '(unsigned-byte 8) (array-element-type data-type))))
|
62
|
+
|
63
|
+
(defun encode-stream (data stream)
|
64
|
+
(cond ((floatp data) (encode-float data stream))
|
65
|
+
((numberp data) (encode-integer data stream))
|
66
|
+
((null data) (write-byte #xc0 stream))
|
67
|
+
((eq data t) (write-byte #xc3 stream))
|
68
|
+
((stringp data)
|
69
|
+
(encode-string data stream))
|
70
|
+
((is-byte-array data)
|
71
|
+
(encode-raw-bytes data stream))
|
72
|
+
((or (consp data) (vectorp data))
|
73
|
+
(encode-array data stream))
|
74
|
+
((hash-table-p data)
|
75
|
+
(encode-hash data stream))
|
76
|
+
((symbolp data)
|
77
|
+
(encode-string (symbol-name data) stream))
|
78
|
+
(t (error "Cannot encode data."))))
|
79
|
+
|
80
|
+
(defun encode-string (data stream)
|
81
|
+
(encode-raw-bytes (babel:string-to-octets data) stream))
|
82
|
+
|
83
|
+
#+sbcl (defun sbcl-encode-float (data stream)
|
84
|
+
(cond ((equal (type-of data) 'single-float)
|
85
|
+
(write-byte #xca stream)
|
86
|
+
(store-big-endian (sb-kernel:single-float-bits data) stream 4))
|
87
|
+
((equal (type-of data) 'double-float)
|
88
|
+
(write-byte #xcb stream)
|
89
|
+
(store-big-endian (sb-kernel:double-float-high-bits data) stream 4)
|
90
|
+
(store-big-endian (sb-kernel:double-float-low-bits data) stream 4)))
|
91
|
+
t)
|
92
|
+
|
93
|
+
(defun encode-float (data stream)
|
94
|
+
(or #+sbcl (sbcl-encode-float data stream)
|
95
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
96
|
+
|
97
|
+
(defun encode-each (data stream &optional (encoder #'encode-stream))
|
98
|
+
(cond ((hash-table-p data)
|
99
|
+
(maphash (lambda (key value)
|
100
|
+
(funcall encoder key stream)
|
101
|
+
(funcall encoder value stream))
|
102
|
+
data))
|
103
|
+
((or (vectorp data) (consp data))
|
104
|
+
(mapc (lambda (subdata)
|
105
|
+
(funcall encoder subdata stream))
|
106
|
+
(coerce data 'list)))
|
107
|
+
(t (error "Not sequence or hash table."))))
|
108
|
+
|
109
|
+
(defun encode-sequence (data stream
|
110
|
+
short-prefix short-length
|
111
|
+
typecode-16 typecode-32
|
112
|
+
&optional (encoder #'encode-stream))
|
113
|
+
(let ((len (if (hash-table-p data)
|
114
|
+
(hash-table-count data)
|
115
|
+
(length data))))
|
116
|
+
(cond ((<= 0 len short-length)
|
117
|
+
(write-byte (+ short-prefix len) stream)
|
118
|
+
(encode-each data stream encoder))
|
119
|
+
((<= 0 len 65535)
|
120
|
+
(write-byte typecode-16 stream)
|
121
|
+
(store-big-endian len stream 2)
|
122
|
+
(encode-each data stream encoder))
|
123
|
+
((<= 0 len (1- (expt 2 32)))
|
124
|
+
(write-byte typecode-32 stream)
|
125
|
+
(store-big-endian len stream 4)
|
126
|
+
(encode-each data stream encoder)))))
|
127
|
+
|
128
|
+
(defun encode-hash (data stream)
|
129
|
+
(encode-sequence data stream #x80 15 #xdc #xdd))
|
130
|
+
|
131
|
+
(defun encode-array (data stream)
|
132
|
+
(encode-sequence data stream #x90 15 #xdc #xdd))
|
133
|
+
|
134
|
+
(defun encode-raw-bytes (data stream)
|
135
|
+
(encode-sequence data stream #xa0 31 #xda #xdb #'write-byte))
|
136
|
+
|
137
|
+
(defun encode-integer (data stream)
|
138
|
+
(cond ((<= 0 data 127) (write-byte data stream))
|
139
|
+
((<= -32 data -1) (write-byte (sb8->ub8 data) stream))
|
140
|
+
((<= 0 data 255)
|
141
|
+
(write-byte #xcc stream)
|
142
|
+
(write-byte data stream))
|
143
|
+
((<= 0 data 65535)
|
144
|
+
(write-byte #xcd stream)
|
145
|
+
(store-big-endian data stream 2))
|
146
|
+
((<= 0 data (1- (expt 2 32)))
|
147
|
+
(write-byte #xce stream)
|
148
|
+
(store-big-endian data stream 4))
|
149
|
+
((<= 0 data (1- (expt 2 64)))
|
150
|
+
(write-byte #xcf stream)
|
151
|
+
(store-big-endian data stream 8))
|
152
|
+
((<= -128 data 127)
|
153
|
+
(write-byte #xd0 stream)
|
154
|
+
(write-byte (sb8->ub8 data) stream))
|
155
|
+
((<= -32768 data 32767)
|
156
|
+
(write-byte #xd1 stream)
|
157
|
+
(write-byte (sb16->ub16 data) stream))
|
158
|
+
((<= (- (expt 2 31)) data (1- (expt 2 31)))
|
159
|
+
(write-byte #xd2 stream)
|
160
|
+
(write-byte (sb32->ub32 data) stream))
|
161
|
+
((<= (- (expt 2 63)) data (1- (expt 2 63)))
|
162
|
+
(write-byte #xd3 stream)
|
163
|
+
(write-byte (sb64->ub64 data) stream))
|
164
|
+
(t (error "Integer too large or too small."))))
|
165
|
+
|
166
|
+
(defun store-big-endian (number stream byte-count)
|
167
|
+
(let (byte-list)
|
168
|
+
(loop
|
169
|
+
while (> number 0)
|
170
|
+
do
|
171
|
+
(push (rem number 256)
|
172
|
+
byte-list)
|
173
|
+
(setf number (ash number -8)))
|
174
|
+
(loop
|
175
|
+
while (< (length byte-list) byte-count)
|
176
|
+
do (push 0 byte-list))
|
177
|
+
(when (> (length byte-list) byte-count)
|
178
|
+
(error "Number too large."))
|
179
|
+
(write-sequence byte-list stream)))
|
180
|
+
|
181
|
+
(defun decode (byte-array)
|
182
|
+
(flexi-streams:with-input-from-sequence (stream byte-array)
|
183
|
+
(decode-stream stream)))
|
184
|
+
|
185
|
+
(defun decode-stream (stream)
|
186
|
+
(let ((byte (read-byte stream)))
|
187
|
+
(cond ((= 0 (ldb (byte 1 7) byte))
|
188
|
+
byte)
|
189
|
+
((= 7 (ldb (byte 3 5) byte))
|
190
|
+
(ub8->sb8 byte))
|
191
|
+
((= #xcc byte)
|
192
|
+
(read-byte stream))
|
193
|
+
((= #xcd byte)
|
194
|
+
(load-big-endian stream 2))
|
195
|
+
((= #xce byte)
|
196
|
+
(load-big-endian stream 4))
|
197
|
+
((= #xcf byte)
|
198
|
+
(load-big-endian stream 8))
|
199
|
+
((= #xd0 byte)
|
200
|
+
(ub8->sb8 (read-byte stream)))
|
201
|
+
((= #xd1 byte)
|
202
|
+
(ub16->sb16 (load-big-endian stream 2)))
|
203
|
+
((= #xd2 byte)
|
204
|
+
(ub32->sb32 (load-big-endian stream 4)))
|
205
|
+
((= #xd3 byte)
|
206
|
+
(ub64->sb64 (load-big-endian stream 8)))
|
207
|
+
((= #xc0 byte)
|
208
|
+
nil)
|
209
|
+
((= #xc3 byte)
|
210
|
+
t)
|
211
|
+
((= #xc2 byte)
|
212
|
+
nil)
|
213
|
+
((= #xca byte)
|
214
|
+
(or #+sbcl (sb-kernel:make-single-float (load-big-endian stream 4))
|
215
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
216
|
+
((= #xcb byte)
|
217
|
+
(or #+sbcl (sb-kernel:make-double-float (load-big-endian stream 4)
|
218
|
+
(load-big-endian stream 4))
|
219
|
+
#-(or sbcl) (error "No floating point support yet.")))
|
220
|
+
((= 5 (ldb (byte 3 5) byte))
|
221
|
+
(decode-raw-sequence (ldb (byte 5 0) byte) stream))
|
222
|
+
((= #xda byte)
|
223
|
+
(decode-raw-sequence (load-big-endian stream 2) stream))
|
224
|
+
((= #xdb byte)
|
225
|
+
(decode-raw-sequence (load-big-endian stream 4) stream))
|
226
|
+
((= 9 (ldb (byte 4 4) byte))
|
227
|
+
(decode-array (- byte #x90) stream))
|
228
|
+
((= #xdc byte)
|
229
|
+
(decode-array (load-big-endian stream 2) stream))
|
230
|
+
((= #xdd byte)
|
231
|
+
(decode-array (load-big-endian stream 4) stream))
|
232
|
+
((= 8 (ldb (byte 4 4) byte))
|
233
|
+
(decode-map (- byte #x80) stream))
|
234
|
+
((= #xde byte)
|
235
|
+
(decode-map (load-big-endian stream 2) stream))
|
236
|
+
((= #xdf byte)
|
237
|
+
(decode-map (load-big-endian stream 4) stream)))))
|
238
|
+
|
239
|
+
(defun decode-map (length stream)
|
240
|
+
(let ((hash-table (make-hash-table :test #'equal)))
|
241
|
+
(loop repeat length
|
242
|
+
do (let ((key (decode-stream stream))
|
243
|
+
(value (decode-stream stream)))
|
244
|
+
(setf (gethash key hash-table) value)))
|
245
|
+
hash-table))
|
246
|
+
|
247
|
+
(defun decode-array (length stream)
|
248
|
+
(let ((array (make-array length)))
|
249
|
+
(dotimes (i length)
|
250
|
+
(setf (aref array i) (decode-stream stream)))
|
251
|
+
array))
|
252
|
+
|
253
|
+
(defun decode-raw-sequence (length stream)
|
254
|
+
(let ((seq (make-array length :element-type '(mod 256))))
|
255
|
+
(read-sequence seq stream)
|
256
|
+
(babel:octets-to-string seq)))
|
257
|
+
|
258
|
+
(defun load-big-endian (stream byte-count)
|
259
|
+
(let ((result 0))
|
260
|
+
(loop
|
261
|
+
repeat byte-count
|
262
|
+
do (setf result (+ (ash result 8)
|
263
|
+
(read-byte stream))))
|
264
|
+
result))
|