charlock_holmes 0.7.3 → 0.7.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/charlock_holmes/common.h +4 -4
- data/ext/charlock_holmes/encoding_detector.c +18 -0
- data/ext/charlock_holmes/extconf.rb +1 -0
- data/lib/charlock_holmes/version.rb +1 -1
- metadata +13 -46
- data/.gitignore +0 -9
- data/Gemfile +0 -3
- data/MIT-LICENSE +0 -20
- data/README.md +0 -111
- data/Rakefile +0 -16
- data/benchmark/detection.rb +0 -39
- data/benchmark/test.txt +0 -693
- data/charlock_holmes.gemspec +0 -27
- data/test/converter_test.rb +0 -48
- data/test/encoding_detector_test.rb +0 -145
- data/test/fixtures/AnsiGraph.psm1 +0 -0
- data/test/fixtures/ISO-2022-KR.txt +0 -43
- data/test/fixtures/TwigExtensionsDate.es.yml +0 -8
- data/test/fixtures/cl-messagepack.lisp +0 -264
- data/test/fixtures/core.rkt +0 -254
- data/test/fixtures/foo.pdf +0 -0
- data/test/fixtures/hello_world +0 -0
- data/test/fixtures/laholator.py +0 -131
- data/test/fixtures/octocat.ai +8 -4441
- data/test/fixtures/octocat.gif +0 -0
- data/test/fixtures/octocat.jpg +0 -0
- data/test/fixtures/octocat.png +0 -0
- data/test/fixtures/octocat.psd +0 -0
- data/test/fixtures/repl2.cljs +0 -109
- data/test/fixtures/sierpinski.ps +0 -41
- data/test/fixtures/utf16be.html +0 -0
- data/test/fixtures/utf32be.html +0 -0
- data/test/fixtures/utf32le.html +0 -0
- data/test/fixtures/utf8.html +0 -240
- data/test/fixtures/vimrc +0 -596
- data/test/helper.rb +0 -26
- data/test/string_methods_test.rb +0 -73
- data/test/transliterator_test.rb +0 -123
data/charlock_holmes.gemspec
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
require './lib/charlock_holmes/version' unless defined? CharlockHolmes::VERSION
|
4
|
-
|
5
|
-
Gem::Specification.new do |s|
|
6
|
-
s.name = %q{charlock_holmes}
|
7
|
-
s.license = "MIT"
|
8
|
-
s.version = CharlockHolmes::VERSION
|
9
|
-
s.authors = ["Brian Lopez", "Vicent Martí"]
|
10
|
-
s.date = Time.now.utc.strftime("%Y-%m-%d")
|
11
|
-
s.email = %q{seniorlopez@gmail.com}
|
12
|
-
s.extensions = ["ext/charlock_holmes/extconf.rb"]
|
13
|
-
s.files = `git ls-files`.split("\n")
|
14
|
-
s.homepage = %q{https://github.com/brianmario/charlock_holmes}
|
15
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
16
|
-
s.require_paths = ["lib"]
|
17
|
-
s.rubygems_version = %q{1.4.2}
|
18
|
-
s.summary = %q{Character encoding detection, brought to you by ICU}
|
19
|
-
s.description = "charlock_holmes provides binary and text detection as well as text transcoding using libicu"
|
20
|
-
s.test_files = `git ls-files spec`.split("\n")
|
21
|
-
|
22
|
-
# tests
|
23
|
-
s.add_development_dependency 'rake-compiler', ">= 0.7.5"
|
24
|
-
s.add_development_dependency 'minitest'
|
25
|
-
# benchmarks
|
26
|
-
s.add_development_dependency 'chardet'
|
27
|
-
end
|
data/test/converter_test.rb
DELETED
@@ -1,48 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require File.expand_path("../helper", __FILE__)
|
3
|
-
|
4
|
-
class ConverterTest < MiniTest::Test
|
5
|
-
def test_convert_ascii_from_iso859_1_to_utf16_and_back
|
6
|
-
input = 'test'
|
7
|
-
|
8
|
-
output = CharlockHolmes::Converter.convert input, 'ISO-8859-1', 'UTF-16'
|
9
|
-
assert input.bytesize < output.bytesize
|
10
|
-
assert input != output
|
11
|
-
|
12
|
-
output = CharlockHolmes::Converter.convert output, 'UTF-16', 'ISO-8859-1'
|
13
|
-
assert input.bytesize == output.bytesize
|
14
|
-
assert input == output
|
15
|
-
end
|
16
|
-
|
17
|
-
def test_convert_utf8_to_utf16_and_back
|
18
|
-
input = 'λ, λ, λ'
|
19
|
-
|
20
|
-
output = CharlockHolmes::Converter.convert input, 'UTF-8', 'UTF-16'
|
21
|
-
assert input.bytesize < output.bytesize
|
22
|
-
assert input != output
|
23
|
-
|
24
|
-
output = CharlockHolmes::Converter.convert output, 'UTF-16', 'UTF-8'
|
25
|
-
assert input.bytesize == output.bytesize
|
26
|
-
assert input == output
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_params_must_be_strings
|
30
|
-
assert_raises TypeError do
|
31
|
-
CharlockHolmes::Converter.convert nil, 'UTF-8', 'UTF-16'
|
32
|
-
end
|
33
|
-
|
34
|
-
assert_raises TypeError do
|
35
|
-
CharlockHolmes::Converter.convert 'lol', nil, 'UTF-16'
|
36
|
-
end
|
37
|
-
|
38
|
-
assert_raises TypeError do
|
39
|
-
CharlockHolmes::Converter.convert 'lol', 'UTF-8', nil
|
40
|
-
end
|
41
|
-
|
42
|
-
begin
|
43
|
-
CharlockHolmes::Converter.convert 'lol', 'UTF-8', 'UTF-16'
|
44
|
-
rescue Exception => e
|
45
|
-
assert_nil e, "#{e.class.name} raised, expected nothing"
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
@@ -1,145 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require File.expand_path("../helper", __FILE__)
|
3
|
-
|
4
|
-
class EncodingDetectorTest < MiniTest::Test
|
5
|
-
def setup
|
6
|
-
@detector = CharlockHolmes::EncodingDetector.new
|
7
|
-
end
|
8
|
-
|
9
|
-
def test_has_class_level_detect_method
|
10
|
-
CharlockHolmes::EncodingDetector.respond_to? :detect
|
11
|
-
detected = CharlockHolmes::EncodingDetector.detect 'test'
|
12
|
-
assert_equal 'ISO-8859-1', detected[:encoding]
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_class_level_detect_accepts_encoding_hint
|
16
|
-
CharlockHolmes::EncodingDetector.respond_to? :detect
|
17
|
-
detected = CharlockHolmes::EncodingDetector.detect 'test', 'UTF-8'
|
18
|
-
assert_equal 'ISO-8859-1', detected[:encoding]
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_has_class_level_detect_all_method
|
22
|
-
CharlockHolmes::EncodingDetector.respond_to? :detect_all
|
23
|
-
detected_list = CharlockHolmes::EncodingDetector.detect_all 'test'
|
24
|
-
assert detected_list.is_a? Array
|
25
|
-
|
26
|
-
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
27
|
-
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_class_level_detect_all_method_accepts_encoding_hint
|
31
|
-
CharlockHolmes::EncodingDetector.respond_to? :detect_all
|
32
|
-
detected_list = CharlockHolmes::EncodingDetector.detect_all 'test', 'UTF-8'
|
33
|
-
assert detected_list.is_a? Array
|
34
|
-
|
35
|
-
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
36
|
-
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_has_detect_method
|
40
|
-
@detector.respond_to? :detect
|
41
|
-
detected = @detector.detect 'test'
|
42
|
-
assert_equal 'ISO-8859-1', detected[:encoding]
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_detect_accepts_encoding_hint
|
46
|
-
@detector.respond_to? :detect
|
47
|
-
detected = @detector.detect 'test', 'UTF-8'
|
48
|
-
assert_equal 'ISO-8859-1', detected[:encoding]
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_has_detect_all_method
|
52
|
-
@detector.respond_to? :detect_all
|
53
|
-
detected_list = @detector.detect_all 'test'
|
54
|
-
assert detected_list.is_a? Array
|
55
|
-
|
56
|
-
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
57
|
-
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
58
|
-
end
|
59
|
-
|
60
|
-
def test_detect_all_accepts_encoding_hint
|
61
|
-
@detector.respond_to? :detect_all
|
62
|
-
detected_list = @detector.detect_all 'test', 'UTF-8'
|
63
|
-
assert detected_list.is_a? Array
|
64
|
-
|
65
|
-
encoding_list = detected_list.map {|d| d[:encoding]}.sort
|
66
|
-
assert_equal ['ISO-8859-1', 'ISO-8859-2', 'UTF-8'], encoding_list
|
67
|
-
end
|
68
|
-
|
69
|
-
def test_strip_tags_flag
|
70
|
-
detector = CharlockHolmes::EncodingDetector.new
|
71
|
-
detector.strip_tags = true
|
72
|
-
assert detector.strip_tags
|
73
|
-
|
74
|
-
detection = detector.detect "<div ascii_attribute='some more ascii'>λ, λ, λ</div>"
|
75
|
-
assert_equal 'UTF-8', detection[:encoding]
|
76
|
-
|
77
|
-
detector.strip_tags = false
|
78
|
-
assert !detector.strip_tags
|
79
|
-
|
80
|
-
detection = detector.detect "<div ascii_attribute='some more ascii'>λ, λ, λ</div>"
|
81
|
-
assert_equal 'UTF-8', detection[:encoding]
|
82
|
-
end
|
83
|
-
|
84
|
-
def test_has_list_of_supported_encodings
|
85
|
-
CharlockHolmes::EncodingDetector.respond_to? :supported_encodings
|
86
|
-
supported_encodings = CharlockHolmes::EncodingDetector.supported_encodings
|
87
|
-
|
88
|
-
assert supported_encodings.is_a?(Array)
|
89
|
-
assert supported_encodings.include? 'UTF-8'
|
90
|
-
assert supported_encodings.include? 'windows-1250'
|
91
|
-
assert supported_encodings.include? 'windows-1252'
|
92
|
-
assert supported_encodings.include? 'windows-1253'
|
93
|
-
assert supported_encodings.include? 'windows-1254'
|
94
|
-
assert supported_encodings.include? 'windows-1255'
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_returns_a_ruby_compatible_encoding_name
|
98
|
-
detected = @detector.detect 'test'
|
99
|
-
assert_equal 'ISO-8859-1', detected[:encoding]
|
100
|
-
assert_equal 'ISO-8859-1', detected[:ruby_encoding]
|
101
|
-
|
102
|
-
not_compat_txt = fixture("ISO-2022-KR.txt").read
|
103
|
-
detected = @detector.detect not_compat_txt
|
104
|
-
assert_equal 'ISO-2022-KR', detected[:encoding]
|
105
|
-
assert_equal 'binary', detected[:ruby_encoding]
|
106
|
-
end
|
107
|
-
|
108
|
-
MAPPING = [
|
109
|
-
['repl2.cljs', 'ISO-8859-1', :text],
|
110
|
-
['cl-messagepack.lisp', 'ISO-8859-1', :text],
|
111
|
-
['sierpinski.ps', 'ISO-8859-1', :text],
|
112
|
-
['core.rkt', 'UTF-8', :text],
|
113
|
-
['TwigExtensionsDate.es.yml', 'UTF-8', :text],
|
114
|
-
['laholator.py', 'UTF-8', :text],
|
115
|
-
['vimrc', 'UTF-8', :text],
|
116
|
-
['AnsiGraph.psm1', 'UTF-16LE', :text],
|
117
|
-
['utf16be.html', 'UTF-16BE', :text],
|
118
|
-
['utf32le.html', 'UTF-32LE', :text],
|
119
|
-
['utf32be.html', 'UTF-32BE', :text],
|
120
|
-
['hello_world', nil, :binary],
|
121
|
-
['octocat.png', nil, :binary],
|
122
|
-
['octocat.jpg', nil, :binary],
|
123
|
-
['octocat.psd', nil, :binary],
|
124
|
-
['octocat.gif', nil, :binary],
|
125
|
-
['octocat.ai', nil, :binary],
|
126
|
-
['foo.pdf', nil, :binary],
|
127
|
-
]
|
128
|
-
|
129
|
-
def test_detection_works_as_expected
|
130
|
-
MAPPING.each do |mapping|
|
131
|
-
file, encoding, type = mapping
|
132
|
-
|
133
|
-
content = fixture(file).read
|
134
|
-
guessed = @detector.detect content
|
135
|
-
|
136
|
-
assert_equal encoding, guessed[:encoding]
|
137
|
-
assert_equal type, guessed[:type]
|
138
|
-
|
139
|
-
if content.respond_to?(:force_encoding) && guessed[:type] == :text
|
140
|
-
content.force_encoding guessed[:encoding]
|
141
|
-
assert content.valid_encoding?
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
145
|
-
end
|
Binary file
|
@@ -1,43 +0,0 @@
|
|
1
|
-
$)C#
|
2
|
-
# Out-AnsiGraph.psm1
|
3
|
-
# Author: xcud
|
4
|
-
# History:
|
5
|
-
# v0.1 September 21, 2009 initial version
|
6
|
-
#
|
7
|
-
# PS Example> ps | select -first 5 | sort -property VM |
|
8
|
-
# Out-AnsiGraph ProcessName, VM
|
9
|
-
# AEADISRV 14508032
|
10
|
-
# audiodg 50757632
|
11
|
-
# conhost 73740288
|
12
|
-
# AppleMobileDeviceService 92061696
|
13
|
-
# btdna 126443520
|
14
|
-
#
|
15
|
-
function Out-AnsiGraph($Parameter1=$null) {
|
16
|
-
BEGIN {
|
17
|
-
$q = new-object Collections.queue
|
18
|
-
$max = 0; $namewidth = 0;
|
19
|
-
}
|
20
|
-
|
21
|
-
PROCESS {
|
22
|
-
if($_) {
|
23
|
-
$name = $_.($Parameter1[0]);
|
24
|
-
$val = $_.($Parameter1[1])
|
25
|
-
if($max -lt $val) { $max = $val}
|
26
|
-
if($namewidth -lt $name.length) {
|
27
|
-
$namewidth = $name.length }
|
28
|
-
$q.enqueue(@($name, $val))
|
29
|
-
}
|
30
|
-
}
|
31
|
-
|
32
|
-
END {
|
33
|
-
$q | %{
|
34
|
-
$graph = ""; 0..($_[1]/$max*20) |
|
35
|
-
%{ $graph += "" }
|
36
|
-
$name = "{0,$namewidth}" -f $_[0]
|
37
|
-
"$name $graph " + $_[1]
|
38
|
-
}
|
39
|
-
|
40
|
-
}
|
41
|
-
}
|
42
|
-
|
43
|
-
Export-ModuleMember Out-AnsiGraph
|
@@ -1,8 +0,0 @@
|
|
1
|
-
date.year: '%year% año|%year% años'
|
2
|
-
date.month: '%month% mes|%month% meses'
|
3
|
-
date.day: '%day% día|%day% días'
|
4
|
-
date.hour: '%hour% hora|%hour% horas'
|
5
|
-
date.minute: '%minute% minuto|%minute% minutos'
|
6
|
-
date.second: '%second% segundo|%second% segundos'
|
7
|
-
date.new: 'menos de un minuto'
|
8
|
-
date.and: ' y '
|
@@ -1,264 +0,0 @@
|
|
1
|
-
;;;; cl-messagepack.lisp
|
2
|
-
|
3
|
-
(in-package #:messagepack)
|
4
|
-
|
5
|
-
(declaim (optimize (debug 3)))
|
6
|
-
|
7
|
-
(eval-when (:compile-toplevel :load-toplevel :execute)
|
8
|
-
(defun mkstr (&rest args)
|
9
|
-
(format nil "~{~a~}" args))
|
10
|
-
(defun mksymb (&rest args)
|
11
|
-
(intern (apply #'mkstr args))))
|
12
|
-
|
13
|
-
(defmacro signed-unsigned-convertors (size)
|
14
|
-
(let ((speed (if (< size 32) 3 0)))
|
15
|
-
`(progn
|
16
|
-
(defun ,(mksymb 'sb size '-> 'ub size) (sb)
|
17
|
-
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
18
|
-
(type (integer ,(- (expt 2 (1- size))) ,(1- (expt 2 (1- size)))) sb))
|
19
|
-
(if (< sb 0)
|
20
|
-
(ldb (byte ,size 0) sb)
|
21
|
-
sb))
|
22
|
-
(defun ,(mksymb 'ub size '-> 'sb size) (sb)
|
23
|
-
(declare (optimize (debug 0) (safety 0) (speed ,speed))
|
24
|
-
(type (mod ,(expt 2 size)) sb))
|
25
|
-
(if (logbitp (1- ,size) sb)
|
26
|
-
(- (1+ (logxor (1- (expt 2 ,size)) sb)))
|
27
|
-
sb)))))
|
28
|
-
|
29
|
-
(signed-unsigned-convertors 8)
|
30
|
-
(signed-unsigned-convertors 16)
|
31
|
-
(signed-unsigned-convertors 32)
|
32
|
-
(signed-unsigned-convertors 64)
|
33
|
-
|
34
|
-
(defun write-hex (data)
|
35
|
-
(let (line)
|
36
|
-
(loop
|
37
|
-
for i from 0 to (1- (length data))
|
38
|
-
do (push (elt data i) line)
|
39
|
-
when (= (length line) 16)
|
40
|
-
do
|
41
|
-
(format t "~{~2,'0x ~}~%" (nreverse line))
|
42
|
-
(setf line nil))
|
43
|
-
(when line
|
44
|
-
(format t "~{~2,'0x ~}~%" (nreverse line)))))
|
45
|
-
|
46
|
-
(defun encode (data)
|
47
|
-
(flexi-streams:with-output-to-sequence (stream)
|
48
|
-
(encode-stream data stream)))
|
49
|
-
|
50
|
-
(defun make-hash (data)
|
51
|
-
(let ((result (make-hash-table)))
|
52
|
-
(dolist (kv data)
|
53
|
-
(cond ((consp (cdr kv))
|
54
|
-
(setf (gethash (first kv) result) (second kv)))
|
55
|
-
(t
|
56
|
-
(setf (gethash (car kv) result) (cdr kv)))))
|
57
|
-
result))
|
58
|
-
|
59
|
-
(defun is-byte-array (data-type)
|
60
|
-
(and (vectorp data-type)
|
61
|
-
(equal '(unsigned-byte 8) (array-element-type data-type))))
|
62
|
-
|
63
|
-
(defun encode-stream (data stream)
|
64
|
-
(cond ((floatp data) (encode-float data stream))
|
65
|
-
((numberp data) (encode-integer data stream))
|
66
|
-
((null data) (write-byte #xc0 stream))
|
67
|
-
((eq data t) (write-byte #xc3 stream))
|
68
|
-
((stringp data)
|
69
|
-
(encode-string data stream))
|
70
|
-
((is-byte-array data)
|
71
|
-
(encode-raw-bytes data stream))
|
72
|
-
((or (consp data) (vectorp data))
|
73
|
-
(encode-array data stream))
|
74
|
-
((hash-table-p data)
|
75
|
-
(encode-hash data stream))
|
76
|
-
((symbolp data)
|
77
|
-
(encode-string (symbol-name data) stream))
|
78
|
-
(t (error "Cannot encode data."))))
|
79
|
-
|
80
|
-
(defun encode-string (data stream)
|
81
|
-
(encode-raw-bytes (babel:string-to-octets data) stream))
|
82
|
-
|
83
|
-
#+sbcl (defun sbcl-encode-float (data stream)
|
84
|
-
(cond ((equal (type-of data) 'single-float)
|
85
|
-
(write-byte #xca stream)
|
86
|
-
(store-big-endian (sb-kernel:single-float-bits data) stream 4))
|
87
|
-
((equal (type-of data) 'double-float)
|
88
|
-
(write-byte #xcb stream)
|
89
|
-
(store-big-endian (sb-kernel:double-float-high-bits data) stream 4)
|
90
|
-
(store-big-endian (sb-kernel:double-float-low-bits data) stream 4)))
|
91
|
-
t)
|
92
|
-
|
93
|
-
(defun encode-float (data stream)
|
94
|
-
(or #+sbcl (sbcl-encode-float data stream)
|
95
|
-
#-(or sbcl) (error "No floating point support yet.")))
|
96
|
-
|
97
|
-
(defun encode-each (data stream &optional (encoder #'encode-stream))
|
98
|
-
(cond ((hash-table-p data)
|
99
|
-
(maphash (lambda (key value)
|
100
|
-
(funcall encoder key stream)
|
101
|
-
(funcall encoder value stream))
|
102
|
-
data))
|
103
|
-
((or (vectorp data) (consp data))
|
104
|
-
(mapc (lambda (subdata)
|
105
|
-
(funcall encoder subdata stream))
|
106
|
-
(coerce data 'list)))
|
107
|
-
(t (error "Not sequence or hash table."))))
|
108
|
-
|
109
|
-
(defun encode-sequence (data stream
|
110
|
-
short-prefix short-length
|
111
|
-
typecode-16 typecode-32
|
112
|
-
&optional (encoder #'encode-stream))
|
113
|
-
(let ((len (if (hash-table-p data)
|
114
|
-
(hash-table-count data)
|
115
|
-
(length data))))
|
116
|
-
(cond ((<= 0 len short-length)
|
117
|
-
(write-byte (+ short-prefix len) stream)
|
118
|
-
(encode-each data stream encoder))
|
119
|
-
((<= 0 len 65535)
|
120
|
-
(write-byte typecode-16 stream)
|
121
|
-
(store-big-endian len stream 2)
|
122
|
-
(encode-each data stream encoder))
|
123
|
-
((<= 0 len (1- (expt 2 32)))
|
124
|
-
(write-byte typecode-32 stream)
|
125
|
-
(store-big-endian len stream 4)
|
126
|
-
(encode-each data stream encoder)))))
|
127
|
-
|
128
|
-
(defun encode-hash (data stream)
|
129
|
-
(encode-sequence data stream #x80 15 #xdc #xdd))
|
130
|
-
|
131
|
-
(defun encode-array (data stream)
|
132
|
-
(encode-sequence data stream #x90 15 #xdc #xdd))
|
133
|
-
|
134
|
-
(defun encode-raw-bytes (data stream)
|
135
|
-
(encode-sequence data stream #xa0 31 #xda #xdb #'write-byte))
|
136
|
-
|
137
|
-
(defun encode-integer (data stream)
|
138
|
-
(cond ((<= 0 data 127) (write-byte data stream))
|
139
|
-
((<= -32 data -1) (write-byte (sb8->ub8 data) stream))
|
140
|
-
((<= 0 data 255)
|
141
|
-
(write-byte #xcc stream)
|
142
|
-
(write-byte data stream))
|
143
|
-
((<= 0 data 65535)
|
144
|
-
(write-byte #xcd stream)
|
145
|
-
(store-big-endian data stream 2))
|
146
|
-
((<= 0 data (1- (expt 2 32)))
|
147
|
-
(write-byte #xce stream)
|
148
|
-
(store-big-endian data stream 4))
|
149
|
-
((<= 0 data (1- (expt 2 64)))
|
150
|
-
(write-byte #xcf stream)
|
151
|
-
(store-big-endian data stream 8))
|
152
|
-
((<= -128 data 127)
|
153
|
-
(write-byte #xd0 stream)
|
154
|
-
(write-byte (sb8->ub8 data) stream))
|
155
|
-
((<= -32768 data 32767)
|
156
|
-
(write-byte #xd1 stream)
|
157
|
-
(write-byte (sb16->ub16 data) stream))
|
158
|
-
((<= (- (expt 2 31)) data (1- (expt 2 31)))
|
159
|
-
(write-byte #xd2 stream)
|
160
|
-
(write-byte (sb32->ub32 data) stream))
|
161
|
-
((<= (- (expt 2 63)) data (1- (expt 2 63)))
|
162
|
-
(write-byte #xd3 stream)
|
163
|
-
(write-byte (sb64->ub64 data) stream))
|
164
|
-
(t (error "Integer too large or too small."))))
|
165
|
-
|
166
|
-
(defun store-big-endian (number stream byte-count)
|
167
|
-
(let (byte-list)
|
168
|
-
(loop
|
169
|
-
while (> number 0)
|
170
|
-
do
|
171
|
-
(push (rem number 256)
|
172
|
-
byte-list)
|
173
|
-
(setf number (ash number -8)))
|
174
|
-
(loop
|
175
|
-
while (< (length byte-list) byte-count)
|
176
|
-
do (push 0 byte-list))
|
177
|
-
(when (> (length byte-list) byte-count)
|
178
|
-
(error "Number too large."))
|
179
|
-
(write-sequence byte-list stream)))
|
180
|
-
|
181
|
-
(defun decode (byte-array)
|
182
|
-
(flexi-streams:with-input-from-sequence (stream byte-array)
|
183
|
-
(decode-stream stream)))
|
184
|
-
|
185
|
-
(defun decode-stream (stream)
|
186
|
-
(let ((byte (read-byte stream)))
|
187
|
-
(cond ((= 0 (ldb (byte 1 7) byte))
|
188
|
-
byte)
|
189
|
-
((= 7 (ldb (byte 3 5) byte))
|
190
|
-
(ub8->sb8 byte))
|
191
|
-
((= #xcc byte)
|
192
|
-
(read-byte stream))
|
193
|
-
((= #xcd byte)
|
194
|
-
(load-big-endian stream 2))
|
195
|
-
((= #xce byte)
|
196
|
-
(load-big-endian stream 4))
|
197
|
-
((= #xcf byte)
|
198
|
-
(load-big-endian stream 8))
|
199
|
-
((= #xd0 byte)
|
200
|
-
(ub8->sb8 (read-byte stream)))
|
201
|
-
((= #xd1 byte)
|
202
|
-
(ub16->sb16 (load-big-endian stream 2)))
|
203
|
-
((= #xd2 byte)
|
204
|
-
(ub32->sb32 (load-big-endian stream 4)))
|
205
|
-
((= #xd3 byte)
|
206
|
-
(ub64->sb64 (load-big-endian stream 8)))
|
207
|
-
((= #xc0 byte)
|
208
|
-
nil)
|
209
|
-
((= #xc3 byte)
|
210
|
-
t)
|
211
|
-
((= #xc2 byte)
|
212
|
-
nil)
|
213
|
-
((= #xca byte)
|
214
|
-
(or #+sbcl (sb-kernel:make-single-float (load-big-endian stream 4))
|
215
|
-
#-(or sbcl) (error "No floating point support yet.")))
|
216
|
-
((= #xcb byte)
|
217
|
-
(or #+sbcl (sb-kernel:make-double-float (load-big-endian stream 4)
|
218
|
-
(load-big-endian stream 4))
|
219
|
-
#-(or sbcl) (error "No floating point support yet.")))
|
220
|
-
((= 5 (ldb (byte 3 5) byte))
|
221
|
-
(decode-raw-sequence (ldb (byte 5 0) byte) stream))
|
222
|
-
((= #xda byte)
|
223
|
-
(decode-raw-sequence (load-big-endian stream 2) stream))
|
224
|
-
((= #xdb byte)
|
225
|
-
(decode-raw-sequence (load-big-endian stream 4) stream))
|
226
|
-
((= 9 (ldb (byte 4 4) byte))
|
227
|
-
(decode-array (- byte #x90) stream))
|
228
|
-
((= #xdc byte)
|
229
|
-
(decode-array (load-big-endian stream 2) stream))
|
230
|
-
((= #xdd byte)
|
231
|
-
(decode-array (load-big-endian stream 4) stream))
|
232
|
-
((= 8 (ldb (byte 4 4) byte))
|
233
|
-
(decode-map (- byte #x80) stream))
|
234
|
-
((= #xde byte)
|
235
|
-
(decode-map (load-big-endian stream 2) stream))
|
236
|
-
((= #xdf byte)
|
237
|
-
(decode-map (load-big-endian stream 4) stream)))))
|
238
|
-
|
239
|
-
(defun decode-map (length stream)
|
240
|
-
(let ((hash-table (make-hash-table :test #'equal)))
|
241
|
-
(loop repeat length
|
242
|
-
do (let ((key (decode-stream stream))
|
243
|
-
(value (decode-stream stream)))
|
244
|
-
(setf (gethash key hash-table) value)))
|
245
|
-
hash-table))
|
246
|
-
|
247
|
-
(defun decode-array (length stream)
|
248
|
-
(let ((array (make-array length)))
|
249
|
-
(dotimes (i length)
|
250
|
-
(setf (aref array i) (decode-stream stream)))
|
251
|
-
array))
|
252
|
-
|
253
|
-
(defun decode-raw-sequence (length stream)
|
254
|
-
(let ((seq (make-array length :element-type '(mod 256))))
|
255
|
-
(read-sequence seq stream)
|
256
|
-
(babel:octets-to-string seq)))
|
257
|
-
|
258
|
-
(defun load-big-endian (stream byte-count)
|
259
|
-
(let ((result 0))
|
260
|
-
(loop
|
261
|
-
repeat byte-count
|
262
|
-
do (setf result (+ (ash result 8)
|
263
|
-
(read-byte stream))))
|
264
|
-
result))
|