zaml 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +14 -0
- data/README +47 -0
- data/lib/zaml.rb +318 -0
- data/test/zaml_benchmarks.rb +171 -0
- data/test/zaml_test.rb +420 -0
- metadata +66 -0
data/LICENSE
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
Copyright (c) 2008 ZAML contributers
|
2
|
+
|
3
|
+
This program is free software: you can redistribute it and/or modify
|
4
|
+
it under the terms of the GNU General Public License as published by
|
5
|
+
the Free Software Foundation, either version 3 of the License, or (at
|
6
|
+
your option) any later version.
|
7
|
+
|
8
|
+
This program is distributed in the hope that it will be useful, but
|
9
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
11
|
+
General Public License for more details.
|
12
|
+
|
13
|
+
You should have received a copy of the GNU General Public License
|
14
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
data/README
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
ZAML is faster YAML serialization for Ruby
|
2
|
+
|
3
|
+
The Ruby YAML library is a full stack standard compliant
|
4
|
+
implementation of the YAML protocol, while ZAML is an attempt to
|
5
|
+
produce (effectively) the same output as YAML would have - but much
|
6
|
+
faster and with cleaner code.
|
7
|
+
|
8
|
+
ZAML has been benchmarked at up to %1600 faster than YAML:
|
9
|
+
http://gnomecoder.wordpress.com/2008/09/27/yaml-dump-1600-percent-faster/
|
10
|
+
http://groups.google.com/group/rubyonrails-core/browse_thread/thread/3e33e31f57e23911/d2eedb7531a12e47?lnk=gst&q=zaml#d2eedb7531a12e47
|
11
|
+
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
Add gemcutter to your sources if you haven't done so already:
|
16
|
+
|
17
|
+
sudo gem install gemcutter
|
18
|
+
gem tumble
|
19
|
+
|
20
|
+
Install the gem:
|
21
|
+
|
22
|
+
sudo gem install zaml
|
23
|
+
|
24
|
+
|
25
|
+
## Usage
|
26
|
+
|
27
|
+
require 'zaml'
|
28
|
+
foo = { :some => { :complicated => [:data, :structure] } }
|
29
|
+
File.open('output.yml', 'w') { |f| ZAML.dump(foo,f) }
|
30
|
+
|
31
|
+
|
32
|
+
## License
|
33
|
+
|
34
|
+
Copyright (c) 2008-2009 ZAML contributers
|
35
|
+
|
36
|
+
This program is free software: you can redistribute it and/or modify
|
37
|
+
it under the terms of the GNU General Public License as published by
|
38
|
+
the Free Software Foundation, either version 3 of the License, or (at
|
39
|
+
your option) any later version.
|
40
|
+
|
41
|
+
This program is distributed in the hope that it will be useful, but
|
42
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of
|
43
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
44
|
+
General Public License for more details.
|
45
|
+
|
46
|
+
You should have received a copy of the GNU General Public License
|
47
|
+
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
data/lib/zaml.rb
ADDED
@@ -0,0 +1,318 @@
|
|
1
|
+
#
|
2
|
+
# ZAML -- A partial replacement for YAML, writen with speed and code clarity
|
3
|
+
# in mind. ZAML fixes one YAML bug (loading Exceptions) and provides
|
4
|
+
# a replacement for YAML.dump() unimaginatively called ZAML.dump(),
|
5
|
+
# which is faster on all known cases and an order of magnitude faster
|
6
|
+
# with complex structures.
|
7
|
+
#
|
8
|
+
# http://github.com/hallettj/zaml
|
9
|
+
#
|
10
|
+
# Authors: Markus Roberts, Jesse Hallett, Ian McIntosh, Igal Koshevoy, Simon Chiang
|
11
|
+
#
|
12
|
+
|
13
|
+
require 'yaml'
|
14
|
+
|
15
|
+
class ZAML
|
16
|
+
VERSION = "0.1.2"
|
17
|
+
#
|
18
|
+
# Class Methods
|
19
|
+
#
|
20
|
+
def self.dump(stuff, where='')
|
21
|
+
z = new
|
22
|
+
stuff.to_zaml(z)
|
23
|
+
where << z.to_s
|
24
|
+
end
|
25
|
+
#
|
26
|
+
# Instance Methods
|
27
|
+
#
|
28
|
+
def initialize
|
29
|
+
@result = []
|
30
|
+
@indent = nil
|
31
|
+
@structured_key_prefix = nil
|
32
|
+
Label.counter_reset
|
33
|
+
emit('--- ')
|
34
|
+
end
|
35
|
+
def nested(tail=' ')
|
36
|
+
old_indent = @indent
|
37
|
+
@indent = "#{@indent || "\n"}#{tail}"
|
38
|
+
yield
|
39
|
+
@indent = old_indent
|
40
|
+
end
|
41
|
+
class Label
|
42
|
+
#
|
43
|
+
# YAML only wants objects in the datastream once; if the same object
|
44
|
+
# occurs more than once, we need to emit a label ("&idxxx") on the
|
45
|
+
# first occurrence and then emit a back reference (*idxxx") on any
|
46
|
+
# subsequent occurrence(s).
|
47
|
+
#
|
48
|
+
# To accomplish this we keeps a hash (by object id) of the labels of
|
49
|
+
# the things we serialize as we begin to serialize them. The labels
|
50
|
+
# initially serialize as an empty string (since most objects are only
|
51
|
+
# going to be be encountered once), but can be changed to a valid
|
52
|
+
# (by assigning it a number) the first time it is subsequently used,
|
53
|
+
# if it ever is. Note that we need to do the label setup BEFORE we
|
54
|
+
# start to serialize the object so that circular structures (in
|
55
|
+
# which we will encounter a reference to the object as we serialize
|
56
|
+
# it can be handled).
|
57
|
+
#
|
58
|
+
def self.counter_reset
|
59
|
+
@@previously_emitted_object = {}
|
60
|
+
@@next_free_label_number = 0
|
61
|
+
end
|
62
|
+
def initialize(obj,indent)
|
63
|
+
@indent = indent
|
64
|
+
@this_label_number = nil
|
65
|
+
@@previously_emitted_object[obj.object_id] = self
|
66
|
+
end
|
67
|
+
def to_s
|
68
|
+
@this_label_number ? ('&id%03d%s' % [@this_label_number, @indent]) : ''
|
69
|
+
end
|
70
|
+
def reference
|
71
|
+
@this_label_number ||= (@@next_free_label_number += 1)
|
72
|
+
@reference ||= '*id%03d' % @this_label_number
|
73
|
+
end
|
74
|
+
def self.for(obj)
|
75
|
+
@@previously_emitted_object[obj.object_id]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
def new_label_for(obj)
|
79
|
+
Label.new(obj,(Hash === obj || Array === obj) ? "#{@indent || "\n"} " : ' ')
|
80
|
+
end
|
81
|
+
def first_time_only(obj)
|
82
|
+
if label = Label.for(obj)
|
83
|
+
emit(label.reference)
|
84
|
+
else
|
85
|
+
if @structured_key_prefix and not obj.is_a? String
|
86
|
+
emit(@structured_key_prefix)
|
87
|
+
@structured_key_prefix = nil
|
88
|
+
end
|
89
|
+
emit(new_label_for(obj))
|
90
|
+
yield
|
91
|
+
end
|
92
|
+
end
|
93
|
+
def emit(s)
|
94
|
+
@result << s
|
95
|
+
@recent_nl = false unless s.kind_of?(Label)
|
96
|
+
end
|
97
|
+
def nl(s='')
|
98
|
+
emit(@indent || "\n") unless @recent_nl
|
99
|
+
emit(s)
|
100
|
+
@recent_nl = true
|
101
|
+
end
|
102
|
+
def to_s
|
103
|
+
@result.join
|
104
|
+
end
|
105
|
+
def prefix_structured_keys(x)
|
106
|
+
@structured_key_prefix = x
|
107
|
+
yield
|
108
|
+
nl unless @structured_key_prefix
|
109
|
+
@structured_key_prefix = nil
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
################################################################
|
114
|
+
#
|
115
|
+
# Behavior for custom classes
|
116
|
+
#
|
117
|
+
################################################################
|
118
|
+
|
119
|
+
class Object
|
120
|
+
def to_yaml_properties
|
121
|
+
instance_variables.sort # Default YAML behavior
|
122
|
+
end
|
123
|
+
def zamlized_class_name(root)
|
124
|
+
"!ruby/#{root.name.downcase}#{self.class == root ? '' : ":#{self.class.name}"}"
|
125
|
+
end
|
126
|
+
def to_zaml(z)
|
127
|
+
z.first_time_only(self) {
|
128
|
+
z.emit(zamlized_class_name(Object))
|
129
|
+
z.nested {
|
130
|
+
instance_variables = to_yaml_properties
|
131
|
+
if instance_variables.empty?
|
132
|
+
z.emit(" {}")
|
133
|
+
else
|
134
|
+
instance_variables.each { |v|
|
135
|
+
z.nl
|
136
|
+
v[1..-1].to_zaml(z) # Remove leading '@'
|
137
|
+
z.emit(': ')
|
138
|
+
instance_variable_get(v).to_zaml(z)
|
139
|
+
}
|
140
|
+
end
|
141
|
+
}
|
142
|
+
}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
################################################################
|
147
|
+
#
|
148
|
+
# Behavior for built-in classes
|
149
|
+
#
|
150
|
+
################################################################
|
151
|
+
|
152
|
+
class NilClass
|
153
|
+
def to_zaml(z)
|
154
|
+
z.emit('') # NOTE: blank turns into nil in YAML.load
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
class Symbol
|
159
|
+
def to_zaml(z)
|
160
|
+
z.emit(self.inspect)
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
class TrueClass
|
165
|
+
def to_zaml(z)
|
166
|
+
z.emit('true')
|
167
|
+
end
|
168
|
+
end
|
169
|
+
|
170
|
+
class FalseClass
|
171
|
+
def to_zaml(z)
|
172
|
+
z.emit('false')
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
class Numeric
|
177
|
+
def to_zaml(z)
|
178
|
+
z.emit(self)
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
class Regexp
|
183
|
+
def to_zaml(z)
|
184
|
+
z.first_time_only(self) { z.emit("#{zamlized_class_name(Regexp)} #{inspect}") }
|
185
|
+
end
|
186
|
+
end
|
187
|
+
|
188
|
+
class Exception
|
189
|
+
def to_zaml(z)
|
190
|
+
z.emit(zamlized_class_name(Exception))
|
191
|
+
z.nested {
|
192
|
+
z.nl("message: ")
|
193
|
+
message.to_zaml(z)
|
194
|
+
}
|
195
|
+
end
|
196
|
+
#
|
197
|
+
# Monkey patch for buggy Exception restore in YAML
|
198
|
+
#
|
199
|
+
# This makes it work for now but is not very future-proof; if things
|
200
|
+
# change we'll most likely want to remove this. To mitigate the risks
|
201
|
+
# as much as possible, we test for the bug before appling the patch.
|
202
|
+
#
|
203
|
+
if respond_to? :yaml_new and yaml_new(self, :tag, "message" => "blurp").message != "blurp"
|
204
|
+
def self.yaml_new( klass, tag, val )
|
205
|
+
o = YAML.object_maker( klass, {} ).exception(val.delete( 'message'))
|
206
|
+
val.each_pair do |k,v|
|
207
|
+
o.instance_variable_set("@#{k}", v)
|
208
|
+
end
|
209
|
+
o
|
210
|
+
end
|
211
|
+
end
|
212
|
+
end
|
213
|
+
|
214
|
+
class String
|
215
|
+
ZAML_ESCAPES = %w{\x00 \x01 \x02 \x03 \x04 \x05 \x06 \a \x08 \t \n \v \f \r \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \e \x1c \x1d \x1e \x1f }
|
216
|
+
def escaped_for_zaml
|
217
|
+
gsub( /\x5C/, "\\\\\\" ). # Demi-kludge for Maglev/rubinius; the regexp should be /\\/ but parsetree chokes on that.
|
218
|
+
gsub( /"/, "\\\"" ).
|
219
|
+
gsub( /([\x00-\x1F])/ ) { |x| ZAML_ESCAPES[ x.unpack("C")[0] ] }.
|
220
|
+
gsub( /([\x80-\xFF])/ ) { |x| "\\x#{x.unpack("C")[0].to_s(16)}" }
|
221
|
+
end
|
222
|
+
def to_zaml(z)
|
223
|
+
z.first_time_only(self) {
|
224
|
+
num = '[-+]?(0x)?\d+\.?\d*'
|
225
|
+
case
|
226
|
+
when self == ''
|
227
|
+
z.emit('""')
|
228
|
+
# when self =~ /[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\xFF]/
|
229
|
+
# z.emit("!binary |\n")
|
230
|
+
# z.emit([self].pack("m*"))
|
231
|
+
when (
|
232
|
+
(self =~ /\A(true|false|yes|no|on|null|off|#{num}(:#{num})*|!|=|~)$/i) or
|
233
|
+
(self =~ /\A\n* /) or
|
234
|
+
(self =~ /\s$/) or
|
235
|
+
(self =~ /^[>|][-+\d]*\s/i) or
|
236
|
+
(self[-1..-1] =~ /\s/) or
|
237
|
+
(self =~ /[\x00-\x08\x0B\x0C\x0E-\x1F\x80-\xFF]/) or
|
238
|
+
(self =~ /[,\[\]\{\}\r\t]|:\s|\s#/) or
|
239
|
+
(self =~ /\A([-:?!#&*'"]|<<|%.+:.)/)
|
240
|
+
)
|
241
|
+
z.emit("\"#{escaped_for_zaml}\"")
|
242
|
+
when self =~ /\n/
|
243
|
+
if self[-1..-1] == "\n" then z.emit('|+') else z.emit('|-') end
|
244
|
+
z.nested { split("\n",-1).each { |line| z.nl; z.emit(line.chomp("\n")) } }
|
245
|
+
z.nl
|
246
|
+
else
|
247
|
+
z.emit(self)
|
248
|
+
end
|
249
|
+
}
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
class Hash
|
254
|
+
def to_zaml(z)
|
255
|
+
z.first_time_only(self) {
|
256
|
+
z.nested {
|
257
|
+
if empty?
|
258
|
+
z.emit('{}')
|
259
|
+
else
|
260
|
+
each_pair { |k, v|
|
261
|
+
z.nl
|
262
|
+
z.prefix_structured_keys('? ') { k.to_zaml(z) }
|
263
|
+
z.emit(': ')
|
264
|
+
v.to_zaml(z)
|
265
|
+
}
|
266
|
+
end
|
267
|
+
}
|
268
|
+
}
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
class Array
|
273
|
+
def to_zaml(z)
|
274
|
+
z.first_time_only(self) {
|
275
|
+
z.nested {
|
276
|
+
if empty?
|
277
|
+
z.emit('[]')
|
278
|
+
else
|
279
|
+
each { |v| z.nl('- '); v.to_zaml(z) }
|
280
|
+
end
|
281
|
+
}
|
282
|
+
}
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
class Time
|
287
|
+
def to_zaml(z)
|
288
|
+
# 2008-12-06 10:06:51.373758 -07:00
|
289
|
+
ms = ("%0.6f" % (usec * 1e-6)).sub(/^\d+\./,'')
|
290
|
+
offset = "%+0.2i:%0.2i" % [utc_offset / 3600, (utc_offset / 60) % 60]
|
291
|
+
z.emit(self.strftime("%Y-%m-%d %H:%M:%S.#{ms} #{offset}"))
|
292
|
+
end
|
293
|
+
end
|
294
|
+
|
295
|
+
class Date
|
296
|
+
def to_zaml(z)
|
297
|
+
z.emit(strftime('%Y-%m-%d'))
|
298
|
+
end
|
299
|
+
end
|
300
|
+
|
301
|
+
class Range
|
302
|
+
def to_zaml(z)
|
303
|
+
z.first_time_only(self) {
|
304
|
+
z.emit(zamlized_class_name(Range))
|
305
|
+
z.nested {
|
306
|
+
z.nl
|
307
|
+
z.emit('begin: ')
|
308
|
+
z.emit(first)
|
309
|
+
z.nl
|
310
|
+
z.emit('end: ')
|
311
|
+
z.emit(last)
|
312
|
+
z.nl
|
313
|
+
z.emit('excl: ')
|
314
|
+
z.emit(exclude_end?)
|
315
|
+
}
|
316
|
+
}
|
317
|
+
end
|
318
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'benchmark'
|
3
|
+
require 'yaml'
|
4
|
+
require 'tempfile'
|
5
|
+
|
6
|
+
require 'zaml'
|
7
|
+
|
8
|
+
class My_class
|
9
|
+
def initialize
|
10
|
+
@string = 'string...'
|
11
|
+
@self = self
|
12
|
+
@do_not_store_me = '*************** SHOULD NOT SHOW UP IN OUTPUT ***************'
|
13
|
+
end
|
14
|
+
def to_yaml_properties
|
15
|
+
['@string', '@self']
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
class ZamlBenchmarks < Test::Unit::TestCase
|
20
|
+
#
|
21
|
+
# dump various data tests
|
22
|
+
#
|
23
|
+
my_range = 7..13
|
24
|
+
my_obj = My_class.new
|
25
|
+
my_dull_object = Object.new
|
26
|
+
my_bob = 'bob'
|
27
|
+
my_exception = Exception.new("Error message")
|
28
|
+
my_runtime_error = RuntimeError.new("This is a runtime error exception")
|
29
|
+
wright_joke = %q{
|
30
|
+
|
31
|
+
I was in the grocery store. I saw a sign that said "pet supplies".
|
32
|
+
|
33
|
+
So I did.
|
34
|
+
|
35
|
+
Then I went outside and saw a sign that said "compact cars".
|
36
|
+
|
37
|
+
-- Steven Wright
|
38
|
+
}
|
39
|
+
a_box_of_cheese = [:cheese]
|
40
|
+
|
41
|
+
DATA = [1, my_range, my_obj, my_bob, my_dull_object, 2, 'test', " funky\n test\n", true, false,
|
42
|
+
{my_obj => 'obj is the key!'},
|
43
|
+
{:bob => 6.8, :sam => 9.7, :subhash => {:sh1 => 'one', :sh2 => 'two'}},
|
44
|
+
6, my_bob, my_obj, my_range, 'bob', 1..10, 0...8]
|
45
|
+
|
46
|
+
MORE_DATA = [{
|
47
|
+
:a_regexp => /a.*(b+)/im,
|
48
|
+
:an_exception => my_exception,
|
49
|
+
:a_runtime_error => my_runtime_error,
|
50
|
+
:a_long_string => wright_joke}]
|
51
|
+
|
52
|
+
NESTED_ARRAYS = [
|
53
|
+
[:one, 'One'],
|
54
|
+
[:two, 'Two'],
|
55
|
+
a_box_of_cheese,
|
56
|
+
[:three, 'Three'],
|
57
|
+
[:four, 'Four'],
|
58
|
+
a_box_of_cheese,
|
59
|
+
[:five, 'Five'],
|
60
|
+
[:six, 'Six']]
|
61
|
+
|
62
|
+
COMPLEX_DATA = {
|
63
|
+
:data => DATA,
|
64
|
+
:more_data => MORE_DATA,
|
65
|
+
:nested_arrays => NESTED_ARRAYS
|
66
|
+
}
|
67
|
+
|
68
|
+
HASH = {
|
69
|
+
'str' => 'value',
|
70
|
+
:sym => :value,
|
71
|
+
:true => true,
|
72
|
+
:false => false,
|
73
|
+
:int => 100,
|
74
|
+
:float => 1.1
|
75
|
+
}
|
76
|
+
def test_dump_time
|
77
|
+
puts
|
78
|
+
puts "dump:"
|
79
|
+
Benchmark.bm do |x|
|
80
|
+
n = 100
|
81
|
+
GC.start; x.report('yaml') { n.times { YAML.dump(HASH, "") } }
|
82
|
+
GC.start; x.report('zaml') { n.times { ZAML.dump(HASH, "") } }
|
83
|
+
end
|
84
|
+
end
|
85
|
+
def test_dump_time_for_complex_data
|
86
|
+
puts
|
87
|
+
puts "dump time for complex data:"
|
88
|
+
Benchmark.bm do |x|
|
89
|
+
n = 100
|
90
|
+
GC.start; x.report('yaml') { n.times { YAML.dump(COMPLEX_DATA, "") } }
|
91
|
+
GC.start; x.report('zaml') { n.times { ZAML.dump(COMPLEX_DATA, "") } }
|
92
|
+
end
|
93
|
+
end
|
94
|
+
def test_dump_time_for_big_data
|
95
|
+
puts
|
96
|
+
puts "dump time for big data:"
|
97
|
+
Benchmark.bm do |x|
|
98
|
+
n = 100
|
99
|
+
[10,100,1000].each { |s|
|
100
|
+
big_data = (1..s).collect { |i| [My_class.new,COMPLEX_DATA] }
|
101
|
+
print "s = #{s}\n"
|
102
|
+
GC.start; x.report('yaml') { n.times { YAML.dump(big_data, "") } }
|
103
|
+
GC.start; x.report('zaml') { n.times { ZAML.dump(big_data, "") } }
|
104
|
+
n = n/10
|
105
|
+
}
|
106
|
+
end
|
107
|
+
end
|
108
|
+
def test_deeply_nested_arrays
|
109
|
+
branch = []
|
110
|
+
root = [branch]
|
111
|
+
(1..10000).each { |i|
|
112
|
+
case
|
113
|
+
when i % 3 == 0 then branch << i
|
114
|
+
when 1 % 5 == 0 then branch << [i]
|
115
|
+
else branch = []; root << branch
|
116
|
+
end
|
117
|
+
}
|
118
|
+
puts
|
119
|
+
puts "dump time for deeply nested arrays:"
|
120
|
+
Benchmark.bm do |x|
|
121
|
+
n = 10
|
122
|
+
GC.start; x.report('yaml') { n.times { YAML.dump(root, "") } }
|
123
|
+
GC.start; x.report('zaml') { n.times { ZAML.dump(root, "") } }
|
124
|
+
end
|
125
|
+
end
|
126
|
+
def test_lots_of_back_refs
|
127
|
+
leaves = ['a string',[:an,:array,:of,:symbols],{:this=>'is a hash'},0..9,'a'*10000]
|
128
|
+
branch = []
|
129
|
+
root = [branch]
|
130
|
+
(1..500000).each { |i|
|
131
|
+
case
|
132
|
+
when i % 3 == 0 then branch << leaves[i % leaves.length]
|
133
|
+
when 1 % 5 == 0 then branch << leaves
|
134
|
+
when 1 % 2 == 0 then branch << root[i % root.length]
|
135
|
+
else branch = []; root << branch
|
136
|
+
end
|
137
|
+
}
|
138
|
+
puts
|
139
|
+
puts "dump time for lots of back references:"
|
140
|
+
Benchmark.bm do |x|
|
141
|
+
GC.start; x.report('yaml') { YAML.dump(root, "") }
|
142
|
+
GC.start; x.report('zaml') { ZAML.dump(root, "") }
|
143
|
+
end
|
144
|
+
end
|
145
|
+
class A_node
|
146
|
+
attr_accessor :value,:factors,:gcd_pairs
|
147
|
+
def initialize(v)
|
148
|
+
@value = v
|
149
|
+
@factors = []
|
150
|
+
@gcd_pairs = []
|
151
|
+
end
|
152
|
+
end
|
153
|
+
def gcd(a,b)
|
154
|
+
(b == 0) ? a : gcd(b,a % b)
|
155
|
+
end
|
156
|
+
def test_nest_of_objects
|
157
|
+
my_mess = []
|
158
|
+
n = 1000
|
159
|
+
(0..n).each { |i| my_mess << A_node.new(i) }
|
160
|
+
(1..n).each { |i|
|
161
|
+
(2..(n/i)).each { |j| my_mess[j].factors << my_mess[i] }
|
162
|
+
(i..n).each { |j| my_mess[gcd(i,j)].gcd_pairs << [i,j] }
|
163
|
+
}
|
164
|
+
puts
|
165
|
+
puts "dump time for big, tangled nest of objects:"
|
166
|
+
Benchmark.bm do |x|
|
167
|
+
GC.start; x.report('yaml') { YAML.dump(my_mess, "") }
|
168
|
+
GC.start; x.report('zaml') { ZAML.dump(my_mess, "") }
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
data/test/zaml_test.rb
ADDED
@@ -0,0 +1,420 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'yaml'
|
3
|
+
require 'zaml'
|
4
|
+
|
5
|
+
class My_class
|
6
|
+
def initialize
|
7
|
+
@string = 'string...'
|
8
|
+
@self = self
|
9
|
+
@do_not_store_me = '*************** SHOULD NOT SHOW UP IN OUTPUT ***************'
|
10
|
+
end
|
11
|
+
def to_yaml_properties
|
12
|
+
['@string', '@self']
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
class ZamlDumpTest < Test::Unit::TestCase
|
17
|
+
#
|
18
|
+
# This class and the test helper which follow embody what we mean by YAML compatibility.
|
19
|
+
# When we do a round-trip dump->load we expect
|
20
|
+
# 1) that the data from ZAML.dump will come back correctly
|
21
|
+
# 1a) all vales will be correct
|
22
|
+
# 1b) only data that should be dumped is
|
23
|
+
# 1c) object identity is preseved
|
24
|
+
# 2) if YAML.dump also works by these standards the dumped data should
|
25
|
+
# generally look like what yaml.rb produces, minus unneeded whitespace
|
26
|
+
# (trailing blanks, etc.)
|
27
|
+
class Equivalency < Hash
|
28
|
+
attr_reader :result,:message
|
29
|
+
def self.test(a,b)
|
30
|
+
new.test(a,b)
|
31
|
+
end
|
32
|
+
def test(a,b)
|
33
|
+
@result = equivalent(a,b)
|
34
|
+
self
|
35
|
+
end
|
36
|
+
def note_failure(msg)
|
37
|
+
(@message ||= '') << msg
|
38
|
+
false
|
39
|
+
end
|
40
|
+
def same_class(a,b)
|
41
|
+
(a.class == b.class) or note_failure("Saw a #{a.class} but expected a #{b.class}\n")
|
42
|
+
end
|
43
|
+
def seen_either_before(a,b)
|
44
|
+
result = (has_key?(a.object_id) or has_key?(b.object_id))
|
45
|
+
(self[a.object_id] = self[b.object_id] = size) unless result or a.is_a? Numeric
|
46
|
+
result
|
47
|
+
end
|
48
|
+
def matched_before(a,b)
|
49
|
+
(self[a.object_id] == self[b.object_id]) or note_failure("#{a.inspect} and #{b.inspect} should refer to the same object.\n")
|
50
|
+
end
|
51
|
+
def same_object(a,b)
|
52
|
+
a.object_id == b.object_id
|
53
|
+
end
|
54
|
+
def guess_maping(a,b)
|
55
|
+
result = {}
|
56
|
+
a.delete_if { |xa| result[xa] = b.delete(xa) }
|
57
|
+
raise "Too many odd keys in a test hash to tell if the results are correct." if a.length > 1
|
58
|
+
a.each { |xa| result[xa] = b.pop }
|
59
|
+
result
|
60
|
+
end
|
61
|
+
def same_properties(a,b)
|
62
|
+
@what_we_are_looking_at ||= []
|
63
|
+
return true if @what_we_are_looking_at.include? [a.object_id,b.object_id]
|
64
|
+
@what_we_are_looking_at.push [a.object_id,b.object_id]
|
65
|
+
result = case a
|
66
|
+
when Array
|
67
|
+
(a.length == b.length) and a.zip(b).all? { |ia,ib| equivalent(ia,ib) }
|
68
|
+
when Hash
|
69
|
+
key_map = guess_maping(a.keys,b.keys)
|
70
|
+
a.keys.length == b.keys.length and a.keys.all? {|a_k|
|
71
|
+
b_k = key_map[a_k]
|
72
|
+
equivalent(a_k,b_k) and equivalent(a[a_k],b[b_k])
|
73
|
+
}
|
74
|
+
when Exception
|
75
|
+
equivalent(a.message,b.message)
|
76
|
+
when Time,Date,Numeric,nil,true,false,Range,Symbol,String,Regexp
|
77
|
+
a == b
|
78
|
+
else
|
79
|
+
a.to_yaml_properties.all? { |p| equivalent(a.instance_variable_get(p),b.instance_variable_get(p)) }
|
80
|
+
end or note_failure("Expected:\n #{b.inspect}\n but got:\n #{a.inspect}\n")
|
81
|
+
@what_we_are_looking_at.pop
|
82
|
+
result
|
83
|
+
end
|
84
|
+
def equivalent(a,b)
|
85
|
+
seen_either_before(a,b) ? matched_before(a,b) : (same_object(a,b) or (same_class(a,b) and same_properties(a,b)))
|
86
|
+
end
|
87
|
+
end
|
88
|
+
def stripped(x)
|
89
|
+
x.gsub(/ +$/,'').chomp.chomp.gsub(/\n+/,"\n")
|
90
|
+
end
|
91
|
+
def dump_test(obj)
|
92
|
+
z_load = YAML.load(z_dump = ZAML.dump(obj)) rescue "ZAML produced something YAML can't load."
|
93
|
+
y_load = YAML.load(y_dump = YAML.dump(obj)) rescue "YAML failed to eat it's own dogfood"
|
94
|
+
context = {}
|
95
|
+
test = Equivalency.test(z_load,obj)
|
96
|
+
assert_block("Reload discrepancy:\n#{test.message}\nZAML:\"\n#{z_dump}\"\nYAML:\"\n#{y_dump}\"\n\n") { test.result }
|
97
|
+
# if Equivalency.test(y_load,obj).result and not obj.is_a? String
|
98
|
+
# assert_equal stripped(y_dump),stripped(z_dump), "Dump discrepancy"
|
99
|
+
# end
|
100
|
+
end
|
101
|
+
#
|
102
|
+
# dump tests
|
103
|
+
#
|
104
|
+
|
105
|
+
def test_dump_object
|
106
|
+
dump_test(Object.new)
|
107
|
+
dump_test(My_class.new)
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_dump_nil
|
111
|
+
dump_test(nil)
|
112
|
+
end
|
113
|
+
|
114
|
+
def test_dump_symbol
|
115
|
+
dump_test(:sym)
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_dump_true
|
119
|
+
dump_test(true)
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_dump_false
|
123
|
+
dump_test(false)
|
124
|
+
end
|
125
|
+
|
126
|
+
def test_dump_numeric
|
127
|
+
dump_test(1)
|
128
|
+
dump_test(1.1)
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_dump_exception
|
132
|
+
dump_test(Exception.new('error message'))
|
133
|
+
dump_test(ArgumentError.new('error message'))
|
134
|
+
end
|
135
|
+
|
136
|
+
def test_dump_regexp
|
137
|
+
dump_test(/abc/)
|
138
|
+
dump_test(/a.*(b+)/im)
|
139
|
+
end
|
140
|
+
|
141
|
+
|
142
|
+
def test_dump_short_strings
|
143
|
+
#
|
144
|
+
every_character = (0..255).collect { |n| n.chr }
|
145
|
+
letters = 'a'..'z'
|
146
|
+
some_characters = (0..128).collect { |n| n.chr } - ('A'..'Z').to_a - ('b'..'z').to_a - ('1'..'9').to_a
|
147
|
+
fewer_characters = some_characters - every_character[1..31] + ["\n","\r","\t","\e"] - [127.chr,128.chr]
|
148
|
+
#
|
149
|
+
every_character.each { |c1| dump_test c1 }
|
150
|
+
every_character.each { |c1| dump_test "> "+c1 }
|
151
|
+
every_character.each { |c1| every_character.each { |c2| dump_test c1+c2 }}
|
152
|
+
letters.each { |c1|
|
153
|
+
letters.each { |c2|
|
154
|
+
print c1,c2,' ',8.chr*3
|
155
|
+
STDOUT.flush
|
156
|
+
letters.each { |c3|
|
157
|
+
dump_test c1+c2+c3
|
158
|
+
letters.each { |c4| dump_test c1+c2+c3+c4 }
|
159
|
+
}
|
160
|
+
}
|
161
|
+
GC.start
|
162
|
+
} if false #slow
|
163
|
+
some_characters.each { |c1|
|
164
|
+
some_characters.each { |c2|
|
165
|
+
print((c1+c2).inspect,' ',8.chr*((c1+c2).inspect.length+8))
|
166
|
+
STDOUT.flush
|
167
|
+
some_characters.each { |c3|
|
168
|
+
some_characters.each { |c4| dump_test c1+c2+c3+c4 }
|
169
|
+
}
|
170
|
+
GC.start
|
171
|
+
}
|
172
|
+
} if false #slower
|
173
|
+
fewer_characters.each { |c1|
|
174
|
+
fewer_characters.each { |c2|
|
175
|
+
print((c1+c2).inspect,' ',8.chr*((c1+c2).inspect.length+8))
|
176
|
+
STDOUT.flush
|
177
|
+
fewer_characters.each { |c3|
|
178
|
+
fewer_characters.each { |c4|
|
179
|
+
fewer_characters.each { |c5| dump_test c1+c2+c3+c4+c5 }
|
180
|
+
}
|
181
|
+
GC.start
|
182
|
+
}
|
183
|
+
}
|
184
|
+
} if false #very slow
|
185
|
+
end
|
186
|
+
def test_system_dict_words
|
187
|
+
system_dict = '/usr/share/dict/words'
|
188
|
+
File.readlines(system_dict).each { |w| dump_test w.chomp } if File.exists?(system_dict)
|
189
|
+
end
|
190
|
+
def test_dump_tricky_strings
|
191
|
+
dump_test("")
|
192
|
+
dump_test("#")
|
193
|
+
dump_test("!")
|
194
|
+
dump_test("~")
|
195
|
+
dump_test("=")
|
196
|
+
dump_test("\n")
|
197
|
+
dump_test("\n0")
|
198
|
+
dump_test("\n!")
|
199
|
+
dump_test("!\n")
|
200
|
+
dump_test("##")
|
201
|
+
dump_test("###")
|
202
|
+
dump_test("2:7")
|
203
|
+
dump_test("1:1 x")
|
204
|
+
dump_test(">")
|
205
|
+
dump_test(">>")
|
206
|
+
dump_test("> >")
|
207
|
+
dump_test("> !")
|
208
|
+
dump_test(">++ !")
|
209
|
+
dump_test(">0+ !")
|
210
|
+
dump_test("| |")
|
211
|
+
dump_test("0:0")
|
212
|
+
dump_test("1:2:3")
|
213
|
+
dump_test("+1:2:3")
|
214
|
+
dump_test("1:-2:+3")
|
215
|
+
dump_test("%.:.")
|
216
|
+
dump_test("%.:/")
|
217
|
+
end
|
218
|
+
def test_dump_string
|
219
|
+
dump_test('str')
|
220
|
+
dump_test(" leading and trailing whitespace ")
|
221
|
+
|
222
|
+
dump_test("a string \n with newline")
|
223
|
+
dump_test("a string with 'quotes'")
|
224
|
+
dump_test("a string with \"double quotes\"")
|
225
|
+
dump_test("a string with \\ escape")
|
226
|
+
|
227
|
+
dump_test("a really long string" * 10)
|
228
|
+
dump_test("a really long string \n with newline" * 10)
|
229
|
+
dump_test("a really long string with 'quotes'" * 10)
|
230
|
+
dump_test("a really long string with \"double quotes\"" * 10)
|
231
|
+
dump_test("a really long string with \\ escape" * 10)
|
232
|
+
|
233
|
+
dump_test("string with binary data \x00 \x01 \x02")
|
234
|
+
dump_test(" funky\n test\n")
|
235
|
+
dump_test('"')
|
236
|
+
dump_test("'")
|
237
|
+
dump_test('\\')
|
238
|
+
dump_test("k: v")
|
239
|
+
dump_test(":goo")
|
240
|
+
dump_test("? foo")
|
241
|
+
dump_test("{khkjh}")
|
242
|
+
dump_test("[ha]")
|
243
|
+
dump_test("- - (text) - -")
|
244
|
+
dump_test("\n\n \n \n x\n y\n z\n!\n")
|
245
|
+
end
|
246
|
+
|
247
|
+
def test_dump_strings_that_resemble_literals
|
248
|
+
dump_test("true")
|
249
|
+
dump_test("false")
|
250
|
+
dump_test("null")
|
251
|
+
dump_test("yes")
|
252
|
+
dump_test("no")
|
253
|
+
dump_test("on")
|
254
|
+
dump_test("off")
|
255
|
+
dump_test("nil")
|
256
|
+
dump_test("3")
|
257
|
+
dump_test("3.14")
|
258
|
+
dump_test("1e-6")
|
259
|
+
dump_test("0x345")
|
260
|
+
dump_test("-0x345")
|
261
|
+
dump_test("1e5")
|
262
|
+
end
|
263
|
+
|
264
|
+
def test_dump_time
|
265
|
+
dump_test(Time.now)
|
266
|
+
end
|
267
|
+
|
268
|
+
def test_dump_date
|
269
|
+
dump_test(Date.strptime('2008-08-08'))
|
270
|
+
end
|
271
|
+
|
272
|
+
def test_dump_range
|
273
|
+
dump_test(1..10)
|
274
|
+
dump_test('a'...'b')
|
275
|
+
end
|
276
|
+
|
277
|
+
#
|
278
|
+
# hash
|
279
|
+
#
|
280
|
+
|
281
|
+
def test_dump_simple_hash
|
282
|
+
dump_test({:key => 'value'})
|
283
|
+
end
|
284
|
+
|
285
|
+
HASH = {
|
286
|
+
:nil => nil,
|
287
|
+
:sym => :value,
|
288
|
+
:true => true,
|
289
|
+
:false => false,
|
290
|
+
:int => 100,
|
291
|
+
:float => 1.1,
|
292
|
+
:regexp => /abc/,
|
293
|
+
'str' => 'value',
|
294
|
+
:range => 1..10
|
295
|
+
}
|
296
|
+
|
297
|
+
ARRAY = [nil, :sym, true, false, 100, 1.1, /abc/, 'str', 1..10]
|
298
|
+
|
299
|
+
def test_dump_hash
|
300
|
+
dump_test(HASH)
|
301
|
+
end
|
302
|
+
|
303
|
+
def test_dump_simple_nested_hash
|
304
|
+
dump_test({:hash => {:key => 'value'}, :array => [1,2,3]})
|
305
|
+
end
|
306
|
+
|
307
|
+
def test_dump_nested_hash
|
308
|
+
dump_test(HASH.merge(:hash => {:hash => {:key => 'value'}}, :array => [[1,2,3]]))
|
309
|
+
end
|
310
|
+
|
311
|
+
def test_dump_self_referential_hash
|
312
|
+
array = ARRAY + [ARRAY]
|
313
|
+
dump_test(HASH.merge(:hash => HASH, :array => array))
|
314
|
+
end
|
315
|
+
|
316
|
+
def test_dump_singlular_self_referential_hash
|
317
|
+
hash = {}
|
318
|
+
hash[hash] = hash
|
319
|
+
dump_test(hash)
|
320
|
+
end
|
321
|
+
|
322
|
+
#
|
323
|
+
# array
|
324
|
+
#
|
325
|
+
|
326
|
+
def test_dump_simple_array
|
327
|
+
dump_test([1,2,3])
|
328
|
+
end
|
329
|
+
|
330
|
+
def test_dump_array
|
331
|
+
dump_test(ARRAY)
|
332
|
+
end
|
333
|
+
|
334
|
+
def test_dump_simple_nested_array
|
335
|
+
dump_test([{:key => 'value'}, [1,2,3]])
|
336
|
+
end
|
337
|
+
|
338
|
+
def test_dump_nested_array
|
339
|
+
dump_test(ARRAY.concat([{:array => [1,2,3]}, [[1,2,3]]]))
|
340
|
+
end
|
341
|
+
|
342
|
+
def test_dump_self_referential_array
|
343
|
+
array = ARRAY + [ARRAY, HASH.merge(:hash => HASH)]
|
344
|
+
dump_test(array)
|
345
|
+
end
|
346
|
+
|
347
|
+
def test_dump_singlular_self_referential_array
|
348
|
+
array = []
|
349
|
+
array << array
|
350
|
+
dump_test(array)
|
351
|
+
end
|
352
|
+
|
353
|
+
#
|
354
|
+
# dump various data tests
|
355
|
+
#
|
356
|
+
|
357
|
+
my_range = 7..13
|
358
|
+
my_obj = My_class.new
|
359
|
+
my_dull_object = Object.new
|
360
|
+
my_bob = 'bob'
|
361
|
+
my_exception = Exception.new("Error message")
|
362
|
+
my_runtime_error = RuntimeError.new("This is a runtime error exception")
|
363
|
+
wright_joke = %q{
|
364
|
+
|
365
|
+
I was in the grocery store. I saw a sign that said "pet supplies".
|
366
|
+
|
367
|
+
So I did.
|
368
|
+
|
369
|
+
Then I went outside and saw a sign that said "compact cars".
|
370
|
+
|
371
|
+
-- Steven Wright
|
372
|
+
}
|
373
|
+
a_box_of_cheese = [:cheese]
|
374
|
+
DATA = [1, my_range, my_obj, my_bob, my_dull_object, 2, 'test', " funky\n test\n", true, false,
|
375
|
+
{my_obj => 'obj is the key!'},
|
376
|
+
{:bob => 6.8, :sam => 9.7, :subhash => {:sh1 => 'one', :sh2 => 'two'}},
|
377
|
+
6, my_bob, my_obj, my_range, 'bob', 1..10, 0...8]
|
378
|
+
MORE_DATA = [{
|
379
|
+
:a_regexp => /a.*(b+)/im,
|
380
|
+
:an_exception => my_exception,
|
381
|
+
:a_runtime_error => my_runtime_error,
|
382
|
+
:a_long_string => wright_joke}
|
383
|
+
]
|
384
|
+
NESTED_ARRAYS = [
|
385
|
+
[:one, 'One'],
|
386
|
+
[:two, 'Two'],
|
387
|
+
a_box_of_cheese,
|
388
|
+
[:three, 'Three'],
|
389
|
+
[:four, 'Four'],
|
390
|
+
a_box_of_cheese,
|
391
|
+
[:five, 'Five'],
|
392
|
+
[:six, 'Six']
|
393
|
+
]
|
394
|
+
COMPLEX_DATA = {
|
395
|
+
:data => DATA,
|
396
|
+
:more_data => MORE_DATA,
|
397
|
+
:nested_arrays => NESTED_ARRAYS
|
398
|
+
}
|
399
|
+
def test_dump_DATA
|
400
|
+
dump_test(DATA)
|
401
|
+
end
|
402
|
+
def test_dump_MORE_DATA
|
403
|
+
dump_test(MORE_DATA)
|
404
|
+
end
|
405
|
+
def test_dump_NESTED_ARRAYS
|
406
|
+
dump_test(NESTED_ARRAYS)
|
407
|
+
end
|
408
|
+
def test_dump_COMPLEX_DATA
|
409
|
+
dump_test(COMPLEX_DATA)
|
410
|
+
end
|
411
|
+
def test_indentation_array_edge_cases
|
412
|
+
dump_test({[]=>[]})
|
413
|
+
dump_test([[]])
|
414
|
+
dump_test([[[],[]]])
|
415
|
+
end
|
416
|
+
def test_string_identity
|
417
|
+
a = 'str'
|
418
|
+
dump_test([a,a])
|
419
|
+
end
|
420
|
+
end
|
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: zaml
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Markus Roberts
|
8
|
+
- Jesse Hallett
|
9
|
+
- Ian McIntosh
|
10
|
+
- Igal Koshevoy
|
11
|
+
- Simon Chiang
|
12
|
+
autorequire:
|
13
|
+
bindir: bin
|
14
|
+
cert_chain: []
|
15
|
+
|
16
|
+
date: 2009-10-03 00:00:00 -07:00
|
17
|
+
default_executable:
|
18
|
+
dependencies: []
|
19
|
+
|
20
|
+
description: A partial replacement for YAML, writen with speed and code clarity in mind. ZAML fixes one YAML bug (loading Exceptions) and provides a replacement for YAML.dump() unimaginatively called ZAML.dump(), which is faster on all known cases and an order of magnitude faster with complex structures.
|
21
|
+
email: zaml@googlegroups.com
|
22
|
+
executables: []
|
23
|
+
|
24
|
+
extensions: []
|
25
|
+
|
26
|
+
extra_rdoc_files:
|
27
|
+
- README
|
28
|
+
- LICENSE
|
29
|
+
files:
|
30
|
+
- README
|
31
|
+
- LICENSE
|
32
|
+
- lib/zaml.rb
|
33
|
+
- test/zaml_benchmarks.rb
|
34
|
+
- test/zaml_test.rb
|
35
|
+
has_rdoc: true
|
36
|
+
homepage: http://github.com/hallettj/zaml
|
37
|
+
licenses: []
|
38
|
+
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options:
|
41
|
+
- --main
|
42
|
+
- README
|
43
|
+
require_paths:
|
44
|
+
- lib
|
45
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: "0"
|
50
|
+
version:
|
51
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
|
+
requirements:
|
53
|
+
- - ">="
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: "0"
|
56
|
+
version:
|
57
|
+
requirements: []
|
58
|
+
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 1.3.4
|
61
|
+
signing_key:
|
62
|
+
specification_version: 3
|
63
|
+
summary: A partial replacement for YAML, writen with speed and code clarity in mind.
|
64
|
+
test_files:
|
65
|
+
- test/zaml_benchmarks.rb
|
66
|
+
- test/zaml_test.rb
|