ya2yaml 0.29.2 → 0.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/{README → README.rdoc} +0 -0
  2. data/lib/ya2yaml.rb +359 -359
  3. data/test/test.rb +443 -403
  4. metadata +6 -7
File without changes
@@ -1,373 +1,373 @@
1
1
  # encoding: UTF-8
2
2
 
3
- # $Id: ya2yaml.rb,v 0.29 2009/02/09 09:01:30 funai Exp funai $
4
- #
5
3
  # Author:: Akira FUNAI
6
- # Copyright:: Copyright (c) 2006 Akira FUNAI
4
+ # Copyright:: Copyright (c) 2006-2010 Akira FUNAI
7
5
  # License:: MIT License
8
6
 
9
7
  class Ya2YAML
10
8
 
11
- def initialize(opts = {})
12
- options = opts.dup
13
- options[:indent_size] = 2 if options[:indent_size].to_i <= 0
14
- options[:minimum_block_length] = 0 if options[:minimum_block_length].to_i <= 0
15
- options.update(
16
- {
17
- :printable_with_syck => true,
18
- :escape_b_specific => true,
19
- :escape_as_utf8 => true,
20
- }
21
- ) if options[:syck_compatible]
22
-
23
- @options = options
24
- end
25
-
26
- def _ya2yaml(obj)
27
- throw 'set $KCODE to "UTF8".' if (RUBY_VERSION < '1.9.0') && ($KCODE != 'UTF8')
28
- '--- ' + emit(obj,1) + "\n"
29
- end
30
-
31
- private
32
-
33
- def emit(obj,level)
34
- case obj
35
- when Array
36
- if (obj.length == 0)
37
- '[]'
38
- else
39
- indent = "\n" + s_indent(level - 1)
40
- obj.collect {|o|
41
- indent + '- ' + emit(o,level + 1)
42
- }.join('')
43
- end
44
- when Hash
45
- if (obj.length == 0)
46
- '{}'
47
- else
48
- indent = "\n" + s_indent(level - 1)
49
- hash_order = @options[:hash_order]
50
- if (hash_order && level == 1)
51
- hash_keys = obj.keys.sort {|x,y|
52
- x_order = hash_order.index(x) ? hash_order.index(x) : Float::MAX
53
- y_order = hash_order.index(y) ? hash_order.index(y) : Float::MAX
54
- o = (x_order <=> y_order)
55
- (o != 0) ? o : (x.to_s <=> y.to_s)
56
- }
57
- else
58
- hash_keys = obj.keys.sort {|x,y| x.to_s <=> y.to_s }
59
- end
60
- hash_keys.collect {|k|
61
- key = emit(k,level + 1)
62
- if (
63
- is_one_plain_line?(key) ||
64
- key =~ /\A(#{REX_BOOL}|#{REX_FLOAT}|#{REX_INT}|#{REX_NULL})\z/x
65
- )
66
- indent + key + ': ' + emit(obj[k],level + 1)
67
- else
68
- indent + '? ' + key +
69
- indent + ': ' + emit(obj[k],level + 1)
70
- end
71
- }.join('')
72
- end
73
- when NilClass
74
- '~'
75
- when String
76
- emit_string(obj,level)
77
- when TrueClass,FalseClass
78
- obj.to_s
79
- when Fixnum,Bignum,Float
80
- obj.to_s
81
- when Date
82
- obj.to_s
83
- when Time
84
- offset = obj.gmtoff
85
- off_hm = sprintf(
86
- '%+.2d:%.2d',
87
- (offset / 3600.0).to_i,
88
- (offset % 3600.0) / 60
89
- )
90
- u_sec = (obj.usec != 0) ? sprintf(".%.6d",obj.usec) : ''
91
- obj.strftime("%Y-%m-%d %H:%M:%S#{u_sec} #{off_hm}")
92
- when Symbol
93
- '!ruby/symbol ' + emit_string(obj.to_s,level)
94
- when Range
95
- '!ruby/range ' + obj.to_s
96
- when Regexp
97
- '!ruby/regexp ' + obj.inspect
98
- else
99
- case
100
- when obj.is_a?(Struct)
101
- struct_members = {}
102
- obj.each_pair{|k,v| struct_members[k.to_s] = v }
103
- '!ruby/struct:' + obj.class.to_s.sub(/^(Struct::(.+)|.*)$/,'\2') + ' ' +
104
- emit(struct_members,level + 1)
105
- else
106
- # serialized as a generic object
107
- object_members = {}
108
- obj.instance_variables.each{|k,v|
109
- object_members[k.to_s.sub(/^@/,'')] = obj.instance_variable_get(k)
110
- }
111
- '!ruby/object:' + obj.class.to_s + ' ' +
112
- emit(object_members,level + 1)
113
- end
114
- end
115
- end
116
-
117
- def emit_string(str,level)
118
- (is_string,is_printable,is_one_line,is_one_plain_line) = string_type(str)
119
- if is_string
120
- if is_printable
121
- if is_one_plain_line
122
- emit_simple_string(str,level)
123
- else
124
- (is_one_line || str.length < @options[:minimum_block_length]) ?
125
- emit_quoted_string(str,level) :
126
- emit_block_string(str,level)
127
- end
128
- else
129
- emit_quoted_string(str,level)
130
- end
131
- else
132
- emit_base64_binary(str,level)
133
- end
134
- end
135
-
136
- def emit_simple_string(str,level)
137
- str
138
- end
139
-
140
- def emit_block_string(str,level)
141
- str = normalize_line_break(str)
142
-
143
- indent = s_indent(level)
144
- indentation_indicator = (str =~ /\A /) ? indent.size.to_s : ''
145
- str =~ /(#{REX_NORMAL_LB}*)\z/
146
- chomping_indicator = case $1.length
147
- when 0
148
- '-'
149
- when 1
150
- ''
151
- else
152
- '+'
153
- end
154
-
155
- str.chomp!
156
- str.gsub!(/#{REX_NORMAL_LB}/) {
157
- $1 + indent
158
- }
159
- '|' + indentation_indicator + chomping_indicator + "\n" + indent + str
160
- end
161
-
162
- def emit_quoted_string(str,level)
163
- str = yaml_escape(normalize_line_break(str))
164
- if (str.length < @options[:minimum_block_length])
165
- str.gsub!(/#{REX_NORMAL_LB}/) { ESCAPE_SEQ_LB[$1] }
166
- else
167
- str.gsub!(/#{REX_NORMAL_LB}$/) { ESCAPE_SEQ_LB[$1] }
168
- str.gsub!(/(#{REX_NORMAL_LB}+)(.)/) {
169
- trail_c = $3
170
- $1 + trail_c.sub(/([\t ])/) { ESCAPE_SEQ_WS[$1] }
171
- }
172
- indent = s_indent(level)
173
- str.gsub!(/#{REX_NORMAL_LB}/) {
174
- ESCAPE_SEQ_LB[$1] + "\\\n" + indent
175
- }
176
- end
177
- '"' + str + '"'
178
- end
179
-
180
- def emit_base64_binary(str,level)
181
- indent = "\n" + s_indent(level)
182
- base64 = [str].pack('m')
183
- '!binary |' + indent + base64.gsub(/\n(?!\z)/,indent)
184
- end
185
-
186
- def string_type(str)
187
- if str.respond_to?(:valid_encoding?) && !str.valid_encoding?
188
- return false,false,false,false
189
- end
190
- (ucs_codes = str.unpack('U*')) rescue (
191
- # ArgumentError -> binary data
192
- return false,false,false,false
193
- )
194
- if (
195
- @options[:printable_with_syck] &&
196
- str =~ /\A#{REX_ANY_LB}* | #{REX_ANY_LB}*\z|#{REX_ANY_LB}{2}\z/
197
- )
198
- # detour Syck bug
199
- return true,false,nil,false
200
- end
201
- ucs_codes.each {|ucs_code|
202
- return true,false,nil,false unless is_printable?(ucs_code)
203
- }
204
- return true,true,is_one_line?(str),is_one_plain_line?(str)
205
- end
206
-
207
- def is_printable?(ucs_code)
208
- # YAML 1.1 / 4.1.1.
209
- (
210
- [0x09,0x0a,0x0d,0x85].include?(ucs_code) ||
211
- (ucs_code <= 0x7e && ucs_code >= 0x20) ||
212
- (ucs_code <= 0xd7ff && ucs_code >= 0xa0) ||
213
- (ucs_code <= 0xfffd && ucs_code >= 0xe000) ||
214
- (ucs_code <= 0x10ffff && ucs_code >= 0x10000)
215
- ) &&
216
- !(
217
- # treat LS/PS as non-printable characters
218
- @options[:escape_b_specific] &&
219
- (ucs_code == 0x2028 || ucs_code == 0x2029)
220
- )
221
- end
222
-
223
- def is_one_line?(str)
224
- str !~ /#{REX_ANY_LB}(?!\z)/
225
- end
226
-
227
- def is_one_plain_line?(str)
228
- # YAML 1.1 / 4.6.11.
229
- str !~ /^([\-\?:,\[\]\{\}\#&\*!\|>'"%@`\s]|---|\.\.\.)/ &&
230
- str !~ /[:\#\s\[\]\{\},]/ &&
231
- str !~ /#{REX_ANY_LB}/ &&
232
- str !~ /^(#{REX_BOOL}|#{REX_FLOAT}|#{REX_INT}|#{REX_MERGE}
233
- |#{REX_NULL}|#{REX_TIMESTAMP}|#{REX_VALUE})$/x
234
- end
235
-
236
- def s_indent(level)
237
- # YAML 1.1 / 4.2.2.
238
- ' ' * (level * @options[:indent_size])
239
- end
240
-
241
- def normalize_line_break(str)
242
- # YAML 1.1 / 4.1.4.
243
- str.gsub(/(#{REX_CRLF}|#{REX_CR}|#{REX_NEL})/,"\n")
244
- end
245
-
246
- def yaml_escape(str)
247
- # YAML 1.1 / 4.1.6.
248
- str.gsub(/[^a-zA-Z0-9]/u) {|c|
249
- ucs_code, = (c.unpack('U') rescue [??])
250
- case
251
- when ESCAPE_SEQ[c]
252
- ESCAPE_SEQ[c]
253
- when is_printable?(ucs_code)
254
- c
255
- when @options[:escape_as_utf8]
256
- c.respond_to?(:bytes) ?
257
- c.bytes.collect {|b| '\\x%.2x' % b }.join :
258
- '\\x' + c.unpack('H2' * c.size).join('\\x')
259
- when ucs_code == 0x2028 || ucs_code == 0x2029
260
- ESCAPE_SEQ_LB[c]
261
- when ucs_code <= 0x7f
262
- sprintf('\\x%.2x',ucs_code)
263
- when ucs_code <= 0xffff
264
- sprintf('\\u%.4x',ucs_code)
265
- else
266
- sprintf('\\U%.8x',ucs_code)
267
- end
268
- }
269
- end
270
-
271
- module Constants
272
- UCS_0X85 = [0x85].pack('U') # c285@UTF8 Unicode next line
273
- UCS_0XA0 = [0xa0].pack('U') # c2a0@UTF8 Unicode non-breaking space
274
- UCS_0X2028 = [0x2028].pack('U') # e280a8@UTF8 Unicode line separator
275
- UCS_0X2029 = [0x2029].pack('U') # e280a9@UTF8 Unicode paragraph separator
276
-
277
- # non-break characters
278
- ESCAPE_SEQ = {
279
- "\x00" => '\\0',
280
- "\x07" => '\\a',
281
- "\x08" => '\\b',
282
- "\x0b" => '\\v',
283
- "\x0c" => '\\f',
284
- "\x1b" => '\\e',
285
- "\"" => '\\"',
286
- "\\" => '\\\\',
287
- }
288
-
289
- # non-breaking space
290
- ESCAPE_SEQ_NS = {
291
- UCS_0XA0 => '\\_',
292
- }
293
-
294
- # white spaces
295
- ESCAPE_SEQ_WS = {
296
- "\x09" => '\\t',
297
- " " => '\\x20',
298
- }
299
-
300
- # line breaks
301
- ESCAPE_SEQ_LB ={
302
- "\x0a" => '\\n',
303
- "\x0d" => '\\r',
304
- UCS_0X85 => '\\N',
305
- UCS_0X2028 => '\\L',
306
- UCS_0X2029 => '\\P',
307
- }
308
-
309
- # regexps for line breaks
310
- REX_LF = Regexp.escape("\x0a")
311
- REX_CR = Regexp.escape("\x0d")
312
- REX_CRLF = Regexp.escape("\x0d\x0a")
313
- REX_NEL = Regexp.escape(UCS_0X85)
314
- REX_LS = Regexp.escape(UCS_0X2028)
315
- REX_PS = Regexp.escape(UCS_0X2029)
316
-
317
- REX_ANY_LB = /(#{REX_LF}|#{REX_CR}|#{REX_NEL}|#{REX_LS}|#{REX_PS})/
318
- REX_NORMAL_LB = /(#{REX_LF}|#{REX_LS}|#{REX_PS})/
319
-
320
- # regexps for language-Independent types for YAML1.1
321
- REX_BOOL = /
322
- y|Y|yes|Yes|YES|n|N|no|No|NO
323
- |true|True|TRUE|false|False|FALSE
324
- |on|On|ON|off|Off|OFF
325
- /x
326
- REX_FLOAT = /
327
- [-+]?([0-9][0-9_]*)?\.[0-9.]*([eE][-+][0-9]+)? # (base 10)
328
- |[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+\.[0-9_]* # (base 60)
329
- |[-+]?\.(inf|Inf|INF) # (infinity)
330
- |\.(nan|NaN|NAN) # (not a number)
331
- /x
332
- REX_INT = /
333
- [-+]?0b[0-1_]+ # (base 2)
334
- |[-+]?0[0-7_]+ # (base 8)
335
- |[-+]?(0|[1-9][0-9_]*) # (base 10)
336
- |[-+]?0x[0-9a-fA-F_]+ # (base 16)
337
- |[-+]?[1-9][0-9_]*(:[0-5]?[0-9])+ # (base 60)
338
- /x
339
- REX_MERGE = /
340
- <<
341
- /x
342
- REX_NULL = /
343
- ~ # (canonical)
344
- |null|Null|NULL # (English)
345
- | # (Empty)
346
- /x
347
- REX_TIMESTAMP = /
348
- [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] # (ymd)
349
- |[0-9][0-9][0-9][0-9] # (year)
350
- -[0-9][0-9]? # (month)
351
- -[0-9][0-9]? # (day)
352
- ([Tt]|[ \t]+)[0-9][0-9]? # (hour)
353
- :[0-9][0-9] # (minute)
354
- :[0-9][0-9] # (second)
355
- (\.[0-9]*)? # (fraction)
356
- (([ \t]*)Z|[-+][0-9][0-9]?(:[0-9][0-9])?)? # (time zone)
357
- /x
358
- REX_VALUE = /
359
- =
360
- /x
361
- end
362
-
363
- include Constants
9
+ def initialize(opts = {})
10
+ options = opts.dup
11
+ options[:indent_size] = 2 if options[:indent_size].to_i <= 0
12
+ options[:minimum_block_length] = 0 if options[:minimum_block_length].to_i <= 0
13
+ options.update(
14
+ {
15
+ :printable_with_syck => true,
16
+ :escape_b_specific => true,
17
+ :escape_as_utf8 => true,
18
+ }
19
+ ) if options[:syck_compatible]
20
+
21
+ @options = options
22
+ end
23
+
24
+ def _ya2yaml(obj)
25
+ raise 'set $KCODE to "UTF8".' if (RUBY_VERSION < '1.9.0') && ($KCODE != 'UTF8')
26
+ '--- ' + emit(obj, 1) + "\n"
27
+ rescue SystemStackError
28
+ raise ArgumentError, "ya2yaml can't handle circular references"
29
+ end
30
+
31
+ private
32
+
33
+ def emit(obj, level)
34
+ case obj
35
+ when Array
36
+ if (obj.length == 0)
37
+ '[]'
38
+ else
39
+ indent = "\n" + s_indent(level - 1)
40
+ obj.collect {|o|
41
+ indent + '- ' + emit(o, level + 1)
42
+ }.join('')
43
+ end
44
+ when Hash
45
+ if (obj.length == 0)
46
+ '{}'
47
+ else
48
+ indent = "\n" + s_indent(level - 1)
49
+ hash_order = @options[:hash_order]
50
+ if (hash_order && level == 1)
51
+ hash_keys = obj.keys.sort {|x, y|
52
+ x_order = hash_order.index(x) ? hash_order.index(x) : Float::MAX
53
+ y_order = hash_order.index(y) ? hash_order.index(y) : Float::MAX
54
+ o = (x_order <=> y_order)
55
+ (o != 0) ? o : (x.to_s <=> y.to_s)
56
+ }
57
+ else
58
+ hash_keys = obj.keys.sort {|x, y| x.to_s <=> y.to_s }
59
+ end
60
+ hash_keys.collect {|k|
61
+ key = emit(k, level + 1)
62
+ if (
63
+ is_one_plain_line?(key) ||
64
+ key =~ /\A(#{REX_BOOL}|#{REX_FLOAT}|#{REX_INT}|#{REX_NULL})\z/x
65
+ )
66
+ indent + key + ': ' + emit(obj[k], level + 1)
67
+ else
68
+ indent + '? ' + key +
69
+ indent + ': ' + emit(obj[k], level + 1)
70
+ end
71
+ }.join('')
72
+ end
73
+ when NilClass
74
+ '~'
75
+ when String
76
+ emit_string(obj, level)
77
+ when TrueClass, FalseClass
78
+ obj.to_s
79
+ when Fixnum, Bignum, Float
80
+ obj.to_s
81
+ when Date
82
+ obj.to_s
83
+ when Time
84
+ offset = obj.gmtoff
85
+ off_hm = sprintf(
86
+ '%+.2d:%.2d',
87
+ (offset / 3600.0).to_i,
88
+ (offset % 3600.0) / 60
89
+ )
90
+ u_sec = (obj.usec != 0) ? sprintf(".%.6d", obj.usec) : ''
91
+ obj.strftime("%Y-%m-%d %H:%M:%S#{u_sec} #{off_hm}")
92
+ when Symbol
93
+ '!ruby/symbol ' + emit_string(obj.to_s, level)
94
+ when Range
95
+ '!ruby/range ' + obj.to_s
96
+ when Regexp
97
+ '!ruby/regexp ' + obj.inspect
98
+ else
99
+ case
100
+ when obj.is_a?(Struct)
101
+ struct_members = {}
102
+ obj.each_pair{|k, v| struct_members[k.to_s] = v }
103
+ '!ruby/struct:' + obj.class.to_s.sub(/^(Struct::(.+)|.*)$/, '\2') + ' ' +
104
+ emit(struct_members, level + 1)
105
+ else
106
+ # serialized as a generic object
107
+ object_members = {}
108
+ obj.instance_variables.each{|k, v|
109
+ object_members[k.to_s.sub(/^@/, '')] = obj.instance_variable_get(k)
110
+ }
111
+ '!ruby/object:' + obj.class.to_s + ' ' +
112
+ emit(object_members, level + 1)
113
+ end
114
+ end
115
+ end
116
+
117
+ def emit_string(str, level)
118
+ (is_string, is_printable, is_one_line, is_one_plain_line) = string_type(str)
119
+ if is_string
120
+ if is_printable
121
+ if is_one_plain_line
122
+ emit_simple_string(str, level)
123
+ else
124
+ (is_one_line || str.length < @options[:minimum_block_length]) ?
125
+ emit_quoted_string(str, level) :
126
+ emit_block_string(str, level)
127
+ end
128
+ else
129
+ emit_quoted_string(str, level)
130
+ end
131
+ else
132
+ emit_base64_binary(str, level)
133
+ end
134
+ end
135
+
136
+ def emit_simple_string(str, level)
137
+ str
138
+ end
139
+
140
+ def emit_block_string(str, level)
141
+ str = normalize_line_break(str)
142
+
143
+ indent = s_indent(level)
144
+ indentation_indicator = (str =~ /\A /) ? indent.size.to_s : ''
145
+ str =~ /(#{REX_NORMAL_LB}*)\z/
146
+ chomping_indicator = case $1.length
147
+ when 0
148
+ '-'
149
+ when 1
150
+ ''
151
+ else
152
+ '+'
153
+ end
154
+
155
+ str.chomp!
156
+ str.gsub!(/#{REX_NORMAL_LB}/) {
157
+ $1 + indent
158
+ }
159
+ '|' + indentation_indicator + chomping_indicator + "\n" + indent + str
160
+ end
161
+
162
+ def emit_quoted_string(str, level)
163
+ str = yaml_escape(normalize_line_break(str))
164
+ if (str.length < @options[:minimum_block_length])
165
+ str.gsub!(/#{REX_NORMAL_LB}/) { ESCAPE_SEQ_LB[$1] }
166
+ else
167
+ str.gsub!(/#{REX_NORMAL_LB}$/) { ESCAPE_SEQ_LB[$1] }
168
+ str.gsub!(/(#{REX_NORMAL_LB}+)(.)/) {
169
+ trail_c = $3
170
+ $1 + trail_c.sub(/([\t ])/) { ESCAPE_SEQ_WS[$1] }
171
+ }
172
+ indent = s_indent(level)
173
+ str.gsub!(/#{REX_NORMAL_LB}/) {
174
+ ESCAPE_SEQ_LB[$1] + "\\\n" + indent
175
+ }
176
+ end
177
+ '"' + str + '"'
178
+ end
179
+
180
+ def emit_base64_binary(str, level)
181
+ indent = "\n" + s_indent(level)
182
+ base64 = [str].pack('m')
183
+ '!binary |' + indent + base64.gsub(/\n(?!\z)/, indent)
184
+ end
185
+
186
+ def string_type(str)
187
+ if str.respond_to?(:encoding) && (!str.valid_encoding? || str.encoding == Encoding::ASCII_8BIT)
188
+ return false, false, false, false
189
+ end
190
+ (ucs_codes = str.unpack('U*')) rescue (
191
+ # ArgumentError -> binary data
192
+ return false, false, false, false
193
+ )
194
+ if (
195
+ @options[:printable_with_syck] &&
196
+ str =~ /\A#{REX_ANY_LB}* | #{REX_ANY_LB}*\z|#{REX_ANY_LB}{2}\z/
197
+ )
198
+ # detour Syck bug
199
+ return true, false, nil, false
200
+ end
201
+ ucs_codes.each {|ucs_code|
202
+ return true, false, nil, false unless is_printable?(ucs_code)
203
+ }
204
+ return true, true, is_one_line?(str), is_one_plain_line?(str)
205
+ end
206
+
207
+ def is_printable?(ucs_code)
208
+ # YAML 1.1 / 4.1.1.
209
+ (
210
+ [0x09, 0x0a, 0x0d, 0x85].include?(ucs_code) ||
211
+ (ucs_code <= 0x7e && ucs_code >= 0x20) ||
212
+ (ucs_code <= 0xd7ff && ucs_code >= 0xa0) ||
213
+ (ucs_code <= 0xfffd && ucs_code >= 0xe000) ||
214
+ (ucs_code <= 0x10ffff && ucs_code >= 0x10000)
215
+ ) &&
216
+ !(
217
+ # treat LS/PS as non-printable characters
218
+ @options[:escape_b_specific] &&
219
+ (ucs_code == 0x2028 || ucs_code == 0x2029)
220
+ )
221
+ end
222
+
223
+ def is_one_line?(str)
224
+ str !~ /#{REX_ANY_LB}(?!\z)/
225
+ end
226
+
227
+ def is_one_plain_line?(str)
228
+ # YAML 1.1 / 4.6.11.
229
+ str !~ /^([\-\?:,\[\]\{\}\#&\*!\|>'"%@`\s]|---|\.\.\.)/ &&
230
+ str !~ /[:\#\s\[\]\{\},]/ &&
231
+ str !~ /#{REX_ANY_LB}/ &&
232
+ str !~ /^(#{REX_BOOL}|#{REX_FLOAT}|#{REX_INT}|#{REX_MERGE}
233
+ |#{REX_NULL}|#{REX_TIMESTAMP}|#{REX_VALUE})$/x
234
+ end
235
+
236
+ def s_indent(level)
237
+ # YAML 1.1 / 4.2.2.
238
+ ' ' * (level * @options[:indent_size])
239
+ end
240
+
241
+ def normalize_line_break(str)
242
+ # YAML 1.1 / 4.1.4.
243
+ str.gsub(/(#{REX_CRLF}|#{REX_CR}|#{REX_NEL})/, "\n")
244
+ end
245
+
246
+ def yaml_escape(str)
247
+ # YAML 1.1 / 4.1.6.
248
+ str.gsub(/[^a-zA-Z0-9]/u) {|c|
249
+ ucs_code, = (c.unpack('U') rescue [??])
250
+ case
251
+ when ESCAPE_SEQ[c]
252
+ ESCAPE_SEQ[c]
253
+ when is_printable?(ucs_code)
254
+ c
255
+ when @options[:escape_as_utf8]
256
+ c.respond_to?(:bytes) ?
257
+ c.bytes.collect {|b| '\\x%.2x' % b }.join :
258
+ '\\x' + c.unpack('H2' * c.size).join('\\x')
259
+ when ucs_code == 0x2028 || ucs_code == 0x2029
260
+ ESCAPE_SEQ_LB[c]
261
+ when ucs_code <= 0x7f
262
+ sprintf('\\x%.2x', ucs_code)
263
+ when ucs_code <= 0xffff
264
+ sprintf('\\u%.4x', ucs_code)
265
+ else
266
+ sprintf('\\U%.8x', ucs_code)
267
+ end
268
+ }
269
+ end
270
+
271
+ module Constants
272
+ UCS_0X85 = [0x85].pack('U') # c285@UTF8 Unicode next line
273
+ UCS_0XA0 = [0xa0].pack('U') # c2a0@UTF8 Unicode non-breaking space
274
+ UCS_0X2028 = [0x2028].pack('U') # e280a8@UTF8 Unicode line separator
275
+ UCS_0X2029 = [0x2029].pack('U') # e280a9@UTF8 Unicode paragraph separator
276
+
277
+ # non-break characters
278
+ ESCAPE_SEQ = {
279
+ "\x00" => '\\0',
280
+ "\x07" => '\\a',
281
+ "\x08" => '\\b',
282
+ "\x0b" => '\\v',
283
+ "\x0c" => '\\f',
284
+ "\x1b" => '\\e',
285
+ "\"" => '\\"',
286
+ "\\" => '\\\\',
287
+ }
288
+
289
+ # non-breaking space
290
+ ESCAPE_SEQ_NS = {
291
+ UCS_0XA0 => '\\_',
292
+ }
293
+
294
+ # white spaces
295
+ ESCAPE_SEQ_WS = {
296
+ "\x09" => '\\t',
297
+ " " => '\\x20',
298
+ }
299
+
300
+ # line breaks
301
+ ESCAPE_SEQ_LB ={
302
+ "\x0a" => '\\n',
303
+ "\x0d" => '\\r',
304
+ UCS_0X85 => '\\N',
305
+ UCS_0X2028 => '\\L',
306
+ UCS_0X2029 => '\\P',
307
+ }
308
+
309
+ # regexps for line breaks
310
+ REX_LF = Regexp.escape("\x0a")
311
+ REX_CR = Regexp.escape("\x0d")
312
+ REX_CRLF = Regexp.escape("\x0d\x0a")
313
+ REX_NEL = Regexp.escape(UCS_0X85)
314
+ REX_LS = Regexp.escape(UCS_0X2028)
315
+ REX_PS = Regexp.escape(UCS_0X2029)
316
+
317
+ REX_ANY_LB = /(#{REX_LF}|#{REX_CR}|#{REX_NEL}|#{REX_LS}|#{REX_PS})/
318
+ REX_NORMAL_LB = /(#{REX_LF}|#{REX_LS}|#{REX_PS})/
319
+
320
+ # regexps for language-Independent types for YAML1.1
321
+ REX_BOOL = /
322
+ y|Y|yes|Yes|YES|n|N|no|No|NO
323
+ |true|True|TRUE|false|False|FALSE
324
+ |on|On|ON|off|Off|OFF
325
+ /x
326
+ REX_FLOAT = /
327
+ [-+]?([0-9][0-9_]*)?\.[0-9.]*([eE][-+][0-9]+)? # (base 10)
328
+ |[-+]?[0-9][0-9_]*(:[0-5]?[0-9])+\.[0-9_]* # (base 60)
329
+ |[-+]?\.(inf|Inf|INF) # (infinity)
330
+ |\.(nan|NaN|NAN) # (not a number)
331
+ /x
332
+ REX_INT = /
333
+ [-+]?0b[0-1_]+ # (base 2)
334
+ |[-+]?0[0-7_]+ # (base 8)
335
+ |[-+]?(0|[1-9][0-9_]*) # (base 10)
336
+ |[-+]?0x[0-9a-fA-F_]+ # (base 16)
337
+ |[-+]?[1-9][0-9_]*(:[0-5]?[0-9])+ # (base 60)
338
+ /x
339
+ REX_MERGE = /
340
+ <<
341
+ /x
342
+ REX_NULL = /
343
+ ~ # (canonical)
344
+ |null|Null|NULL # (English)
345
+ | # (Empty)
346
+ /x
347
+ REX_TIMESTAMP = /
348
+ [0-9][0-9][0-9][0-9]-[0-9][0-9]-[0-9][0-9] # (ymd)
349
+ |[0-9][0-9][0-9][0-9] # (year)
350
+ -[0-9][0-9]? # (month)
351
+ -[0-9][0-9]? # (day)
352
+ ([Tt]|[ \t]+)[0-9][0-9]? # (hour)
353
+ :[0-9][0-9] # (minute)
354
+ :[0-9][0-9] # (second)
355
+ (\.[0-9]*)? # (fraction)
356
+ (([ \t]*)Z|[-+][0-9][0-9]?(:[0-9][0-9])?)? # (time zone)
357
+ /x
358
+ REX_VALUE = /
359
+ =
360
+ /x
361
+ end
362
+
363
+ include Constants
364
364
 
365
365
  end
366
366
 
367
367
  class Object
368
- def ya2yaml(options = {})
369
- Ya2YAML.new(options)._ya2yaml(self)
370
- end
368
+ def ya2yaml(options = {})
369
+ Ya2YAML.new(options)._ya2yaml(self)
370
+ end
371
371
  end
372
372
 
373
373
  __END__