html5 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (59) hide show
  1. data/History.txt +3 -0
  2. data/Manifest.txt +58 -0
  3. data/README +9 -0
  4. data/Rakefile.rb +17 -0
  5. data/lib/html5/constants.rb +818 -0
  6. data/lib/html5/filters/base.rb +10 -0
  7. data/lib/html5/filters/inject_meta_charset.rb +82 -0
  8. data/lib/html5/filters/optionaltags.rb +198 -0
  9. data/lib/html5/filters/sanitizer.rb +15 -0
  10. data/lib/html5/filters/whitespace.rb +36 -0
  11. data/lib/html5/html5parser/after_body_phase.rb +46 -0
  12. data/lib/html5/html5parser/after_frameset_phase.rb +34 -0
  13. data/lib/html5/html5parser/after_head_phase.rb +50 -0
  14. data/lib/html5/html5parser/before_head_phase.rb +41 -0
  15. data/lib/html5/html5parser/in_body_phase.rb +607 -0
  16. data/lib/html5/html5parser/in_caption_phase.rb +68 -0
  17. data/lib/html5/html5parser/in_cell_phase.rb +78 -0
  18. data/lib/html5/html5parser/in_column_group_phase.rb +55 -0
  19. data/lib/html5/html5parser/in_frameset_phase.rb +57 -0
  20. data/lib/html5/html5parser/in_head_phase.rb +138 -0
  21. data/lib/html5/html5parser/in_row_phase.rb +87 -0
  22. data/lib/html5/html5parser/in_select_phase.rb +84 -0
  23. data/lib/html5/html5parser/in_table_body_phase.rb +83 -0
  24. data/lib/html5/html5parser/in_table_phase.rb +110 -0
  25. data/lib/html5/html5parser/initial_phase.rb +134 -0
  26. data/lib/html5/html5parser/phase.rb +158 -0
  27. data/lib/html5/html5parser/root_element_phase.rb +42 -0
  28. data/lib/html5/html5parser/trailing_end_phase.rb +35 -0
  29. data/lib/html5/html5parser.rb +248 -0
  30. data/lib/html5/inputstream.rb +654 -0
  31. data/lib/html5/liberalxmlparser.rb +158 -0
  32. data/lib/html5/sanitizer.rb +188 -0
  33. data/lib/html5/serializer/htmlserializer.rb +180 -0
  34. data/lib/html5/serializer/xhtmlserializer.rb +20 -0
  35. data/lib/html5/serializer.rb +2 -0
  36. data/lib/html5/tokenizer.rb +968 -0
  37. data/lib/html5/treebuilders/base.rb +334 -0
  38. data/lib/html5/treebuilders/hpricot.rb +231 -0
  39. data/lib/html5/treebuilders/rexml.rb +208 -0
  40. data/lib/html5/treebuilders/simpletree.rb +185 -0
  41. data/lib/html5/treebuilders.rb +24 -0
  42. data/lib/html5/treewalkers/base.rb +154 -0
  43. data/lib/html5/treewalkers/hpricot.rb +48 -0
  44. data/lib/html5/treewalkers/rexml.rb +48 -0
  45. data/lib/html5/treewalkers/simpletree.rb +48 -0
  46. data/lib/html5/treewalkers.rb +26 -0
  47. data/lib/html5.rb +13 -0
  48. data/parse.rb +217 -0
  49. data/tests/preamble.rb +82 -0
  50. data/tests/test_encoding.rb +35 -0
  51. data/tests/test_lxp.rb +263 -0
  52. data/tests/test_parser.rb +68 -0
  53. data/tests/test_sanitizer.rb +142 -0
  54. data/tests/test_serializer.rb +68 -0
  55. data/tests/test_stream.rb +62 -0
  56. data/tests/test_tokenizer.rb +94 -0
  57. data/tests/test_treewalkers.rb +116 -0
  58. data/tests/tokenizer_test_parser.rb +63 -0
  59. metadata +120 -0
@@ -0,0 +1,654 @@
1
+ require 'stringio'
2
+ require 'html5/constants'
3
+
4
+ module HTML5
5
+
6
+ # Provides a unicode stream of characters to the HTMLTokenizer.
7
+
8
+ # This class takes care of character encoding and removing or replacing
9
+ # incorrect byte-sequences and also provides column and line tracking.
10
+
11
+ class HTMLInputStream
12
+
13
+ attr_accessor :queue, :char_encoding, :errors
14
+
15
+ # Initialises the HTMLInputStream.
16
+ #
17
+ # HTMLInputStream(source, [encoding]) -> Normalized stream from source
18
+ # for use by the HTML5Lib.
19
+ #
20
+ # source can be either a file-object, local filename or a string.
21
+ #
22
+ # The optional encoding parameter must be a string that indicates
23
+ # the encoding. If specified, that encoding will be used,
24
+ # regardless of any BOM or later declaration (such as in a meta
25
+ # element)
26
+ #
27
+ # parseMeta - Look for a <meta> element containing encoding information
28
+
29
+ def initialize(source, options = {})
30
+ @encoding = nil
31
+ @parse_meta = true
32
+ @chardet = true
33
+
34
+ options.each {|name, value| instance_variable_set("@#{name}", value) }
35
+
36
+ # Raw Stream
37
+ @raw_stream = open_stream(source)
38
+
39
+ # Encoding Information
40
+ #Number of bytes to use when looking for a meta element with
41
+ #encoding information
42
+ @NUM_BYTES_META = 512
43
+ #Number of bytes to use when using detecting encoding using chardet
44
+ @NUM_BYTES_CHARDET = 256
45
+ #Number of bytes to use when reading content
46
+ @NUM_BYTES_BUFFER = 1024
47
+
48
+ #Encoding to use if no other information can be found
49
+ @DEFAULT_ENCODING = 'windows-1252'
50
+
51
+ #Detect encoding iff no explicit "transport level" encoding is supplied
52
+ if @encoding.nil? or not HTML5.is_valid_encoding(@encoding)
53
+ @char_encoding = detect_encoding
54
+ else
55
+ @char_encoding = @encoding
56
+ end
57
+
58
+ # Read bytes from stream decoding them into Unicode
59
+ @buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
60
+ if @char_encoding == 'windows-1252'
61
+ @win1252 = true
62
+ elsif @char_encoding != 'utf-8'
63
+ begin
64
+ require 'iconv'
65
+ begin
66
+ @buffer << @raw_stream.read unless @raw_stream.eof?
67
+ @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
68
+ rescue
69
+ @win1252 = true
70
+ end
71
+ rescue LoadError
72
+ @win1252 = true
73
+ end
74
+ end
75
+
76
+ @queue = []
77
+ @errors = []
78
+
79
+ # Reset position in the list to read from
80
+ @tell = 0
81
+ @line = @col = 0
82
+ @line_lengths = []
83
+ end
84
+
85
+ # Produces a file object from source.
86
+ #
87
+ # source can be either a file object, local filename or a string.
88
+ def open_stream(source)
89
+ # Already an IO like object
90
+ if source.respond_to?(:read)
91
+ @stream = source
92
+ else
93
+ # Treat source as a string and wrap in StringIO
94
+ @stream = StringIO.new(source)
95
+ end
96
+ return @stream
97
+ end
98
+
99
+ def detect_encoding
100
+
101
+ #First look for a BOM
102
+ #This will also read past the BOM if present
103
+ encoding = detect_bom
104
+
105
+ #If there is no BOM need to look for meta elements with encoding
106
+ #information
107
+ if encoding.nil? and @parse_meta
108
+ encoding = detect_encoding_meta
109
+ end
110
+
111
+ #Guess with chardet, if avaliable
112
+ if encoding.nil? and @chardet
113
+ begin
114
+ require 'rubygems'
115
+ require 'UniversalDetector' # gem install chardet
116
+ buffers = []
117
+ detector = UniversalDetector::Detector.instance
118
+ detector.reset
119
+ until @raw_stream.eof?
120
+ buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
121
+ break if !buffer or buffer.empty?
122
+ buffers << buffer
123
+ detector.feed(buffer)
124
+ break if detector.instance_eval {@done}
125
+ detector.instance_eval {
126
+ @_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
127
+ }
128
+ end
129
+ detector.close
130
+ encoding = detector.result['encoding']
131
+ seek(buffers*'', 0)
132
+ rescue LoadError
133
+ end
134
+ end
135
+
136
+ # If all else fails use the default encoding
137
+ if encoding.nil?
138
+ encoding = @DEFAULT_ENCODING
139
+ end
140
+
141
+ #Substitute for equivalent encodings
142
+ encoding_sub = {'iso-8859-1' => 'windows-1252'}
143
+
144
+ if encoding_sub.has_key?(encoding.downcase)
145
+ encoding = encoding_sub[encoding.downcase]
146
+ end
147
+
148
+ return encoding
149
+ end
150
+
151
+ # Attempts to detect at BOM at the start of the stream. If
152
+ # an encoding can be determined from the BOM return the name of the
153
+ # encoding otherwise return nil
154
+ def detect_bom
155
+ bom_dict = {
156
+ "\xef\xbb\xbf" => 'utf-8',
157
+ "\xff\xfe" => 'utf-16le',
158
+ "\xfe\xff" => 'utf-16be',
159
+ "\xff\xfe\x00\x00" => 'utf-32le',
160
+ "\x00\x00\xfe\xff" => 'utf-32be'
161
+ }
162
+
163
+ # Go to beginning of file and read in 4 bytes
164
+ string = @raw_stream.read(4)
165
+ return nil unless string
166
+
167
+ # Try detecting the BOM using bytes from the string
168
+ encoding = bom_dict[string[0...3]] # UTF-8
169
+ seek = 3
170
+ unless encoding
171
+ # Need to detect UTF-32 before UTF-16
172
+ encoding = bom_dict[string] # UTF-32
173
+ seek = 4
174
+ unless encoding
175
+ encoding = bom_dict[string[0...2]] # UTF-16
176
+ seek = 2
177
+ end
178
+ end
179
+
180
+ # Set the read position past the BOM if one was found, otherwise
181
+ # set it to the start of the stream
182
+ seek(string, encoding ? seek : 0)
183
+
184
+ return encoding
185
+ end
186
+
187
+ def seek(buffer, n)
188
+ if @raw_stream.respond_to?(:unget)
189
+ @raw_stream.unget(buffer[n..-1])
190
+ return
191
+ end
192
+
193
+ if @raw_stream.respond_to?(:seek)
194
+ begin
195
+ @raw_stream.seek(n)
196
+ return
197
+ rescue Errno::ESPIPE
198
+ end
199
+ end
200
+
201
+ require 'delegate'
202
+ @raw_stream = SimpleDelegator.new(@raw_stream)
203
+
204
+ class << @raw_stream
205
+ def read(chars=-1)
206
+ if chars == -1 or chars > @data.length
207
+ result = @data
208
+ @data = ''
209
+ return result if __getobj__.eof?
210
+ return result + __getobj__.read if chars == -1
211
+ return result + __getobj__.read(chars-result.length)
212
+ elsif @data.empty?
213
+ return __getobj__.read(chars)
214
+ else
215
+ result = @data[1...chars]
216
+ @data = @data[chars..-1]
217
+ return result
218
+ end
219
+ end
220
+
221
+ def unget(data)
222
+ if !@data or @data.empty?
223
+ @data = data
224
+ else
225
+ @data += data
226
+ end
227
+ end
228
+ end
229
+
230
+ @raw_stream.unget(buffer[n .. -1])
231
+ end
232
+
233
+ # Report the encoding declared by the meta element
234
+ def detect_encoding_meta
235
+ buffer = @raw_stream.read(@NUM_BYTES_META)
236
+ parser = EncodingParser.new(buffer)
237
+ seek(buffer, 0)
238
+ return parser.get_encoding
239
+ end
240
+
241
+ # Returns (line, col) of the current position in the stream.
242
+ def position
243
+ line, col = @line, @col
244
+ @queue.reverse.each do |c|
245
+ if c == "\n"
246
+ line -= 1
247
+ raise RuntimeError.new("col=#{col}") unless col == 0
248
+ col = @line_lengths[line]
249
+ else
250
+ col -= 1
251
+ end
252
+ end
253
+ return [line+1, col]
254
+ end
255
+
256
+ # Read one character from the stream or queue if available. Return
257
+ # EOF when EOF is reached.
258
+ def char
259
+ unless @queue.empty?
260
+ return @queue.shift
261
+ else
262
+ if @tell + 3 > @buffer.length and !@raw_stream.eof?
263
+ # read next block
264
+ @buffer = @buffer[@tell .. -1] + @raw_stream.read(@NUM_BYTES_BUFFER)
265
+ @tell = 0
266
+ end
267
+
268
+ c = @buffer[@tell]
269
+ @tell += 1
270
+
271
+ case c
272
+ when 0x01 .. 0x7F
273
+ if c == 0x0D
274
+ # normalize newlines
275
+ @tell += 1 if @buffer[@tell] == 0x0A
276
+ c = 0x0A
277
+ end
278
+
279
+ # update position in stream
280
+ if c == 0x0a
281
+ @line_lengths << @col
282
+ @line += 1
283
+ @col = 0
284
+ else
285
+ @col += 1
286
+ end
287
+
288
+ c.chr
289
+
290
+ when 0x80 .. 0xBF
291
+ if !@win1252
292
+ [0xFFFD].pack('U') # invalid utf-8
293
+ elsif c <= 0x9f
294
+ [ENTITIES_WINDOWS1252[c-0x80]].pack('U')
295
+ else
296
+ "\xC2" + c.chr # convert to utf-8
297
+ end
298
+
299
+ when 0xC0 .. 0xFF
300
+ if instance_variable_defined?(:@win1252) && @win1252
301
+ "\xC3" + (c-64).chr # convert to utf-8
302
+ elsif @buffer[@tell-1 .. @tell+3] =~ /^
303
+ ( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
304
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
305
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
306
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
307
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
308
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
309
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
310
+ )/x
311
+ @tell += $1.length - 1
312
+ $1
313
+ else
314
+ [0xFFFD].pack('U') # invalid utf-8
315
+ end
316
+
317
+ when 0x00
318
+ @errors.push('null character found in input stream, ' +
319
+ 'replaced with U+FFFD')
320
+ [0xFFFD].pack('U') # null characters are invalid
321
+
322
+ else
323
+ :EOF
324
+ end
325
+ end
326
+ end
327
+
328
+ # Returns a string of characters from the stream up to but not
329
+ # including any character in characters or EOF. characters can be
330
+ # any container that supports the in method being called on it.
331
+ def chars_until(characters, opposite=false)
332
+ char_stack = [char]
333
+
334
+ while char_stack.last != :EOF
335
+ break unless (characters.include?(char_stack.last)) == opposite
336
+ char_stack.push(char)
337
+ end
338
+
339
+ # Put the character stopped on back to the front of the queue
340
+ # from where it came.
341
+ c = char_stack.pop
342
+ @queue.insert(0, c) unless c == :EOF
343
+ return char_stack.join('')
344
+ end
345
+
346
+ def unget(characters)
347
+ @queue.unshift(*characters.to_a) unless characters == :EOF
348
+ end
349
+ end
350
+
351
+ # String-like object with an assosiated position and various extra methods
352
+ # If the position is ever greater than the string length then an exception is raised
353
+ class EncodingBytes < String
354
+
355
+ attr_accessor :position
356
+
357
+ def initialize(value)
358
+ super(value)
359
+ @position = -1
360
+ end
361
+
362
+ def each
363
+ while @position < length
364
+ @position += 1
365
+ yield self[@position]
366
+ end
367
+ rescue EOF
368
+ end
369
+
370
+ def current_byte
371
+ raise EOF if @position >= length
372
+ return self[@position].chr
373
+ end
374
+
375
+ # Skip past a list of characters
376
+ def skip(chars=SPACE_CHARACTERS)
377
+ while chars.include?(current_byte)
378
+ @position += 1
379
+ end
380
+ end
381
+
382
+ # Look for a sequence of bytes at the start of a string. If the bytes
383
+ # are found return true and advance the position to the byte after the
384
+ # match. Otherwise return false and leave the position alone
385
+ def match_bytes(bytes, lower=false)
386
+ data = self[position ... position+bytes.length]
387
+ data.downcase! if lower
388
+ rv = (data == bytes)
389
+ @position += bytes.length if rv == true
390
+ return rv
391
+ end
392
+
393
+ # Look for the next sequence of bytes matching a given sequence. If
394
+ # a match is found advance the position to the last byte of the match
395
+ def jump_to(bytes)
396
+ new_position = self[position .. -1].index(bytes)
397
+ if new_position
398
+ @position += (new_position + bytes.length-1)
399
+ return true
400
+ else
401
+ raise EOF
402
+ end
403
+ end
404
+
405
+ # Move the pointer so it points to the next byte in a set of possible
406
+ # bytes
407
+ def find_next(byte_list)
408
+ until byte_list.include?(current_byte)
409
+ @position += 1
410
+ end
411
+ end
412
+ end
413
+
414
+ # Mini parser for detecting character encoding from meta elements
415
+ class EncodingParser
416
+
417
+ # string - the data to work on for encoding detection
418
+ def initialize(data)
419
+ @data = EncodingBytes.new(data.to_s)
420
+ @encoding = nil
421
+ end
422
+
423
+ @@method_dispatch = [
424
+ ['<!--', :handle_comment],
425
+ ['<meta', :handle_meta],
426
+ ['</', :handle_possible_end_tag],
427
+ ['<!', :handle_other],
428
+ ['<?', :handle_other],
429
+ ['<', :handle_possible_start_tag]
430
+ ]
431
+
432
+ def get_encoding
433
+ @data.each do |byte|
434
+ keep_parsing = true
435
+ @@method_dispatch.each do |(key, method)|
436
+ if @data.match_bytes(key, lower = true)
437
+ keep_parsing = send(method)
438
+ break
439
+ end
440
+ end
441
+ break unless keep_parsing
442
+ end
443
+ @encoding = @encoding.strip unless @encoding.nil?
444
+ return @encoding
445
+ end
446
+
447
+ # Skip over comments
448
+ def handle_comment
449
+ return @data.jump_to('-->')
450
+ end
451
+
452
+ def handle_meta
453
+ # if we have <meta not followed by a space so just keep going
454
+ return true unless SPACE_CHARACTERS.include?(@data.current_byte)
455
+
456
+ #We have a valid meta element we want to search for attributes
457
+ while true
458
+ #Try to find the next attribute after the current position
459
+ attr = get_attribute
460
+
461
+ return true if attr.nil?
462
+
463
+ if attr[0] == 'charset'
464
+ tentative_encoding = attr[1]
465
+ if HTML5.is_valid_encoding(tentative_encoding)
466
+ @encoding = tentative_encoding
467
+ return false
468
+ end
469
+ elsif attr[0] == 'content'
470
+ content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
471
+ tentative_encoding = content_parser.parse
472
+ if HTML5.is_valid_encoding(tentative_encoding)
473
+ @encoding = tentative_encoding
474
+ return false
475
+ end
476
+ end
477
+ end
478
+ end
479
+
480
+ def handle_possible_start_tag
481
+ return handle_possible_tag(false)
482
+ end
483
+
484
+ def handle_possible_end_tag
485
+ @data.position += 1
486
+ return handle_possible_tag(true)
487
+ end
488
+
489
+ def handle_possible_tag(end_tag)
490
+ unless ASCII_LETTERS.include?(@data.current_byte)
491
+ #If the next byte is not an ascii letter either ignore this
492
+ #fragment (possible start tag case) or treat it according to
493
+ #handleOther
494
+ if end_tag
495
+ @data.position -= 1
496
+ handle_other
497
+ end
498
+ return true
499
+ end
500
+
501
+ @data.find_next(SPACE_CHARACTERS + ['<', '>'])
502
+
503
+ if @data.current_byte == '<'
504
+ #return to the first step in the overall "two step" algorithm
505
+ #reprocessing the < byte
506
+ @data.position -= 1
507
+ else
508
+ #Read all attributes
509
+ {} until get_attribute.nil?
510
+ end
511
+ return true
512
+ end
513
+
514
+ def handle_other
515
+ return @data.jump_to('>')
516
+ end
517
+
518
+ # Return a name,value pair for the next attribute in the stream,
519
+ # if one is found, or nil
520
+ def get_attribute
521
+ @data.skip(SPACE_CHARACTERS + ['/'])
522
+
523
+ if @data.current_byte == '<'
524
+ @data.position -= 1
525
+ return nil
526
+ elsif @data.current_byte == '>'
527
+ return nil
528
+ end
529
+
530
+ attr_name = []
531
+ attr_value = []
532
+ space_found = false
533
+ #Step 5 attribute name
534
+ while true
535
+ if @data.current_byte == '=' and attr_name
536
+ break
537
+ elsif SPACE_CHARACTERS.include?(@data.current_byte)
538
+ space_found = true
539
+ break
540
+ elsif ['/', '<', '>'].include?(@data.current_byte)
541
+ return [attr_name.join(''), '']
542
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
543
+ attr_name.push(@data.current_byte.downcase)
544
+ else
545
+ attr_name.push(@data.current_byte)
546
+ end
547
+ #Step 6
548
+ @data.position += 1
549
+ end
550
+ #Step 7
551
+ if space_found
552
+ @data.skip
553
+ #Step 8
554
+ unless @data.current_byte == '='
555
+ @data.position -= 1
556
+ return [attr_name.join(''), '']
557
+ end
558
+ end
559
+ #XXX need to advance position in both spaces and value case
560
+ #Step 9
561
+ @data.position += 1
562
+ #Step 10
563
+ @data.skip
564
+ #Step 11
565
+ if ["'", '"'].include?(@data.current_byte)
566
+ #11.1
567
+ quote_char = @data.current_byte
568
+ while true
569
+ @data.position+=1
570
+ #11.3
571
+ if @data.current_byte == quote_char
572
+ @data.position += 1
573
+ return [attr_name.join(''), attr_value.join('')]
574
+ #11.4
575
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
576
+ attr_value.push(@data.current_byte.downcase)
577
+ #11.5
578
+ else
579
+ attr_value.push(@data.current_byte)
580
+ end
581
+ end
582
+ elsif ['>', '<'].include?(@data.current_byte)
583
+ return [attr_name.join(''), '']
584
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
585
+ attr_value.push(@data.current_byte.downcase)
586
+ else
587
+ attr_value.push(@data.current_byte)
588
+ end
589
+ while true
590
+ @data.position += 1
591
+ if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
592
+ return [attr_name.join(''), attr_value.join('')]
593
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
594
+ attr_value.push(@data.current_byte.downcase)
595
+ else
596
+ attr_value.push(@data.current_byte)
597
+ end
598
+ end
599
+ end
600
+ end
601
+
602
+ class ContentAttrParser
603
+ def initialize(data)
604
+ @data = data
605
+ end
606
+
607
+ def parse
608
+ begin
609
+ #Skip to the first ";"
610
+ @data.position = 0
611
+ @data.jump_to(';')
612
+ @data.position += 1
613
+ @data.skip
614
+ #Check if the attr name is charset
615
+ #otherwise return
616
+ @data.jump_to('charset')
617
+ @data.position += 1
618
+ @data.skip
619
+ unless @data.current_byte == '='
620
+ #If there is no = sign keep looking for attrs
621
+ return nil
622
+ end
623
+ @data.position += 1
624
+ @data.skip
625
+ #Look for an encoding between matching quote marks
626
+ if ['"', "'"].include?(@data.current_byte)
627
+ quote_mark = @data.current_byte
628
+ @data.position += 1
629
+ old_position = @data.position
630
+ @data.jump_to(quote_mark)
631
+ return @data[old_position ... @data.position]
632
+ else
633
+ #Unquoted value
634
+ old_position = @data.position
635
+ begin
636
+ @data.find_next(SPACE_CHARACTERS)
637
+ return @data[old_position ... @data.position]
638
+ rescue EOF
639
+ #Return the whole remaining value
640
+ return @data[old_position .. -1]
641
+ end
642
+ end
643
+ rescue EOF
644
+ return nil
645
+ end
646
+ end
647
+ end
648
+
649
+ # Determine if a string is a supported encoding
650
+ def self.is_valid_encoding(encoding)
651
+ (not encoding.nil? and encoding.kind_of?(String) and ENCODINGS.include?(encoding.downcase.strip))
652
+ end
653
+
654
+ end