mullet 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. data/lib/mullet/container.rb +8 -4
  2. data/lib/mullet/default_model.rb +9 -9
  3. data/lib/mullet/default_nested_model.rb +7 -6
  4. data/lib/mullet/html/attribute_command.rb +8 -4
  5. data/lib/mullet/html/attributes.rb +22 -0
  6. data/lib/mullet/html/command.rb +4 -3
  7. data/lib/mullet/html/command_element_renderer.rb +19 -0
  8. data/lib/mullet/html/element.rb +41 -0
  9. data/lib/mullet/html/element_renderer.rb +261 -0
  10. data/lib/mullet/html/filtered_element_handler.rb +87 -0
  11. data/lib/mullet/html/for_element_renderer.rb +47 -0
  12. data/lib/mullet/html/if_element_renderer.rb +46 -0
  13. data/lib/mullet/html/layout.rb +48 -0
  14. data/lib/mullet/html/message.rb +55 -0
  15. data/lib/mullet/html/message_attribute_command.rb +30 -0
  16. data/lib/mullet/html/model_attribute_command.rb +30 -0
  17. data/lib/mullet/html/page_builder.rb +152 -0
  18. data/lib/mullet/html/parser/attribute.rb +8 -0
  19. data/lib/mullet/html/parser/constants.rb +1061 -0
  20. data/lib/mullet/html/parser/default_handler.rb +27 -0
  21. data/lib/mullet/html/parser/input_stream.rb +711 -0
  22. data/lib/mullet/html/parser/open_element.rb +77 -0
  23. data/lib/mullet/html/parser/simple_parser.rb +128 -0
  24. data/lib/mullet/html/parser/tokenizer.rb +1085 -0
  25. data/lib/mullet/html/remove_mode.rb +30 -0
  26. data/lib/mullet/html/static_text_renderer.rb +20 -0
  27. data/lib/mullet/html/template.rb +44 -0
  28. data/lib/mullet/html/template_builder.rb +208 -63
  29. data/lib/mullet/html/template_loader.rb +77 -39
  30. data/lib/mullet/html/template_parser.rb +48 -0
  31. data/lib/mullet/html/unless_element_renderer.rb +24 -0
  32. data/lib/mullet/model.rb +2 -5
  33. data/lib/mullet/render_context.rb +24 -18
  34. data/lib/mullet/tilt.rb +37 -0
  35. data/lib/mullet/version.rb +2 -1
  36. data/lib/mullet.rb +1 -0
  37. metadata +58 -11
@@ -0,0 +1,711 @@
1
+ require 'mullet/html/parser/constants'
2
+
3
+ module Mullet; module HTML; module Parser
4
+
5
+ # Provides a unicode stream of characters to the HTMLTokenizer.
6
+
7
+ # This class takes care of character encoding and removing or replacing
8
+ # incorrect byte-sequences and also provides column and line tracking.
9
+
10
+ class InputStream
11
+
12
+ attr_accessor :queue, :char_encoding, :errors
13
+
14
+ # Initialises the InputStream.
15
+ #
16
+ # InputStream(source, [encoding]) -> Normalized stream from source
17
+ # for use by the HTML5Lib.
18
+ #
19
+ # source can be either a file-object, local filename or a string.
20
+ #
21
+ # The optional encoding parameter must be a string that indicates
22
+ # the encoding. If specified, that encoding will be used,
23
+ # regardless of any BOM or later declaration (such as in a meta
24
+ # element)
25
+ #
26
+ # parseMeta - Look for a <meta> element containing encoding information
27
+
28
+ def initialize(source, options = {})
29
+ @encoding = nil
30
+ @parse_meta = true
31
+ @chardet = true
32
+
33
+ options.each {|name, value| instance_variable_set("@#{name}", value) }
34
+
35
+ # partial Ruby 1.9 support
36
+ if @encoding and source.respond_to? :force_encoding
37
+ source.force_encoding(@encoding) rescue nil
38
+ end
39
+
40
+ # Raw Stream
41
+ @raw_stream = open_stream(source)
42
+
43
+ # Encoding Information
44
+ #Number of bytes to use when looking for a meta element with
45
+ #encoding information
46
+ @NUM_BYTES_META = 512
47
+ #Number of bytes to use when using detecting encoding using chardet
48
+ @NUM_BYTES_CHARDET = 256
49
+ #Number of bytes to use when reading content
50
+ @NUM_BYTES_BUFFER = 1024
51
+
52
+ #Encoding to use if no other information can be found
53
+ @DEFAULT_ENCODING = 'windows-1252'
54
+
55
+ #Detect encoding iff no explicit "transport level" encoding is supplied
56
+ if @encoding.nil?
57
+ @char_encoding = detect_encoding
58
+ else
59
+ @char_encoding = @encoding
60
+ end
61
+
62
+ # Read bytes from stream decoding them into Unicode
63
+ @buffer = @raw_stream.read(@NUM_BYTES_BUFFER) || ''
64
+ if @char_encoding == 'windows-1252'
65
+ @win1252 = true
66
+ elsif @char_encoding != 'utf-8'
67
+ require 'iconv'
68
+ begin
69
+ @buffer << @raw_stream.read unless @raw_stream.eof?
70
+ @buffer = Iconv.iconv('utf-8', @char_encoding, @buffer).first
71
+ rescue
72
+ @win1252 = true
73
+ end
74
+ end
75
+
76
+ @queue = []
77
+ @errors = []
78
+
79
+ # Reset position in the list to read from
80
+ @tell = 0
81
+ @line = @col = 0
82
+ @line_lengths = []
83
+ end
84
+
85
+ # Produces a file object from source.
86
+ #
87
+ # source can be either a file object, local filename or a string.
88
+ def open_stream(source)
89
+ # Already an IO like object
90
+ if source.respond_to?(:read)
91
+ source
92
+ else
93
+ # Treat source as a string and wrap in StringIO
94
+ StringIO.new(source)
95
+ end
96
+ end
97
+
98
+ def detect_encoding
99
+
100
+ #First look for a BOM
101
+ #This will also read past the BOM if present
102
+ encoding = detect_bom
103
+
104
+ #If there is no BOM need to look for meta elements with encoding
105
+ #information
106
+ if encoding.nil? and @parse_meta
107
+ encoding = detect_encoding_meta
108
+ end
109
+
110
+ #Guess with chardet, if avaliable
111
+ if encoding.nil? and @chardet
112
+ begin
113
+ require 'rubygems'
114
+ require 'UniversalDetector' # gem install chardet
115
+ buffers = []
116
+ detector = UniversalDetector::Detector.instance
117
+ detector.reset
118
+ until @raw_stream.eof?
119
+ buffer = @raw_stream.read(@NUM_BYTES_CHARDET)
120
+ break if !buffer or buffer.empty?
121
+ buffers << buffer
122
+ detector.feed(buffer)
123
+ break if detector.instance_eval {@done}
124
+ detector.instance_eval {
125
+ @_mLastChar = @_mLastChar.chr if Fixnum === @_mLastChar
126
+ }
127
+ end
128
+ detector.close
129
+ encoding = detector.result['encoding']
130
+ seek(buffers*'', 0)
131
+ rescue LoadError
132
+ end
133
+ end
134
+
135
+ # If all else fails use the default encoding
136
+ if encoding.nil?
137
+ encoding = @DEFAULT_ENCODING
138
+ end
139
+
140
+ #Substitute for equivalent encoding
141
+ if 'iso-8859-1' == encoding.downcase
142
+ encoding = 'windows-1252'
143
+ end
144
+
145
+ encoding
146
+ end
147
+
148
+ # Attempts to detect at BOM at the start of the stream. If
149
+ # an encoding can be determined from the BOM return the name of the
150
+ # encoding otherwise return nil
151
+ def detect_bom
152
+ bom_dict = {
153
+ "\xef\xbb\xbf" => 'utf-8',
154
+ "\xff\xfe" => 'utf-16le',
155
+ "\xfe\xff" => 'utf-16be',
156
+ "\xff\xfe\x00\x00" => 'utf-32le',
157
+ "\x00\x00\xfe\xff" => 'utf-32be'
158
+ }
159
+
160
+ # Go to beginning of file and read in 4 bytes
161
+ string = @raw_stream.read(4)
162
+ return nil unless string
163
+
164
+ # Try detecting the BOM using bytes from the string
165
+ encoding = bom_dict[string[0...3]] # UTF-8
166
+ seek = 3
167
+ unless encoding
168
+ # Need to detect UTF-32 before UTF-16
169
+ encoding = bom_dict[string] # UTF-32
170
+ seek = 4
171
+ unless encoding
172
+ encoding = bom_dict[string[0...2]] # UTF-16
173
+ seek = 2
174
+ end
175
+ end
176
+
177
+ # Set the read position past the BOM if one was found, otherwise
178
+ # set it to the start of the stream
179
+ seek(string, encoding ? seek : 0)
180
+
181
+ return encoding
182
+ end
183
+
184
+ def seek(buffer, n)
185
+ if @raw_stream.respond_to?(:unget)
186
+ @raw_stream.unget(buffer[n..-1])
187
+ return
188
+ end
189
+
190
+ if @raw_stream.respond_to?(:seek)
191
+ begin
192
+ @raw_stream.seek(n)
193
+ return
194
+ rescue Errno::ESPIPE
195
+ end
196
+ end
197
+
198
+ #TODO: huh?
199
+ require 'delegate'
200
+ @raw_stream = SimpleDelegator.new(@raw_stream)
201
+
202
+ class << @raw_stream
203
+ def read(chars=-1)
204
+ if chars == -1 or chars > @data.length
205
+ result = @data
206
+ @data = ''
207
+ return result if __getobj__.eof?
208
+ return result + __getobj__.read if chars == -1
209
+ return result + __getobj__.read(chars-result.length)
210
+ elsif @data.empty?
211
+ return __getobj__.read(chars)
212
+ else
213
+ result = @data[1...chars]
214
+ @data = @data[chars..-1]
215
+ return result
216
+ end
217
+ end
218
+
219
+ def unget(data)
220
+ if !@data or @data.empty?
221
+ @data = data
222
+ else
223
+ @data += data
224
+ end
225
+ end
226
+ end
227
+
228
+ @raw_stream.unget(buffer[n .. -1])
229
+ end
230
+
231
+ # Report the encoding declared by the meta element
232
+ def detect_encoding_meta
233
+ buffer = @raw_stream.read(@NUM_BYTES_META)
234
+ parser = EncodingParser.new(buffer)
235
+ seek(buffer, 0)
236
+ return parser.get_encoding
237
+ end
238
+
239
+ # Returns (line, col) of the current position in the stream.
240
+ def position
241
+ line, col = @line, @col
242
+ if @queue and @queue.last != :EOF
243
+ @queue.reverse.each do |c|
244
+ if c == "\n"
245
+ line -= 1
246
+ raise RuntimeError.new("col=#{col}") unless col == 0
247
+ col = @line_lengths[line]
248
+ else
249
+ col -= 1
250
+ end
251
+ end
252
+ end
253
+ return [line + 1, col]
254
+ end
255
+
256
+ # Read one character from the stream or queue if available. Return
257
+ # EOF when EOF is reached.
258
+ def char
259
+ unless @queue.empty?
260
+ return @queue.shift
261
+ else
262
+ if @tell + 3 > @buffer.length && !@raw_stream.eof?
263
+ # read next block
264
+ @buffer = @buffer[@tell..-1] + @raw_stream.read(@NUM_BYTES_BUFFER)
265
+ @tell = 0
266
+ end
267
+
268
+ c = @buffer[@tell]
269
+ @tell += 1
270
+
271
+ case c
272
+
273
+ when String
274
+ # partial Ruby 1.9 support
275
+ case c
276
+ when "\0"
277
+ @errors.push("null-character")
278
+ c = "\uFFFD" # null characters are invalid
279
+ when "\r"
280
+ @tell += 1 if @buffer[@tell] == "\n"
281
+ c = "\n"
282
+ when "\x80" .. "\x9F"
283
+ c = ENTITIES_WINDOWS1252[c.ord-0x80].chr('utf-8')
284
+ when "\xA0" .. "\xFF"
285
+ if c.encoding == Encoding::ASCII_8BIT
286
+ c = c.encode('utf-8','iso-8859-1')
287
+ end
288
+ end
289
+
290
+ if c == "\x0D"
291
+ # normalize newlines
292
+ @tell += 1 if @buffer[@tell] == 0x0A
293
+ c = 0x0A
294
+ end
295
+
296
+ # update position in stream
297
+ if c == "\x0a"
298
+ @line_lengths << @col
299
+ @line += 1
300
+ @col = 0
301
+ else
302
+ @col += 1
303
+ end
304
+
305
+ c
306
+
307
+ when 0x01..0x7F
308
+ if c == 0x0D
309
+ # normalize newlines
310
+ @tell += 1 if @buffer[@tell] == 0x0A
311
+ c = 0x0A
312
+ end
313
+
314
+ # update position in stream
315
+ if c == 0x0a
316
+ @line_lengths << @col
317
+ @line += 1
318
+ @col = 0
319
+ else
320
+ @col += 1
321
+ end
322
+
323
+ c.chr
324
+
325
+ when 0x80..0xBF
326
+ if !@win1252
327
+ [0xFFFD].pack('U') # invalid utf-8
328
+ elsif c <= 0x9f
329
+ [ENTITIES_WINDOWS1252[c-0x80]].pack('U')
330
+ else
331
+ "\xC2" + c.chr # convert to utf-8
332
+ end
333
+
334
+ when 0xC0..0xFF
335
+ if instance_variable_defined?("@win1252") && @win1252
336
+ "\xC3" + (c - 64).chr # convert to utf-8
337
+ # from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
338
+ elsif @buffer[@tell - 1..@tell + 3] =~ /^
339
+ ( [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
340
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
341
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
342
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
343
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
344
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
345
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
346
+ )/x
347
+ @tell += $1.length - 1
348
+ $1
349
+ else
350
+ [0xFFFD].pack('U') # invalid utf-8
351
+ end
352
+
353
+ when 0x00
354
+ @errors.push("null-character")
355
+ [0xFFFD].pack('U') # null characters are invalid
356
+
357
+ else
358
+ :EOF
359
+ end
360
+ end
361
+ end
362
+
363
+ # Returns a string of characters from the stream up to but not
364
+ # including any character in characters or EOF. characters can be
365
+ # any container that supports the in method being called on it.
366
+ def chars_until(characters, opposite=false)
367
+ char_stack = [char]
368
+
369
+ while char_stack.last != :EOF
370
+ break unless (characters.include?(char_stack.last)) == opposite
371
+ char_stack.push(char)
372
+ end
373
+
374
+ # Put the character stopped on back to the front of the queue
375
+ # from where it came.
376
+ c = char_stack.pop
377
+ @queue.insert(0, c) unless c == :EOF
378
+ return char_stack.join('')
379
+ end
380
+
381
+ def unget(characters)
382
+ return if characters == :EOF
383
+ if characters.respond_to? :to_a
384
+ @queue.unshift(*characters.to_a)
385
+ else
386
+ characters.reverse.each_char {|c| @queue.unshift(c)}
387
+ end
388
+ end
389
+ end
390
+
391
+ # String-like object with an assosiated position and various extra methods
392
+ # If the position is ever greater than the string length then an exception is raised
393
+ class EncodingBytes < String
394
+
395
+ attr_accessor :position
396
+
397
+ def initialize(value)
398
+ super(value)
399
+ @position = -1
400
+ end
401
+
402
+ def each
403
+ while @position < length
404
+ @position += 1
405
+ yield self[@position]
406
+ end
407
+ rescue EOF
408
+ end
409
+
410
+ def current_byte
411
+ raise EOF if @position >= length
412
+ return self[@position].chr
413
+ end
414
+
415
+ # Skip past a list of characters
416
+ def skip(chars=SPACE_CHARACTERS)
417
+ while chars.include?(current_byte)
418
+ @position += 1
419
+ end
420
+ end
421
+
422
+ # Look for a sequence of bytes at the start of a string. If the bytes
423
+ # are found return true and advance the position to the byte after the
424
+ # match. Otherwise return false and leave the position alone
425
+ def match_bytes(bytes, lower=false)
426
+ data = self[position ... position+bytes.length]
427
+ data.downcase! if lower
428
+ rv = (data == bytes)
429
+ @position += bytes.length if rv == true
430
+ return rv
431
+ end
432
+
433
+ # Look for the next sequence of bytes matching a given sequence. If
434
+ # a match is found advance the position to the last byte of the match
435
+ def jump_to(bytes)
436
+ new_position = self[position .. -1].index(bytes)
437
+ if new_position
438
+ @position += (new_position + bytes.length-1)
439
+ return true
440
+ else
441
+ raise EOF
442
+ end
443
+ end
444
+
445
+ # Move the pointer so it points to the next byte in a set of possible
446
+ # bytes
447
+ def find_next(byte_list)
448
+ until byte_list.include?(current_byte)
449
+ @position += 1
450
+ end
451
+ end
452
+ end
453
+
454
+ # Mini parser for detecting character encoding from meta elements
455
+ class EncodingParser
456
+ ASCII_PUNCTUATION = %r{[\x09-\x0D\x20-\x2F\x3A-\x40\x5B-\x60\x7B-\x7E]}
457
+ # a (hopefully) temporary hack to deal with the fact that ruby doesn't have a built in encodings
458
+ # library
459
+ ENCODINGS = ['euc_jp', 'utf-8', "iso8859-2", "iso-8859-1", "utf-16", "UTF-16LE", "UTF-16BE"].inject({}){|m, v| m[v.downcase.gsub(ASCII_PUNCTUATION, '')] = v; m}
460
+ # string - the data to work on for encoding detection
461
+ def initialize(data)
462
+ @data = EncodingBytes.new(data.to_s)
463
+ @encoding = nil
464
+ end
465
+
466
+ @@method_dispatch = [
467
+ ['<!--', :handle_comment],
468
+ ['<meta', :handle_meta],
469
+ ['</', :handle_possible_end_tag],
470
+ ['<!', :handle_other],
471
+ ['<?', :handle_other],
472
+ ['<', :handle_possible_start_tag]
473
+ ]
474
+
475
+ def get_encoding
476
+ @data.each do |byte|
477
+ keep_parsing = true
478
+ @@method_dispatch.each do |(key, method)|
479
+ if @data.match_bytes(key, lower = true)
480
+ keep_parsing = send(method)
481
+ break
482
+ end
483
+ end
484
+ break unless keep_parsing
485
+ end
486
+
487
+ unless @encoding.nil?
488
+ @encoding = @encoding.strip
489
+ if ["utf16", "utf16be", "utf16le", "utf32", "utf32be", "utf32le"].include?(@encoding.downcase.gsub(ASCII_PUNCTUATION, ''))
490
+ @encoding = 'utf-8'
491
+ end
492
+ end
493
+
494
+ return @encoding
495
+ end
496
+
497
+ # Skip over comments
498
+ def handle_comment
499
+ return @data.jump_to('-->')
500
+ end
501
+
502
+ def handle_meta
503
+ # if we have <meta not followed by a space so just keep going
504
+ return true unless SPACE_CHARACTERS.include?(@data.current_byte)
505
+
506
+ #We have a valid meta element we want to search for attributes
507
+ while true
508
+ #Try to find the next attribute after the current position
509
+ attr = get_attribute
510
+
511
+ return true if attr.nil?
512
+ if attr[0] == 'charset'
513
+ tentative_encoding = attr[1]
514
+ codec = codec_name(tentative_encoding)
515
+ if codec
516
+ @encoding = codec
517
+ return false
518
+ end
519
+ elsif attr[0] == 'content'
520
+ content_parser = ContentAttrParser.new(EncodingBytes.new(attr[1]))
521
+ tentative_encoding = content_parser.parse
522
+ codec = codec_name(tentative_encoding)
523
+ if codec
524
+ @encoding = codec
525
+ return false
526
+ end
527
+ end
528
+ end
529
+ end
530
+
531
+ def handle_possible_start_tag
532
+ return handle_possible_tag(false)
533
+ end
534
+
535
+ def handle_possible_end_tag
536
+ @data.position += 1
537
+ return handle_possible_tag(true)
538
+ end
539
+
540
+ def handle_possible_tag(end_tag)
541
+ unless ASCII_LETTERS.include?(@data.current_byte)
542
+ #If the next byte is not an ascii letter either ignore this
543
+ #fragment (possible start tag case) or treat it according to
544
+ #handleOther
545
+ if end_tag
546
+ @data.position -= 1
547
+ handle_other
548
+ end
549
+ return true
550
+ end
551
+
552
+ @data.find_next(SPACE_CHARACTERS + ['<', '>'])
553
+
554
+ if @data.current_byte == '<'
555
+ #return to the first step in the overall "two step" algorithm
556
+ #reprocessing the < byte
557
+ @data.position -= 1
558
+ else
559
+ #Read all attributes
560
+ {} until get_attribute.nil?
561
+ end
562
+ return true
563
+ end
564
+
565
+ def handle_other
566
+ return @data.jump_to('>')
567
+ end
568
+
569
+ # Return a name,value pair for the next attribute in the stream,
570
+ # if one is found, or nil
571
+ def get_attribute
572
+ @data.skip(SPACE_CHARACTERS + ['/'])
573
+
574
+ if @data.current_byte == '<'
575
+ @data.position -= 1
576
+ return nil
577
+ elsif @data.current_byte == '>'
578
+ return nil
579
+ end
580
+
581
+ attr_name = []
582
+ attr_value = []
583
+ space_found = false
584
+ #Step 5 attribute name
585
+ while true
586
+ if @data.current_byte == '=' and attr_name
587
+ break
588
+ elsif SPACE_CHARACTERS.include?(@data.current_byte)
589
+ space_found = true
590
+ break
591
+ elsif ['/', '<', '>'].include?(@data.current_byte)
592
+ return [attr_name.join(''), '']
593
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
594
+ attr_name.push(@data.current_byte.downcase)
595
+ else
596
+ attr_name.push(@data.current_byte)
597
+ end
598
+ #Step 6
599
+ @data.position += 1
600
+ end
601
+ #Step 7
602
+ if space_found
603
+ @data.skip
604
+ #Step 8
605
+ unless @data.current_byte == '='
606
+ @data.position -= 1
607
+ return [attr_name.join(''), '']
608
+ end
609
+ end
610
+ #XXX need to advance position in both spaces and value case
611
+ #Step 9
612
+ @data.position += 1
613
+ #Step 10
614
+ @data.skip
615
+ #Step 11
616
+ if ["'", '"'].include?(@data.current_byte)
617
+ #11.1
618
+ quote_char = @data.current_byte
619
+ while true
620
+ @data.position+=1
621
+ #11.3
622
+ if @data.current_byte == quote_char
623
+ @data.position += 1
624
+ return [attr_name.join(''), attr_value.join('')]
625
+ #11.4
626
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
627
+ attr_value.push(@data.current_byte.downcase)
628
+ #11.5
629
+ else
630
+ attr_value.push(@data.current_byte)
631
+ end
632
+ end
633
+ elsif ['>', '<'].include?(@data.current_byte)
634
+ return [attr_name.join(''), '']
635
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
636
+ attr_value.push(@data.current_byte.downcase)
637
+ else
638
+ attr_value.push(@data.current_byte)
639
+ end
640
+ while true
641
+ @data.position += 1
642
+ if (SPACE_CHARACTERS + ['>', '<']).include?(@data.current_byte)
643
+ return [attr_name.join(''), attr_value.join('')]
644
+ elsif ASCII_UPPERCASE.include?(@data.current_byte)
645
+ attr_value.push(@data.current_byte.downcase)
646
+ else
647
+ attr_value.push(@data.current_byte)
648
+ end
649
+ end
650
+ end
651
+
652
+ def codec_name(encoding)
653
+ if (!encoding.nil? && encoding.kind_of?(String))
654
+ canonical_name = encoding.downcase.gsub(ASCII_PUNCTUATION, '')
655
+ ENCODINGS[canonical_name]
656
+ # p encoding
657
+ # encoding
658
+ else
659
+ nil
660
+ end
661
+ end
662
+ end
663
+
664
+ class ContentAttrParser
665
+ def initialize(data)
666
+ @data = data
667
+ end
668
+
669
+ def parse
670
+ begin
671
+ #Skip to the first ";"
672
+ @data.position = 0
673
+ @data.jump_to(';')
674
+ @data.position += 1
675
+ @data.skip
676
+ #Check if the attr name is charset
677
+ #otherwise return
678
+ @data.jump_to('charset')
679
+ @data.position += 1
680
+ @data.skip
681
+ unless @data.current_byte == '='
682
+ #If there is no = sign keep looking for attrs
683
+ return nil
684
+ end
685
+ @data.position += 1
686
+ @data.skip
687
+ #Look for an encoding between matching quote marks
688
+ if ['"', "'"].include?(@data.current_byte)
689
+ quote_mark = @data.current_byte
690
+ @data.position += 1
691
+ old_position = @data.position
692
+ @data.jump_to(quote_mark)
693
+ return @data[old_position ... @data.position]
694
+ else
695
+ #Unquoted value
696
+ old_position = @data.position
697
+ begin
698
+ @data.find_next(SPACE_CHARACTERS)
699
+ return @data[old_position ... @data.position]
700
+ rescue EOF
701
+ #Return the whole remaining value
702
+ return @data[old_position .. -1]
703
+ end
704
+ end
705
+ rescue EOF
706
+ return nil
707
+ end
708
+ end
709
+ end
710
+
711
+ end; end; end