cpjolicoeur-ClothBlue 0.2.2 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,14 +1,14 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "ClothBlue"
3
- s.version = "0.2.2"
4
- s.date = "Time.now"
3
+ s.version = "0.5.1"
4
+ s.date = "2008-11-25"
5
5
  s.summary = "HTML to Markdown converter"
6
6
  s.email = "cpjolicoeur@gmail.com"
7
7
  s.homepage = "http://github.com/cpjolicoeur/clothblue"
8
8
  s.description = "ClothBlue is BlueCloth's evil twin. It converts existing HTML into Markdown format for use with BlueCloth."
9
9
  s.has_rdoc = true
10
10
  s.authors = ["Craig P Jolicoeur"]
11
- s.files = ["README", "TODO", "clothblue.gemspec", "lib/clothblue.rb", "lib/README.rdoc", "test/README", "test/test_entities.rb", "test/test_formatting.rb", "test/test_headings.rb", "test/test_lists.rb", "test/test_structure.rb", "test/test_tables.rb"]
11
+ s.files = ["README", "TODO", "clothblue.gemspec", "lib/clothblue.rb", "lib/parsehtml/parsehtml.rb", "test/README", "test/test_entities.rb", "test/test_formatting.rb", "test/test_headings.rb", "test/test_lists.rb", "test/test_structure.rb", "test/test_tables.rb"]
12
12
  s.test_files = ["test/test_entities.rb", "test/test_formatting.rb", "test/test_headings.rb", "test/test_lists.rb", "test/test_structure.rb", "test/test_tables.rb"]
13
13
  s.rdoc_options = ["--main", "lib/README.rdoc"]
14
- end
14
+ end
@@ -1,124 +1,722 @@
1
- =begin rdoc
2
- Provides the methods to convert HTML into Markdown.
3
- *Please* *note*: ClothBlue creates UTF-8 output. To do so, it sets $KCODE to UTF-8. This will be globally available!
4
- #--
5
- TODO: enhance docs, as more methods come availlable
6
- #++
1
+ require 'parsehtml/parsehtml'
7
2
 
8
- Author:: Craig P Jolicoeur (mailto:cpjolicoeur@gmail.com)
9
- Copyright:: Copyright (c) 2008 Phillip Gawlowski
10
- License:: MIT
11
- =end
12
-
13
- require 'cgi'
14
- $KCODE = "U"
15
-
16
- class ClothBlue < String
17
- #--
18
- TEXT_FORMATTING = [
19
- ["<b>", "**"], ["</b>","**"], ["<em>","_"], ["</em>", "_"], ["<b>", "**"],
20
- ["</b>", "**"], ["<code>", "`"], ["<i>","_"], ["</i>", "_"],
21
- ["</code>", "`"], ["<strong>", "**"], ["</strong>", "**"]
22
- ]
23
-
24
- HEADINGS = [
25
- ["<h1>","# "], ["</h1>", " #\n\n"], ["<h2>","## "], ["</h2>", " ##\n\n"],
26
- ["<h3>","### "], ["</h3>", " ###\n\n"], ["<h4>","#### "], ["</h4>", " ####\n\n"],
27
- ["<h5>","##### "], ["</h5>", " #####\n\n"], ["<h6>","###### "], ["</h6>", " ######\n\n"]
28
- ]
29
-
30
- STRUCTURES = [
31
- ["<p>", "\n\n"],["</p>","\n\n"], ["<blockquote>", "> "], ["</blockquote>","\n"],
32
- ["<br />", "\n"], ["<br>", "\n"]
33
- ]
34
-
35
- ENTITIES = [
36
- ["&#8220;", '"'], ["&#8221;", '"'], ["&#8212;", "--"], ["&#8212;", "--"],
37
- ["&#8211;","-"], ["&#8230;", "..."], ["&#215;", " x "], ["&#8482;","(TM)"],
38
- ["&#174;","(R)"], ["&#169;","(C)"], ["&#8217;", "'"]
39
- ]
3
+ class ClothBlue
4
+
5
+ # Constants
6
+ LINKS_EACH_PARAGRAPH = false
7
+ BODYWIDTH = false
8
+ KEEPHTML = true
9
+ MIN_BODYWIDTH = 25
10
+
11
+ # tags which can be handled by markdown
12
+ IS_MARKDOWNABLE = {
13
+ 'p' => [],
14
+ 'ul' => [],
15
+ 'ol' => [],
16
+ 'li' => [],
17
+ 'br' => [],
18
+ 'blockquote' => [],
19
+ 'code' => [],
20
+ 'pre' => [],
21
+ 'a' => [{'href' => 'required'}, {'title' => 'optional'}],
22
+ 'strong' => [],
23
+ 'b' => [],
24
+ 'em' => [],
25
+ 'i' => [],
26
+ 'img' => [{'src' => 'required'}, {'alt' => 'optional'}, {'title' => 'optional'}],
27
+ 'h1' => [],
28
+ 'h2' => [],
29
+ 'h3' => [],
30
+ 'h4' => [],
31
+ 'h5' => [],
32
+ 'h6' => [],
33
+ 'hr' => []
34
+ }
35
+
36
+ # html tags to be ignored (content will be parsed)
37
+ IGNORE = %w(html body)
38
+
39
+ # html tags to be dropped (content will not be parsed!)
40
+ DROP = %w(script head style form area object param iframe)
40
41
 
41
- LISTS = [
42
- ["<ol>", ""], ["</ol>", "\n\n"], ["<ul>", ""], ["</ul>", "\n\n"], ["<li>", "+ "], ["</li>", "\n"]
42
+ # Markdown indents which could be wrapped
43
+ WRAPPABLE_INDENTS = [
44
+ '\* ', # ul
45
+ '\d. ', # ol
46
+ '\d\d. ', # ol
47
+ '> ', # blockquote
48
+ '' # p
43
49
  ]
44
-
45
- TABLES = [
46
- ["<table>","\n\n<table>"], ["</table>","</table>\n\n"]
50
+
51
+ # list of chars which have to be escaped in normal text
52
+ # TODO: what's with block chars/ sequences at the beginning of a block?
53
+ ESCAPE_IN_TEXT = [
54
+ {'([-*_])([ ]{0,2}\1){2,}' => '\\\\$0|'}, # hr
55
+ {'\*\*([^*\s]+)\*\*' => '\*\*$1\*\*'}, # strong
56
+ {'\*([^*\s]+)\*' => '\*$1\*'}, # em
57
+ {'__(?! |_)(.+)(?!<_| )__' => '\_\_$1\_\_'}, # em
58
+ {'_(?! |_)(.+)(?!<_| )_' => '\_$1\_'}, # em
59
+ {'`(.+)`' => '\`$1\`'}, # code
60
+ {'\[(.+)\](\s*\()' => '\[$1\]$2'}, # links: [text] (url) => [text\] (url)
61
+ {'\[(.+)\](\s*)\[(.*)\]' => '\[$1\]$2\[$3\]'}, # links: [text][id] => [text\][id\]
47
62
  ]
48
-
49
- def initialize (html)
50
- super(html)
51
- @workingcopy = html
63
+
64
+ # parseHTML parser
65
+ attr_accessor :parser
66
+
67
+ # markdown output
68
+ attr_reader :output
69
+
70
+ # stack with tags which were not converted to html
71
+ attr_reader :not_converted
72
+
73
+ # skip conversion to markdown
74
+ attr_reader :skip_conversion
75
+
76
+ # keep html tags which cannot be converted to markdown
77
+ attr_reader :keep_html
78
+
79
+ # wrap output, set to 0 to skip wrapping
80
+ attr_reader :body_width
81
+
82
+ # whether last processed node was a block tag or not
83
+ @last_was_block_tag = false
84
+ attr_reader :last_was_block_tag
85
+
86
+ # name of last closed tag
87
+ @last_closed_tag = ''
88
+ attr_reader :last_closed_tag
89
+
90
+ # list of chars which have to be escaped in normal text
91
+ @escape_in_text = {}
92
+ attr_reader :escape_in_text
93
+
94
+ # number of linebreaks before next inline output
95
+ @linebreaks = 0
96
+ attr_accessor :linebreaks
97
+
98
+ # stores current buffer
99
+ @buffer = []
100
+ attr_accessor :buffer
101
+
102
+ # current indentation
103
+ @indent = ''
104
+ attr_accessor :indent
105
+
106
+ # node stack, e.g. for <a> and <abbr> tags
107
+ @stack = {}
108
+ attr_accessor :stack
109
+
110
+ # Constructor
111
+ def initialize(text = '', links_after_each_paragraph = LINKS_EACH_PARAGRAPH, body_width = BODYWIDTH, keep_html = KEEPHTML)
112
+ @links_after_each_paragraph = links_after_each_paragraph
113
+ @keep_html = keep_html
114
+ @body_width = (body_width > MIN_BODYWIDTH) ? body_width.to_i : MIN_BODYWIDTH
115
+
116
+ @parser = HTML::Tokenizer.new(text)
117
+
118
+ @output = ''
119
+ @not_converted = []
120
+ @skip_conversion = false
121
+
122
+ @search, @replace = [], []
123
+ ESCAPE_IN_TEXT.each do |s,r|
124
+ @search << '/(?<!\\\)/' + s + '/U'
125
+ @replace << r
126
+ end
127
+
128
+ @escape_in_text = {'search' => @search, 'replace' => @replace}
52
129
  end
53
-
54
- #++
55
- #Call all necessary methods to convert a string of HTML into Markdown markup.
56
-
57
- def to_markdown
58
- headings(@workingcopy)
59
- structure(@workingcopy)
60
- text_formatting(@workingcopy)
61
- lists(@workingcopy)
62
- entities(@workingcopy)
63
- tables(@workingcopy)
64
- @workingcopy = CGI::unescapeHTML(@workingcopy)
65
- @workingcopy
130
+
131
+ # parse an HTML string
132
+ def parse_string
133
+ # @parser.html = html ## -> if we passed it in
134
+ parse
135
+ return @output
66
136
  end
67
-
68
- #--
69
- #The conversion methods themselves are private.
70
- private
71
-
72
- def text_formatting(text)
73
- TEXT_FORMATTING.each do |htmltag, markdowntag|
74
- text.gsub!(htmltag, markdowntag)
137
+
138
+ # iterate through the nodes and decide what to do with the current node
139
+ def parse
140
+ @output = ''
141
+ # drop tags that are in the DROP list
142
+ # TODO: implement dropping of @drop tags
143
+
144
+ while token = @parser.next_node
145
+ case @parser.node_type
146
+ when 'doctype', 'pi', 'comment'
147
+ if (@keep_html)
148
+ flush_linebreaks
149
+ out(@parser.node)
150
+ set_linebreaks(2)
151
+ end
152
+ when 'text'
153
+ handle_text
154
+ when 'tag'
155
+ next if IGNORE.include?(@parser.tag_name)
156
+ flush_linebreaks if (@parser.is_start_tag)
157
+ if (@skip_conversion)
158
+ is_markdownable # update notConverted
159
+ handle_tag_to_text
160
+ next
161
+ end
162
+ @parser.html = @parser.html.lstrip if (!@parser.keep_whitespace && @parser.is_block_element && @parser.is_start_tag)
163
+ if (is_markdownable)
164
+ if (@parser.is_block_element && @parser.is_start_tag && !@last_was_block_tag && !@output.empty?)
165
+ if (!@buffer.empty?)
166
+ str = @buffer[@buffer.size - 1]
167
+ else
168
+ str = @output
169
+ end
170
+ if (str.slice((@indent.size - 1) * -1) != "\n#{@indent}")
171
+ str << "\n" + @indent
172
+ end
173
+ func = "handle_tag_#{@parser.tag_name}"
174
+ self.send(func)
175
+
176
+ if (@links_after_each_paragraph && @parser.is_block_element && !@parser.is_start_tag)
177
+ flush_stacked
178
+ end
179
+ if(!@parser.is_start_tag)
180
+ @last_closed_tag = @parser.tag_name
181
+ end
182
+ end
183
+ else
184
+ handle_tag_to_text
185
+ @last_closed_tag = ''
186
+ end
187
+ else
188
+ # TODO: trigger error for invalid node type
189
+ end # end case
190
+
191
+ @last_was_block_tag = (@parser.node_type == 'tag' && @parser.is_start_tag && @parser.is_block_element)
192
+ end # end while
193
+
194
+ ### cleanup
195
+ tmp = @output.gsub('&gt;', '>')
196
+ tmp = tmp.gsub('&amp;', '&')
197
+ @output = tmp.rstrip
198
+ # end parsing, flush stacked tags
199
+ flush_stacked
200
+ @stack = {}
201
+ <<<<<<< HEAD:lib/clothblue_rewrite.rb
202
+ =======
203
+ end
204
+
205
+ # check if current tag can be converted to Markdown
206
+ def is_markdownable
207
+ return false unless (IS_MARKDOWNABLE.include?(@parser.tag_name))
208
+
209
+ if (@parser.is_start_tag)
210
+ ret = true
211
+ if (@keep_html)
212
+ diff = @parser.tag_attributes.reject { |a| @parser.tag_name.include?(a) }
213
+ ret = false unless diff.empty? # non markdownable attributes given
214
+ end
215
+ if (ret)
216
+ IS_MARKDOWNABLE.each do |attr, type|
217
+ if ((type == 'required') && @parser.tag_attributes[attr].nil?)
218
+ # required Markdown attribute not given
219
+ ret = false
220
+ break
221
+ end
222
+ end
223
+ end
224
+ unless (ret)
225
+ @not_converted << (@parser.tag_name + '::' + @parser.open_tags.join('/'))
226
+ end
227
+ return ret
228
+ else
229
+ if (!@not_converted.empty? && (@not_converted.last == (@parser.tag_name + '::' + @parser.open_tags.join('/'))))
230
+ @not_converted.pop
231
+ return false
232
+ end
233
+ return true
75
234
  end
76
- text
235
+ end
236
+
237
+ # flush enqued linebreaks
238
+ def flush_linebreaks
239
+ if ((@linebreaks > 0) && !@output.empty?)
240
+ out("\n" * @linebreaks, true)
241
+ end
242
+ @linebreaks = 0
243
+ end
244
+
245
+ # output all stacked tags
246
+ def flush_stacked
247
+ # # links
248
+ # foreach ($this->stack as $tag => $a) {
249
+ # if (!empty($a)) {
250
+ # call_user_func(array(&$this, 'flushStacked_'.$tag));
251
+ # }
252
+ # }
77
253
  end
78
254
 
79
-
80
- def headings(text)
81
- HEADINGS.each do |htmltag, markdowntag|
82
- text.gsub!(htmltag, markdowntag)
255
+ # set number of line breaks before next start tag
256
+ def set_linebreaks(number)
257
+ @linebreaks = number if (@linebreaks < number)
258
+ end
259
+
260
+ # append string to the correct var, either directly to
261
+ # @output or to the current buffers
262
+ def out(put = '', nowrap = false)
263
+ return if put.empty?
264
+
265
+ if (!@buffer.empty?)
266
+ @buffer.last << put
267
+ else
268
+ if ((@body_width > 0) && !@parser.keep_whitespace) # wrap lines
269
+ # get last line
270
+ pos = @output.index("\n")
271
+ line = pos ? @output.slice(pos, @output.size - pos) : @output
272
+ end
273
+
274
+ if (nowrap)
275
+ if ((put[0,1] != "\n") && (line.size + put.size) > @body_width)
276
+ @output << "\n#{indent(put)}"
277
+ else
278
+ @output << put
279
+ end
280
+ return
281
+ else
282
+ put << "\n" # make sure we get all lines in the while below
283
+ line_len = line.size
284
+ while (pos = put.index("\n"))
285
+ put_line = put.slice(1, pos+1)
286
+ put_len = put_line.size
287
+ put = put.slice(pos+1, put.size - pos)
288
+ if (line_len + put_len < @body_width)
289
+ @output << put_line
290
+ line_len = put_len
291
+ else
292
+ # $split = preg_split('#^(.{0,'.($this->bodyWidth - $lineLen).'})\b#', $putLine, 2, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
293
+ # $this->output .= rtrim($split[1][0])."\n".$this->indent.$this->wordwrap(ltrim($split[2][0]), $this->bodyWidth, "\n".$this->indent, false);
294
+ end
295
+ end # end while
296
+ end
297
+ @output = @output(0, -1)
298
+ return
299
+ else
300
+ @output << put
83
301
  end
84
- text
302
+ >>>>>>> b6201584759afcc6f24a557ef9312597bd63f98f:lib/clothblue_rewrite.rb
85
303
  end
86
304
 
305
+ <<<<<<< HEAD:lib/clothblue_rewrite.rb
306
+ # check if current tag can be converted to Markdown
307
+ def is_markdownable
308
+ return false unless (IS_MARKDOWNABLE.include?(@parser.tag_name))
309
+ =======
310
+ # indent next output (start tag) or unindent (end tag)
311
+ def indent(str, output = true)
312
+ if (@parser.is_start_tag)
313
+ @indent << str
314
+ out(str, true) if @output
315
+ else
316
+ @indent = @indent.slice(0, (str.size * -1))
317
+ end
318
+ end
319
+
320
+ # handle plain text
321
+ def handle_text
322
+ if (has_parent('pre') && @parser.node.index("\n"))
323
+ @parser.node.gsub!("\n", "\n#{@indent}")
324
+ end
325
+ if (!has_parent('code') && !has_parent('pre'))
326
+ # entity decode
327
+ decode(@parser.node)
328
+ if (!@skip_conversion)
329
+ # escape some chars in normal text
330
+ @parser.node.gsub!(@escape_in_text['search'], @escape_in_text['replace'])
331
+ end
332
+ else
333
+ @parser.node.gsub!(['&quot;', '&apos'], ['"', '\''])
334
+ end
335
+ out(@parser.node)
336
+ @last_closed_tag = ''
337
+ end
87
338
 
88
- def lists(text)
89
- LISTS.each do |htmltag, markdowntag|
90
- text.gsub!(htmltag, markdowntag)
339
+ # handle non-Markdownable tags
340
+ def handle_tag_to_text
341
+ if (!@keep_html)
342
+ set_linebreaks(2) if (!@parser.is_start_tag && @parser.is_block_element)
343
+ else
344
+ # dont convert to markdown inside this tag
345
+ # TODO: markdown extra
346
+ if (!@parser.is_empty_tag)
347
+ if (@parser.is_start_tag)
348
+ unless (@skip_conversion)
349
+ @skip_conversion = @parser.tag_name + '::' + @parser.open_tags.join('/')
350
+ end
351
+ else
352
+ if (@skip_conversion == (@parser.tag_name + '::' + @parser.open_tags.join('/'))
353
+ @skip_conversion = false
354
+ end
355
+ end
356
+ end # end !@parser.is_empty_tag
357
+
358
+ if (@parser.is_block_element)
359
+ if (@parser.is_start_tag)
360
+ if (%w(ins del).include?(parent))
361
+ # looks like ins or del are block elements now
362
+ out("\n", true)
363
+ indent(' ')
364
+ end
365
+ if (@parser.tag_name != 'pre')
366
+ out(@parser.node + "\n" + @indent)
367
+ @parser.is_empty_tag ? set_linebreaks(1) : indent(' ')
368
+ @parser.html = @parser.html.lstrip
369
+ else
370
+ # dont indent inside <pre> tags
371
+ out(@parser.node)
372
+ @static_indent = @indent
373
+ @indent = ''
374
+ end
375
+ else
376
+ @output = rstrip(@output) unless @parser.keep_whitespace
377
+ if (@parser.tag_name != 'pre')
378
+ indent(' ')
379
+ out("\n" + @indent + @parser.node)
380
+ else
381
+ # reset indentation
382
+ out(@parser.node)
383
+ @indent = @static_indent
384
+ end
385
+
386
+ if (%w(ins del).include?(parent))
387
+ # ins or del was block element
388
+ out("\n")
389
+ indent(' ')
390
+ end
391
+
392
+ @parser.tag_name == 'li' ? set_linebreaks(1) : set_linebreaks(2)
393
+ end
394
+ else
395
+ out(@parser.node)
396
+ end
397
+
398
+ if (%w(code pre).include?(@parser.tag_name))
399
+ if (@parser.is_start_tag)
400
+ buffer
401
+ else
402
+ # add stuff so cleanup just reverses this
403
+ tmp = unbugger.gsub('&gt;', '&amp;gt;')
404
+ out(tmp.gsub('&lt;', '&amp;lt;'))
405
+ end
406
+ end
91
407
  end
92
- text
93
408
  end
94
-
95
-
96
- def entities(text)
97
- ENTITIES.each do |htmlentity, markdownentity|
98
- text.gsub!(htmlentity, markdownentity)
409
+
410
+ # get tag name of direct parent tag
411
+ def parent
412
+ @parser.open_tags.last
413
+ end
414
+
415
+ # check if current not has a tag as parent (somewhere, not just the direct parent)
416
+ def has_parent(tag)
417
+ @parser.open_tags.include?(tag)
418
+ end
419
+
420
+ # add current node to the stack (this only stores the attributes)
421
+ def stack
422
+ @stack[@parser.tag_name] = [] if (@stack[@parser.tag_name].nil?)
423
+ @stack[@parser.tag_name] << @parser.tag_attributes
424
+ end
425
+
426
+ # remove current tag from stack
427
+ def unstack
428
+ if (@stack[@parser.tag_name].nil? || !@stack[@parser.tag_name].is_a?(Array))
429
+ # TODO: trigger and error
430
+ raise "somebody set us up the bomb"
99
431
  end
100
- text
432
+ @stack[@parser.tag_name].pop
101
433
  end
102
-
103
-
104
- def structure(text)
105
- STRUCTURES.each do |htmltag, markdowntag|
106
- text.gsub!(htmltag, markdowntag)
434
+
435
+ # get last stacked element of type tag
436
+ def get_stacked(tag)
437
+ @stack[tag][@stack[tag].size-1]
438
+ end
439
+
440
+ # buffer next parser output until unbuffer is called
441
+ def buffer
442
+ @buffer << ''
443
+ end
444
+
445
+ # end current buffer and return buffered output
446
+ def unbuffer
447
+ @buffer.pop
448
+ end
449
+
450
+ # wordwrap for utf8 encoded strings
451
+ def wordwrap(str, width, brk, cut = false)
452
+ >>>>>>> b6201584759afcc6f24a557ef9312597bd63f98f:lib/clothblue_rewrite.rb
453
+
454
+ if (@parser.is_start_tag)
455
+ ret = true
456
+ if (@keep_html)
457
+ diff = @parser.tag_attributes.reject { |a| @parser.tag_name.include?(a) }
458
+ ret = false unless diff.empty? # non markdownable attributes given
459
+ end
460
+ if (ret)
461
+ IS_MARKDOWNABLE.each do |attr, type|
462
+ if ((type == 'required') && @parser.tag_attributes[attr].nil?)
463
+ # required Markdown attribute not given
464
+ ret = false
465
+ break
466
+ end
467
+ end
468
+ end
469
+ unless (ret)
470
+ @not_converted << (@parser.tag_name + '::' + @parser.open_tags.join('/'))
471
+ end
472
+ return ret
473
+ else
474
+ if (!@not_converted.empty? && (@not_converted.last == (@parser.tag_name + '::' + @parser.open_tags.join('/'))))
475
+ @not_converted.pop
476
+ return false
477
+ end
478
+ return true
107
479
  end
108
- text
109
480
  end
110
-
111
- def tables(text)
112
- TABLES.each do |htmltag, markdowntag|
113
- text.gsub!(htmltag, markdowntag)
481
+
482
+ <<<<<<< HEAD:lib/clothblue_rewrite.rb
483
+ # flush enqued linebreaks
484
+ def flush_linebreaks
485
+ if ((@linebreaks > 0) && !@output.empty?)
486
+ out("\n" * @linebreaks, true)
114
487
  end
115
- text
488
+ @linebreaks = 0
489
+ end
490
+
491
+ # output all stacked tags
492
+ def flush_stacked
493
+ # # links
494
+ # foreach ($this->stack as $tag => $a) {
495
+ # if (!empty($a)) {
496
+ # call_user_func(array(&$this, 'flushStacked_'.$tag));
497
+ # }
498
+ # }
116
499
  end
117
500
 
501
+ # set number of line breaks before next start tag
502
+ def set_linebreaks(number)
503
+ @linebreaks = number if (@linebreaks < number)
504
+ end
505
+
506
+ # append string to the correct var, either directly to
507
+ # @output or to the current buffers
508
+ def out(put = '', nowrap = false)
509
+ return if put.empty?
510
+
511
+ if (!@buffer.empty?)
512
+ @buffer.last << put
513
+ else
514
+ if ((@body_width > 0) && !@parser.keep_whitespace) # wrap lines
515
+ # get last line
516
+ pos = @output.index("\n")
517
+ line = pos ? @output.slice(pos, @output.size - pos) : @output
518
+ end
519
+
520
+ if (nowrap)
521
+ if ((put[0,1] != "\n") && (line.size + put.size) > @body_width)
522
+ @output << "\n#{indent(put)}"
523
+ else
524
+ @output << put
525
+ end
526
+ return
527
+ else
528
+ put << "\n" # make sure we get all lines in the while below
529
+ line_len = line.size
530
+ while (pos = put.index("\n"))
531
+ put_line = put.slice(1, pos+1)
532
+ put_len = put_line.size
533
+ put = put.slice(pos+1, put.size - pos)
534
+ if (line_len + put_len < @body_width)
535
+ @output << put_line
536
+ line_len = put_len
537
+ else
538
+ # $split = preg_split('#^(.{0,'.($this->bodyWidth - $lineLen).'})\b#', $putLine, 2, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
539
+ # $this->output .= rtrim($split[1][0])."\n".$this->indent.$this->wordwrap(ltrim($split[2][0]), $this->bodyWidth, "\n".$this->indent, false);
540
+ end
541
+ end # end while
542
+ end
543
+ @output = @output(0, -1)
544
+ return
545
+ else
546
+ @output << put
547
+ end
548
+ end
549
+
550
+ # indent next output (start tag) or unindent (end tag)
551
+ def indent(str, output = true)
552
+ if (@parser.is_start_tag)
553
+ @indent << str
554
+ out(str, true) if @output
555
+ else
556
+ @indent = @indent.slice(0, (str.size * -1))
557
+ end
558
+ end
559
+
560
+ # handle plain text
561
+ def handle_text
562
+ if (has_parent('pre') && @parser.node.index("\n"))
563
+ @parser.node.gsub!("\n", "\n#{@indent}")
564
+ end
565
+ if (!has_parent('code') && !has_parent('pre'))
566
+ # entity decode
567
+ decode(@parser.node)
568
+ if (!@skip_conversion)
569
+ # escape some chars in normal text
570
+ @parser.node.gsub!(@escape_in_text['search'], @escape_in_text['replace'])
571
+ end
572
+ else
573
+ @parser.node.gsub!(['&quot;', '&apos'], ['"', '\''])
574
+ end
575
+ out(@parser.node)
576
+ @last_closed_tag = ''
577
+ end
578
+
579
+ # handle non-Markdownable tags
580
+ def handle_tag_to_text
581
+ if (!@keep_html)
582
+ set_linebreaks(2) if (!@parser.is_start_tag && @parser.is_block_element)
583
+ else
584
+ # dont convert to markdown inside this tag
585
+ # TODO: markdown extra
586
+ if (!@parser.is_empty_tag)
587
+ if (@parser.is_start_tag)
588
+ unless (@skip_conversion)
589
+ @skip_conversion = @parser.tag_name + '::' + @parser.open_tags.join('/')
590
+ end
591
+ else
592
+ if (@skip_conversion == (@parser.tag_name + '::' + @parser.open_tags.join('/'))
593
+ @skip_conversion = false
594
+ end
595
+ end
596
+ end # end !@parser.is_empty_tag
597
+
598
+ if (@parser.is_block_element)
599
+ if (@parser.is_start_tag)
600
+ if (%w(ins del).include?(parent))
601
+ # looks like ins or del are block elements now
602
+ out("\n", true)
603
+ indent(' ')
604
+ end
605
+ if (@parser.tag_name != 'pre')
606
+ out(@parser.node + "\n" + @indent)
607
+ @parser.is_empty_tag ? set_linebreaks(1) : indent(' ')
608
+ @parser.html = @parser.html.lstrip
609
+ else
610
+ # dont indent inside <pre> tags
611
+ out(@parser.node)
612
+ @static_indent = @indent
613
+ @indent = ''
614
+ end
615
+ else
616
+ @output = rstrip(@output) unless @parser.keep_whitespace
617
+ if (@parser.tag_name != 'pre')
618
+ indent(' ')
619
+ out("\n" + @indent + @parser.node)
620
+ else
621
+ # reset indentation
622
+ out(@parser.node)
623
+ @indent = @static_indent
624
+ end
625
+
626
+ if (%w(ins del).include?(parent))
627
+ # ins or del was block element
628
+ out("\n")
629
+ indent(' ')
630
+ end
631
+
632
+ @parser.tag_name == 'li' ? set_linebreaks(1) : set_linebreaks(2)
633
+ end
634
+ else
635
+ out(@parser.node)
636
+ end
637
+
638
+ if (%w(code pre).include?(@parser.tag_name))
639
+ if (@parser.is_start_tag)
640
+ buffer
641
+ else
642
+ # add stuff so cleanup just reverses this
643
+ tmp = unbugger.gsub('&gt;', '&amp;gt;')
644
+ out(tmp.gsub('&lt;', '&amp;lt;'))
645
+ end
646
+ end
647
+ end
648
+ end
649
+
650
+ # get tag name of direct parent tag
651
+ def parent
652
+ @parser.open_tags.last
653
+ end
654
+
655
+ # check if current not has a tag as parent (somewhere, not just the direct parent)
656
+ def has_parent(tag)
657
+ @parser.open_tags.include?(tag)
658
+ end
659
+
660
+ # add current node to the stack (this only stores the attributes)
661
+ def stack
662
+ @stack[@parser.tag_name] = [] if (@stack[@parser.tag_name].nil?)
663
+ @stack[@parser.tag_name] << @parser.tag_attributes
664
+ end
665
+
666
+ # remove current tag from stack
667
+ def unstack
668
+ if (@stack[@parser.tag_name].nil? || !@stack[@parser.tag_name].is_a?(Array))
669
+ # TODO: trigger and error
670
+ raise "somebody set us up the bomb"
671
+ end
672
+ @stack[@parser.tag_name].pop
673
+ end
674
+
675
+ # get last stacked element of type tag
676
+ def get_stacked(tag)
677
+ @stack[tag][@stack[tag].size-1]
678
+ end
679
+
680
+ # buffer next parser output until unbuffer is called
681
+ def buffer
682
+ @buffer << ''
683
+ end
684
+
685
+ # end current buffer and return buffered output
686
+ def unbuffer
687
+ @buffer.pop
688
+ end
689
+
690
+ # wordwrap for utf8 encoded strings
691
+ def wordwrap(str, width, brk, cut = false)
692
+ # TODO: implement wordwrap for utf8 code
693
+ end
694
+
695
+ =======
696
+ >>>>>>> b6201584759afcc6f24a557ef9312597bd63f98f:lib/clothblue_rewrite.rb
697
+ # decode email address
698
+ def decode(text, quoted_style = '')
699
+ # TODO: implement decode method
118
700
 
119
- def css_styles(text)
120
- #TODO: Translate CSS-styles
121
- text
701
+ # @author derernst@gmx.ch <http://www.php.net/manual/en/function.html-entity-decode.php#68536>
702
+ # @author Milian Wolff <http://milianw.de>
703
+ # if (version_compare(PHP_VERSION, '5', '>=')) {
704
+ # # UTF-8 is only supported in PHP 5.x.x and above
705
+ # $text = html_entity_decode($text, $quote_style, 'UTF-8');
706
+ # } else {
707
+ # if (function_exists('html_entity_decode')) {
708
+ # $text = html_entity_decode($text, $quote_style, 'ISO-8859-1');
709
+ # } else {
710
+ # static $trans_tbl;
711
+ # if (!isset($trans_tbl)) {
712
+ # $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, $quote_style));
713
+ # }
714
+ # $text = strtr($text, $trans_tbl);
715
+ # }
716
+ # $text = preg_replace_callback('~&#x([0-9a-f]+);~i', array(&$this, '_decode_hex'), $text);
717
+ # $text = preg_replace_callback('~&#(\d{2,5});~', array(&$this, '_decode_numeric'), $text);
718
+ # }
719
+ # return $text;
122
720
  end
123
- #++
124
- end # end class ClothBlue
721
+
722
+ end