cpjolicoeur-ClothBlue 0.2.2 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,14 +1,14 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "ClothBlue"
3
- s.version = "0.2.2"
4
- s.date = "Time.now"
3
+ s.version = "0.5.1"
4
+ s.date = "2008-11-25"
5
5
  s.summary = "HTML to Markdown converter"
6
6
  s.email = "cpjolicoeur@gmail.com"
7
7
  s.homepage = "http://github.com/cpjolicoeur/clothblue"
8
8
  s.description = "ClothBlue is BlueCloth's evil twin. It converts existing HTML into Markdown format for use with BlueCloth."
9
9
  s.has_rdoc = true
10
10
  s.authors = ["Craig P Jolicoeur"]
11
- s.files = ["README", "TODO", "clothblue.gemspec", "lib/clothblue.rb", "lib/README.rdoc", "test/README", "test/test_entities.rb", "test/test_formatting.rb", "test/test_headings.rb", "test/test_lists.rb", "test/test_structure.rb", "test/test_tables.rb"]
11
+ s.files = ["README", "TODO", "clothblue.gemspec", "lib/clothblue.rb", "lib/parsehtml/parsehtml.rb", "test/README", "test/test_entities.rb", "test/test_formatting.rb", "test/test_headings.rb", "test/test_lists.rb", "test/test_structure.rb", "test/test_tables.rb"]
12
12
  s.test_files = ["test/test_entities.rb", "test/test_formatting.rb", "test/test_headings.rb", "test/test_lists.rb", "test/test_structure.rb", "test/test_tables.rb"]
13
13
  s.rdoc_options = ["--main", "lib/README.rdoc"]
14
- end
14
+ end
@@ -1,124 +1,722 @@
1
- =begin rdoc
2
- Provides the methods to convert HTML into Markdown.
3
- *Please* *note*: ClothBlue creates UTF-8 output. To do so, it sets $KCODE to UTF-8. This will be globally available!
4
- #--
5
- TODO: enhance docs, as more methods come availlable
6
- #++
1
+ require 'parsehtml/parsehtml'
7
2
 
8
- Author:: Craig P Jolicoeur (mailto:cpjolicoeur@gmail.com)
9
- Copyright:: Copyright (c) 2008 Phillip Gawlowski
10
- License:: MIT
11
- =end
12
-
13
- require 'cgi'
14
- $KCODE = "U"
15
-
16
- class ClothBlue < String
17
- #--
18
- TEXT_FORMATTING = [
19
- ["<b>", "**"], ["</b>","**"], ["<em>","_"], ["</em>", "_"], ["<b>", "**"],
20
- ["</b>", "**"], ["<code>", "`"], ["<i>","_"], ["</i>", "_"],
21
- ["</code>", "`"], ["<strong>", "**"], ["</strong>", "**"]
22
- ]
23
-
24
- HEADINGS = [
25
- ["<h1>","# "], ["</h1>", " #\n\n"], ["<h2>","## "], ["</h2>", " ##\n\n"],
26
- ["<h3>","### "], ["</h3>", " ###\n\n"], ["<h4>","#### "], ["</h4>", " ####\n\n"],
27
- ["<h5>","##### "], ["</h5>", " #####\n\n"], ["<h6>","###### "], ["</h6>", " ######\n\n"]
28
- ]
29
-
30
- STRUCTURES = [
31
- ["<p>", "\n\n"],["</p>","\n\n"], ["<blockquote>", "> "], ["</blockquote>","\n"],
32
- ["<br />", "\n"], ["<br>", "\n"]
33
- ]
34
-
35
- ENTITIES = [
36
- ["&#8220;", '"'], ["&#8221;", '"'], ["&#8212;", "--"], ["&#8212;", "--"],
37
- ["&#8211;","-"], ["&#8230;", "..."], ["&#215;", " x "], ["&#8482;","(TM)"],
38
- ["&#174;","(R)"], ["&#169;","(C)"], ["&#8217;", "'"]
39
- ]
3
+ class ClothBlue
4
+
5
+ # Constants
6
+ LINKS_EACH_PARAGRAPH = false
7
+ BODYWIDTH = false
8
+ KEEPHTML = true
9
+ MIN_BODYWIDTH = 25
10
+
11
+ # tags which can be handled by markdown
12
+ IS_MARKDOWNABLE = {
13
+ 'p' => [],
14
+ 'ul' => [],
15
+ 'ol' => [],
16
+ 'li' => [],
17
+ 'br' => [],
18
+ 'blockquote' => [],
19
+ 'code' => [],
20
+ 'pre' => [],
21
+ 'a' => [{'href' => 'required'}, {'title' => 'optional'}],
22
+ 'strong' => [],
23
+ 'b' => [],
24
+ 'em' => [],
25
+ 'i' => [],
26
+ 'img' => [{'src' => 'required'}, {'alt' => 'optional'}, {'title' => 'optional'}],
27
+ 'h1' => [],
28
+ 'h2' => [],
29
+ 'h3' => [],
30
+ 'h4' => [],
31
+ 'h5' => [],
32
+ 'h6' => [],
33
+ 'hr' => []
34
+ }
35
+
36
+ # html tags to be ignored (content will be parsed)
37
+ IGNORE = %w(html body)
38
+
39
+ # html tags to be dropped (content will not be parsed!)
40
+ DROP = %w(script head style form area object param iframe)
40
41
 
41
- LISTS = [
42
- ["<ol>", ""], ["</ol>", "\n\n"], ["<ul>", ""], ["</ul>", "\n\n"], ["<li>", "+ "], ["</li>", "\n"]
42
+ # Markdown indents which could be wrapped
43
+ WRAPPABLE_INDENTS = [
44
+ '\* ', # ul
45
+ '\d. ', # ol
46
+ '\d\d. ', # ol
47
+ '> ', # blockquote
48
+ '' # p
43
49
  ]
44
-
45
- TABLES = [
46
- ["<table>","\n\n<table>"], ["</table>","</table>\n\n"]
50
+
51
+ # list of chars which have to be escaped in normal text
52
+ # TODO: what's with block chars/ sequences at the beginning of a block?
53
+ ESCAPE_IN_TEXT = [
54
+ {'([-*_])([ ]{0,2}\1){2,}' => '\\\\$0|'}, # hr
55
+ {'\*\*([^*\s]+)\*\*' => '\*\*$1\*\*'}, # strong
56
+ {'\*([^*\s]+)\*' => '\*$1\*'}, # em
57
+ {'__(?! |_)(.+)(?!<_| )__' => '\_\_$1\_\_'}, # em
58
+ {'_(?! |_)(.+)(?!<_| )_' => '\_$1\_'}, # em
59
+ {'`(.+)`' => '\`$1\`'}, # code
60
+ {'\[(.+)\](\s*\()' => '\[$1\]$2'}, # links: [text] (url) => [text\] (url)
61
+ {'\[(.+)\](\s*)\[(.*)\]' => '\[$1\]$2\[$3\]'}, # links: [text][id] => [text\][id\]
47
62
  ]
48
-
49
- def initialize (html)
50
- super(html)
51
- @workingcopy = html
63
+
64
+ # parseHTML parser
65
+ attr_accessor :parser
66
+
67
+ # markdown output
68
+ attr_reader :output
69
+
70
+ # stack with tags which were not converted to html
71
+ attr_reader :not_converted
72
+
73
+ # skip conversion to markdown
74
+ attr_reader :skip_conversion
75
+
76
+ # keep html tags which cannot be converted to markdown
77
+ attr_reader :keep_html
78
+
79
+ # wrap output, set to 0 to skip wrapping
80
+ attr_reader :body_width
81
+
82
+ # whether last processed node was a block tag or not
83
+ @last_was_block_tag = false
84
+ attr_reader :last_was_block_tag
85
+
86
+ # name of last closed tag
87
+ @last_closed_tag = ''
88
+ attr_reader :last_closed_tag
89
+
90
+ # list of chars which have to be escaped in normal text
91
+ @escape_in_text = {}
92
+ attr_reader :escape_in_text
93
+
94
+ # number of linebreaks before next inline output
95
+ @linebreaks = 0
96
+ attr_accessor :linebreaks
97
+
98
+ # stores current buffer
99
+ @buffer = []
100
+ attr_accessor :buffer
101
+
102
+ # current indentation
103
+ @indent = ''
104
+ attr_accessor :indent
105
+
106
+ # node stack, e.g. for <a> and <abbr> tags
107
+ @stack = {}
108
+ attr_accessor :stack
109
+
110
+ # Constructor
111
+ def initialize(text = '', links_after_each_paragraph = LINKS_EACH_PARAGRAPH, body_width = BODYWIDTH, keep_html = KEEPHTML)
112
+ @links_after_each_paragraph = links_after_each_paragraph
113
+ @keep_html = keep_html
114
+ @body_width = (body_width > MIN_BODYWIDTH) ? body_width.to_i : MIN_BODYWIDTH
115
+
116
+ @parser = HTML::Tokenizer.new(text)
117
+
118
+ @output = ''
119
+ @not_converted = []
120
+ @skip_conversion = false
121
+
122
+ @search, @replace = [], []
123
+ ESCAPE_IN_TEXT.each do |s,r|
124
+ @search << '/(?<!\\\)/' + s + '/U'
125
+ @replace << r
126
+ end
127
+
128
+ @escape_in_text = {'search' => @search, 'replace' => @replace}
52
129
  end
53
-
54
- #++
55
- #Call all necessary methods to convert a string of HTML into Markdown markup.
56
-
57
- def to_markdown
58
- headings(@workingcopy)
59
- structure(@workingcopy)
60
- text_formatting(@workingcopy)
61
- lists(@workingcopy)
62
- entities(@workingcopy)
63
- tables(@workingcopy)
64
- @workingcopy = CGI::unescapeHTML(@workingcopy)
65
- @workingcopy
130
+
131
+ # parse an HTML string
132
+ def parse_string
133
+ # @parser.html = html ## -> if we passed it in
134
+ parse
135
+ return @output
66
136
  end
67
-
68
- #--
69
- #The conversion methods themselves are private.
70
- private
71
-
72
- def text_formatting(text)
73
- TEXT_FORMATTING.each do |htmltag, markdowntag|
74
- text.gsub!(htmltag, markdowntag)
137
+
138
+ # iterate through the nodes and decide what to do with the current node
139
+ def parse
140
+ @output = ''
141
+ # drop tags that are in the DROP list
142
+ # TODO: implement dropping of @drop tags
143
+
144
+ while token = @parser.next_node
145
+ case @parser.node_type
146
+ when 'doctype', 'pi', 'comment'
147
+ if (@keep_html)
148
+ flush_linebreaks
149
+ out(@parser.node)
150
+ set_linebreaks(2)
151
+ end
152
+ when 'text'
153
+ handle_text
154
+ when 'tag'
155
+ next if IGNORE.include?(@parser.tag_name)
156
+ flush_linebreaks if (@parser.is_start_tag)
157
+ if (@skip_conversion)
158
+ is_markdownable # update notConverted
159
+ handle_tag_to_text
160
+ next
161
+ end
162
+ @parser.html = @parser.html.lstrip if (!@parser.keep_whitespace && @parser.is_block_element && @parser.is_start_tag)
163
+ if (is_markdownable)
164
+ if (@parser.is_block_element && @parser.is_start_tag && !@last_was_block_tag && !@output.empty?)
165
+ if (!@buffer.empty?)
166
+ str = @buffer[@buffer.size - 1]
167
+ else
168
+ str = @output
169
+ end
170
+ if (str.slice((@indent.size - 1) * -1) != "\n#{@indent}")
171
+ str << "\n" + @indent
172
+ end
173
+ func = "handle_tag_#{@parser.tag_name}"
174
+ self.send(func)
175
+
176
+ if (@links_after_each_paragraph && @parser.is_block_element && !@parser.is_start_tag)
177
+ flush_stacked
178
+ end
179
+ if(!@parser.is_start_tag)
180
+ @last_closed_tag = @parser.tag_name
181
+ end
182
+ end
183
+ else
184
+ handle_tag_to_text
185
+ @last_closed_tag = ''
186
+ end
187
+ else
188
+ # TODO: trigger error for invalid node type
189
+ end # end case
190
+
191
+ @last_was_block_tag = (@parser.node_type == 'tag' && @parser.is_start_tag && @parser.is_block_element)
192
+ end # end while
193
+
194
+ ### cleanup
195
+ tmp = @output.gsub('&gt;', '>')
196
+ tmp = tmp.gsub('&amp;', '&')
197
+ @output = tmp.rstrip
198
+ # end parsing, flush stacked tags
199
+ flush_stacked
200
+ @stack = {}
201
+ <<<<<<< HEAD:lib/clothblue_rewrite.rb
202
+ =======
203
+ end
204
+
205
+ # check if current tag can be converted to Markdown
206
+ def is_markdownable
207
+ return false unless (IS_MARKDOWNABLE.include?(@parser.tag_name))
208
+
209
+ if (@parser.is_start_tag)
210
+ ret = true
211
+ if (@keep_html)
212
+ diff = @parser.tag_attributes.reject { |a| @parser.tag_name.include?(a) }
213
+ ret = false unless diff.empty? # non markdownable attributes given
214
+ end
215
+ if (ret)
216
+ IS_MARKDOWNABLE.each do |attr, type|
217
+ if ((type == 'required') && @parser.tag_attributes[attr].nil?)
218
+ # required Markdown attribute not given
219
+ ret = false
220
+ break
221
+ end
222
+ end
223
+ end
224
+ unless (ret)
225
+ @not_converted << (@parser.tag_name + '::' + @parser.open_tags.join('/'))
226
+ end
227
+ return ret
228
+ else
229
+ if (!@not_converted.empty? && (@not_converted.last == (@parser.tag_name + '::' + @parser.open_tags.join('/'))))
230
+ @not_converted.pop
231
+ return false
232
+ end
233
+ return true
75
234
  end
76
- text
235
+ end
236
+
237
+ # flush enqued linebreaks
238
+ def flush_linebreaks
239
+ if ((@linebreaks > 0) && !@output.empty?)
240
+ out("\n" * @linebreaks, true)
241
+ end
242
+ @linebreaks = 0
243
+ end
244
+
245
+ # output all stacked tags
246
+ def flush_stacked
247
+ # # links
248
+ # foreach ($this->stack as $tag => $a) {
249
+ # if (!empty($a)) {
250
+ # call_user_func(array(&$this, 'flushStacked_'.$tag));
251
+ # }
252
+ # }
77
253
  end
78
254
 
79
-
80
- def headings(text)
81
- HEADINGS.each do |htmltag, markdowntag|
82
- text.gsub!(htmltag, markdowntag)
255
+ # set number of line breaks before next start tag
256
+ def set_linebreaks(number)
257
+ @linebreaks = number if (@linebreaks < number)
258
+ end
259
+
260
+ # append string to the correct var, either directly to
261
+ # @output or to the current buffers
262
+ def out(put = '', nowrap = false)
263
+ return if put.empty?
264
+
265
+ if (!@buffer.empty?)
266
+ @buffer.last << put
267
+ else
268
+ if ((@body_width > 0) && !@parser.keep_whitespace) # wrap lines
269
+ # get last line
270
+ pos = @output.index("\n")
271
+ line = pos ? @output.slice(pos, @output.size - pos) : @output
272
+ end
273
+
274
+ if (nowrap)
275
+ if ((put[0,1] != "\n") && (line.size + put.size) > @body_width)
276
+ @output << "\n#{indent(put)}"
277
+ else
278
+ @output << put
279
+ end
280
+ return
281
+ else
282
+ put << "\n" # make sure we get all lines in the while below
283
+ line_len = line.size
284
+ while (pos = put.index("\n"))
285
+ put_line = put.slice(1, pos+1)
286
+ put_len = put_line.size
287
+ put = put.slice(pos+1, put.size - pos)
288
+ if (line_len + put_len < @body_width)
289
+ @output << put_line
290
+ line_len = put_len
291
+ else
292
+ # $split = preg_split('#^(.{0,'.($this->bodyWidth - $lineLen).'})\b#', $putLine, 2, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
293
+ # $this->output .= rtrim($split[1][0])."\n".$this->indent.$this->wordwrap(ltrim($split[2][0]), $this->bodyWidth, "\n".$this->indent, false);
294
+ end
295
+ end # end while
296
+ end
297
+ @output = @output(0, -1)
298
+ return
299
+ else
300
+ @output << put
83
301
  end
84
- text
302
+ >>>>>>> b6201584759afcc6f24a557ef9312597bd63f98f:lib/clothblue_rewrite.rb
85
303
  end
86
304
 
305
+ <<<<<<< HEAD:lib/clothblue_rewrite.rb
306
+ # check if current tag can be converted to Markdown
307
+ def is_markdownable
308
+ return false unless (IS_MARKDOWNABLE.include?(@parser.tag_name))
309
+ =======
310
+ # indent next output (start tag) or unindent (end tag)
311
+ def indent(str, output = true)
312
+ if (@parser.is_start_tag)
313
+ @indent << str
314
+ out(str, true) if @output
315
+ else
316
+ @indent = @indent.slice(0, (str.size * -1))
317
+ end
318
+ end
319
+
320
+ # handle plain text
321
+ def handle_text
322
+ if (has_parent('pre') && @parser.node.index("\n"))
323
+ @parser.node.gsub!("\n", "\n#{@indent}")
324
+ end
325
+ if (!has_parent('code') && !has_parent('pre'))
326
+ # entity decode
327
+ decode(@parser.node)
328
+ if (!@skip_conversion)
329
+ # escape some chars in normal text
330
+ @parser.node.gsub!(@escape_in_text['search'], @escape_in_text['replace'])
331
+ end
332
+ else
333
+ @parser.node.gsub!(['&quot;', '&apos'], ['"', '\''])
334
+ end
335
+ out(@parser.node)
336
+ @last_closed_tag = ''
337
+ end
87
338
 
88
- def lists(text)
89
- LISTS.each do |htmltag, markdowntag|
90
- text.gsub!(htmltag, markdowntag)
339
+ # handle non-Markdownable tags
340
+ def handle_tag_to_text
341
+ if (!@keep_html)
342
+ set_linebreaks(2) if (!@parser.is_start_tag && @parser.is_block_element)
343
+ else
344
+ # dont convert to markdown inside this tag
345
+ # TODO: markdown extra
346
+ if (!@parser.is_empty_tag)
347
+ if (@parser.is_start_tag)
348
+ unless (@skip_conversion)
349
+ @skip_conversion = @parser.tag_name + '::' + @parser.open_tags.join('/')
350
+ end
351
+ else
352
+ if (@skip_conversion == (@parser.tag_name + '::' + @parser.open_tags.join('/'))
353
+ @skip_conversion = false
354
+ end
355
+ end
356
+ end # end !@parser.is_empty_tag
357
+
358
+ if (@parser.is_block_element)
359
+ if (@parser.is_start_tag)
360
+ if (%w(ins del).include?(parent))
361
+ # looks like ins or del are block elements now
362
+ out("\n", true)
363
+ indent(' ')
364
+ end
365
+ if (@parser.tag_name != 'pre')
366
+ out(@parser.node + "\n" + @indent)
367
+ @parser.is_empty_tag ? set_linebreaks(1) : indent(' ')
368
+ @parser.html = @parser.html.lstrip
369
+ else
370
+ # dont indent inside <pre> tags
371
+ out(@parser.node)
372
+ @static_indent = @indent
373
+ @indent = ''
374
+ end
375
+ else
376
+ @output = rstrip(@output) unless @parser.keep_whitespace
377
+ if (@parser.tag_name != 'pre')
378
+ indent(' ')
379
+ out("\n" + @indent + @parser.node)
380
+ else
381
+ # reset indentation
382
+ out(@parser.node)
383
+ @indent = @static_indent
384
+ end
385
+
386
+ if (%w(ins del).include?(parent))
387
+ # ins or del was block element
388
+ out("\n")
389
+ indent(' ')
390
+ end
391
+
392
+ @parser.tag_name == 'li' ? set_linebreaks(1) : set_linebreaks(2)
393
+ end
394
+ else
395
+ out(@parser.node)
396
+ end
397
+
398
+ if (%w(code pre).include?(@parser.tag_name))
399
+ if (@parser.is_start_tag)
400
+ buffer
401
+ else
402
+ # add stuff so cleanup just reverses this
403
+ tmp = unbugger.gsub('&gt;', '&amp;gt;')
404
+ out(tmp.gsub('&lt;', '&amp;lt;'))
405
+ end
406
+ end
91
407
  end
92
- text
93
408
  end
94
-
95
-
96
- def entities(text)
97
- ENTITIES.each do |htmlentity, markdownentity|
98
- text.gsub!(htmlentity, markdownentity)
409
+
410
+ # get tag name of direct parent tag
411
+ def parent
412
+ @parser.open_tags.last
413
+ end
414
+
415
+ # check if current not has a tag as parent (somewhere, not just the direct parent)
416
+ def has_parent(tag)
417
+ @parser.open_tags.include?(tag)
418
+ end
419
+
420
+ # add current node to the stack (this only stores the attributes)
421
+ def stack
422
+ @stack[@parser.tag_name] = [] if (@stack[@parser.tag_name].nil?)
423
+ @stack[@parser.tag_name] << @parser.tag_attributes
424
+ end
425
+
426
+ # remove current tag from stack
427
+ def unstack
428
+ if (@stack[@parser.tag_name].nil? || !@stack[@parser.tag_name].is_a?(Array))
429
+ # TODO: trigger and error
430
+ raise "somebody set us up the bomb"
99
431
  end
100
- text
432
+ @stack[@parser.tag_name].pop
101
433
  end
102
-
103
-
104
- def structure(text)
105
- STRUCTURES.each do |htmltag, markdowntag|
106
- text.gsub!(htmltag, markdowntag)
434
+
435
+ # get last stacked element of type tag
436
+ def get_stacked(tag)
437
+ @stack[tag][@stack[tag].size-1]
438
+ end
439
+
440
+ # buffer next parser output until unbuffer is called
441
+ def buffer
442
+ @buffer << ''
443
+ end
444
+
445
+ # end current buffer and return buffered output
446
+ def unbuffer
447
+ @buffer.pop
448
+ end
449
+
450
+ # wordwrap for utf8 encoded strings
451
+ def wordwrap(str, width, brk, cut = false)
452
+ >>>>>>> b6201584759afcc6f24a557ef9312597bd63f98f:lib/clothblue_rewrite.rb
453
+
454
+ if (@parser.is_start_tag)
455
+ ret = true
456
+ if (@keep_html)
457
+ diff = @parser.tag_attributes.reject { |a| @parser.tag_name.include?(a) }
458
+ ret = false unless diff.empty? # non markdownable attributes given
459
+ end
460
+ if (ret)
461
+ IS_MARKDOWNABLE.each do |attr, type|
462
+ if ((type == 'required') && @parser.tag_attributes[attr].nil?)
463
+ # required Markdown attribute not given
464
+ ret = false
465
+ break
466
+ end
467
+ end
468
+ end
469
+ unless (ret)
470
+ @not_converted << (@parser.tag_name + '::' + @parser.open_tags.join('/'))
471
+ end
472
+ return ret
473
+ else
474
+ if (!@not_converted.empty? && (@not_converted.last == (@parser.tag_name + '::' + @parser.open_tags.join('/'))))
475
+ @not_converted.pop
476
+ return false
477
+ end
478
+ return true
107
479
  end
108
- text
109
480
  end
110
-
111
- def tables(text)
112
- TABLES.each do |htmltag, markdowntag|
113
- text.gsub!(htmltag, markdowntag)
481
+
482
+ <<<<<<< HEAD:lib/clothblue_rewrite.rb
483
+ # flush enqued linebreaks
484
+ def flush_linebreaks
485
+ if ((@linebreaks > 0) && !@output.empty?)
486
+ out("\n" * @linebreaks, true)
114
487
  end
115
- text
488
+ @linebreaks = 0
489
+ end
490
+
491
+ # output all stacked tags
492
+ def flush_stacked
493
+ # # links
494
+ # foreach ($this->stack as $tag => $a) {
495
+ # if (!empty($a)) {
496
+ # call_user_func(array(&$this, 'flushStacked_'.$tag));
497
+ # }
498
+ # }
116
499
  end
117
500
 
501
+ # set number of line breaks before next start tag
502
+ def set_linebreaks(number)
503
+ @linebreaks = number if (@linebreaks < number)
504
+ end
505
+
506
+ # append string to the correct var, either directly to
507
+ # @output or to the current buffers
508
+ def out(put = '', nowrap = false)
509
+ return if put.empty?
510
+
511
+ if (!@buffer.empty?)
512
+ @buffer.last << put
513
+ else
514
+ if ((@body_width > 0) && !@parser.keep_whitespace) # wrap lines
515
+ # get last line
516
+ pos = @output.index("\n")
517
+ line = pos ? @output.slice(pos, @output.size - pos) : @output
518
+ end
519
+
520
+ if (nowrap)
521
+ if ((put[0,1] != "\n") && (line.size + put.size) > @body_width)
522
+ @output << "\n#{indent(put)}"
523
+ else
524
+ @output << put
525
+ end
526
+ return
527
+ else
528
+ put << "\n" # make sure we get all lines in the while below
529
+ line_len = line.size
530
+ while (pos = put.index("\n"))
531
+ put_line = put.slice(1, pos+1)
532
+ put_len = put_line.size
533
+ put = put.slice(pos+1, put.size - pos)
534
+ if (line_len + put_len < @body_width)
535
+ @output << put_line
536
+ line_len = put_len
537
+ else
538
+ # $split = preg_split('#^(.{0,'.($this->bodyWidth - $lineLen).'})\b#', $putLine, 2, PREG_SPLIT_OFFSET_CAPTURE | PREG_SPLIT_DELIM_CAPTURE);
539
+ # $this->output .= rtrim($split[1][0])."\n".$this->indent.$this->wordwrap(ltrim($split[2][0]), $this->bodyWidth, "\n".$this->indent, false);
540
+ end
541
+ end # end while
542
+ end
543
+ @output = @output(0, -1)
544
+ return
545
+ else
546
+ @output << put
547
+ end
548
+ end
549
+
550
+ # indent next output (start tag) or unindent (end tag)
551
+ def indent(str, output = true)
552
+ if (@parser.is_start_tag)
553
+ @indent << str
554
+ out(str, true) if @output
555
+ else
556
+ @indent = @indent.slice(0, (str.size * -1))
557
+ end
558
+ end
559
+
560
+ # handle plain text
561
+ def handle_text
562
+ if (has_parent('pre') && @parser.node.index("\n"))
563
+ @parser.node.gsub!("\n", "\n#{@indent}")
564
+ end
565
+ if (!has_parent('code') && !has_parent('pre'))
566
+ # entity decode
567
+ decode(@parser.node)
568
+ if (!@skip_conversion)
569
+ # escape some chars in normal text
570
+ @parser.node.gsub!(@escape_in_text['search'], @escape_in_text['replace'])
571
+ end
572
+ else
573
+ @parser.node.gsub!(['&quot;', '&apos'], ['"', '\''])
574
+ end
575
+ out(@parser.node)
576
+ @last_closed_tag = ''
577
+ end
578
+
579
+ # handle non-Markdownable tags
580
+ def handle_tag_to_text
581
+ if (!@keep_html)
582
+ set_linebreaks(2) if (!@parser.is_start_tag && @parser.is_block_element)
583
+ else
584
+ # dont convert to markdown inside this tag
585
+ # TODO: markdown extra
586
+ if (!@parser.is_empty_tag)
587
+ if (@parser.is_start_tag)
588
+ unless (@skip_conversion)
589
+ @skip_conversion = @parser.tag_name + '::' + @parser.open_tags.join('/')
590
+ end
591
+ else
592
+ if (@skip_conversion == (@parser.tag_name + '::' + @parser.open_tags.join('/'))
593
+ @skip_conversion = false
594
+ end
595
+ end
596
+ end # end !@parser.is_empty_tag
597
+
598
+ if (@parser.is_block_element)
599
+ if (@parser.is_start_tag)
600
+ if (%w(ins del).include?(parent))
601
+ # looks like ins or del are block elements now
602
+ out("\n", true)
603
+ indent(' ')
604
+ end
605
+ if (@parser.tag_name != 'pre')
606
+ out(@parser.node + "\n" + @indent)
607
+ @parser.is_empty_tag ? set_linebreaks(1) : indent(' ')
608
+ @parser.html = @parser.html.lstrip
609
+ else
610
+ # dont indent inside <pre> tags
611
+ out(@parser.node)
612
+ @static_indent = @indent
613
+ @indent = ''
614
+ end
615
+ else
616
+ @output = rstrip(@output) unless @parser.keep_whitespace
617
+ if (@parser.tag_name != 'pre')
618
+ indent(' ')
619
+ out("\n" + @indent + @parser.node)
620
+ else
621
+ # reset indentation
622
+ out(@parser.node)
623
+ @indent = @static_indent
624
+ end
625
+
626
+ if (%w(ins del).include?(parent))
627
+ # ins or del was block element
628
+ out("\n")
629
+ indent(' ')
630
+ end
631
+
632
+ @parser.tag_name == 'li' ? set_linebreaks(1) : set_linebreaks(2)
633
+ end
634
+ else
635
+ out(@parser.node)
636
+ end
637
+
638
+ if (%w(code pre).include?(@parser.tag_name))
639
+ if (@parser.is_start_tag)
640
+ buffer
641
+ else
642
+ # add stuff so cleanup just reverses this
643
+ tmp = unbugger.gsub('&gt;', '&amp;gt;')
644
+ out(tmp.gsub('&lt;', '&amp;lt;'))
645
+ end
646
+ end
647
+ end
648
+ end
649
+
650
+ # get tag name of direct parent tag
651
+ def parent
652
+ @parser.open_tags.last
653
+ end
654
+
655
+ # check if current not has a tag as parent (somewhere, not just the direct parent)
656
+ def has_parent(tag)
657
+ @parser.open_tags.include?(tag)
658
+ end
659
+
660
+ # add current node to the stack (this only stores the attributes)
661
+ def stack
662
+ @stack[@parser.tag_name] = [] if (@stack[@parser.tag_name].nil?)
663
+ @stack[@parser.tag_name] << @parser.tag_attributes
664
+ end
665
+
666
+ # remove current tag from stack
667
+ def unstack
668
+ if (@stack[@parser.tag_name].nil? || !@stack[@parser.tag_name].is_a?(Array))
669
+ # TODO: trigger and error
670
+ raise "somebody set us up the bomb"
671
+ end
672
+ @stack[@parser.tag_name].pop
673
+ end
674
+
675
+ # get last stacked element of type tag
676
+ def get_stacked(tag)
677
+ @stack[tag][@stack[tag].size-1]
678
+ end
679
+
680
+ # buffer next parser output until unbuffer is called
681
+ def buffer
682
+ @buffer << ''
683
+ end
684
+
685
+ # end current buffer and return buffered output
686
+ def unbuffer
687
+ @buffer.pop
688
+ end
689
+
690
+ # wordwrap for utf8 encoded strings
691
+ def wordwrap(str, width, brk, cut = false)
692
+ # TODO: implement wordwrap for utf8 code
693
+ end
694
+
695
+ =======
696
+ >>>>>>> b6201584759afcc6f24a557ef9312597bd63f98f:lib/clothblue_rewrite.rb
697
+ # decode email address
698
+ def decode(text, quoted_style = '')
699
+ # TODO: implement decode method
118
700
 
119
- def css_styles(text)
120
- #TODO: Translate CSS-styles
121
- text
701
+ # @author derernst@gmx.ch <http://www.php.net/manual/en/function.html-entity-decode.php#68536>
702
+ # @author Milian Wolff <http://milianw.de>
703
+ # if (version_compare(PHP_VERSION, '5', '>=')) {
704
+ # # UTF-8 is only supported in PHP 5.x.x and above
705
+ # $text = html_entity_decode($text, $quote_style, 'UTF-8');
706
+ # } else {
707
+ # if (function_exists('html_entity_decode')) {
708
+ # $text = html_entity_decode($text, $quote_style, 'ISO-8859-1');
709
+ # } else {
710
+ # static $trans_tbl;
711
+ # if (!isset($trans_tbl)) {
712
+ # $trans_tbl = array_flip(get_html_translation_table(HTML_ENTITIES, $quote_style));
713
+ # }
714
+ # $text = strtr($text, $trans_tbl);
715
+ # }
716
+ # $text = preg_replace_callback('~&#x([0-9a-f]+);~i', array(&$this, '_decode_hex'), $text);
717
+ # $text = preg_replace_callback('~&#(\d{2,5});~', array(&$this, '_decode_numeric'), $text);
718
+ # }
719
+ # return $text;
122
720
  end
123
- #++
124
- end # end class ClothBlue
721
+
722
+ end