auto_paragraph 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/auto_paragraph.rb +335 -0
  3. metadata +46 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1ec07b8849e86618e4f7408753f47d7465daaa01
4
+ data.tar.gz: db510925c1a4a89d4a36a7348c10ee1edc2a01a6
5
+ SHA512:
6
+ metadata.gz: 376ee810f0b881b127f6c245d30abec8ca83a6083b42b5af411bfc1b00e34db86b2bc9dca7a03828490ac3b7be3e8d99670ef32a25fea4863e86e27abf342e6b
7
+ data.tar.gz: ad41dbf903a765dbfc037b5692207c62d2a47f86866da0ef2a584dcbd05b49c5df08bb8f3659508acd6fc1de28bebb9206217d4cadb25d9425c8e4efccfc0e35
@@ -0,0 +1,335 @@
1
+ class AutoParagraph
2
+ # Same as Wordpress' wpautop
3
+ # From https://github.com/WordPress/WordPress/blob/4.3-branch/wp-includes/formatting.php
4
+
5
+ BLOCK_LEVEL_TAGS = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
6
+
7
+ def initialize(insert_line_breaks: true)
8
+ @pre_tags = {}
9
+ @insert_line_breaks = insert_line_breaks
10
+ end
11
+
12
+ def execute(input)
13
+ return '' if input.strip.empty?
14
+
15
+ @input = input.to_s
16
+
17
+ setup_input_string
18
+
19
+ add_placeholders
20
+ add_p_tags
21
+ remove_extraneous_p_tags
22
+ insert_and_cleanup_br_tags
23
+
24
+ replace_more_with_clear_both
25
+
26
+ restore_placeholders
27
+
28
+ @input
29
+ end
30
+
31
+ private
32
+
33
+
34
+ # For testing
35
+ def input_hook
36
+ @input
37
+ end
38
+
39
+ def input_hook=(input)
40
+ @input = input
41
+ end
42
+
43
+ def add_placeholders
44
+ pad_newline
45
+ replace_pre_with_placeholders
46
+ end
47
+
48
+
49
+ def setup_input_string
50
+ multiple_brs_into_two_line_breaks
51
+ add_single_line_break_above_block_level_opening_tags
52
+ add_double_break_below_block_level_closing_tags
53
+ standardize_newline_to_backslash_n
54
+ replace_newlines_in_elements_with_placeholders
55
+ collapse_line_breaks_around_option_elements
56
+ collapse_line_breaks_inside_object_before_param_or_embed
57
+ collapse_line_breaks_inside_audio_video_around_source_track
58
+ remove_more_than_two_contiguous_line_breaks
59
+ end
60
+
61
+ def add_p_tags
62
+ add_p_tags_at_doule_linebreaks
63
+ end
64
+
65
+ def remove_extraneous_p_tags
66
+ remove_p_with_only_whitespace
67
+ add_closing_p_inside_div_address_form
68
+ unwrap_opening_closing_element_from_p
69
+ unwrap_li_from_p
70
+ unwrap_blockquote_from_p
71
+ remove_preceeding_p_from_block_element_tag
72
+ remove_following_p_from_block_element_tag
73
+ end
74
+
75
+ def insert_and_cleanup_br_tags
76
+ insert_line_breaks
77
+ remove_br_after_opening_closing_block_tag
78
+ remove_br_before_some_block_tags
79
+ end
80
+
81
+ def restore_placeholders
82
+ restore_pre_with_placeholders
83
+ restore_newlines_in_elements_with_placeholders
84
+ end
85
+
86
+ def pad_newline
87
+ @input += "\n"
88
+ end
89
+
90
+ def replace_pre_with_placeholders
91
+ # Pre tags shouldn't be touched by autop.
92
+ # Replace pre tags with placeholders and bring them back after autop.
93
+ if @input.match("<pre")
94
+ @pre_tags = {}
95
+
96
+ input_parts = @input.split '</pre>'
97
+ last_input_part = input_parts.pop
98
+
99
+ input = ''
100
+ input_parts.each_with_index do |input_part,i|
101
+
102
+ start_position = input_part.index('<pre')
103
+
104
+ # Malformed html?
105
+ if !start_position
106
+ input += input_part
107
+ next
108
+ end
109
+
110
+ placeholder_name = "<pre wp-pre-tag-#{i}></pre>";
111
+ @pre_tags[placeholder_name] = input_part[start_position..-1]+'</pre>'
112
+
113
+ input += input_part[0..start_position-1] + placeholder_name
114
+ end
115
+ @input = input + last_input_part
116
+ end
117
+ @input
118
+ end
119
+
120
+
121
+ def multiple_brs_into_two_line_breaks
122
+ @input.gsub! %r{<br\s*/?>\s*<br\s*/?>}, "\n\n"
123
+ end
124
+
125
+
126
+ def add_single_line_break_above_block_level_opening_tags
127
+ @input.gsub! %r{(<#{BLOCK_LEVEL_TAGS}[^>]*>)}, "\n\\1"
128
+ end
129
+
130
+ def add_double_break_below_block_level_closing_tags
131
+ # input = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", input);
132
+ @input.gsub! %r{(</#{BLOCK_LEVEL_TAGS}>)}, "\\1\n\n"
133
+ end
134
+
135
+ def standardize_newline_to_backslash_n
136
+ ["\r\n","\r"].each do |from|
137
+ @input.gsub! from, "\n"
138
+ end
139
+ end
140
+
141
+ def replace_newlines_in_elements_with_placeholders
142
+ @input = replace_in_html_tags(@input, { "\n" => " <!-- wpnl --> " })
143
+ end
144
+
145
+ def collapse_line_breaks_around_option_elements
146
+ if @input.match("<option")
147
+ @input.gsub!(/\s*<option/, '<option');
148
+ @input.gsub!(/<\/option>\s*/, '</option>');
149
+ end
150
+ end
151
+
152
+ def collapse_line_breaks_inside_object_before_param_or_embed
153
+ # Collapse line breaks inside <object> elements, before <param> and <embed> elements
154
+ if @input.match("</object>")
155
+ @input.gsub!(/(<object[^>]*>)\s*/, "\\1")
156
+ @input.gsub!(/\s*<\/object>/, '</object>')
157
+ @input.gsub!(/\s*(<\/?(?:param|embed)[^>]*>)\s*/, "\\1")
158
+ end
159
+ end
160
+
161
+ def collapse_line_breaks_inside_audio_video_around_source_track
162
+ # Collapse line breaks inside <audio> and <video> elements,
163
+ # before and after <source> and <track> elements.
164
+ if @input.match("<source") || @input.match("<track")
165
+ @input.gsub!(%r{([<\[](?:audio|video)[^>\]]*[>\]])\s*}, "\\1")
166
+ @input.gsub!(%r{\s*([<\[]/(?:audio|video)[>\]])}, "\\1")
167
+ @input.gsub!(%r{\s*(<(?:source|track)[^>]*>)\s*}, "\\1")
168
+ end
169
+ end
170
+
171
+
172
+ def remove_more_than_two_contiguous_line_breaks
173
+ @input.gsub!(/\n\n+/, "\n\n")
174
+ end
175
+
176
+ def add_p_tags_at_doule_linebreaks
177
+ # Split up the contents into an array of strings, separated by double line breaks.
178
+ @input = @input.split(/\n\s*\n/).map do |para|
179
+ '<p>'+para.sub(/^\n+/,'').sub(/\n+$/,'')+"</p>\n"
180
+ end.join("")
181
+ end
182
+
183
+ def remove_p_with_only_whitespace
184
+ # Under certain strange conditions it could create a P of entirely whitespace.
185
+ @input.gsub!(%r{<p>\s*</p>}, '')
186
+ end
187
+
188
+ def add_closing_p_inside_div_address_form
189
+ #Add a closing <p> inside <div>, <address>, or <form> tag if missing.
190
+ @input.gsub!(%r{<p>([^<]+)</(div|address|form)>}, "<p>\\1</p></\\2>")
191
+ end
192
+
193
+ def unwrap_opening_closing_element_from_p
194
+ # If an opening or closing block element tag is wrapped in a <p>, unwrap it.
195
+ @input.gsub!(%r{<p>\s*(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*</p>}, "\\1")
196
+ end
197
+
198
+ def unwrap_li_from_p
199
+ # In some cases <li> may get wrapped in <p>, fix them.
200
+ @input.gsub!(%r{<p>(<li.+?)</p>}, "\\1")
201
+ end
202
+
203
+
204
+ def unwrap_blockquote_from_p
205
+ # If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
206
+ @input.gsub!(%r{<p><blockquote([^>]*)>}i, "<blockquote\\1><p>")
207
+ @input.gsub!("</blockquote></p>", "</p></blockquote>")
208
+ end
209
+
210
+ def remove_preceeding_p_from_block_element_tag
211
+ # If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
212
+ @input.gsub!(%r{<p>\s*(</?#{BLOCK_LEVEL_TAGS}[^>]*>)}, "\\1")
213
+ end
214
+
215
+ def remove_following_p_from_block_element_tag
216
+ # If an opening or closing block element tag is followed by a closing <p> tag, remove it.
217
+ @input.gsub!(%r{(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*</p>}, "\\1")
218
+ end
219
+
220
+ def insert_line_breaks
221
+ # Optionally insert line breaks.
222
+ if @insert_line_breaks
223
+ # Replace newlines that shouldn't be touched with a placeholder.
224
+ @input.gsub!(%r{<(script|style).*?</\1>}m) do |match|
225
+ match.gsub("\n", "<WPPreserveNewline />")
226
+ end
227
+
228
+ # Normalize <br>
229
+ @input.gsub!(Regexp.union('<br>', '<br/>'), '<br />')
230
+
231
+ # Replace any new line characters that aren't preceded by a <br /> with a <br />.
232
+ @input.gsub!(%r{(?<!<br />)\s*\n}, "<br />\n")
233
+
234
+ # Replace newline placeholders with newlines.
235
+ @input.gsub!('<WPPreserveNewline />', "\n")
236
+ end
237
+ end
238
+
239
+ def remove_br_after_opening_closing_block_tag
240
+ # If a <br /> tag is after an opening or closing block tag, remove it.
241
+ @input.gsub!(%r{(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*<br />}, "\\1")
242
+ end
243
+
244
+ def remove_br_before_some_block_tags
245
+ # If a <br /> tag is before a subset of opening or closing block tags, remove it.
246
+ @input.gsub!(%r{<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)}, "\\1")
247
+ @input.gsub!(%r{\n</p>$}, "</p>")
248
+ end
249
+
250
+ def replace_more_with_clear_both
251
+ @input.gsub! %r{<!--more(.*?)?-->}, '<div class="clear-both"></div>'
252
+ end
253
+
254
+ def restore_pre_with_placeholders
255
+ # Replace placeholder <pre> tags with their original content.
256
+ @pre_tags.each do |key, val|
257
+ @input.gsub!(key, val)
258
+ end
259
+ end
260
+
261
+ def restore_newlines_in_elements_with_placeholders
262
+ # Restore newlines in all elements.
263
+ @input.gsub!(Regexp.union(' <!-- wpnl --> ', '<!-- wpnl -->'), "\n")
264
+ end
265
+
266
+
267
+ def split_html_elements_regex
268
+
269
+ comments =
270
+ '!' + # Start of comment, after the <.
271
+ '(?:' + # Unroll the loop: Consume everything until --> is found.
272
+ '-(?!->)' + # Dash not followed by end of comment.
273
+ '[^\-]*+' + # Consume non-dashes.
274
+ ')*+' + # Loop possessively.
275
+ '(?:-->)?' # End of comment. If not found, match all input.
276
+
277
+ cdata =
278
+ '!\[CDATA\[' + # Start of comment, after the <.
279
+ '[^\]]*+' + # Consume non-].
280
+ '(?:' + # Unroll the loop: Consume everything until ]]> is found.
281
+ '\](?!\]>)' + # One ] not followed by end of comment.
282
+ '[^\]]*+' + # Consume non-].
283
+ ')*+' + # Loop possessively.
284
+ '(?:\]\]>)?' # End of comment. If not found, match all input.
285
+
286
+ regex =
287
+ '([^<]*)' + # Find from the start of the string
288
+ '(' + # Capture the tag
289
+ '<' + # Find start of element.
290
+ '(?:' + # (non-matching group)
291
+ '(?=!--)' + # Is this a comment?
292
+ comments + # Find end of comment
293
+ ')' +
294
+ '|' + # OR
295
+ '(?:' + # (non-matching group)
296
+ '(?=!\[CDATA\[)' + # Is this a comment?
297
+ cdata + # Find end of comment
298
+ ')' +
299
+ '|' + # OR
300
+ '(?:' + # (non-matching group)
301
+ '[^>]*' + # Find end of element.
302
+ ')' + #
303
+ '>?' + # If not found, match all input.
304
+ ')'
305
+
306
+ Regexp.new(regex, Regexp::MULTILINE)
307
+ end
308
+
309
+ def split_html_elements(text)
310
+ text.split(split_html_elements_regex)
311
+ end
312
+
313
+ def every_tag_only(source)
314
+ # Returns every third element starting at 3: ["","data","<tag>","","more data","</closetag>"] => ["<tag>","</closetag>"]
315
+ source.drop(2).each_slice(3).map(&:first)
316
+ end
317
+
318
+ def replace_in_html_tags(haystack, replace_pairs)
319
+ # find all elements
320
+ tags_split = split_html_elements(haystack)
321
+ changed = false
322
+
323
+ # Loop through every third element (html tags only)
324
+ keys = Regexp.new(replace_pairs.keys.join("|"))
325
+ every_tag_only(tags_split).each do |tag|
326
+ # Changes existing string, so replaces inside tags_split array
327
+ changed = true if tag.gsub!(keys, replace_pairs)
328
+ end
329
+
330
+ haystack = tags_split.join("") if changed
331
+
332
+ haystack
333
+ end
334
+
335
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: auto_paragraph
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - David Peterson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-11 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Same as Wordpress' wpautop() function
14
+ email: dp@vivitec.com.au
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/auto_paragraph.rb
20
+ homepage: https://github.com/dippysan/auto_paragraph
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.2.2
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: 'Formats Wordpress post content in Ruby: Replaces double line breaks with
44
+ paragraph elements'
45
+ test_files: []
46
+ has_rdoc: