auto_paragraph 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/auto_paragraph.rb +335 -0
  3. metadata +46 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 1ec07b8849e86618e4f7408753f47d7465daaa01
4
+ data.tar.gz: db510925c1a4a89d4a36a7348c10ee1edc2a01a6
5
+ SHA512:
6
+ metadata.gz: 376ee810f0b881b127f6c245d30abec8ca83a6083b42b5af411bfc1b00e34db86b2bc9dca7a03828490ac3b7be3e8d99670ef32a25fea4863e86e27abf342e6b
7
+ data.tar.gz: ad41dbf903a765dbfc037b5692207c62d2a47f86866da0ef2a584dcbd05b49c5df08bb8f3659508acd6fc1de28bebb9206217d4cadb25d9425c8e4efccfc0e35
@@ -0,0 +1,335 @@
1
+ class AutoParagraph
2
+ # Same as Wordpress' wpautop
3
+ # From https://github.com/WordPress/WordPress/blob/4.3-branch/wp-includes/formatting.php
4
+
5
+ BLOCK_LEVEL_TAGS = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
6
+
7
+ def initialize(insert_line_breaks: true)
8
+ @pre_tags = {}
9
+ @insert_line_breaks = insert_line_breaks
10
+ end
11
+
12
+ def execute(input)
13
+ return '' if input.strip.empty?
14
+
15
+ @input = input.to_s
16
+
17
+ setup_input_string
18
+
19
+ add_placeholders
20
+ add_p_tags
21
+ remove_extraneous_p_tags
22
+ insert_and_cleanup_br_tags
23
+
24
+ replace_more_with_clear_both
25
+
26
+ restore_placeholders
27
+
28
+ @input
29
+ end
30
+
31
+ private
32
+
33
+
34
+ # For testing
35
+ def input_hook
36
+ @input
37
+ end
38
+
39
+ def input_hook=(input)
40
+ @input = input
41
+ end
42
+
43
+ def add_placeholders
44
+ pad_newline
45
+ replace_pre_with_placeholders
46
+ end
47
+
48
+
49
+ def setup_input_string
50
+ multiple_brs_into_two_line_breaks
51
+ add_single_line_break_above_block_level_opening_tags
52
+ add_double_break_below_block_level_closing_tags
53
+ standardize_newline_to_backslash_n
54
+ replace_newlines_in_elements_with_placeholders
55
+ collapse_line_breaks_around_option_elements
56
+ collapse_line_breaks_inside_object_before_param_or_embed
57
+ collapse_line_breaks_inside_audio_video_around_source_track
58
+ remove_more_than_two_contiguous_line_breaks
59
+ end
60
+
61
+ def add_p_tags
62
+ add_p_tags_at_doule_linebreaks
63
+ end
64
+
65
+ def remove_extraneous_p_tags
66
+ remove_p_with_only_whitespace
67
+ add_closing_p_inside_div_address_form
68
+ unwrap_opening_closing_element_from_p
69
+ unwrap_li_from_p
70
+ unwrap_blockquote_from_p
71
+ remove_preceeding_p_from_block_element_tag
72
+ remove_following_p_from_block_element_tag
73
+ end
74
+
75
+ def insert_and_cleanup_br_tags
76
+ insert_line_breaks
77
+ remove_br_after_opening_closing_block_tag
78
+ remove_br_before_some_block_tags
79
+ end
80
+
81
+ def restore_placeholders
82
+ restore_pre_with_placeholders
83
+ restore_newlines_in_elements_with_placeholders
84
+ end
85
+
86
+ def pad_newline
87
+ @input += "\n"
88
+ end
89
+
90
+ def replace_pre_with_placeholders
91
+ # Pre tags shouldn't be touched by autop.
92
+ # Replace pre tags with placeholders and bring them back after autop.
93
+ if @input.match("<pre")
94
+ @pre_tags = {}
95
+
96
+ input_parts = @input.split '</pre>'
97
+ last_input_part = input_parts.pop
98
+
99
+ input = ''
100
+ input_parts.each_with_index do |input_part,i|
101
+
102
+ start_position = input_part.index('<pre')
103
+
104
+ # Malformed html?
105
+ if !start_position
106
+ input += input_part
107
+ next
108
+ end
109
+
110
+ placeholder_name = "<pre wp-pre-tag-#{i}></pre>";
111
+ @pre_tags[placeholder_name] = input_part[start_position..-1]+'</pre>'
112
+
113
+ input += input_part[0..start_position-1] + placeholder_name
114
+ end
115
+ @input = input + last_input_part
116
+ end
117
+ @input
118
+ end
119
+
120
+
121
+ def multiple_brs_into_two_line_breaks
122
+ @input.gsub! %r{<br\s*/?>\s*<br\s*/?>}, "\n\n"
123
+ end
124
+
125
+
126
+ def add_single_line_break_above_block_level_opening_tags
127
+ @input.gsub! %r{(<#{BLOCK_LEVEL_TAGS}[^>]*>)}, "\n\\1"
128
+ end
129
+
130
+ def add_double_break_below_block_level_closing_tags
131
+ # input = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", input);
132
+ @input.gsub! %r{(</#{BLOCK_LEVEL_TAGS}>)}, "\\1\n\n"
133
+ end
134
+
135
+ def standardize_newline_to_backslash_n
136
+ ["\r\n","\r"].each do |from|
137
+ @input.gsub! from, "\n"
138
+ end
139
+ end
140
+
141
+ def replace_newlines_in_elements_with_placeholders
142
+ @input = replace_in_html_tags(@input, { "\n" => " <!-- wpnl --> " })
143
+ end
144
+
145
+ def collapse_line_breaks_around_option_elements
146
+ if @input.match("<option")
147
+ @input.gsub!(/\s*<option/, '<option');
148
+ @input.gsub!(/<\/option>\s*/, '</option>');
149
+ end
150
+ end
151
+
152
+ def collapse_line_breaks_inside_object_before_param_or_embed
153
+ # Collapse line breaks inside <object> elements, before <param> and <embed> elements
154
+ if @input.match("</object>")
155
+ @input.gsub!(/(<object[^>]*>)\s*/, "\\1")
156
+ @input.gsub!(/\s*<\/object>/, '</object>')
157
+ @input.gsub!(/\s*(<\/?(?:param|embed)[^>]*>)\s*/, "\\1")
158
+ end
159
+ end
160
+
161
+ def collapse_line_breaks_inside_audio_video_around_source_track
162
+ # Collapse line breaks inside <audio> and <video> elements,
163
+ # before and after <source> and <track> elements.
164
+ if @input.match("<source") || @input.match("<track")
165
+ @input.gsub!(%r{([<\[](?:audio|video)[^>\]]*[>\]])\s*}, "\\1")
166
+ @input.gsub!(%r{\s*([<\[]/(?:audio|video)[>\]])}, "\\1")
167
+ @input.gsub!(%r{\s*(<(?:source|track)[^>]*>)\s*}, "\\1")
168
+ end
169
+ end
170
+
171
+
172
+ def remove_more_than_two_contiguous_line_breaks
173
+ @input.gsub!(/\n\n+/, "\n\n")
174
+ end
175
+
176
+ def add_p_tags_at_doule_linebreaks
177
+ # Split up the contents into an array of strings, separated by double line breaks.
178
+ @input = @input.split(/\n\s*\n/).map do |para|
179
+ '<p>'+para.sub(/^\n+/,'').sub(/\n+$/,'')+"</p>\n"
180
+ end.join("")
181
+ end
182
+
183
+ def remove_p_with_only_whitespace
184
+ # Under certain strange conditions it could create a P of entirely whitespace.
185
+ @input.gsub!(%r{<p>\s*</p>}, '')
186
+ end
187
+
188
+ def add_closing_p_inside_div_address_form
189
+ #Add a closing <p> inside <div>, <address>, or <form> tag if missing.
190
+ @input.gsub!(%r{<p>([^<]+)</(div|address|form)>}, "<p>\\1</p></\\2>")
191
+ end
192
+
193
+ def unwrap_opening_closing_element_from_p
194
+ # If an opening or closing block element tag is wrapped in a <p>, unwrap it.
195
+ @input.gsub!(%r{<p>\s*(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*</p>}, "\\1")
196
+ end
197
+
198
+ def unwrap_li_from_p
199
+ # In some cases <li> may get wrapped in <p>, fix them.
200
+ @input.gsub!(%r{<p>(<li.+?)</p>}, "\\1")
201
+ end
202
+
203
+
204
+ def unwrap_blockquote_from_p
205
+ # If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
206
+ @input.gsub!(%r{<p><blockquote([^>]*)>}i, "<blockquote\\1><p>")
207
+ @input.gsub!("</blockquote></p>", "</p></blockquote>")
208
+ end
209
+
210
+ def remove_preceeding_p_from_block_element_tag
211
+ # If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
212
+ @input.gsub!(%r{<p>\s*(</?#{BLOCK_LEVEL_TAGS}[^>]*>)}, "\\1")
213
+ end
214
+
215
+ def remove_following_p_from_block_element_tag
216
+ # If an opening or closing block element tag is followed by a closing <p> tag, remove it.
217
+ @input.gsub!(%r{(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*</p>}, "\\1")
218
+ end
219
+
220
+ def insert_line_breaks
221
+ # Optionally insert line breaks.
222
+ if @insert_line_breaks
223
+ # Replace newlines that shouldn't be touched with a placeholder.
224
+ @input.gsub!(%r{<(script|style).*?</\1>}m) do |match|
225
+ match.gsub("\n", "<WPPreserveNewline />")
226
+ end
227
+
228
+ # Normalize <br>
229
+ @input.gsub!(Regexp.union('<br>', '<br/>'), '<br />')
230
+
231
+ # Replace any new line characters that aren't preceded by a <br /> with a <br />.
232
+ @input.gsub!(%r{(?<!<br />)\s*\n}, "<br />\n")
233
+
234
+ # Replace newline placeholders with newlines.
235
+ @input.gsub!('<WPPreserveNewline />', "\n")
236
+ end
237
+ end
238
+
239
+ def remove_br_after_opening_closing_block_tag
240
+ # If a <br /> tag is after an opening or closing block tag, remove it.
241
+ @input.gsub!(%r{(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*<br />}, "\\1")
242
+ end
243
+
244
+ def remove_br_before_some_block_tags
245
+ # If a <br /> tag is before a subset of opening or closing block tags, remove it.
246
+ @input.gsub!(%r{<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)}, "\\1")
247
+ @input.gsub!(%r{\n</p>$}, "</p>")
248
+ end
249
+
250
+ def replace_more_with_clear_both
251
+ @input.gsub! %r{<!--more(.*?)?-->}, '<div class="clear-both"></div>'
252
+ end
253
+
254
+ def restore_pre_with_placeholders
255
+ # Replace placeholder <pre> tags with their original content.
256
+ @pre_tags.each do |key, val|
257
+ @input.gsub!(key, val)
258
+ end
259
+ end
260
+
261
+ def restore_newlines_in_elements_with_placeholders
262
+ # Restore newlines in all elements.
263
+ @input.gsub!(Regexp.union(' <!-- wpnl --> ', '<!-- wpnl -->'), "\n")
264
+ end
265
+
266
+
267
+ def split_html_elements_regex
268
+
269
+ comments =
270
+ '!' + # Start of comment, after the <.
271
+ '(?:' + # Unroll the loop: Consume everything until --> is found.
272
+ '-(?!->)' + # Dash not followed by end of comment.
273
+ '[^\-]*+' + # Consume non-dashes.
274
+ ')*+' + # Loop possessively.
275
+ '(?:-->)?' # End of comment. If not found, match all input.
276
+
277
+ cdata =
278
+ '!\[CDATA\[' + # Start of comment, after the <.
279
+ '[^\]]*+' + # Consume non-].
280
+ '(?:' + # Unroll the loop: Consume everything until ]]> is found.
281
+ '\](?!\]>)' + # One ] not followed by end of comment.
282
+ '[^\]]*+' + # Consume non-].
283
+ ')*+' + # Loop possessively.
284
+ '(?:\]\]>)?' # End of comment. If not found, match all input.
285
+
286
+ regex =
287
+ '([^<]*)' + # Find from the start of the string
288
+ '(' + # Capture the tag
289
+ '<' + # Find start of element.
290
+ '(?:' + # (non-matching group)
291
+ '(?=!--)' + # Is this a comment?
292
+ comments + # Find end of comment
293
+ ')' +
294
+ '|' + # OR
295
+ '(?:' + # (non-matching group)
296
+ '(?=!\[CDATA\[)' + # Is this a comment?
297
+ cdata + # Find end of comment
298
+ ')' +
299
+ '|' + # OR
300
+ '(?:' + # (non-matching group)
301
+ '[^>]*' + # Find end of element.
302
+ ')' + #
303
+ '>?' + # If not found, match all input.
304
+ ')'
305
+
306
+ Regexp.new(regex, Regexp::MULTILINE)
307
+ end
308
+
309
+ def split_html_elements(text)
310
+ text.split(split_html_elements_regex)
311
+ end
312
+
313
+ def every_tag_only(source)
314
+ # Returns every third element starting at 3: ["","data","<tag>","","more data","</closetag>"] => ["<tag>","</closetag>"]
315
+ source.drop(2).each_slice(3).map(&:first)
316
+ end
317
+
318
+ def replace_in_html_tags(haystack, replace_pairs)
319
+ # find all elements
320
+ tags_split = split_html_elements(haystack)
321
+ changed = false
322
+
323
+ # Loop through every third element (html tags only)
324
+ keys = Regexp.new(replace_pairs.keys.join("|"))
325
+ every_tag_only(tags_split).each do |tag|
326
+ # Changes existing string, so replaces inside tags_split array
327
+ changed = true if tag.gsub!(keys, replace_pairs)
328
+ end
329
+
330
+ haystack = tags_split.join("") if changed
331
+
332
+ haystack
333
+ end
334
+
335
+ end
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: auto_paragraph
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - David Peterson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2016-01-11 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Same as Wordpress' wpautop() function
14
+ email: dp@vivitec.com.au
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/auto_paragraph.rb
20
+ homepage: https://github.com/dippysan/auto_paragraph
21
+ licenses:
22
+ - MIT
23
+ metadata: {}
24
+ post_install_message:
25
+ rdoc_options: []
26
+ require_paths:
27
+ - lib
28
+ required_ruby_version: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ required_rubygems_version: !ruby/object:Gem::Requirement
34
+ requirements:
35
+ - - ">="
36
+ - !ruby/object:Gem::Version
37
+ version: '0'
38
+ requirements: []
39
+ rubyforge_project:
40
+ rubygems_version: 2.2.2
41
+ signing_key:
42
+ specification_version: 4
43
+ summary: 'Formats Wordpress post content in Ruby: Replaces double line breaks with
44
+ paragraph elements'
45
+ test_files: []
46
+ has_rdoc: