auto_paragraph 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/auto_paragraph.rb +335 -0
- metadata +46 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 1ec07b8849e86618e4f7408753f47d7465daaa01
|
4
|
+
data.tar.gz: db510925c1a4a89d4a36a7348c10ee1edc2a01a6
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 376ee810f0b881b127f6c245d30abec8ca83a6083b42b5af411bfc1b00e34db86b2bc9dca7a03828490ac3b7be3e8d99670ef32a25fea4863e86e27abf342e6b
|
7
|
+
data.tar.gz: ad41dbf903a765dbfc037b5692207c62d2a47f86866da0ef2a584dcbd05b49c5df08bb8f3659508acd6fc1de28bebb9206217d4cadb25d9425c8e4efccfc0e35
|
@@ -0,0 +1,335 @@
|
|
1
|
+
class AutoParagraph
|
2
|
+
# Same as Wordpress' wpautop
|
3
|
+
# From https://github.com/WordPress/WordPress/blob/4.3-branch/wp-includes/formatting.php
|
4
|
+
|
5
|
+
BLOCK_LEVEL_TAGS = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'
|
6
|
+
|
7
|
+
def initialize(insert_line_breaks: true)
|
8
|
+
@pre_tags = {}
|
9
|
+
@insert_line_breaks = insert_line_breaks
|
10
|
+
end
|
11
|
+
|
12
|
+
def execute(input)
|
13
|
+
return '' if input.strip.empty?
|
14
|
+
|
15
|
+
@input = input.to_s
|
16
|
+
|
17
|
+
setup_input_string
|
18
|
+
|
19
|
+
add_placeholders
|
20
|
+
add_p_tags
|
21
|
+
remove_extraneous_p_tags
|
22
|
+
insert_and_cleanup_br_tags
|
23
|
+
|
24
|
+
replace_more_with_clear_both
|
25
|
+
|
26
|
+
restore_placeholders
|
27
|
+
|
28
|
+
@input
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
|
34
|
+
# For testing
|
35
|
+
def input_hook
|
36
|
+
@input
|
37
|
+
end
|
38
|
+
|
39
|
+
def input_hook=(input)
|
40
|
+
@input = input
|
41
|
+
end
|
42
|
+
|
43
|
+
def add_placeholders
|
44
|
+
pad_newline
|
45
|
+
replace_pre_with_placeholders
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
def setup_input_string
|
50
|
+
multiple_brs_into_two_line_breaks
|
51
|
+
add_single_line_break_above_block_level_opening_tags
|
52
|
+
add_double_break_below_block_level_closing_tags
|
53
|
+
standardize_newline_to_backslash_n
|
54
|
+
replace_newlines_in_elements_with_placeholders
|
55
|
+
collapse_line_breaks_around_option_elements
|
56
|
+
collapse_line_breaks_inside_object_before_param_or_embed
|
57
|
+
collapse_line_breaks_inside_audio_video_around_source_track
|
58
|
+
remove_more_than_two_contiguous_line_breaks
|
59
|
+
end
|
60
|
+
|
61
|
+
def add_p_tags
|
62
|
+
add_p_tags_at_doule_linebreaks
|
63
|
+
end
|
64
|
+
|
65
|
+
def remove_extraneous_p_tags
|
66
|
+
remove_p_with_only_whitespace
|
67
|
+
add_closing_p_inside_div_address_form
|
68
|
+
unwrap_opening_closing_element_from_p
|
69
|
+
unwrap_li_from_p
|
70
|
+
unwrap_blockquote_from_p
|
71
|
+
remove_preceeding_p_from_block_element_tag
|
72
|
+
remove_following_p_from_block_element_tag
|
73
|
+
end
|
74
|
+
|
75
|
+
def insert_and_cleanup_br_tags
|
76
|
+
insert_line_breaks
|
77
|
+
remove_br_after_opening_closing_block_tag
|
78
|
+
remove_br_before_some_block_tags
|
79
|
+
end
|
80
|
+
|
81
|
+
def restore_placeholders
|
82
|
+
restore_pre_with_placeholders
|
83
|
+
restore_newlines_in_elements_with_placeholders
|
84
|
+
end
|
85
|
+
|
86
|
+
def pad_newline
|
87
|
+
@input += "\n"
|
88
|
+
end
|
89
|
+
|
90
|
+
def replace_pre_with_placeholders
|
91
|
+
# Pre tags shouldn't be touched by autop.
|
92
|
+
# Replace pre tags with placeholders and bring them back after autop.
|
93
|
+
if @input.match("<pre")
|
94
|
+
@pre_tags = {}
|
95
|
+
|
96
|
+
input_parts = @input.split '</pre>'
|
97
|
+
last_input_part = input_parts.pop
|
98
|
+
|
99
|
+
input = ''
|
100
|
+
input_parts.each_with_index do |input_part,i|
|
101
|
+
|
102
|
+
start_position = input_part.index('<pre')
|
103
|
+
|
104
|
+
# Malformed html?
|
105
|
+
if !start_position
|
106
|
+
input += input_part
|
107
|
+
next
|
108
|
+
end
|
109
|
+
|
110
|
+
placeholder_name = "<pre wp-pre-tag-#{i}></pre>";
|
111
|
+
@pre_tags[placeholder_name] = input_part[start_position..-1]+'</pre>'
|
112
|
+
|
113
|
+
input += input_part[0..start_position-1] + placeholder_name
|
114
|
+
end
|
115
|
+
@input = input + last_input_part
|
116
|
+
end
|
117
|
+
@input
|
118
|
+
end
|
119
|
+
|
120
|
+
|
121
|
+
def multiple_brs_into_two_line_breaks
|
122
|
+
@input.gsub! %r{<br\s*/?>\s*<br\s*/?>}, "\n\n"
|
123
|
+
end
|
124
|
+
|
125
|
+
|
126
|
+
def add_single_line_break_above_block_level_opening_tags
|
127
|
+
@input.gsub! %r{(<#{BLOCK_LEVEL_TAGS}[^>]*>)}, "\n\\1"
|
128
|
+
end
|
129
|
+
|
130
|
+
def add_double_break_below_block_level_closing_tags
|
131
|
+
# input = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", input);
|
132
|
+
@input.gsub! %r{(</#{BLOCK_LEVEL_TAGS}>)}, "\\1\n\n"
|
133
|
+
end
|
134
|
+
|
135
|
+
def standardize_newline_to_backslash_n
|
136
|
+
["\r\n","\r"].each do |from|
|
137
|
+
@input.gsub! from, "\n"
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
def replace_newlines_in_elements_with_placeholders
|
142
|
+
@input = replace_in_html_tags(@input, { "\n" => " <!-- wpnl --> " })
|
143
|
+
end
|
144
|
+
|
145
|
+
def collapse_line_breaks_around_option_elements
|
146
|
+
if @input.match("<option")
|
147
|
+
@input.gsub!(/\s*<option/, '<option');
|
148
|
+
@input.gsub!(/<\/option>\s*/, '</option>');
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
def collapse_line_breaks_inside_object_before_param_or_embed
|
153
|
+
# Collapse line breaks inside <object> elements, before <param> and <embed> elements
|
154
|
+
if @input.match("</object>")
|
155
|
+
@input.gsub!(/(<object[^>]*>)\s*/, "\\1")
|
156
|
+
@input.gsub!(/\s*<\/object>/, '</object>')
|
157
|
+
@input.gsub!(/\s*(<\/?(?:param|embed)[^>]*>)\s*/, "\\1")
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def collapse_line_breaks_inside_audio_video_around_source_track
|
162
|
+
# Collapse line breaks inside <audio> and <video> elements,
|
163
|
+
# before and after <source> and <track> elements.
|
164
|
+
if @input.match("<source") || @input.match("<track")
|
165
|
+
@input.gsub!(%r{([<\[](?:audio|video)[^>\]]*[>\]])\s*}, "\\1")
|
166
|
+
@input.gsub!(%r{\s*([<\[]/(?:audio|video)[>\]])}, "\\1")
|
167
|
+
@input.gsub!(%r{\s*(<(?:source|track)[^>]*>)\s*}, "\\1")
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
|
172
|
+
def remove_more_than_two_contiguous_line_breaks
|
173
|
+
@input.gsub!(/\n\n+/, "\n\n")
|
174
|
+
end
|
175
|
+
|
176
|
+
def add_p_tags_at_doule_linebreaks
|
177
|
+
# Split up the contents into an array of strings, separated by double line breaks.
|
178
|
+
@input = @input.split(/\n\s*\n/).map do |para|
|
179
|
+
'<p>'+para.sub(/^\n+/,'').sub(/\n+$/,'')+"</p>\n"
|
180
|
+
end.join("")
|
181
|
+
end
|
182
|
+
|
183
|
+
def remove_p_with_only_whitespace
|
184
|
+
# Under certain strange conditions it could create a P of entirely whitespace.
|
185
|
+
@input.gsub!(%r{<p>\s*</p>}, '')
|
186
|
+
end
|
187
|
+
|
188
|
+
def add_closing_p_inside_div_address_form
|
189
|
+
#Add a closing <p> inside <div>, <address>, or <form> tag if missing.
|
190
|
+
@input.gsub!(%r{<p>([^<]+)</(div|address|form)>}, "<p>\\1</p></\\2>")
|
191
|
+
end
|
192
|
+
|
193
|
+
def unwrap_opening_closing_element_from_p
|
194
|
+
# If an opening or closing block element tag is wrapped in a <p>, unwrap it.
|
195
|
+
@input.gsub!(%r{<p>\s*(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*</p>}, "\\1")
|
196
|
+
end
|
197
|
+
|
198
|
+
def unwrap_li_from_p
|
199
|
+
# In some cases <li> may get wrapped in <p>, fix them.
|
200
|
+
@input.gsub!(%r{<p>(<li.+?)</p>}, "\\1")
|
201
|
+
end
|
202
|
+
|
203
|
+
|
204
|
+
def unwrap_blockquote_from_p
|
205
|
+
# If a <blockquote> is wrapped with a <p>, move it inside the <blockquote>.
|
206
|
+
@input.gsub!(%r{<p><blockquote([^>]*)>}i, "<blockquote\\1><p>")
|
207
|
+
@input.gsub!("</blockquote></p>", "</p></blockquote>")
|
208
|
+
end
|
209
|
+
|
210
|
+
def remove_preceeding_p_from_block_element_tag
|
211
|
+
# If an opening or closing block element tag is preceded by an opening <p> tag, remove it.
|
212
|
+
@input.gsub!(%r{<p>\s*(</?#{BLOCK_LEVEL_TAGS}[^>]*>)}, "\\1")
|
213
|
+
end
|
214
|
+
|
215
|
+
def remove_following_p_from_block_element_tag
|
216
|
+
# If an opening or closing block element tag is followed by a closing <p> tag, remove it.
|
217
|
+
@input.gsub!(%r{(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*</p>}, "\\1")
|
218
|
+
end
|
219
|
+
|
220
|
+
def insert_line_breaks
|
221
|
+
# Optionally insert line breaks.
|
222
|
+
if @insert_line_breaks
|
223
|
+
# Replace newlines that shouldn't be touched with a placeholder.
|
224
|
+
@input.gsub!(%r{<(script|style).*?</\1>}m) do |match|
|
225
|
+
match.gsub("\n", "<WPPreserveNewline />")
|
226
|
+
end
|
227
|
+
|
228
|
+
# Normalize <br>
|
229
|
+
@input.gsub!(Regexp.union('<br>', '<br/>'), '<br />')
|
230
|
+
|
231
|
+
# Replace any new line characters that aren't preceded by a <br /> with a <br />.
|
232
|
+
@input.gsub!(%r{(?<!<br />)\s*\n}, "<br />\n")
|
233
|
+
|
234
|
+
# Replace newline placeholders with newlines.
|
235
|
+
@input.gsub!('<WPPreserveNewline />', "\n")
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
def remove_br_after_opening_closing_block_tag
|
240
|
+
# If a <br /> tag is after an opening or closing block tag, remove it.
|
241
|
+
@input.gsub!(%r{(</?#{BLOCK_LEVEL_TAGS}[^>]*>)\s*<br />}, "\\1")
|
242
|
+
end
|
243
|
+
|
244
|
+
def remove_br_before_some_block_tags
|
245
|
+
# If a <br /> tag is before a subset of opening or closing block tags, remove it.
|
246
|
+
@input.gsub!(%r{<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)}, "\\1")
|
247
|
+
@input.gsub!(%r{\n</p>$}, "</p>")
|
248
|
+
end
|
249
|
+
|
250
|
+
def replace_more_with_clear_both
|
251
|
+
@input.gsub! %r{<!--more(.*?)?-->}, '<div class="clear-both"></div>'
|
252
|
+
end
|
253
|
+
|
254
|
+
def restore_pre_with_placeholders
|
255
|
+
# Replace placeholder <pre> tags with their original content.
|
256
|
+
@pre_tags.each do |key, val|
|
257
|
+
@input.gsub!(key, val)
|
258
|
+
end
|
259
|
+
end
|
260
|
+
|
261
|
+
def restore_newlines_in_elements_with_placeholders
|
262
|
+
# Restore newlines in all elements.
|
263
|
+
@input.gsub!(Regexp.union(' <!-- wpnl --> ', '<!-- wpnl -->'), "\n")
|
264
|
+
end
|
265
|
+
|
266
|
+
|
267
|
+
def split_html_elements_regex
|
268
|
+
|
269
|
+
comments =
|
270
|
+
'!' + # Start of comment, after the <.
|
271
|
+
'(?:' + # Unroll the loop: Consume everything until --> is found.
|
272
|
+
'-(?!->)' + # Dash not followed by end of comment.
|
273
|
+
'[^\-]*+' + # Consume non-dashes.
|
274
|
+
')*+' + # Loop possessively.
|
275
|
+
'(?:-->)?' # End of comment. If not found, match all input.
|
276
|
+
|
277
|
+
cdata =
|
278
|
+
'!\[CDATA\[' + # Start of comment, after the <.
|
279
|
+
'[^\]]*+' + # Consume non-].
|
280
|
+
'(?:' + # Unroll the loop: Consume everything until ]]> is found.
|
281
|
+
'\](?!\]>)' + # One ] not followed by end of comment.
|
282
|
+
'[^\]]*+' + # Consume non-].
|
283
|
+
')*+' + # Loop possessively.
|
284
|
+
'(?:\]\]>)?' # End of comment. If not found, match all input.
|
285
|
+
|
286
|
+
regex =
|
287
|
+
'([^<]*)' + # Find from the start of the string
|
288
|
+
'(' + # Capture the tag
|
289
|
+
'<' + # Find start of element.
|
290
|
+
'(?:' + # (non-matching group)
|
291
|
+
'(?=!--)' + # Is this a comment?
|
292
|
+
comments + # Find end of comment
|
293
|
+
')' +
|
294
|
+
'|' + # OR
|
295
|
+
'(?:' + # (non-matching group)
|
296
|
+
'(?=!\[CDATA\[)' + # Is this a comment?
|
297
|
+
cdata + # Find end of comment
|
298
|
+
')' +
|
299
|
+
'|' + # OR
|
300
|
+
'(?:' + # (non-matching group)
|
301
|
+
'[^>]*' + # Find end of element.
|
302
|
+
')' + #
|
303
|
+
'>?' + # If not found, match all input.
|
304
|
+
')'
|
305
|
+
|
306
|
+
Regexp.new(regex, Regexp::MULTILINE)
|
307
|
+
end
|
308
|
+
|
309
|
+
def split_html_elements(text)
|
310
|
+
text.split(split_html_elements_regex)
|
311
|
+
end
|
312
|
+
|
313
|
+
def every_tag_only(source)
|
314
|
+
# Returns every third element starting at 3: ["","data","<tag>","","more data","</closetag>"] => ["<tag>","</closetag>"]
|
315
|
+
source.drop(2).each_slice(3).map(&:first)
|
316
|
+
end
|
317
|
+
|
318
|
+
def replace_in_html_tags(haystack, replace_pairs)
|
319
|
+
# find all elements
|
320
|
+
tags_split = split_html_elements(haystack)
|
321
|
+
changed = false
|
322
|
+
|
323
|
+
# Loop through every third element (html tags only)
|
324
|
+
keys = Regexp.new(replace_pairs.keys.join("|"))
|
325
|
+
every_tag_only(tags_split).each do |tag|
|
326
|
+
# Changes existing string, so replaces inside tags_split array
|
327
|
+
changed = true if tag.gsub!(keys, replace_pairs)
|
328
|
+
end
|
329
|
+
|
330
|
+
haystack = tags_split.join("") if changed
|
331
|
+
|
332
|
+
haystack
|
333
|
+
end
|
334
|
+
|
335
|
+
end
|
metadata
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: auto_paragraph
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- David Peterson
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2016-01-11 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Same as Wordpress' wpautop() function
|
14
|
+
email: dp@vivitec.com.au
|
15
|
+
executables: []
|
16
|
+
extensions: []
|
17
|
+
extra_rdoc_files: []
|
18
|
+
files:
|
19
|
+
- lib/auto_paragraph.rb
|
20
|
+
homepage: https://github.com/dippysan/auto_paragraph
|
21
|
+
licenses:
|
22
|
+
- MIT
|
23
|
+
metadata: {}
|
24
|
+
post_install_message:
|
25
|
+
rdoc_options: []
|
26
|
+
require_paths:
|
27
|
+
- lib
|
28
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
29
|
+
requirements:
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
34
|
+
requirements:
|
35
|
+
- - ">="
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
38
|
+
requirements: []
|
39
|
+
rubyforge_project:
|
40
|
+
rubygems_version: 2.2.2
|
41
|
+
signing_key:
|
42
|
+
specification_version: 4
|
43
|
+
summary: 'Formats Wordpress post content in Ruby: Replaces double line breaks with
|
44
|
+
paragraph elements'
|
45
|
+
test_files: []
|
46
|
+
has_rdoc:
|