wraptext 0.1.2 → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/wraptext/parser.rb +30 -5
- data/spec/data/in/siri-car.txt +6 -0
- data/spec/data/out/4ced9bc198db7477220005c3.txt +4 -4
- data/spec/data/out/siri-car.txt +4 -0
- data/spec/wraptext/parser_spec.rb +17 -27
- data/wraptext.gemspec +1 -1
- metadata +44 -54
data/lib/wraptext/parser.rb
CHANGED
@@ -27,10 +27,11 @@ module Wraptext
|
|
27
27
|
end
|
28
28
|
@root = Nokogiri::HTML "<body></body>"
|
29
29
|
reparent_nodes @root.xpath("/html/body").first, @doc.xpath("/html/body").first
|
30
|
+
replace_single_breaks
|
30
31
|
strip_empty_paragraphs!
|
31
32
|
end
|
32
33
|
|
33
|
-
def to_html
|
34
|
+
def to_html
|
34
35
|
@html ||= @root.xpath("/html/body").inner_html
|
35
36
|
end
|
36
37
|
|
@@ -40,7 +41,31 @@ module Wraptext
|
|
40
41
|
|
41
42
|
private
|
42
43
|
|
43
|
-
def
|
44
|
+
def replace_single_breaks
|
45
|
+
nodes = @root.xpath("//p//text()")
|
46
|
+
nodes.each do |node|
|
47
|
+
frag = Nokogiri::HTML::DocumentFragment.parse node.content.gsub(/(\r\n|\n)/, "<br />")
|
48
|
+
node.swap frag
|
49
|
+
end
|
50
|
+
|
51
|
+
@root.xpath("//p").each do |node|
|
52
|
+
if child = node.children[0] and child.name == "br"
|
53
|
+
child.remove
|
54
|
+
end
|
55
|
+
if child = node.children[node.children.length - 1] and child.name == "br"
|
56
|
+
child.remove
|
57
|
+
end
|
58
|
+
# This is done after the BR removals intentionally.
|
59
|
+
if child = node.children[0] and child.text?
|
60
|
+
child.content = child.content.lstrip
|
61
|
+
end
|
62
|
+
#if child = node.children[node.children.length - 1] and child.text?
|
63
|
+
# child.content = child.content.rstrip
|
64
|
+
# end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def strip_empty_paragraphs!
|
44
69
|
@root.xpath("//p").each do |n|
|
45
70
|
if n.inner_html.strip == ''
|
46
71
|
n.remove
|
@@ -49,7 +74,7 @@ module Wraptext
|
|
49
74
|
# n.remove
|
50
75
|
end
|
51
76
|
end
|
52
|
-
end
|
77
|
+
end
|
53
78
|
|
54
79
|
# This traverses the entire document, and where it finds double newlines in text,
|
55
80
|
# it replaces them with <p> tags. This is a document-oriented approach to this
|
@@ -64,7 +89,7 @@ module Wraptext
|
|
64
89
|
# If we hit a block-level tag, we need to unwind any <p> tags we've inserted; block level elements are
|
65
90
|
# siblings to <p> tags, not children.
|
66
91
|
top = top.parent while top.name == "p"
|
67
|
-
|
92
|
+
|
68
93
|
# Some tags we don't want to traverse into, like <pre> and <script>. Just copy them into the doc.
|
69
94
|
if STRAIGHT_COPY_TAGS_LOOKUP.has_key? node.name
|
70
95
|
top.add_child node.clone
|
@@ -108,7 +133,7 @@ module Wraptext
|
|
108
133
|
end
|
109
134
|
end
|
110
135
|
end
|
111
|
-
|
136
|
+
|
112
137
|
# If this isn't a block or text node, we need to copy it into the new document. If it's a <p> node, then
|
113
138
|
# we just copy it in directly. Else, wrap it in a <p> tag and copy it in.
|
114
139
|
# This allows things like "<em>Foo</em> Bar Baz" to be wrapped in a single tag, as the <em> tag will be
|
@@ -0,0 +1,6 @@
|
|
1
|
+
The other day I got a text on my iPhone while I was sitting in a meeting at work. Not an unusual occurrence for me, but the recipient was. It was from my car. Thanks, SmartStart.
|
2
|
+
|
3
|
+
<strong>Subject</strong>: Alert for Andrea's Rav
|
4
|
+
<strong>Message</strong>: Command received at vehicle. Speed: 0
|
5
|
+
|
6
|
+
My car was telling me someone (my son) had started it using the Viper SmartStart Remote. I hit the <a href="http://mashable.com/follow/topics/gps/">GPS</a> button on the SmartStart app on my iPhone (also available for Android and Blackberry), and was able to tell he was driving from my house to where he works. Or in that general direction. If I wanted to, I could check back in a little while and see if he actually went there.
|
@@ -35,10 +35,10 @@ The Mashable Awards Gala at Cirque du Soleil Zumanity (Vegas)
|
|
35
35
|
<hr>
|
36
36
|
<p>In partnership with Cirque du Soleil, The Mashable Awards Gala event will bring together the winners and nominees, the Mashable community, partners, media, the marketing community, consumer electronics and technology brands and attendees from the 2011 International CES Convention to Las Vegas on Thursday, January 6, 2011. Together, we will celebrate the winners and the community of the Mashable Awards at the Cirque du Soleil Zumanity stage in the beautiful New York New York Hotel. The event will include acts and performances from our partner Cirque du Soleil Zumanity. In addition, there will be special guest presenters and appearances.</p>
|
37
37
|
<p>
|
38
|
-
<strong>Date</strong>: Thursday, January 6th, 2011 (during International CES Convention week)
|
39
|
-
<strong>Time</strong>: 7:00 - 10:00 p.m. PT
|
40
|
-
<strong>Location</strong>: Cirque du Soleil Zumanity, New York New York Hotel, Las Vegas
|
41
|
-
<strong>Agenda</strong>: Networking, Open Bars, Acts, Surprises and the Mashable Awards Gala presentations
|
38
|
+
<strong>Date</strong>: Thursday, January 6th, 2011 (during International CES Convention week)<br>
|
39
|
+
<strong>Time</strong>: 7:00 - 10:00 p.m. PT<br>
|
40
|
+
<strong>Location</strong>: Cirque du Soleil Zumanity, New York New York Hotel, Las Vegas<br>
|
41
|
+
<strong>Agenda</strong>: Networking, Open Bars, Acts, Surprises and the Mashable Awards Gala presentations<br>
|
42
42
|
<strong>Socialize</strong>: <a href="http://www.facebook.com/event.php?eid=123960060989680&ref=ts" target="_blank">Facebook</a>, <a href="http://foursquare.com/venue/8822146" target="_blank">Foursquare</a>, <a href="http://www.meetup.com/mashable/22947/" target="_blank">Meetup</a>, <a href="http://plancast.com/p/27by" target="_blank">Plancast</a>, <a href="http://search.twitter.com/search?q=%23mashableawards" target="_blank">Twitter</a> (Hashtag: #MashableAwards)</p>
|
43
43
|
<p>
|
44
44
|
<strong><em>Mashable Awards Category Sponsor:</em></strong></p>
|
@@ -0,0 +1,4 @@
|
|
1
|
+
<p>The other day I got a text on my iPhone while I was sitting in a meeting at work. Not an unusual occurrence for me, but the recipient was. It was from my car. Thanks, SmartStart.</p>
|
2
|
+
<p><strong>Subject</strong>: Alert for Andrea's Rav<br />
|
3
|
+
<strong>Message</strong>: Command received at vehicle. Speed: 0</p>
|
4
|
+
<p>My car was telling me someone (my son) had started it using the Viper SmartStart Remote. I hit the <a href="http://mashable.com/follow/topics/gps/">GPS</a> button on the SmartStart app on my iPhone (also available for Android and Blackberry), and was able to tell he was driving from my house to where he works. Or in that general direction. If I wanted to, I could check back in a little while and see if he actually went there.</p>
|
@@ -26,7 +26,7 @@ describe Wraptext::Parser do
|
|
26
26
|
|
27
27
|
it "should return a Nokogiri::XML::Element from #to_doc" do
|
28
28
|
@doc.to_doc.should be_a(Nokogiri::XML::Element)
|
29
|
-
end
|
29
|
+
end
|
30
30
|
end
|
31
31
|
|
32
32
|
context "given a set of plain text" do
|
@@ -42,8 +42,7 @@ describe Wraptext::Parser do
|
|
42
42
|
it "should convert plain text to p-wrapped text" do
|
43
43
|
expects = <<-EOF
|
44
44
|
<p>This is some text.</p>
|
45
|
-
<p>
|
46
|
-
</p>
|
45
|
+
<p>This is some more text.</p>
|
47
46
|
EOF
|
48
47
|
@doc.to_html.should == expects.strip
|
49
48
|
end
|
@@ -57,18 +56,15 @@ This is some text
|
|
57
56
|
This is some text after the block element
|
58
57
|
EOF
|
59
58
|
expects = <<-EOF
|
60
|
-
<p>This is some text
|
61
|
-
</p>
|
59
|
+
<p>This is some text</p>
|
62
60
|
<div><p>This is a block level element</p></div>
|
63
|
-
<p>
|
64
|
-
This is some text after the block element
|
65
|
-
</p>
|
61
|
+
<p>This is some text after the block element</p>
|
66
62
|
EOF
|
67
63
|
Wraptext::Parser.new(doc).to_html.should == expects.strip
|
68
64
|
end
|
69
65
|
end
|
70
66
|
|
71
|
-
|
67
|
+
|
72
68
|
context "given plain text with some p-peer tags" do
|
73
69
|
it "should not inject p tags directly inside p-peer tags" do
|
74
70
|
doc = <<-EOF
|
@@ -77,16 +73,13 @@ This is some text
|
|
77
73
|
This is some text after the block element
|
78
74
|
EOF
|
79
75
|
expects = <<-EOF
|
80
|
-
<p>This is some text
|
81
|
-
</p>
|
76
|
+
<p>This is some text</p>
|
82
77
|
<h1>This is a p-peer element</h1>
|
83
|
-
<p>
|
84
|
-
This is some text after the block element
|
85
|
-
</p>
|
78
|
+
<p>This is some text after the block element</p>
|
86
79
|
EOF
|
87
80
|
Wraptext::Parser.new(doc).to_html.should == expects.strip
|
88
81
|
end
|
89
|
-
end
|
82
|
+
end
|
90
83
|
|
91
84
|
context "given a <script> tag" do
|
92
85
|
it "should not perform any transformation inside the tag" do
|
@@ -102,18 +95,17 @@ And another line
|
|
102
95
|
EOF
|
103
96
|
expects = <<-EOF
|
104
97
|
<p>This is some precursor text</p>
|
105
|
-
<p>And another line
|
106
|
-
</p>
|
98
|
+
<p>And another line</p>
|
107
99
|
<script>
|
108
100
|
var elem = 'this is some javascript';
|
109
101
|
|
110
102
|
elem = elem.toUpperCase();
|
111
103
|
</script>
|
112
104
|
EOF
|
113
|
-
Wraptext::Parser.new(doc).to_html.should == expects.strip
|
105
|
+
Wraptext::Parser.new(doc).to_html.should == expects.strip
|
114
106
|
end
|
115
107
|
end
|
116
|
-
|
108
|
+
|
117
109
|
context "given Wordpress datasets" do
|
118
110
|
before :all do
|
119
111
|
@in = File.expand_path(File.join(__FILE__, "..", "..", "data", "in"))
|
@@ -125,9 +117,9 @@ EOF
|
|
125
117
|
gsub(/>/, ">\n").
|
126
118
|
gsub(" />", ">").
|
127
119
|
split(/\n/).
|
128
|
-
map(&:strip).
|
120
|
+
map(&:strip).
|
129
121
|
join("\n").strip
|
130
|
-
end
|
122
|
+
end
|
131
123
|
|
132
124
|
def test_datafile(file)
|
133
125
|
data_in = File.read(file)
|
@@ -153,11 +145,9 @@ EOF
|
|
153
145
|
EOF
|
154
146
|
|
155
147
|
expects = <<-EOF
|
156
|
-
<p>
|
157
|
-
|
158
|
-
<p> And here is <i>another</i> line
|
159
|
-
</p>
|
148
|
+
<p>This is some <em>emphasized</em> text</p>
|
149
|
+
<p>And here is <i>another</i> line</p>
|
160
150
|
EOF
|
161
|
-
Wraptext::Parser.new(doc).to_html.should == expects.strip
|
162
|
-
end
|
151
|
+
Wraptext::Parser.new(doc).to_html.should == expects.strip
|
152
|
+
end
|
163
153
|
end
|
data/wraptext.gemspec
CHANGED
metadata
CHANGED
@@ -1,46 +1,41 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: wraptext
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.3
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 2
|
10
|
-
version: 0.1.2
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Chris Heald
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2012-10-23 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: nokogiri
|
22
|
-
|
23
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
24
17
|
none: false
|
25
|
-
requirements:
|
26
|
-
- -
|
27
|
-
- !ruby/object:Gem::Version
|
28
|
-
|
29
|
-
segments:
|
30
|
-
- 0
|
31
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :runtime
|
33
|
-
|
34
|
-
|
35
|
-
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Wraps bare text nodes from an HTML document in <p> tags and splits text
|
31
|
+
nodes on double newlines. Conveniently serves to format Wordpress post content properly
|
32
|
+
as a side effect.
|
33
|
+
email:
|
36
34
|
- cheald@gmail.com
|
37
35
|
executables: []
|
38
|
-
|
39
36
|
extensions: []
|
40
|
-
|
41
37
|
extra_rdoc_files: []
|
42
|
-
|
43
|
-
files:
|
38
|
+
files:
|
44
39
|
- .gitignore
|
45
40
|
- .rspec
|
46
41
|
- Gemfile
|
@@ -53,58 +48,53 @@ files:
|
|
53
48
|
- spec/data/in/4ced9bf198db7477220006f1.txt
|
54
49
|
- spec/data/in/4ced9bf798db74772200070f.txt
|
55
50
|
- spec/data/in/mobile-games.txt
|
51
|
+
- spec/data/in/siri-car.txt
|
56
52
|
- spec/data/out/4ced9bc198db7477220005c3.txt
|
57
53
|
- spec/data/out/4ced9bf198db7477220006f1.txt
|
58
54
|
- spec/data/out/4ced9bf798db74772200070f.txt
|
59
55
|
- spec/data/out/mobile-games.txt
|
56
|
+
- spec/data/out/siri-car.txt
|
60
57
|
- spec/data/wordpress_autop.php
|
61
58
|
- spec/spec_helper.rb
|
62
59
|
- spec/wraptext/parser_spec.rb
|
63
60
|
- wraptext.gemspec
|
64
|
-
homepage:
|
61
|
+
homepage: ''
|
65
62
|
licenses: []
|
66
|
-
|
67
63
|
post_install_message:
|
68
64
|
rdoc_options: []
|
69
|
-
|
70
|
-
require_paths:
|
65
|
+
require_paths:
|
71
66
|
- lib
|
72
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
73
68
|
none: false
|
74
|
-
requirements:
|
75
|
-
- -
|
76
|
-
- !ruby/object:Gem::Version
|
77
|
-
|
78
|
-
|
79
|
-
- 0
|
80
|
-
version: "0"
|
81
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
requirements:
|
70
|
+
- - ! '>='
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
version: '0'
|
73
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
82
74
|
none: false
|
83
|
-
requirements:
|
84
|
-
- -
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
|
87
|
-
segments:
|
88
|
-
- 0
|
89
|
-
version: "0"
|
75
|
+
requirements:
|
76
|
+
- - ! '>='
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '0'
|
90
79
|
requirements: []
|
91
|
-
|
92
80
|
rubyforge_project:
|
93
|
-
rubygems_version: 1.8.
|
81
|
+
rubygems_version: 1.8.24
|
94
82
|
signing_key:
|
95
83
|
specification_version: 3
|
96
|
-
summary: Wraps bare text nodes from an HTML document in <p> tags and splits text nodes
|
97
|
-
|
84
|
+
summary: Wraps bare text nodes from an HTML document in <p> tags and splits text nodes
|
85
|
+
on double newlines.
|
86
|
+
test_files:
|
98
87
|
- spec/data/convert.php
|
99
88
|
- spec/data/in/4ced9bc198db7477220005c3.txt
|
100
89
|
- spec/data/in/4ced9bf198db7477220006f1.txt
|
101
90
|
- spec/data/in/4ced9bf798db74772200070f.txt
|
102
91
|
- spec/data/in/mobile-games.txt
|
92
|
+
- spec/data/in/siri-car.txt
|
103
93
|
- spec/data/out/4ced9bc198db7477220005c3.txt
|
104
94
|
- spec/data/out/4ced9bf198db7477220006f1.txt
|
105
95
|
- spec/data/out/4ced9bf798db74772200070f.txt
|
106
96
|
- spec/data/out/mobile-games.txt
|
97
|
+
- spec/data/out/siri-car.txt
|
107
98
|
- spec/data/wordpress_autop.php
|
108
99
|
- spec/spec_helper.rb
|
109
100
|
- spec/wraptext/parser_spec.rb
|
110
|
-
has_rdoc:
|