feed_yamlizer 0.0.9 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/feed_yamlizer/html_listener.rb +9 -16
- data/lib/feed_yamlizer/version.rb +1 -1
- data/lib/feed_yamlizer.rb +12 -5
- metadata +2 -3
- data/lib/feed_yamlizer/textifier.rb +0 -16
@@ -28,11 +28,9 @@ class FeedYamlizer
|
|
28
28
|
|
29
29
|
digits = @links.size.to_s.size
|
30
30
|
|
31
|
+
x = Nokogiri::HTML.parse(x).inner_text
|
31
32
|
# wrap the text
|
32
|
-
x = format(x)
|
33
|
-
|
34
|
-
# delete extra blank lines
|
35
|
-
x = x.split(/\n\n+/).join("\n\n")
|
33
|
+
x = FeedYamlizer.format(x)
|
36
34
|
|
37
35
|
# format the blockquotes
|
38
36
|
line_buffer = []
|
@@ -44,7 +42,7 @@ class FeedYamlizer
|
|
44
42
|
elsif line == '[/blockquote]'
|
45
43
|
inblock = false
|
46
44
|
block = blockquote_buffer.join("\n")
|
47
|
-
line_buffer << format(block, '-c')
|
45
|
+
line_buffer << ( FeedYamlizer.format(block, '-c') )
|
48
46
|
blockquote_buffer = []
|
49
47
|
else
|
50
48
|
if inblock
|
@@ -55,8 +53,9 @@ class FeedYamlizer
|
|
55
53
|
end
|
56
54
|
end
|
57
55
|
x = line_buffer.join("\n")
|
56
|
+
|
58
57
|
|
59
|
-
x + "\n\n" + @links.map {|x|
|
58
|
+
res = x + "\n\n" + @links.map {|x|
|
60
59
|
gutter = x[:index].to_s.rjust(digits)
|
61
60
|
if x[:content] && x[:content].strip.length > 0
|
62
61
|
%Q|#{gutter}. "#{x[:content].gsub(/[\r\n]+/, ' ').strip}"\n#{' ' * (digits + 2)}#{x[:href]}|
|
@@ -64,6 +63,7 @@ class FeedYamlizer
|
|
64
63
|
"#{gutter}. #{x[:href]}"
|
65
64
|
end
|
66
65
|
}.join("\n")
|
66
|
+
res
|
67
67
|
end
|
68
68
|
|
69
69
|
def strip_empty_tags(line)
|
@@ -91,7 +91,7 @@ class FeedYamlizer
|
|
91
91
|
when 'ul', 'ol', 'dl'
|
92
92
|
@content << "<#{name}>\n"
|
93
93
|
when 'li', 'dt', 'dd'
|
94
|
-
@content[
|
94
|
+
@content += ["[blockquote]", "", "* "]
|
95
95
|
when 'strong', 'em'
|
96
96
|
@content[-1] << "<#{name}>"
|
97
97
|
when *BLOCK_TAGS
|
@@ -112,11 +112,11 @@ class FeedYamlizer
|
|
112
112
|
when *HEADER_TAGS
|
113
113
|
@content[-1] << "</#{UNIFORM_HEADER_TAG}>"
|
114
114
|
when 'blockquote'
|
115
|
-
@content
|
115
|
+
@content += ["","[/blockquote]"]
|
116
116
|
when 'ul', 'ol', 'dl'
|
117
117
|
@content[-1] << "</#{name}>"
|
118
118
|
when 'li', 'dt', 'dd'
|
119
|
-
@content
|
119
|
+
@content += ["", "[/blockquote]"]
|
120
120
|
when 'strong', 'em'
|
121
121
|
@content[-1] << "</#{name}>"
|
122
122
|
when *BLOCK_TAGS
|
@@ -149,13 +149,6 @@ class FeedYamlizer
|
|
149
149
|
@nested_tags.join('/')
|
150
150
|
end
|
151
151
|
|
152
|
-
def format(x, flags='')
|
153
|
-
IO.popen("fmt #{flags}", "r+") do |pipe|
|
154
|
-
pipe.puts x
|
155
|
-
pipe.close_write
|
156
|
-
pipe.read
|
157
|
-
end
|
158
|
-
end
|
159
152
|
|
160
153
|
end
|
161
154
|
end
|
data/lib/feed_yamlizer.rb
CHANGED
@@ -10,7 +10,6 @@ require 'feed_yamlizer/feed_parser'
|
|
10
10
|
require 'feed_yamlizer/html_listener'
|
11
11
|
require 'feed_yamlizer/html_cleaner'
|
12
12
|
require 'nokogiri'
|
13
|
-
require 'feed_yamlizer/textifier'
|
14
13
|
require 'fileutils'
|
15
14
|
require 'yaml'
|
16
15
|
require 'htmlentities'
|
@@ -18,6 +17,13 @@ require 'string_ext'
|
|
18
17
|
|
19
18
|
class FeedYamlizer
|
20
19
|
include FileUtils::Verbose
|
20
|
+
def self.format(x, flags='')
|
21
|
+
IO.popen("fmt #{flags}", "r+") do |pipe|
|
22
|
+
pipe.puts x
|
23
|
+
pipe.close_write
|
24
|
+
pipe.read
|
25
|
+
end
|
26
|
+
end
|
21
27
|
|
22
28
|
def initialize(feed)
|
23
29
|
@feed = feed
|
@@ -64,14 +70,15 @@ class FeedYamlizer
|
|
64
70
|
@result[:items][-1][:content] = {:html => content}
|
65
71
|
# TODO check if HTML or plain text!
|
66
72
|
simplified = HtmlCleaner.new(content).output
|
67
|
-
textified = Textifier.new(simplified).output
|
68
73
|
#@result[:items][-1][:content][:simplified] = simplified
|
69
|
-
textified =
|
74
|
+
textified = simplified.gsub(FeedYamlizer::NEWLINE_PLACEHOLDER, "\n").
|
70
75
|
gsub(SPACE_PLACEHOLDER, " ").
|
71
76
|
gsub(TAB_PLACEHOLDER, " ").
|
72
77
|
gsub(/^\s+$/, "").
|
73
|
-
# eliminate extra blank lines
|
74
|
-
gsub(/\n{3,}/, "\n\n")
|
78
|
+
# eliminate extra blank lines
|
79
|
+
#gsub(/\n{3,}(?!\s)/, "awdkljalwkdjalwkjd lawkdj GOLD klajw d\n\n")
|
80
|
+
#gsub(/\n{3,}(?!\s)/m, "\n\n").
|
81
|
+
gsub(/\n *\n *\n *$/ , "\n\n")
|
75
82
|
# next two lines are dev lines
|
76
83
|
#puts textified
|
77
84
|
#exit
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.9
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Daniel Choi
|
@@ -64,7 +64,6 @@ files:
|
|
64
64
|
- lib/feed_yamlizer/feed_parser.rb
|
65
65
|
- lib/feed_yamlizer/html_cleaner.rb
|
66
66
|
- lib/feed_yamlizer/html_listener.rb
|
67
|
-
- lib/feed_yamlizer/textifier.rb
|
68
67
|
- lib/feed_yamlizer/version.rb
|
69
68
|
has_rdoc: true
|
70
69
|
homepage: https://github.com/danchoi/feed_yamlizer
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# just takes simplified HTML and converts it to plain text
|
2
|
-
class FeedYamlizer
|
3
|
-
class Textifier
|
4
|
-
def initialize(html)
|
5
|
-
@doc = Nokogiri::HTML.parse(html)
|
6
|
-
end
|
7
|
-
|
8
|
-
# TODO beef this up with real effects
|
9
|
-
|
10
|
-
def output
|
11
|
-
@doc.inner_text
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
|