feed_yamlizer 0.0.9 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/feed_yamlizer/html_listener.rb +9 -16
- data/lib/feed_yamlizer/version.rb +1 -1
- data/lib/feed_yamlizer.rb +12 -5
- metadata +2 -3
- data/lib/feed_yamlizer/textifier.rb +0 -16
@@ -28,11 +28,9 @@ class FeedYamlizer
|
|
28
28
|
|
29
29
|
digits = @links.size.to_s.size
|
30
30
|
|
31
|
+
x = Nokogiri::HTML.parse(x).inner_text
|
31
32
|
# wrap the text
|
32
|
-
x = format(x)
|
33
|
-
|
34
|
-
# delete extra blank lines
|
35
|
-
x = x.split(/\n\n+/).join("\n\n")
|
33
|
+
x = FeedYamlizer.format(x)
|
36
34
|
|
37
35
|
# format the blockquotes
|
38
36
|
line_buffer = []
|
@@ -44,7 +42,7 @@ class FeedYamlizer
|
|
44
42
|
elsif line == '[/blockquote]'
|
45
43
|
inblock = false
|
46
44
|
block = blockquote_buffer.join("\n")
|
47
|
-
line_buffer << format(block, '-c')
|
45
|
+
line_buffer << ( FeedYamlizer.format(block, '-c') )
|
48
46
|
blockquote_buffer = []
|
49
47
|
else
|
50
48
|
if inblock
|
@@ -55,8 +53,9 @@ class FeedYamlizer
|
|
55
53
|
end
|
56
54
|
end
|
57
55
|
x = line_buffer.join("\n")
|
56
|
+
|
58
57
|
|
59
|
-
x + "\n\n" + @links.map {|x|
|
58
|
+
res = x + "\n\n" + @links.map {|x|
|
60
59
|
gutter = x[:index].to_s.rjust(digits)
|
61
60
|
if x[:content] && x[:content].strip.length > 0
|
62
61
|
%Q|#{gutter}. "#{x[:content].gsub(/[\r\n]+/, ' ').strip}"\n#{' ' * (digits + 2)}#{x[:href]}|
|
@@ -64,6 +63,7 @@ class FeedYamlizer
|
|
64
63
|
"#{gutter}. #{x[:href]}"
|
65
64
|
end
|
66
65
|
}.join("\n")
|
66
|
+
res
|
67
67
|
end
|
68
68
|
|
69
69
|
def strip_empty_tags(line)
|
@@ -91,7 +91,7 @@ class FeedYamlizer
|
|
91
91
|
when 'ul', 'ol', 'dl'
|
92
92
|
@content << "<#{name}>\n"
|
93
93
|
when 'li', 'dt', 'dd'
|
94
|
-
@content[
|
94
|
+
@content += ["[blockquote]", "", "* "]
|
95
95
|
when 'strong', 'em'
|
96
96
|
@content[-1] << "<#{name}>"
|
97
97
|
when *BLOCK_TAGS
|
@@ -112,11 +112,11 @@ class FeedYamlizer
|
|
112
112
|
when *HEADER_TAGS
|
113
113
|
@content[-1] << "</#{UNIFORM_HEADER_TAG}>"
|
114
114
|
when 'blockquote'
|
115
|
-
@content
|
115
|
+
@content += ["","[/blockquote]"]
|
116
116
|
when 'ul', 'ol', 'dl'
|
117
117
|
@content[-1] << "</#{name}>"
|
118
118
|
when 'li', 'dt', 'dd'
|
119
|
-
@content
|
119
|
+
@content += ["", "[/blockquote]"]
|
120
120
|
when 'strong', 'em'
|
121
121
|
@content[-1] << "</#{name}>"
|
122
122
|
when *BLOCK_TAGS
|
@@ -149,13 +149,6 @@ class FeedYamlizer
|
|
149
149
|
@nested_tags.join('/')
|
150
150
|
end
|
151
151
|
|
152
|
-
def format(x, flags='')
|
153
|
-
IO.popen("fmt #{flags}", "r+") do |pipe|
|
154
|
-
pipe.puts x
|
155
|
-
pipe.close_write
|
156
|
-
pipe.read
|
157
|
-
end
|
158
|
-
end
|
159
152
|
|
160
153
|
end
|
161
154
|
end
|
data/lib/feed_yamlizer.rb
CHANGED
@@ -10,7 +10,6 @@ require 'feed_yamlizer/feed_parser'
|
|
10
10
|
require 'feed_yamlizer/html_listener'
|
11
11
|
require 'feed_yamlizer/html_cleaner'
|
12
12
|
require 'nokogiri'
|
13
|
-
require 'feed_yamlizer/textifier'
|
14
13
|
require 'fileutils'
|
15
14
|
require 'yaml'
|
16
15
|
require 'htmlentities'
|
@@ -18,6 +17,13 @@ require 'string_ext'
|
|
18
17
|
|
19
18
|
class FeedYamlizer
|
20
19
|
include FileUtils::Verbose
|
20
|
+
def self.format(x, flags='')
|
21
|
+
IO.popen("fmt #{flags}", "r+") do |pipe|
|
22
|
+
pipe.puts x
|
23
|
+
pipe.close_write
|
24
|
+
pipe.read
|
25
|
+
end
|
26
|
+
end
|
21
27
|
|
22
28
|
def initialize(feed)
|
23
29
|
@feed = feed
|
@@ -64,14 +70,15 @@ class FeedYamlizer
|
|
64
70
|
@result[:items][-1][:content] = {:html => content}
|
65
71
|
# TODO check if HTML or plain text!
|
66
72
|
simplified = HtmlCleaner.new(content).output
|
67
|
-
textified = Textifier.new(simplified).output
|
68
73
|
#@result[:items][-1][:content][:simplified] = simplified
|
69
|
-
textified =
|
74
|
+
textified = simplified.gsub(FeedYamlizer::NEWLINE_PLACEHOLDER, "\n").
|
70
75
|
gsub(SPACE_PLACEHOLDER, " ").
|
71
76
|
gsub(TAB_PLACEHOLDER, " ").
|
72
77
|
gsub(/^\s+$/, "").
|
73
|
-
# eliminate extra blank lines
|
74
|
-
gsub(/\n{3,}/, "\n\n")
|
78
|
+
# eliminate extra blank lines
|
79
|
+
#gsub(/\n{3,}(?!\s)/, "awdkljalwkdjalwkjd lawkdj GOLD klajw d\n\n")
|
80
|
+
#gsub(/\n{3,}(?!\s)/m, "\n\n").
|
81
|
+
gsub(/\n *\n *\n *$/ , "\n\n")
|
75
82
|
# next two lines are dev lines
|
76
83
|
#puts textified
|
77
84
|
#exit
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 0
|
7
|
+
- 1
|
7
8
|
- 0
|
8
|
-
|
9
|
-
version: 0.0.9
|
9
|
+
version: 0.1.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Daniel Choi
|
@@ -64,7 +64,6 @@ files:
|
|
64
64
|
- lib/feed_yamlizer/feed_parser.rb
|
65
65
|
- lib/feed_yamlizer/html_cleaner.rb
|
66
66
|
- lib/feed_yamlizer/html_listener.rb
|
67
|
-
- lib/feed_yamlizer/textifier.rb
|
68
67
|
- lib/feed_yamlizer/version.rb
|
69
68
|
has_rdoc: true
|
70
69
|
homepage: https://github.com/danchoi/feed_yamlizer
|
@@ -1,16 +0,0 @@
|
|
1
|
-
# just takes simplified HTML and converts it to plain text
|
2
|
-
class FeedYamlizer
|
3
|
-
class Textifier
|
4
|
-
def initialize(html)
|
5
|
-
@doc = Nokogiri::HTML.parse(html)
|
6
|
-
end
|
7
|
-
|
8
|
-
# TODO beef this up with real effects
|
9
|
-
|
10
|
-
def output
|
11
|
-
@doc.inner_text
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
|