feed_yamlizer 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
data/bin/feed2yaml CHANGED
@@ -8,7 +8,7 @@ rescue LoadError
8
8
  end
9
9
  require 'open-uri'
10
10
 
11
- # for testing
11
+ # just prints the text, not yaml
12
12
  def print_text(res)
13
13
  res[:items].each {|x|
14
14
  puts '-' * 30
@@ -18,13 +18,19 @@ def print_text(res)
18
18
  }
19
19
  end
20
20
 
21
+ if ARGV.first == '-t' # text
22
+ puts "Printing text form"
23
+ @text = true
24
+ ARGV.shift
25
+ end
26
+
21
27
  result = if STDIN.tty?
22
28
  FeedYamlizer.process_url ARGV.first
23
29
  else
24
30
  FeedYamlizer.process_xml STDIN.read
25
31
  end
26
32
 
27
- if ENV['TEST']
33
+ if @text
28
34
  print_text result
29
35
  else
30
36
  puts result.to_yaml
@@ -43,7 +43,7 @@ class FeedYamlizer
43
43
  #output = IO.popen("tidy -q -n -wrap 120 -asxml -latin1", "r+") do |pipe|
44
44
  #output = IO.popen("tidy -q -wrap 120 -raw -asxml ", "r+") do |pipe| # if from latin1
45
45
 
46
- tidy = "tidy -q -wrap 120 -n -utf8 -asxml 2>/dev/null"
46
+ tidy = "tidy -q -wrap 120 -n -raw -utf8 -asxml 2>/dev/null"
47
47
  output = IO.popen(tidy, "r+") do |pipe|
48
48
  input = <<-END
49
49
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -24,8 +24,34 @@ class FeedYamlizer
24
24
 
25
25
  digits = @links.size.to_s.size
26
26
 
27
+ # wrap the text
27
28
  x = format(x)
28
29
 
30
+ # delete extra blank lines
31
+ x = x.split(/\n\n+/).join("\n\n")
32
+
33
+ # format the blockquotes
34
+ line_buffer = []
35
+ blockquote_buffer = []
36
+ inblock = false
37
+ x.split(/\n/).each do |line|
38
+ if line == '[blockquote]'
39
+ inblock = true
40
+ elsif line == '[/blockquote]'
41
+ inblock = false
42
+ block = blockquote_buffer.join("\n")
43
+ line_buffer << format(block, '-c')
44
+ blockquote_buffer = []
45
+ else
46
+ if inblock
47
+ blockquote_buffer << " " * 4 + line.to_s
48
+ else
49
+ line_buffer << line
50
+ end
51
+ end
52
+ end
53
+ x = line_buffer.join("\n")
54
+
29
55
  x + "\n\n" + @links.map {|x|
30
56
  gutter = x[:index].to_s.rjust(digits)
31
57
  if x[:content] && x[:content].strip.length > 0
@@ -48,7 +74,7 @@ class FeedYamlizer
48
74
  @in_link = true
49
75
  when 'img'
50
76
  text = attrs['alt'] || attrs['title']
51
- chunk = ['img', text].join(':')
77
+ chunk = "[img:#{text}] "
52
78
  @content[-1] << chunk
53
79
  when *HEADER_TAGS
54
80
  @content << "<#{UNIFORM_HEADER_TAG}>"
@@ -57,7 +83,7 @@ class FeedYamlizer
57
83
  # @content << ""
58
84
  @content[-1] += " "
59
85
  when 'blockquote'
60
- @content << "[blockquote]\n"
86
+ @content += ["[blockquote]", ""]
61
87
  when 'ul', 'ol', 'dl'
62
88
  @content << "<#{name}>"
63
89
  when 'li', 'dt', 'dd'
@@ -102,7 +128,6 @@ class FeedYamlizer
102
128
  return
103
129
  end
104
130
 
105
- # probably slow, but ok for now
106
131
  @content[-1] << text
107
132
  end
108
133
 
@@ -114,8 +139,8 @@ class FeedYamlizer
114
139
  @nested_tags.join('/')
115
140
  end
116
141
 
117
- def format(x)
118
- IO.popen("fmt", "r+") do |pipe|
142
+ def format(x, flags='')
143
+ IO.popen("fmt #{flags}", "r+") do |pipe|
119
144
  pipe.puts x
120
145
  pipe.close_write
121
146
  pipe.read
@@ -1,3 +1,3 @@
1
1
  class FeedYamlizer
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
data/lib/feed_yamlizer.rb CHANGED
@@ -14,6 +14,7 @@ require 'feed_yamlizer/textifier'
14
14
  require 'fileutils'
15
15
  require 'yaml'
16
16
  require 'htmlentities'
17
+ require 'string_ext'
17
18
 
18
19
  class FeedYamlizer
19
20
  include FileUtils::Verbose
@@ -70,16 +71,11 @@ class FeedYamlizer
70
71
 
71
72
  class << self
72
73
  def xml_encoding(rawxml)
73
- x = rawxml.scan(/encoding=["']([^"']+)["']/)
74
- encoding = x && x[0] && x[0][0]
74
+ encoding = rawxml[/encoding=["']([^"']+)["']/,1]
75
75
  STDERR.puts "xml encoding: #{encoding.inspect}"
76
76
  encoding
77
77
  end
78
78
 
79
- def to_utf(x, encoding = 'ISO-8859-1')
80
- x = Iconv.conv("UTF-8//TRANSLIT//IGNORE", encoding, x)
81
- end
82
-
83
79
  def check_for_tidy
84
80
  if `which tidy` == ''
85
81
  abort "Please install tidy"
@@ -87,9 +83,9 @@ class FeedYamlizer
87
83
  end
88
84
 
89
85
  # main method
90
- def run(feed_xml, encoding)
86
+ def run(feed_xml, encoding='UTF-8')
91
87
  check_for_tidy
92
- feed_xml = to_utf feed_xml, encoding
88
+ feed_xml = Iconv.conv("UTF-8//TRANSLIT//IGNORE", encoding, feed_xml)
93
89
  parsed_data = FeedYamlizer::FeedParser.new(feed_xml).result
94
90
  result = FeedYamlizer.new(parsed_data).result
95
91
  result
@@ -104,7 +100,7 @@ class FeedYamlizer
104
100
  charset = response.charset
105
101
  #STDERR.puts "charset: #{charset}"
106
102
  xml = response.read
107
- encoding = charset || xml_encoding(xml) || "ISO-8859-1"
103
+ encoding = charset || xml_encoding(xml) || "UTF-8"
108
104
  run xml, encoding
109
105
  end
110
106
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 5
9
- version: 0.0.5
8
+ - 6
9
+ version: 0.0.6
10
10
  platform: ruby
11
11
  authors:
12
12
  - Daniel Choi
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-19 00:00:00 -05:00
17
+ date: 2011-02-20 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency