feed_yamlizer 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/feed2yaml CHANGED
@@ -8,7 +8,7 @@ rescue LoadError
8
8
  end
9
9
  require 'open-uri'
10
10
 
11
- # for testing
11
+ # just prints the text, not yaml
12
12
  def print_text(res)
13
13
  res[:items].each {|x|
14
14
  puts '-' * 30
@@ -18,13 +18,19 @@ def print_text(res)
18
18
  }
19
19
  end
20
20
 
21
+ if ARGV.first == '-t' # text
22
+ puts "Printing text form"
23
+ @text = true
24
+ ARGV.shift
25
+ end
26
+
21
27
  result = if STDIN.tty?
22
28
  FeedYamlizer.process_url ARGV.first
23
29
  else
24
30
  FeedYamlizer.process_xml STDIN.read
25
31
  end
26
32
 
27
- if ENV['TEST']
33
+ if @text
28
34
  print_text result
29
35
  else
30
36
  puts result.to_yaml
@@ -43,7 +43,7 @@ class FeedYamlizer
43
43
  #output = IO.popen("tidy -q -n -wrap 120 -asxml -latin1", "r+") do |pipe|
44
44
  #output = IO.popen("tidy -q -wrap 120 -raw -asxml ", "r+") do |pipe| # if from latin1
45
45
 
46
- tidy = "tidy -q -wrap 120 -n -utf8 -asxml 2>/dev/null"
46
+ tidy = "tidy -q -wrap 120 -n -raw -utf8 -asxml 2>/dev/null"
47
47
  output = IO.popen(tidy, "r+") do |pipe|
48
48
  input = <<-END
49
49
  <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
@@ -24,8 +24,34 @@ class FeedYamlizer
24
24
 
25
25
  digits = @links.size.to_s.size
26
26
 
27
+ # wrap the text
27
28
  x = format(x)
28
29
 
30
+ # delete extra blank lines
31
+ x = x.split(/\n\n+/).join("\n\n")
32
+
33
+ # format the blockquotes
34
+ line_buffer = []
35
+ blockquote_buffer = []
36
+ inblock = false
37
+ x.split(/\n/).each do |line|
38
+ if line == '[blockquote]'
39
+ inblock = true
40
+ elsif line == '[/blockquote]'
41
+ inblock = false
42
+ block = blockquote_buffer.join("\n")
43
+ line_buffer << format(block, '-c')
44
+ blockquote_buffer = []
45
+ else
46
+ if inblock
47
+ blockquote_buffer << " " * 4 + line.to_s
48
+ else
49
+ line_buffer << line
50
+ end
51
+ end
52
+ end
53
+ x = line_buffer.join("\n")
54
+
29
55
  x + "\n\n" + @links.map {|x|
30
56
  gutter = x[:index].to_s.rjust(digits)
31
57
  if x[:content] && x[:content].strip.length > 0
@@ -48,7 +74,7 @@ class FeedYamlizer
48
74
  @in_link = true
49
75
  when 'img'
50
76
  text = attrs['alt'] || attrs['title']
51
- chunk = ['img', text].join(':')
77
+ chunk = "[img:#{text}] "
52
78
  @content[-1] << chunk
53
79
  when *HEADER_TAGS
54
80
  @content << "<#{UNIFORM_HEADER_TAG}>"
@@ -57,7 +83,7 @@ class FeedYamlizer
57
83
  # @content << ""
58
84
  @content[-1] += " "
59
85
  when 'blockquote'
60
- @content << "[blockquote]\n"
86
+ @content += ["[blockquote]", ""]
61
87
  when 'ul', 'ol', 'dl'
62
88
  @content << "<#{name}>"
63
89
  when 'li', 'dt', 'dd'
@@ -102,7 +128,6 @@ class FeedYamlizer
102
128
  return
103
129
  end
104
130
 
105
- # probably slow, but ok for now
106
131
  @content[-1] << text
107
132
  end
108
133
 
@@ -114,8 +139,8 @@ class FeedYamlizer
114
139
  @nested_tags.join('/')
115
140
  end
116
141
 
117
- def format(x)
118
- IO.popen("fmt", "r+") do |pipe|
142
+ def format(x, flags='')
143
+ IO.popen("fmt #{flags}", "r+") do |pipe|
119
144
  pipe.puts x
120
145
  pipe.close_write
121
146
  pipe.read
@@ -1,3 +1,3 @@
1
1
  class FeedYamlizer
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end
data/lib/feed_yamlizer.rb CHANGED
@@ -14,6 +14,7 @@ require 'feed_yamlizer/textifier'
14
14
  require 'fileutils'
15
15
  require 'yaml'
16
16
  require 'htmlentities'
17
+ require 'string_ext'
17
18
 
18
19
  class FeedYamlizer
19
20
  include FileUtils::Verbose
@@ -70,16 +71,11 @@ class FeedYamlizer
70
71
 
71
72
  class << self
72
73
  def xml_encoding(rawxml)
73
- x = rawxml.scan(/encoding=["']([^"']+)["']/)
74
- encoding = x && x[0] && x[0][0]
74
+ encoding = rawxml[/encoding=["']([^"']+)["']/,1]
75
75
  STDERR.puts "xml encoding: #{encoding.inspect}"
76
76
  encoding
77
77
  end
78
78
 
79
- def to_utf(x, encoding = 'ISO-8859-1')
80
- x = Iconv.conv("UTF-8//TRANSLIT//IGNORE", encoding, x)
81
- end
82
-
83
79
  def check_for_tidy
84
80
  if `which tidy` == ''
85
81
  abort "Please install tidy"
@@ -87,9 +83,9 @@ class FeedYamlizer
87
83
  end
88
84
 
89
85
  # main method
90
- def run(feed_xml, encoding)
86
+ def run(feed_xml, encoding='UTF-8')
91
87
  check_for_tidy
92
- feed_xml = to_utf feed_xml, encoding
88
+ feed_xml = Iconv.conv("UTF-8//TRANSLIT//IGNORE", encoding, feed_xml)
93
89
  parsed_data = FeedYamlizer::FeedParser.new(feed_xml).result
94
90
  result = FeedYamlizer.new(parsed_data).result
95
91
  result
@@ -104,7 +100,7 @@ class FeedYamlizer
104
100
  charset = response.charset
105
101
  #STDERR.puts "charset: #{charset}"
106
102
  xml = response.read
107
- encoding = charset || xml_encoding(xml) || "ISO-8859-1"
103
+ encoding = charset || xml_encoding(xml) || "UTF-8"
108
104
  run xml, encoding
109
105
  end
110
106
  end
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 5
9
- version: 0.0.5
8
+ - 6
9
+ version: 0.0.6
10
10
  platform: ruby
11
11
  authors:
12
12
  - Daniel Choi
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-02-19 00:00:00 -05:00
17
+ date: 2011-02-20 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency