dash-bees 0.20 → 0.21

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ 2010-09-20 v0.21 Improvements and bug fixes for feed source
2
+
3
+ Truncates long contents.
4
+
5
+ Now accepting feed: URLs and long input box.
6
+
7
+ Handles a few more error cases.
8
+
1
9
  2010-09-20 v0.20 Added data source for Atom feeds
2
10
 
3
11
  2010-09-20 v0.19 Fixed sometimes missing identity for Github
data/lib/dash-fu/bee.rb CHANGED
@@ -10,7 +10,7 @@ module DashFu
10
10
  # The README covers it all.
11
11
  module Bee
12
12
 
13
- VERSION = "0.20"
13
+ VERSION = "0.21"
14
14
 
15
15
  class << self
16
16
  attr_accessor :logger
@@ -203,6 +203,32 @@ module DashFu
203
203
  end
204
204
  alias :h :escape_html
205
205
 
206
+
207
+ # Truncates HTML without unbalanced elements/entities.
208
+ def truncate_html(html, limit = 30, ellipsis = "...")
209
+ # Inspiration: http://pastie.textmate.org/145402
210
+ doc = Nokogiri::XML(html)
211
+ ellipsis_length = Nokogiri::XML(ellipsis).text.length
212
+ content_length = doc.text.length
213
+ return html if content_length <= limit
214
+ truncate = lambda do |node, max|
215
+ break node if node.text.length <= max
216
+ node.children.each do |child|
217
+ if max <= 0
218
+ child.remove
219
+ elsif child.text?
220
+ child.content = child.text[0,max]
221
+ max -= child.content.length
222
+ else
223
+ truncate[child, max]
224
+ end
225
+ end
226
+ node
227
+ end
228
+ truncate[doc, limit - ellipsis_length].to_html.strip + ellipsis
229
+ end
230
+
231
+
206
232
  # Returns API key for this source. May return string or hash, depending on
207
233
  # the API.
208
234
  def api_key
@@ -12,7 +12,8 @@ module DashFu::Bee
12
12
  end
13
13
 
14
14
  def validate(source)
15
- uri = URI.parse(source["url"]) rescue nil
15
+ unfeed = source["url"].gsub(/^feed:\/\//, "http://").gsub(/^feed:/, "")
16
+ uri = URI.parse(unfeed) rescue nil
16
17
  raise "Not a valid URL" unless uri && uri.absolute?
17
18
  raise "Only HTTP/S URLs supported" unless uri.scheme == "http" || uri.scheme == "https"
18
19
  begin
@@ -20,11 +21,11 @@ module DashFu::Bee
20
21
  code = io.status.first
21
22
  raise "Cannot read this feed, got status code #{code}" unless code == "200"
22
23
  feed = (Nokogiri::XML(io.read)>"feed").first
23
- source["title"] = get_text(feed>"title").strip
24
- source["source.name"] = source["title"] if source["title"].length > 2
25
- permalink = (feed>"link[rel=alternate]").first
26
- source["permalink"] = permalink["href"] if permalink
24
+ source["source.name"] = source["title"] = get_text(feed>"title").strip
25
+ alt = (feed>"link[rel=alternate]").first
26
+ source["permalink"] = alt["href"] if alt
27
27
  source["logo"] = (feed>"logo").text
28
+ source["url"] = uri.to_s
28
29
  end
29
30
  rescue
30
31
  raise "Cannot read this feed: is it down for you or just for us?"
@@ -43,7 +44,7 @@ module DashFu::Bee
43
44
  updated = Time.iso8601((feed>"updated").text) rescue Time.now
44
45
  if source["updated"].nil? || updated > source["updated"]
45
46
  (feed>"entry").each do |entry|
46
- published = Time.iso8601((entry>"published").text) rescue Time.now
47
+ published = Time.iso8601(entry.css("published,updated").first.text) rescue Time.now
47
48
  break unless source["updated"].nil? || updated >= published
48
49
  if author = (entry>"author").first
49
50
  person = { fullname: (author>"name").text, email: (author>"email").text,
@@ -90,14 +91,15 @@ module DashFu::Bee
90
91
  end
91
92
  end
92
93
 
93
- def get_html(elements)
94
+ def get_html(elements, length = 250)
94
95
  element = elements.first
95
96
  return unless element
96
- case element["type"]
97
- when "html" ; Sanitize.clean(element.text, Sanitize::Config::BASIC).strip
98
- when "xhtml" ; Sanitize.clean(element.to_xhtml, Sanitize::Config::BASIC).strip
99
- when "text"; element.text.strip
97
+ html = case element["type"]
98
+ when "html" ; element.text
99
+ when "xhtml" ; element.to_html
100
+ when "text"; element.text
100
101
  end
102
+ Sanitize.clean(truncate_html(html.to_s.strip, length), Sanitize::Config::BASIC.merge(output: :html)).strip
101
103
  end
102
104
 
103
105
  end
@@ -1,7 +1,7 @@
1
1
  en:
2
2
  description: "Gather activities from an Atom feed"
3
3
  inputs: |-
4
- <label>Feed URL <input type="text" name="source[url]" size="30"></label>
4
+ <label>Feed URL <input type="text" name="source[url]" size="50"></label>
5
5
  notes: |-
6
6
  <ul>
7
7
  </ul>
@@ -81,3 +81,59 @@
81
81
  </content>
82
82
  </entry>
83
83
  </feed>
84
+ - !ruby/struct:VCR::HTTPInteraction
85
+ request: !ruby/struct:VCR::Request
86
+ method: :get
87
+ uri: http://example.org:80/update.xml
88
+ response: !ruby/struct:VCR::Response
89
+ status: !ruby/struct:VCR::ResponseStatus
90
+ code: 200
91
+ message: OK
92
+ body: |-
93
+ <?xml version="1.0" encoding="utf-8"?>
94
+ <feed xmlns="http://www.w3.org/2005/Atom">
95
+ <title type="text">dive into mark</title>
96
+ <entry>
97
+ <title>Atom draft-07 snapshot</title>
98
+ <updated>2003-12-13T08:29:29-04:00</updated>
99
+ </entry>
100
+ </feed>
101
+ - !ruby/struct:VCR::HTTPInteraction
102
+ request: !ruby/struct:VCR::Request
103
+ method: :get
104
+ uri: http://example.org:80/html.xml
105
+ response: !ruby/struct:VCR::Response
106
+ status: !ruby/struct:VCR::ResponseStatus
107
+ code: 200
108
+ message: OK
109
+ body: |-
110
+ <?xml version="1.0" encoding="utf-8"?>
111
+ <feed xmlns="http://www.w3.org/2005/Atom">
112
+ <title type="text">dive into mark</title>
113
+ <entry>
114
+ <title>Atom draft-07 snapshot</title>
115
+ <content type="html">&lt;i&gt;HTML &lt;br&gt; content&lt;/i&gt;</content>
116
+ </entry>
117
+ </feed>
118
+ - !ruby/struct:VCR::HTTPInteraction
119
+ request: !ruby/struct:VCR::Request
120
+ method: :get
121
+ uri: http://example.org:80/truncated.xml
122
+ response: !ruby/struct:VCR::Response
123
+ status: !ruby/struct:VCR::ResponseStatus
124
+ code: 200
125
+ message: OK
126
+ body: |-
127
+ <?xml version="1.0" encoding="utf-8"?>
128
+ <feed xmlns="http://www.w3.org/2005/Atom">
129
+ <title type="text">dive into mark</title>
130
+ <entry>
131
+ <title>Atom draft-07 snapshot</title>
132
+ <content type="xhtml">
133
+ <p><em>Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated
134
+ Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated
135
+ Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated
136
+ </em></p>
137
+ </content>
138
+ </entry>
139
+ </feed>
data/test/feed_test.rb CHANGED
@@ -34,8 +34,19 @@ test DashFu::Bee::Feed do
34
34
  source.setup "url"=>"http://example.org/feed.xml"
35
35
  assert_equal "The Awesome Feed", source.name
36
36
  end
37
+
38
+ should "accept feed URL (short form)" do
39
+ source.setup "url"=>"feed://example.org/feed.xml"
40
+ assert_equal "dive into mark", source.name
41
+ end
42
+
43
+ should "accept feed URL (long form)" do
44
+ source.setup "url"=>"feed:http://example.org/feed.xml"
45
+ assert_equal "dive into mark", source.name
46
+ end
37
47
  end
38
48
 
49
+
39
50
  context "update" do
40
51
  setup { source.setup "url"=>"http://example.org/feed.xml" }
41
52
 
@@ -119,8 +130,52 @@ posted <a href=\"http://example.org/2005/04/02/atom\">Atom draft-07 snapshot</a>
119
130
  end
120
131
  end
121
132
 
133
+ context "with update but no published date" do
134
+ setup do
135
+ source.setup "url"=>"http://example.org/update.xml"
136
+ source.update
137
+ end
138
+ subject { source.activity }
139
+
140
+ should "capture published date" do
141
+ assert_equal Time.parse("2003-12-13T08:29:29-04:00"), subject.timestamp
142
+ end
143
+ end
144
+
145
+ context "with HTML content" do
146
+ setup do
147
+ source.setup "url"=>"http://example.org/html.xml"
148
+ source.update
149
+ end
150
+ subject { source.activity }
151
+
152
+ should "capture title and summary" do
153
+ assert_equal <<-HTML.strip, subject.html
154
+ posted <a href=\"\">Atom draft-07 snapshot</a>:
155
+ <blockquote><i>HTML <br> content</i></blockquote>
156
+ HTML
157
+ end
158
+ end
159
+
160
+ context "with truncated content" do
161
+ setup do
162
+ source.setup "url"=>"http://example.org/truncated.xml"
163
+ source.update
164
+ end
165
+ subject { source.activity }
166
+
167
+ should "capture title and summary" do
168
+ assert_equal <<-HTML.strip, subject.html
169
+ posted <a href=\"\">Atom draft-07 snapshot</a>:
170
+ <blockquote><p><em>Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated Truncated\n Truncated Truncated Truncated Truncated Truncated Truncat</em></p>
171
+ ...</blockquote>
172
+ HTML
173
+ end
174
+ end
175
+
122
176
  end
123
177
 
178
+
124
179
  context "meta" do
125
180
  setup { source.setup "url"=>"http://example.org/feed.xml" }
126
181
  subject { source.meta }
@@ -129,4 +184,5 @@ posted <a href=\"http://example.org/2005/04/02/atom\">Atom draft-07 snapshot</a>
129
184
  assert_contains subject, :title=>"Source", :text=>"dive into mark", :url=>"http://example.org/"
130
185
  end
131
186
  end
187
+
132
188
  end
data/test/test.log CHANGED
@@ -1008,3 +1008,46 @@ RubyGems: 500
1008
1008
  #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1009
1009
  #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1010
1010
  #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1011
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1012
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1013
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1014
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1015
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1016
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1017
+ #<NoMethodError: undefined method `[]' for nil:NilClass>
1018
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1019
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1020
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1021
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1022
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1023
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1024
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1025
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1026
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1027
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1028
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1029
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1030
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1031
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1032
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1033
+ #<NoMethodError: undefined method `children=' for #<Nokogiri::XML::Element:0x000001015ba0e8>>
1034
+ #<NoMethodError: undefined method `children=' for #<Nokogiri::XML::Element:0x000001012e51b0>>
1035
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1036
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1037
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1038
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1039
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1040
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1041
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1042
+ #<NoMethodError: undefined method `>' for nil:NilClass>
1043
+ #<NoMethodError: undefined method `>' for nil:NilClass>
1044
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1045
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1046
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1047
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1048
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1049
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1050
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1051
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1052
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
1053
+ #<NoMethodError: undefined method `namespaces' for nil:NilClass>
metadata CHANGED
@@ -4,8 +4,8 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 0
7
- - 20
8
- version: "0.20"
7
+ - 21
8
+ version: "0.21"
9
9
  platform: ruby
10
10
  authors:
11
11
  - Assaf Arkin
@@ -146,7 +146,7 @@ licenses: []
146
146
  post_install_message:
147
147
  rdoc_options:
148
148
  - --title
149
- - DashFu::Bee 0.20
149
+ - DashFu::Bee 0.21
150
150
  - --main
151
151
  - README.rdoc
152
152
  - --webcvs