cdamian-feedlib 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/feedlib.rb +2 -0
- data/lib/feedlib/parser.rb +0 -42
- data/lib/feedlib/parser/atom.rb +2 -2
- data/lib/feedlib/parser/rss.rb +5 -5
- metadata +1 -11
data/Rakefile
CHANGED
@@ -13,7 +13,6 @@ begin
|
|
13
13
|
gem.files = FileList['lib/**/*.rb', '[A-Z]*', 'test/**/*'].to_a
|
14
14
|
gem.add_dependency("hpricot", ">= 0.8.1")
|
15
15
|
gem.add_dependency("chardet", ">= 0.9.0")
|
16
|
-
gem.add_dependency("htmlentities", ">= 4.0.0")
|
17
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
17
|
end
|
19
18
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/lib/feedlib.rb
CHANGED
data/lib/feedlib/parser.rb
CHANGED
@@ -5,46 +5,4 @@ class FeedParser
|
|
5
5
|
@source_xml = source_xml
|
6
6
|
parse_feed
|
7
7
|
end
|
8
|
-
|
9
|
-
protected
|
10
|
-
def sanitize(html)
|
11
|
-
whitelist = %w(em i strong u)
|
12
|
-
attrs = {}
|
13
|
-
blacklist = %w(script)
|
14
|
-
|
15
|
-
whitelist += attrs.keys
|
16
|
-
|
17
|
-
html.gsub!('<', '<')
|
18
|
-
html.gsub!('>', '>')
|
19
|
-
html.gsub!('&', '&')
|
20
|
-
html.gsub!(''', "'")
|
21
|
-
html.gsub!('"', '"')
|
22
|
-
html.gsub!('<![CDATA[<![CDATA[', '')
|
23
|
-
html.gsub!('<![CDATA[', '')
|
24
|
-
html.gsub!(']]>', '')
|
25
|
-
html.gsub!(']>', '')
|
26
|
-
page = Hpricot(html)
|
27
|
-
|
28
|
-
page.search("*").each do |e|
|
29
|
-
if e.elem?
|
30
|
-
tagname = e.name.downcase
|
31
|
-
if blacklist.include?(tagname)
|
32
|
-
e.swap("")
|
33
|
-
elsif !whitelist.include?(tagname)
|
34
|
-
e.parent.replace_child(e, e.children)
|
35
|
-
elsif attrs.has_key?(tagname)
|
36
|
-
e.attributes.delete_if { |key,val| !attrs[tagname].include?(key.downcase)}
|
37
|
-
else
|
38
|
-
e.attributes = {}
|
39
|
-
end
|
40
|
-
elsif e.comment?
|
41
|
-
# HTML comments can contain executable scripts, depending on the browser, so we'll
|
42
|
-
# be paranoid and just get rid of all of them
|
43
|
-
# e.g. <!--[if lt IE 7]><script type="text/javascript">h4x0r();</script><![endif]-->
|
44
|
-
e.swap('')
|
45
|
-
end
|
46
|
-
end
|
47
|
-
coder = HTMLEntities.new
|
48
|
-
coder.decode(page.to_s)
|
49
|
-
end
|
50
8
|
end
|
data/lib/feedlib/parser/atom.rb
CHANGED
@@ -16,8 +16,8 @@ class FeedAtomParser < FeedParser
|
|
16
16
|
new_entry = FeedEntryAtom.new
|
17
17
|
new_entry.link = (entry/:link).attr('href')
|
18
18
|
new_entry.author = (entry/:author/:name).inner_html
|
19
|
-
new_entry.title =
|
20
|
-
new_entry.content = (entry/:content).inner_html.blank? ?
|
19
|
+
new_entry.title = (entry/:title).inner_html
|
20
|
+
new_entry.content = (entry/:content).inner_html.blank? ? (entry/:content).inner_html : (entry/:summary).inner_html
|
21
21
|
new_entry.published_at = (entry/:published).inner_html
|
22
22
|
new_entry
|
23
23
|
end
|
data/lib/feedlib/parser/rss.rb
CHANGED
@@ -14,11 +14,11 @@ class FeedRssParser < FeedParser
|
|
14
14
|
protected
|
15
15
|
def parse_entry(entry)
|
16
16
|
new_entry = FeedEntryRss.new
|
17
|
-
new_entry.link = (
|
18
|
-
new_entry.author = (
|
19
|
-
new_entry.title =
|
20
|
-
new_entry.content =
|
21
|
-
new_entry.published_at = (
|
17
|
+
new_entry.link = (entry/:link).inner_html
|
18
|
+
new_entry.author = (entry/:author).inner_html
|
19
|
+
new_entry.title = (entry/:title).inner_html
|
20
|
+
new_entry.content = (entry/:description).inner_html
|
21
|
+
new_entry.published_at = (entry/:pubDate).inner_html
|
22
22
|
new_entry
|
23
23
|
end
|
24
24
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdamian-feedlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damian Caruso
|
@@ -32,16 +32,6 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.9.0
|
34
34
|
version:
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: htmlentities
|
37
|
-
type: :runtime
|
38
|
-
version_requirement:
|
39
|
-
version_requirements: !ruby/object:Gem::Requirement
|
40
|
-
requirements:
|
41
|
-
- - ">="
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
version: 4.0.0
|
44
|
-
version:
|
45
35
|
description: Feed library for building and parsing Atom and RSS feeds
|
46
36
|
email: damian.caruso@gmail.com
|
47
37
|
executables: []
|