cdamian-feedlib 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/feedlib.rb +2 -0
- data/lib/feedlib/parser.rb +0 -42
- data/lib/feedlib/parser/atom.rb +2 -2
- data/lib/feedlib/parser/rss.rb +5 -5
- metadata +1 -11
data/Rakefile
CHANGED
@@ -13,7 +13,6 @@ begin
|
|
13
13
|
gem.files = FileList['lib/**/*.rb', '[A-Z]*', 'test/**/*'].to_a
|
14
14
|
gem.add_dependency("hpricot", ">= 0.8.1")
|
15
15
|
gem.add_dependency("chardet", ">= 0.9.0")
|
16
|
-
gem.add_dependency("htmlentities", ">= 4.0.0")
|
17
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
17
|
end
|
19
18
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/lib/feedlib.rb
CHANGED
data/lib/feedlib/parser.rb
CHANGED
@@ -5,46 +5,4 @@ class FeedParser
|
|
5
5
|
@source_xml = source_xml
|
6
6
|
parse_feed
|
7
7
|
end
|
8
|
-
|
9
|
-
protected
|
10
|
-
def sanitize(html)
|
11
|
-
whitelist = %w(em i strong u)
|
12
|
-
attrs = {}
|
13
|
-
blacklist = %w(script)
|
14
|
-
|
15
|
-
whitelist += attrs.keys
|
16
|
-
|
17
|
-
html.gsub!('<', '<')
|
18
|
-
html.gsub!('>', '>')
|
19
|
-
html.gsub!('&', '&')
|
20
|
-
html.gsub!(''', "'")
|
21
|
-
html.gsub!('"', '"')
|
22
|
-
html.gsub!('<![CDATA[<![CDATA[', '')
|
23
|
-
html.gsub!('<![CDATA[', '')
|
24
|
-
html.gsub!(']]>', '')
|
25
|
-
html.gsub!(']>', '')
|
26
|
-
page = Hpricot(html)
|
27
|
-
|
28
|
-
page.search("*").each do |e|
|
29
|
-
if e.elem?
|
30
|
-
tagname = e.name.downcase
|
31
|
-
if blacklist.include?(tagname)
|
32
|
-
e.swap("")
|
33
|
-
elsif !whitelist.include?(tagname)
|
34
|
-
e.parent.replace_child(e, e.children)
|
35
|
-
elsif attrs.has_key?(tagname)
|
36
|
-
e.attributes.delete_if { |key,val| !attrs[tagname].include?(key.downcase)}
|
37
|
-
else
|
38
|
-
e.attributes = {}
|
39
|
-
end
|
40
|
-
elsif e.comment?
|
41
|
-
# HTML comments can contain executable scripts, depending on the browser, so we'll
|
42
|
-
# be paranoid and just get rid of all of them
|
43
|
-
# e.g. <!--[if lt IE 7]><script type="text/javascript">h4x0r();</script><![endif]-->
|
44
|
-
e.swap('')
|
45
|
-
end
|
46
|
-
end
|
47
|
-
coder = HTMLEntities.new
|
48
|
-
coder.decode(page.to_s)
|
49
|
-
end
|
50
8
|
end
|
data/lib/feedlib/parser/atom.rb
CHANGED
@@ -16,8 +16,8 @@ class FeedAtomParser < FeedParser
|
|
16
16
|
new_entry = FeedEntryAtom.new
|
17
17
|
new_entry.link = (entry/:link).attr('href')
|
18
18
|
new_entry.author = (entry/:author/:name).inner_html
|
19
|
-
new_entry.title =
|
20
|
-
new_entry.content = (entry/:content).inner_html.blank? ?
|
19
|
+
new_entry.title = (entry/:title).inner_html
|
20
|
+
new_entry.content = (entry/:content).inner_html.blank? ? (entry/:content).inner_html : (entry/:summary).inner_html
|
21
21
|
new_entry.published_at = (entry/:published).inner_html
|
22
22
|
new_entry
|
23
23
|
end
|
data/lib/feedlib/parser/rss.rb
CHANGED
@@ -14,11 +14,11 @@ class FeedRssParser < FeedParser
|
|
14
14
|
protected
|
15
15
|
def parse_entry(entry)
|
16
16
|
new_entry = FeedEntryRss.new
|
17
|
-
new_entry.link = (
|
18
|
-
new_entry.author = (
|
19
|
-
new_entry.title =
|
20
|
-
new_entry.content =
|
21
|
-
new_entry.published_at = (
|
17
|
+
new_entry.link = (entry/:link).inner_html
|
18
|
+
new_entry.author = (entry/:author).inner_html
|
19
|
+
new_entry.title = (entry/:title).inner_html
|
20
|
+
new_entry.content = (entry/:description).inner_html
|
21
|
+
new_entry.published_at = (entry/:pubDate).inner_html
|
22
22
|
new_entry
|
23
23
|
end
|
24
24
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdamian-feedlib
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damian Caruso
|
@@ -32,16 +32,6 @@ dependencies:
|
|
32
32
|
- !ruby/object:Gem::Version
|
33
33
|
version: 0.9.0
|
34
34
|
version:
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: htmlentities
|
37
|
-
type: :runtime
|
38
|
-
version_requirement:
|
39
|
-
version_requirements: !ruby/object:Gem::Requirement
|
40
|
-
requirements:
|
41
|
-
- - ">="
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
version: 4.0.0
|
44
|
-
version:
|
45
35
|
description: Feed library for building and parsing Atom and RSS feeds
|
46
36
|
email: damian.caruso@gmail.com
|
47
37
|
executables: []
|