cdamian-feedlib 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -13,7 +13,6 @@ begin
13
13
  gem.files = FileList['lib/**/*.rb', '[A-Z]*', 'test/**/*'].to_a
14
14
  gem.add_dependency("hpricot", ">= 0.8.1")
15
15
  gem.add_dependency("chardet", ">= 0.9.0")
16
- gem.add_dependency("htmlentities", ">= 4.0.0")
17
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
17
  end
19
18
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
data/lib/feedlib.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  require 'open-uri'
2
+ require 'iconv'
3
+ require 'rubygems'
2
4
  require 'hpricot'
3
5
  require 'UniversalDetector'
4
6
  require 'feedlib/errors'
@@ -5,46 +5,4 @@ class FeedParser
5
5
  @source_xml = source_xml
6
6
  parse_feed
7
7
  end
8
-
9
- protected
10
- def sanitize(html)
11
- whitelist = %w(em i strong u)
12
- attrs = {}
13
- blacklist = %w(script)
14
-
15
- whitelist += attrs.keys
16
-
17
- html.gsub!('&lt;', '<')
18
- html.gsub!('&gt;', '>')
19
- html.gsub!('&amp;', '&')
20
- html.gsub!('&#39;', "'")
21
- html.gsub!('&quot;', '"')
22
- html.gsub!('<![CDATA[<![CDATA[', '')
23
- html.gsub!('<![CDATA[', '')
24
- html.gsub!(']]>', '')
25
- html.gsub!(']>', '')
26
- page = Hpricot(html)
27
-
28
- page.search("*").each do |e|
29
- if e.elem?
30
- tagname = e.name.downcase
31
- if blacklist.include?(tagname)
32
- e.swap("")
33
- elsif !whitelist.include?(tagname)
34
- e.parent.replace_child(e, e.children)
35
- elsif attrs.has_key?(tagname)
36
- e.attributes.delete_if { |key,val| !attrs[tagname].include?(key.downcase)}
37
- else
38
- e.attributes = {}
39
- end
40
- elsif e.comment?
41
- # HTML comments can contain executable scripts, depending on the browser, so we'll
42
- # be paranoid and just get rid of all of them
43
- # e.g. <!--[if lt IE 7]><script type="text/javascript">h4x0r();</script><![endif]-->
44
- e.swap('')
45
- end
46
- end
47
- coder = HTMLEntities.new
48
- coder.decode(page.to_s)
49
- end
50
8
  end
@@ -16,8 +16,8 @@ class FeedAtomParser < FeedParser
16
16
  new_entry = FeedEntryAtom.new
17
17
  new_entry.link = (entry/:link).attr('href')
18
18
  new_entry.author = (entry/:author/:name).inner_html
19
- new_entry.title = sanitize((entry/:title).inner_html)
20
- new_entry.content = (entry/:content).inner_html.blank? ? sanitize((entry/:content).inner_html) : sanitize((entry/:summary).inner_html)
19
+ new_entry.title = (entry/:title).inner_html
20
+ new_entry.content = (entry/:content).inner_html.blank? ? (entry/:content).inner_html : (entry/:summary).inner_html
21
21
  new_entry.published_at = (entry/:published).inner_html
22
22
  new_entry
23
23
  end
@@ -14,11 +14,11 @@ class FeedRssParser < FeedParser
14
14
  protected
15
15
  def parse_entry(entry)
16
16
  new_entry = FeedEntryRss.new
17
- new_entry.link = (item/:link).inner_html
18
- new_entry.author = (item/:author).inner_html
19
- new_entry.title = sanitize((item/:title).inner_html)
20
- new_entry.content = sanitize((item/:description).inner_html)
21
- new_entry.published_at = (item/:pubDate).inner_html
17
+ new_entry.link = (entry/:link).inner_html
18
+ new_entry.author = (entry/:author).inner_html
19
+ new_entry.title = (entry/:title).inner_html
20
+ new_entry.content = (entry/:description).inner_html
21
+ new_entry.published_at = (entry/:pubDate).inner_html
22
22
  new_entry
23
23
  end
24
24
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdamian-feedlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damian Caruso
@@ -32,16 +32,6 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.9.0
34
34
  version:
35
- - !ruby/object:Gem::Dependency
36
- name: htmlentities
37
- type: :runtime
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- version: 4.0.0
44
- version:
45
35
  description: Feed library for building and parsing Atom and RSS feeds
46
36
  email: damian.caruso@gmail.com
47
37
  executables: []