cdamian-feedlib 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -13,7 +13,6 @@ begin
13
13
  gem.files = FileList['lib/**/*.rb', '[A-Z]*', 'test/**/*'].to_a
14
14
  gem.add_dependency("hpricot", ">= 0.8.1")
15
15
  gem.add_dependency("chardet", ">= 0.9.0")
16
- gem.add_dependency("htmlentities", ">= 4.0.0")
17
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
17
  end
19
18
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
data/lib/feedlib.rb CHANGED
@@ -1,4 +1,6 @@
1
1
  require 'open-uri'
2
+ require 'iconv'
3
+ require 'rubygems'
2
4
  require 'hpricot'
3
5
  require 'UniversalDetector'
4
6
  require 'feedlib/errors'
@@ -5,46 +5,4 @@ class FeedParser
5
5
  @source_xml = source_xml
6
6
  parse_feed
7
7
  end
8
-
9
- protected
10
- def sanitize(html)
11
- whitelist = %w(em i strong u)
12
- attrs = {}
13
- blacklist = %w(script)
14
-
15
- whitelist += attrs.keys
16
-
17
- html.gsub!('&lt;', '<')
18
- html.gsub!('&gt;', '>')
19
- html.gsub!('&amp;', '&')
20
- html.gsub!('&#39;', "'")
21
- html.gsub!('&quot;', '"')
22
- html.gsub!('<![CDATA[<![CDATA[', '')
23
- html.gsub!('<![CDATA[', '')
24
- html.gsub!(']]>', '')
25
- html.gsub!(']>', '')
26
- page = Hpricot(html)
27
-
28
- page.search("*").each do |e|
29
- if e.elem?
30
- tagname = e.name.downcase
31
- if blacklist.include?(tagname)
32
- e.swap("")
33
- elsif !whitelist.include?(tagname)
34
- e.parent.replace_child(e, e.children)
35
- elsif attrs.has_key?(tagname)
36
- e.attributes.delete_if { |key,val| !attrs[tagname].include?(key.downcase)}
37
- else
38
- e.attributes = {}
39
- end
40
- elsif e.comment?
41
- # HTML comments can contain executable scripts, depending on the browser, so we'll
42
- # be paranoid and just get rid of all of them
43
- # e.g. <!--[if lt IE 7]><script type="text/javascript">h4x0r();</script><![endif]-->
44
- e.swap('')
45
- end
46
- end
47
- coder = HTMLEntities.new
48
- coder.decode(page.to_s)
49
- end
50
8
  end
@@ -16,8 +16,8 @@ class FeedAtomParser < FeedParser
16
16
  new_entry = FeedEntryAtom.new
17
17
  new_entry.link = (entry/:link).attr('href')
18
18
  new_entry.author = (entry/:author/:name).inner_html
19
- new_entry.title = sanitize((entry/:title).inner_html)
20
- new_entry.content = (entry/:content).inner_html.blank? ? sanitize((entry/:content).inner_html) : sanitize((entry/:summary).inner_html)
19
+ new_entry.title = (entry/:title).inner_html
20
+ new_entry.content = (entry/:content).inner_html.blank? ? (entry/:content).inner_html : (entry/:summary).inner_html
21
21
  new_entry.published_at = (entry/:published).inner_html
22
22
  new_entry
23
23
  end
@@ -14,11 +14,11 @@ class FeedRssParser < FeedParser
14
14
  protected
15
15
  def parse_entry(entry)
16
16
  new_entry = FeedEntryRss.new
17
- new_entry.link = (item/:link).inner_html
18
- new_entry.author = (item/:author).inner_html
19
- new_entry.title = sanitize((item/:title).inner_html)
20
- new_entry.content = sanitize((item/:description).inner_html)
21
- new_entry.published_at = (item/:pubDate).inner_html
17
+ new_entry.link = (entry/:link).inner_html
18
+ new_entry.author = (entry/:author).inner_html
19
+ new_entry.title = (entry/:title).inner_html
20
+ new_entry.content = (entry/:description).inner_html
21
+ new_entry.published_at = (entry/:pubDate).inner_html
22
22
  new_entry
23
23
  end
24
24
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cdamian-feedlib
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damian Caruso
@@ -32,16 +32,6 @@ dependencies:
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.9.0
34
34
  version:
35
- - !ruby/object:Gem::Dependency
36
- name: htmlentities
37
- type: :runtime
38
- version_requirement:
39
- version_requirements: !ruby/object:Gem::Requirement
40
- requirements:
41
- - - ">="
42
- - !ruby/object:Gem::Version
43
- version: 4.0.0
44
- version:
45
35
  description: Feed library for building and parsing Atom and RSS feeds
46
36
  email: damian.caruso@gmail.com
47
37
  executables: []