gozap_rss 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,18 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ gem "logger"
9
+ gem "mysql2", ">=0.3.7"
10
+ gem "sanitize", ">=2.0.3"
11
+
12
+
13
+ group :development do
14
+ gem "rspec", "~> 2.7.0"
15
+ gem "bundler", "~> 1.0.0"
16
+ gem "jeweler", "~> 1.6.4"
17
+ gem "simplecov", ">= 0"
18
+ end
data/Gemfile.lock ADDED
@@ -0,0 +1,40 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ diff-lcs (1.1.3)
5
+ git (1.2.5)
6
+ jeweler (1.6.4)
7
+ bundler (~> 1.0)
8
+ git (>= 1.2.5)
9
+ rake
10
+ logger (1.2.8)
11
+ multi_json (1.0.4)
12
+ mysql2 (0.3.11)
13
+ nokogiri (1.5.0)
14
+ rake (0.9.2.2)
15
+ rspec (2.7.0)
16
+ rspec-core (~> 2.7.0)
17
+ rspec-expectations (~> 2.7.0)
18
+ rspec-mocks (~> 2.7.0)
19
+ rspec-core (2.7.1)
20
+ rspec-expectations (2.7.0)
21
+ diff-lcs (~> 1.1.2)
22
+ rspec-mocks (2.7.0)
23
+ sanitize (2.0.3)
24
+ nokogiri (< 1.6, >= 1.4.4)
25
+ simplecov (0.5.4)
26
+ multi_json (~> 1.0.3)
27
+ simplecov-html (~> 0.5.3)
28
+ simplecov-html (0.5.3)
29
+
30
+ PLATFORMS
31
+ ruby
32
+
33
+ DEPENDENCIES
34
+ bundler (~> 1.0.0)
35
+ jeweler (~> 1.6.4)
36
+ logger
37
+ mysql2 (>= 0.3.7)
38
+ rspec (~> 2.7.0)
39
+ sanitize (>= 2.0.3)
40
+ simplecov
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2012 王明华
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = gozap_rss
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to gozap_rss
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2012 王明华. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,54 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ $:.unshift File.join(File.dirname(__FILE__), 'lib')
6
+ require 'gozap_rss/version'
7
+
8
+ begin
9
+ Bundler.setup(:default, :development)
10
+ rescue Bundler::BundlerError => e
11
+ $stderr.puts e.message
12
+ $stderr.puts "Run `bundle install` to install missing gems"
13
+ exit e.status_code
14
+ end
15
+ require 'rake'
16
+
17
+ require 'jeweler'
18
+ Jeweler::Tasks.new do |gem|
19
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
20
+ gem.name = "gozap_rss"
21
+ gem.homepage = "http://github.com/wangmh/gozap_rss"
22
+ gem.license = "MIT"
23
+ gem.summary = %Q{gozap公司用来抓取rss的服务}
24
+ gem.description = %Q{抓取RSS服务的简单应用}
25
+ gem.email = "wangmh.bit@gmail.com"
26
+ gem.authors = ["王明华"]
27
+ gem.version = GozapRss::VERSION
28
+ gem.files = FileList['lib/**/*.rb', '[A-Z]*', 'spec/**/*'].to_a
29
+
30
+ # dependencies defined in Gemfile
31
+ end
32
+ Jeweler::RubygemsDotOrgTasks.new
33
+
34
+ require 'rspec/core'
35
+ require 'rspec/core/rake_task'
36
+ RSpec::Core::RakeTask.new(:spec) do |spec|
37
+ spec.pattern = FileList['spec/**/*_spec.rb']
38
+ end
39
+
40
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
41
+ spec.pattern = 'spec/**/*_spec.rb'
42
+ spec.rcov = true
43
+ end
44
+
45
+ task :default => :spec
46
+
47
+ require 'rdoc/task'
48
+ Rake::RDocTask.new do |rdoc|
49
+ version = GozapRss::VERSION
50
+ rdoc.rdoc_dir = 'rdoc'
51
+ rdoc.title = "gozap_rss #{version}"
52
+ rdoc.rdoc_files.include('README*')
53
+ rdoc.rdoc_files.include('lib/**/*.rb')
54
+ end
@@ -0,0 +1,116 @@
1
+ #encoding utf8
2
+
3
+ module GozapRss
4
+
5
+
6
+
7
+ class ChoutiRssBase
8
+ def self.logger
9
+ @logger || GozapRss.logger || Logger.new(STDOUT)
10
+ end
11
+
12
+ def self.logger= logger
13
+ @logger = logger if logger
14
+ end
15
+
16
+ def self.logger_exception e
17
+ logger.error e
18
+ logger.error e.backtrace
19
+ end
20
+
21
+ def logger
22
+ self.class.logger
23
+ end
24
+
25
+
26
+ def logger_exception e
27
+ self.class.logger_exception e
28
+ end
29
+
30
+
31
+ attr_reader :url, :description, :title, :pub_date
32
+ attr_accessor :http_headers_option
33
+
34
+
35
+ end
36
+
37
+
38
+ class ChoutiRss < ChoutiRssBase
39
+
40
+ attr_reader :rss_items
41
+
42
+ def initialize uri
43
+ @http_headers_option = {"User-Agent"=>"Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.75 Safari/535.7"}
44
+ @url = uri
45
+ @rss_items = []
46
+ content = get_feed_content uri
47
+ parse_rss(content)
48
+ end
49
+
50
+
51
+
52
+ private
53
+
54
+ def parse_rss content
55
+ return if content.nil? or content.empty?
56
+ begin
57
+ rss = RSS::Parser.parse(content, false)
58
+ @title = rss.channel.title.to_s.html_format
59
+ @description = rss.channel.description.to_s.html_format
60
+ @pub_date = rss.channel.pubDate
61
+ @rss_items = []
62
+ rss.items.each do |item|
63
+ rss_item = ChoutiRssItem.new(item)
64
+ @rss_items << rss_item if rss_item
65
+ end
66
+ @rss_items.sort!{|a,b|b.pub_date <=>a.pub_date}
67
+ rescue Exception => e
68
+ logger_exception e
69
+ end
70
+
71
+ end
72
+
73
+
74
+ #because some site feed refuse rss robot, so i set the http headers User-Agent to disguise as a browser
75
+ def get_feed_content uri
76
+ content = ""
77
+ begin
78
+ open(uri, @http_headers_option) do |s|
79
+ content = s.read
80
+ end
81
+
82
+ isutf8 = Kconv.isutf8(content)
83
+ content = Iconv.iconv("UTF-8//IGNORE", "GB2312//IGNORE", content)[0] unless isutf8
84
+ rescue Exception=>e
85
+ logger_exception e
86
+ end
87
+ return content
88
+ end
89
+
90
+ end
91
+
92
+
93
+ class ChoutiRssItem < ChoutiRssBase
94
+ attr_reader :url_md5
95
+
96
+ def initialize item
97
+ @title = item.title.to_s.html_format
98
+ @pub_date = item.pubDate || item.lastBuildDate
99
+ @description = item.description.to_s.html_format
100
+ @url = item.link.to_s.strip
101
+ @url_md5 = Digest::MD5.hexdigest(@url)
102
+ unless validate
103
+ logger.error "parser item error -- title=>#{@title}, pub_date=>#{@pub_date} description=>#{@description}, url=>#{@url}"
104
+ return nil
105
+ end
106
+ self
107
+ end
108
+
109
+ private
110
+ def validate
111
+ !(@url.nil? || @description.nil? || @title.nil? ||
112
+ @url.empty? || @description.empty? || @title.empty?)
113
+ end
114
+
115
+ end
116
+ end
@@ -0,0 +1,6 @@
1
+ class String
2
+ def html_format
3
+ str = Sanitize.clean(self);
4
+ str.gsub(/[\s]+?/, "")
5
+ end
6
+ end
@@ -0,0 +1,3 @@
1
+ module GozapRss
2
+ VERSION = "0.0.1"
3
+ end
data/lib/gozap_rss.rb ADDED
@@ -0,0 +1,52 @@
1
+ require 'rubygems'
2
+ require 'logger'
3
+ require 'json'
4
+ require "mysql2"
5
+ require 'kconv'
6
+ require 'iconv'
7
+ require 'rss/1.0'
8
+ require 'rss/2.0'
9
+ require 'open-uri'
10
+ require "digest/md5"
11
+ require "sanitize"
12
+
13
+
14
+
15
+ __DIR__ = File.dirname(__FILE__)
16
+
17
+ $LOAD_PATH.unshift __DIR__ unless $LOAD_PATH.include?(__DIR__) ||
18
+ $LOAD_PATH.include?(File.expand_path(__DIR__))
19
+
20
+ require "gozap_rss/gozap_ext"
21
+ require "gozap_rss/version.rb"
22
+ require "gozap_rss/chouti_rss"
23
+
24
+
25
+ module GozapRss
26
+ class << self
27
+ def data_dir(path)
28
+ if datadir = Gem.datadir("gozap_rss")
29
+ File.join(datadir, path)
30
+ else
31
+ ""
32
+ end
33
+
34
+ end
35
+
36
+ def logger
37
+ @@logger ||= Logger.new(STDOUT)
38
+ end
39
+
40
+ def logger=(logger)
41
+ @@logger = logger
42
+ end
43
+ end
44
+ end
45
+
46
+
47
+
48
+
49
+
50
+
51
+
52
+
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "GozapRss" do
4
+ it "hello" do
5
+ GozapRss::VERSION.should == "0.0.1"
6
+ end
7
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'gozap_rss'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
metadata ADDED
@@ -0,0 +1,144 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: gozap_rss
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - "\xE7\x8E\x8B\xE6\x98\x8E\xE5\x8D\x8E"
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-03-18 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: logger
17
+ requirement: &id001 !ruby/object:Gem::Requirement
18
+ none: false
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: "0"
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: mysql2
28
+ requirement: &id002 !ruby/object:Gem::Requirement
29
+ none: false
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.7
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: sanitize
39
+ requirement: &id003 !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ">="
43
+ - !ruby/object:Gem::Version
44
+ version: 2.0.3
45
+ type: :runtime
46
+ prerelease: false
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: rspec
50
+ requirement: &id004 !ruby/object:Gem::Requirement
51
+ none: false
52
+ requirements:
53
+ - - ~>
54
+ - !ruby/object:Gem::Version
55
+ version: 2.7.0
56
+ type: :development
57
+ prerelease: false
58
+ version_requirements: *id004
59
+ - !ruby/object:Gem::Dependency
60
+ name: bundler
61
+ requirement: &id005 !ruby/object:Gem::Requirement
62
+ none: false
63
+ requirements:
64
+ - - ~>
65
+ - !ruby/object:Gem::Version
66
+ version: 1.0.0
67
+ type: :development
68
+ prerelease: false
69
+ version_requirements: *id005
70
+ - !ruby/object:Gem::Dependency
71
+ name: jeweler
72
+ requirement: &id006 !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ~>
76
+ - !ruby/object:Gem::Version
77
+ version: 1.6.4
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: *id006
81
+ - !ruby/object:Gem::Dependency
82
+ name: simplecov
83
+ requirement: &id007 !ruby/object:Gem::Requirement
84
+ none: false
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: "0"
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: *id007
92
+ description: "\xE6\x8A\x93\xE5\x8F\x96RSS\xE6\x9C\x8D\xE5\x8A\xA1\xE7\x9A\x84\xE7\xAE\x80\xE5\x8D\x95\xE5\xBA\x94\xE7\x94\xA8"
93
+ email: wangmh.bit@gmail.com
94
+ executables: []
95
+
96
+ extensions: []
97
+
98
+ extra_rdoc_files:
99
+ - LICENSE.txt
100
+ - README.rdoc
101
+ files:
102
+ - Gemfile
103
+ - Gemfile.lock
104
+ - LICENSE.txt
105
+ - README.rdoc
106
+ - Rakefile
107
+ - lib/gozap_rss.rb
108
+ - lib/gozap_rss/chouti_rss.rb
109
+ - lib/gozap_rss/gozap_ext.rb
110
+ - lib/gozap_rss/version.rb
111
+ - spec/gozap_rss_spec.rb
112
+ - spec/spec_helper.rb
113
+ homepage: http://github.com/wangmh/gozap_rss
114
+ licenses:
115
+ - MIT
116
+ post_install_message:
117
+ rdoc_options: []
118
+
119
+ require_paths:
120
+ - lib
121
+ required_ruby_version: !ruby/object:Gem::Requirement
122
+ none: false
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ hash: 419284605
127
+ segments:
128
+ - 0
129
+ version: "0"
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ none: false
132
+ requirements:
133
+ - - ">="
134
+ - !ruby/object:Gem::Version
135
+ version: "0"
136
+ requirements: []
137
+
138
+ rubyforge_project:
139
+ rubygems_version: 1.8.12
140
+ signing_key:
141
+ specification_version: 3
142
+ summary: "gozap\xE5\x85\xAC\xE5\x8F\xB8\xE7\x94\xA8\xE6\x9D\xA5\xE6\x8A\x93\xE5\x8F\x96rss\xE7\x9A\x84\xE6\x9C\x8D\xE5\x8A\xA1"
143
+ test_files: []
144
+