feedtools 0.2.10 → 0.2.11

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ == FeedTools 0.2.11
2
+ * ruby -w shouldn't produce nearly as many warnings for feed_tools.rb anymore
3
+ * you can now force the open method to only pull from the cache
4
+ * the global cache_only option has been removed
5
+ * nows queries the cache after each redirection
6
+ * attribute dictionary stuff removed for being unnecessary and pythonic
7
+ * better file structure -- no more 5000 line files
8
+ * schema files are formatted better to keep them from looking wierd in rdoc
1
9
  == FeedTools 0.2.10
2
10
  * http error messages now sport a list of redirections to simplify debugging
3
11
  * automatic table creation removed
@@ -1,10 +1,11 @@
1
- CREATE TABLE `feeds` (
2
- `id` int(10) unsigned NOT NULL auto_increment,
3
- `url` varchar(255) default NULL,
4
- `title` varchar(255) default NULL,
5
- `link` varchar(255) default NULL,
6
- `xml_data` longtext default NULL,
7
- `http_headers` text default NULL,
8
- `last_retrieved` datetime default NULL,
9
- PRIMARY KEY (`id`)
10
- )
1
+ -- Example MySQL schema
2
+ CREATE TABLE `feeds` (
3
+ `id` int(10) unsigned NOT NULL auto_increment,
4
+ `url` varchar(255) default NULL,
5
+ `title` varchar(255) default NULL,
6
+ `link` varchar(255) default NULL,
7
+ `xml_data` longtext default NULL,
8
+ `http_headers` text default NULL,
9
+ `last_retrieved` datetime default NULL,
10
+ PRIMARY KEY (`id`)
11
+ )
@@ -1,9 +1,10 @@
1
- CREATE TABLE feeds (
2
- id SERIAL PRIMARY KEY NOT NULL,
3
- url varchar(255) default NULL,
4
- title varchar(255) default NULL,
5
- link varchar(255) default NULL,
6
- xml_data text default NULL,
7
- http_headers text default NULL,
8
- last_retrieved timestamp default NULL
9
- );
1
+ -- Example PostgreSQL schema
2
+ CREATE TABLE feeds (
3
+ id SERIAL PRIMARY KEY NOT NULL,
4
+ url varchar(255) default NULL,
5
+ title varchar(255) default NULL,
6
+ link varchar(255) default NULL,
7
+ xml_data text default NULL,
8
+ http_headers text default NULL,
9
+ last_retrieved timestamp default NULL
10
+ );
@@ -1,9 +1,10 @@
1
- CREATE TABLE 'feeds' (
2
- 'id' INTEGER PRIMARY KEY NOT NULL,
3
- 'url' VARCHAR(255) DEFAULT NULL,
4
- 'title' VARCHAR(255) DEFAULT NULL,
5
- 'link' VARCHAR(255) DEFAULT NULL,
6
- 'xml_data' TEXT DEFAULT NULL,
7
- 'http_headers' TEXT DEFAULT NULL,
8
- 'last_retrieved' DATETIME DEFAULT NULL
9
- );
1
+ -- Example Sqlite schema
2
+ CREATE TABLE feeds (
3
+ id INTEGER PRIMARY KEY NOT NULL,
4
+ url VARCHAR(255) DEFAULT NULL,
5
+ title VARCHAR(255) DEFAULT NULL,
6
+ link VARCHAR(255) DEFAULT NULL,
7
+ xml_data TEXT DEFAULT NULL,
8
+ http_headers TEXT DEFAULT NULL,
9
+ last_retrieved DATETIME DEFAULT NULL
10
+ );
@@ -32,7 +32,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
32
32
  ENV['RAILS_ENV'] ||
33
33
  'production' # :nodoc:
34
34
 
35
- FEED_TOOLS_VERSION = "0.2.10"
35
+ FEED_TOOLS_VERSION = "0.2.11"
36
36
 
37
37
  $:.unshift(File.dirname(__FILE__))
38
38
  $:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
@@ -77,7 +77,9 @@ require 'yaml'
77
77
  require_gem('activerecord', '>= 1.10.1')
78
78
  require_gem('uuidtools', '>= 0.1.2')
79
79
 
80
- require 'database_feed_cache'
80
+ require 'feed_tools/feed'
81
+ require 'feed_tools/feed_item'
82
+ require 'feed_tools/database_feed_cache'
81
83
 
82
84
  #= feed_tools.rb
83
85
  #
@@ -96,99 +98,17 @@ require 'database_feed_cache'
96
98
  # => "43,37,28,23,11,3,1"
97
99
  module FeedTools
98
100
 
99
- # Error raised when a feed cannot be retrieved
100
- class FeedAccessError < StandardError
101
- end
102
-
103
- # Quick method of enabling small classes to have their attributes
104
- # accessible as a dictionary. These methods should not be used whenever
105
- # performance is going to be an issue. They exist almost entirely for the
106
- # purposes of aesthetics and/or debugging.
107
- module AttributeDictionary
108
- # Access the attributes as a dictionary.
109
- def [](key)
110
- return nil unless self.keys.include? key
111
- return self.send(key)
112
- end
113
-
114
- # Access the attributes as a dictionary.
115
- def []=(key, value)
116
- pseudo_key = key
117
- pseudo_key = key[0..-2] if key[-1..-1] == "?"
118
- return nil unless self.method(pseudo_key + "=").arity == 1
119
- local_keys = self.keys
120
- unless local_keys.include?(key) || local_keys.include?(pseudo_key)
121
- return nil
122
- end
123
- return self.send(pseudo_key + "=", value)
124
- end
125
-
126
- # Access the attributes as a dictionary.
127
- def keys
128
- key_methods = []
129
- for key in self.methods
130
- # Quick-n-dirty hack to speed things up and keep the list clean
131
- if self.method(key).arity == 0 && key[-1..-1] != "=" &&
132
- key[-1..-1] != "!" && key[0..1] != "__" &&
133
- key[0..2] != "to_" && key[-5..-1] != "_node" &&
134
- key != "cache_object" && key != "save" && key != "xml" &&
135
- key != "xml_data" && key != "expired?" && key != "live?" &&
136
- key != "feed"
137
- superfluous_ancestors = self.class.ancestors
138
- superfluous_ancestors = superfluous_ancestors[1..-1]
139
- superfluous = false
140
- for ancestor in superfluous_ancestors
141
- if ancestor.instance_methods.include? key
142
- superfluous = true
143
- break
144
- end
145
- end
146
- next if superfluous
147
- key_methods << key
148
- end
149
- end
150
- return key_methods.sort
151
- end
152
-
153
- # Access the attributes as a dictionary.
154
- # Please note that this method may cause a nearly complete parse of a
155
- # feed. This will be very slow.
156
- def values
157
- return self.keys.map { |key| self[key] }
158
- end
159
-
160
- # Access the attributes as a dictionary.
161
- # Please note that this method may cause a complete parse of a feed.
162
- # This will be very slow.
163
- def to_hash
164
- attribute_hash = {}
165
- for key in keys
166
- value = self[key]
167
- if value.respond_to? :to_hash
168
- value = value.to_hash
169
- end
170
- if value.respond_to? :to_ary
171
- new_value = []
172
- for item in value.to_ary
173
- if item.respond_to? :to_hash
174
- new_value << item.to_hash
175
- else
176
- new_value << item
177
- end
178
- end
179
- value = new_value
180
- end
181
- attribute_hash[key] = value
182
- end
183
- return attribute_hash
184
- end
185
- end
186
-
101
+ @force_tidy_enabled = true
102
+ @tidy_enabled = false
187
103
  @feed_cache = DatabaseFeedCache
188
104
  @user_agent = "FeedTools/#{FEED_TOOLS_VERSION} " +
189
105
  "+http://www.sporkmonger.com/projects/feedtools/"
190
106
  @no_content_string = "[no description]"
191
107
 
108
+ # Error raised when a feed cannot be retrieved
109
+ class FeedAccessError < StandardError
110
+ end
111
+
192
112
  # Returns the current caching mechanism.
193
113
  def FeedTools.feed_cache
194
114
  return @feed_cache
@@ -222,23 +142,7 @@ module FeedTools
222
142
  # ==================================================================
223
143
  @feed_cache = new_feed_cache
224
144
  end
225
-
226
- # Returns true if FeedTools should only retrieve from the cache and avoid
227
- # pulling feeds from their remote location.
228
- def FeedTools.cache_only?
229
- @cache_only = false if @cache_only.nil?
230
- return @cache_only
231
- end
232
-
233
- # Sets whether or not FeedTools should retrieve feeds from remote locations
234
- # or if it should rely on the cache only.
235
- def FeedTools.cache_only=(new_cache_only)
236
- if new_cache_only != true && new_cache_only != false
237
- raise ArgumentError, "Must be either true or false."
238
- end
239
- @cache_only = new_cache_only
240
- end
241
-
145
+
242
146
  # Returns true if FeedTools.feed_cache is not nil and a connection with
243
147
  # the cache has been successfully established. Also returns false if an
244
148
  # error is raised while trying to determine the status of the cache.
@@ -649,3651 +553,6 @@ module FeedTools
649
553
  end
650
554
  return merged_feed
651
555
  end
652
-
653
- class Feed
654
- include REXML # :nodoc:
655
- include AttributeDictionary
656
-
657
- # Represents a feed/feed item's category
658
- class Category
659
- include AttributeDictionary
660
-
661
- # The category term value
662
- attr_accessor :term
663
- # The categorization scheme
664
- attr_accessor :scheme
665
- # A human-readable description of the category
666
- attr_accessor :label
667
-
668
- alias_method :value, :term
669
- alias_method :category, :term
670
- alias_method :domain, :scheme
671
- end
672
-
673
- # Represents a feed/feed item's author
674
- class Author
675
- include AttributeDictionary
676
-
677
- # The author's real name
678
- attr_accessor :name
679
- # The author's email address
680
- attr_accessor :email
681
- # The url of the author's homepage
682
- attr_accessor :url
683
- # The raw value of the author tag if present
684
- attr_accessor :raw
685
- end
686
-
687
- # Represents a feed's image
688
- class Image
689
- include AttributeDictionary
690
-
691
- # The image's title
692
- attr_accessor :title
693
- # The image's description
694
- attr_accessor :description
695
- # The image's url
696
- attr_accessor :url
697
- # The url to link the image to
698
- attr_accessor :link
699
- # The width of the image
700
- attr_accessor :width
701
- # The height of the image
702
- attr_accessor :height
703
- # The style of the image
704
- # Possible values are "icon", "image", or "image-wide"
705
- attr_accessor :style
706
- end
707
-
708
- # Represents a feed's text input element.
709
- # Be aware that this will be ignored for feed generation. It's a
710
- # pointless element that aggregators usually ignore and it doesn't have an
711
- # equivalent in all feeds types.
712
- class TextInput
713
- include AttributeDictionary
714
-
715
- # The label of the Submit button in the text input area.
716
- attr_accessor :title
717
- # The description explains the text input area.
718
- attr_accessor :description
719
- # The URL of the CGI script that processes text input requests.
720
- attr_accessor :link
721
- # The name of the text object in the text input area.
722
- attr_accessor :name
723
- end
724
-
725
- # Represents a feed's cloud.
726
- # Be aware that this will be ignored for feed generation.
727
- class Cloud
728
- include AttributeDictionary
729
-
730
- # The domain of the cloud.
731
- attr_accessor :domain
732
- # The path for the cloud.
733
- attr_accessor :path
734
- # The port the cloud is listening on.
735
- attr_accessor :port
736
- # The web services protocol the cloud uses.
737
- # Possible values are either "xml-rpc" or "soap".
738
- attr_accessor :protocol
739
- # The procedure to use to request notification.
740
- attr_accessor :register_procedure
741
- end
742
-
743
- # Represents a simple hyperlink
744
- class Link
745
- include AttributeDictionary
746
-
747
- # The url that is being linked to
748
- attr_accessor :url
749
- # The content of the hyperlink
750
- attr_accessor :value
751
-
752
- alias_method :href, :url
753
- end
754
-
755
- # Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
756
- def Feed.open(url)
757
- # clean up the url
758
- url = FeedTools.normalize_url(url)
759
-
760
- # create and load the new feed
761
- feed = Feed.new
762
- feed.url = url
763
- feed.update!
764
- return feed
765
- end
766
-
767
- # Loads the feed from the remote url if the feed has expired from the cache or cannot be
768
- # retrieved from the cache for some reason.
769
- def update!
770
- if self.http_headers.nil? && !(self.cache_object.nil?) &&
771
- !(self.cache_object.http_headers.nil?)
772
- @http_headers = YAML.load(self.cache_object.http_headers)
773
- @http_headers = {} unless @http_headers.kind_of? Hash
774
- end
775
- if FeedTools.cache_only? || self.expired? == false
776
- @live = false
777
- else
778
- load_remote_feed!
779
- end
780
- end
781
-
782
- # Attempts to load the feed from the remote location. Requires the url
783
- # field to be set. If an etag or the last_modified date has been set,
784
- # attempts to use them to prevent unnecessary reloading of identical
785
- # content.
786
- def load_remote_feed!
787
- @live = true
788
- if self.http_headers.nil? && !(self.cache_object.nil?) &&
789
- !(self.cache_object.http_headers.nil?)
790
- @http_headers = YAML.load(self.cache_object.http_headers)
791
- end
792
-
793
- if (self.url =~ /^feed:/) == 0
794
- # Woah, Nelly, how'd that happen? You should've already been
795
- # corrected. So let's fix that url. And please,
796
- # just use less crappy browsers instead of badly defined
797
- # pseudo-protocol hacks.
798
- self.url = FeedTools.normalize_url(self.url)
799
- end
800
-
801
- # Find out what method we're going to be using to obtain this feed.
802
- uri = URI.parse(self.url)
803
- retrieval_method = "http"
804
- case uri.scheme
805
- when "http"
806
- retrieval_method = "http"
807
- when "ftp"
808
- retrieval_method = "ftp"
809
- when "file"
810
- retrieval_method = "file"
811
- when nil
812
- raise FeedAccessError,
813
- "No protocol was specified in the url."
814
- else
815
- raise FeedAccessError,
816
- "Cannot retrieve feed using unrecognized protocol: " + uri.scheme
817
- end
818
-
819
- # No need for http headers unless we're actually doing http
820
- if retrieval_method == "http"
821
- # Set up the appropriate http headers
822
- headers = {}
823
- unless self.http_headers.nil?
824
- headers["If-None-Match"] =
825
- self.http_headers['etag'] unless self.http_headers['etag'].nil?
826
- headers["If-Modified-Since"] =
827
- self.http_headers['last-modified'] unless
828
- self.http_headers['last-modified'].nil?
829
- end
830
- headers["User-Agent"] =
831
- FeedTools.user_agent unless FeedTools.user_agent.nil?
832
-
833
- # The http feed access method
834
- http_fetch = lambda do |feed_url, http_headers, redirect_limit,
835
- response_chain, no_headers|
836
- raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
837
- feed_uri = nil
838
- begin
839
- feed_uri = URI.parse(feed_url)
840
- rescue URI::InvalidURIError
841
- # Uh, maybe try to fix it?
842
- feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
843
- end
844
-
845
- # Borrowed from open-uri:
846
- # According to RFC2616 14.23, Host: request-header field should be
847
- # set to an origin server.
848
- # But net/http wrongly set a proxy server if an absolute URI is
849
- # specified as a request URI.
850
- # So override it here explicitly.
851
- http_headers['Host'] = feed_uri.host
852
- http_headers['Host'] += ":#{feed_uri.port}" if feed_uri.port
853
-
854
- Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
855
- final_uri = feed_uri.path
856
- final_uri += ('?' + feed_uri.query) if feed_uri.query
857
- http_headers = {} if no_headers
858
- response = http.request_get(final_uri, http_headers)
859
-
860
- case response
861
- when Net::HTTPSuccess
862
- # We've reached the final destination, process all previous
863
- # redirections, and see if we need to update the url.
864
- for redirected_response in response_chain
865
- if redirected_response.last.code.to_i == 301
866
- # Reset the cache object or we may get duplicate entries
867
- self.cache_object = nil
868
- self.url = redirected_response.last['location']
869
- else
870
- # Jump out as soon as we hit anything that isn't a
871
- # permanently moved redirection.
872
- break
873
- end
874
- end
875
- response
876
- when Net::HTTPRedirection
877
- if response.code.to_i == 304
878
- response.error!
879
- else
880
- if response['location'].nil?
881
- raise FeedAccessError,
882
- "No location to redirect to supplied: " + response.code
883
- end
884
- response_chain << [feed_url, response]
885
- new_location = response['location']
886
- if response_chain.assoc(new_location) != nil
887
- raise FeedAccessError, "Redirection loop detected."
888
- end
889
- # TODO: deal with stupid people using relative urls
890
- # in Location header
891
- # =================================================
892
- http_fetch.call(new_location, http_headers,
893
- redirect_limit - 1, response_chain, no_headers)
894
- end
895
- else
896
- class << response
897
- def response_chain
898
- return @response_chain
899
- end
900
- end
901
- response.instance_variable_set("@response_chain",
902
- response_chain)
903
- response.error!
904
- end
905
- end
906
- end
907
-
908
- begin
909
- begin
910
- @http_response = http_fetch.call(self.url, headers, 10, [], false)
911
- rescue => error
912
- if error.respond_to?(:response)
913
- # You might not believe this, but...
914
- #
915
- # Under certain circumstances, web servers will try to block
916
- # based on the User-Agent header. This is *retarded*. But
917
- # we won't let their stupid error stop us!
918
- #
919
- # This is, of course, a quick-n-dirty hack. But at least
920
- # we get to blame other people's bad software and/or bad
921
- # configuration files.
922
- if error.response.code.to_i == 404 &&
923
- FeedTools.user_agent != nil
924
- @http_response = http_fetch.call(self.url, {}, 10, [], true)
925
- if @http_response != nil && @http_response.code.to_i == 200
926
- warn("The server appears to be blocking based on the " +
927
- "User-Agent header. This is stupid, and you should " +
928
- "inform the webmaster of this.")
929
- end
930
- else
931
- raise error
932
- end
933
- else
934
- raise error
935
- end
936
- end
937
- @http_headers = {}
938
- self.http_response.each_header do |header|
939
- self.http_headers[header.first.downcase] = header.last
940
- end
941
- self.last_retrieved = Time.now
942
- self.xml_data = self.http_response.body
943
- rescue FeedAccessError
944
- @live = false
945
- if self.xml_data.nil?
946
- raise
947
- end
948
- rescue Timeout::Error
949
- # if we time out, do nothing, it should fall back to the xml_data
950
- # stored in the cache.
951
- @live = false
952
- if self.xml_data.nil?
953
- raise
954
- end
955
- rescue Errno::ECONNRESET
956
- # if the connection gets reset by peer, oh well, fall back to the
957
- # xml_data stored in the cache
958
- @live = false
959
- if self.xml_data.nil?
960
- raise
961
- end
962
- rescue => error
963
- # heck, if anything at all bad happens, fall back to the xml_data
964
- # stored in the cache.
965
-
966
- # If we can, get the HTTPResponse...
967
- @http_response = nil
968
- if error.respond_to?(:each_header)
969
- @http_response = error
970
- end
971
- if error.respond_to?(:response) &&
972
- error.response.respond_to?(:each_header)
973
- @http_response = error.response
974
- end
975
- if @http_response != nil
976
- @http_headers = {}
977
- self.http_response.each_header do |header|
978
- self.http_headers[header.first] = header.last
979
- end
980
- if self.http_response.code.to_i == 304
981
- self.last_retrieved = Time.now
982
- end
983
- end
984
- @live = false
985
- if self.xml_data.nil?
986
- if error.respond_to?(:response) &&
987
- error.response.respond_to?(:response_chain)
988
- redirects = error.response.response_chain.map do |pair|
989
- pair.first
990
- end
991
- error.message << (" - Redirects: " + redirects.inspect)
992
- end
993
- raise error
994
- end
995
- end
996
- elsif retrieval_method == "https"
997
- # Not supported... yet
998
- elsif retrieval_method == "ftp"
999
- # Not supported... yet
1000
- # Technically, CDF feeds are supposed to be able to be accessed directly
1001
- # from an ftp server. This is silly, but we'll humor Microsoft.
1002
- #
1003
- # Eventually.
1004
- elsif retrieval_method == "file"
1005
- # Now that we've gone to all that trouble to ensure the url begins
1006
- # with 'file://', strip the 'file://' off the front of the url.
1007
- file_name = self.url.gsub(/^file:\/\//, "")
1008
- begin
1009
- open(file_name) do |file|
1010
- @http_response = nil
1011
- @http_headers = {}
1012
- self.last_retrieved = Time.now
1013
- self.xml_data = file.read
1014
- end
1015
- rescue
1016
- @live = false
1017
- # In this case, pulling from the cache is probably not going
1018
- # to help at all, and the use should probably be immediately
1019
- # appraised of the problem. Raise the exception.
1020
- raise
1021
- end
1022
- end
1023
- unless self.cache_object.nil?
1024
- begin
1025
- self.save
1026
- rescue
1027
- end
1028
- end
1029
- end
1030
-
1031
- # Returns the relevant information from an http request.
1032
- def http_response
1033
- return @http_response
1034
- end
1035
-
1036
- # Returns a hash of the http headers from the response.
1037
- def http_headers
1038
- return @http_headers
1039
- end
1040
-
1041
- # Returns the feed's raw xml data.
1042
- def xml_data
1043
- if @xml_data.nil?
1044
- unless self.cache_object.nil?
1045
- @xml_data = self.cache_object.xml_data
1046
- end
1047
- end
1048
- return @xml_data
1049
- end
1050
-
1051
- # Sets the feed's xml data.
1052
- def xml_data=(new_xml_data)
1053
- @xml_data = new_xml_data
1054
- unless self.cache_object.nil?
1055
- self.cache_object.xml_data = new_xml_data
1056
- end
1057
- end
1058
-
1059
- # Returns a REXML Document of the xml_data
1060
- def xml
1061
- if @xml_doc.nil?
1062
- begin
1063
- # TODO: :ignore_whitespace_nodes => :all
1064
- # Add that?
1065
- # ======================================
1066
- @xml_doc = Document.new(xml_data)
1067
- rescue
1068
- # Something failed, attempt to repair the xml with htree.
1069
- @xml_doc = HTree.parse(xml_data).to_rexml
1070
- end
1071
- end
1072
- return @xml_doc
1073
- end
1074
-
1075
- # Returns the first node within the channel_node that matches the xpath query.
1076
- def find_node(xpath)
1077
- return XPath.first(channel_node, xpath)
1078
- end
1079
-
1080
- # Returns all nodes within the channel_node that match the xpath query.
1081
- def find_all_nodes(xpath)
1082
- return XPath.match(channel_node, xpath)
1083
- end
1084
-
1085
- # Returns the root node of the feed.
1086
- def root_node
1087
- if @root_node.nil?
1088
- # TODO: Fix this so that added content at the end of the file doesn't
1089
- # break this stuff.
1090
- # E.g.: http://smogzer.tripod.com/smog.rdf
1091
- # ===================================================================
1092
- @root_node = xml.root
1093
- end
1094
- return @root_node
1095
- end
1096
-
1097
- # Returns the channel node of the feed.
1098
- def channel_node
1099
- if @channel_node.nil? && root_node != nil
1100
- @channel_node = XPath.first(root_node, "channel")
1101
- if @channel_node == nil
1102
- @channel_node = XPath.first(root_node, "CHANNEL")
1103
- end
1104
- if @channel_node == nil
1105
- @channel_node = XPath.first(root_node, "feedinfo")
1106
- end
1107
- if @channel_node == nil
1108
- @channel_node = root_node
1109
- end
1110
- end
1111
- return @channel_node
1112
- end
1113
-
1114
- # The cache object that handles the feed persistence.
1115
- def cache_object
1116
- unless FeedTools.feed_cache.nil?
1117
- if @cache_object.nil?
1118
- begin
1119
- if @id != nil
1120
- @cache_object = FeedTools.feed_cache.find_by_id(@id)
1121
- elsif @url != nil
1122
- @cache_object = FeedTools.feed_cache.find_by_url(@url)
1123
- end
1124
- if @cache_object.nil?
1125
- @cache_object = FeedTools.feed_cache.new
1126
- end
1127
- rescue
1128
- end
1129
- end
1130
- end
1131
- return @cache_object
1132
- end
1133
-
1134
- # Sets the cache object for this feed.
1135
- #
1136
- # This can be any object, but it must accept the following messages:
1137
- # url
1138
- # url=
1139
- # title
1140
- # title=
1141
- # link
1142
- # link=
1143
- # xml_data
1144
- # xml_data=
1145
- # etag
1146
- # etag=
1147
- # last_modified
1148
- # last_modified=
1149
- # save
1150
- def cache_object=(new_cache_object)
1151
- @cache_object = new_cache_object
1152
- end
1153
-
1154
- # Returns the type of feed
1155
- # Possible values:
1156
- # "rss", "atom", "cdf", "!okay/news"
1157
- def feed_type
1158
- if @feed_type.nil?
1159
- case self.root_node.name.downcase
1160
- when "feed"
1161
- @feed_type = "atom"
1162
- when "rdf:rdf"
1163
- @feed_type = "rss"
1164
- when "rdf"
1165
- @feed_type = "rss"
1166
- when "rss"
1167
- @feed_type = "rss"
1168
- when "channel"
1169
- @feed_type = "cdf"
1170
- end
1171
- end
1172
- return @feed_type
1173
- end
1174
-
1175
- # Sets the default feed type
1176
- def feed_type=(new_feed_type)
1177
- @feed_type = new_feed_type
1178
- end
1179
-
1180
- # Returns the version number of the feed type.
1181
- # Intentionally does not differentiate between the Netscape and Userland
1182
- # versions of RSS 0.91.
1183
- def feed_version
1184
- if @feed_version.nil?
1185
- version = nil
1186
- begin
1187
- version = XPath.first(root_node, "@version").to_s.strip.to_f
1188
- rescue
1189
- end
1190
- version = nil if version == 0.0
1191
- default_namespace = XPath.first(root_node, "@xmlns").to_s.strip
1192
- case self.feed_type
1193
- when "atom"
1194
- if default_namespace == "http://www.w3.org/2005/Atom"
1195
- @feed_version = 1.0
1196
- elsif version != nil
1197
- @feed_version = version
1198
- elsif default_namespace == "http://purl.org/atom/ns#"
1199
- @feed_version = 0.3
1200
- end
1201
- when "rss"
1202
- if default_namespace == "http://my.netscape.com/rdf/simple/0.9/"
1203
- @feed_version = 0.9
1204
- elsif default_namespace == "http://purl.org/rss/1.0/"
1205
- @feed_version = 1.0
1206
- elsif default_namespace == "http://purl.org/net/rss1.1#"
1207
- @feed_version = 1.1
1208
- elsif version != nil
1209
- case version
1210
- when 2.1
1211
- @feed_version = 2.0
1212
- when 2.01
1213
- @feed_version = 2.0
1214
- else
1215
- @feed_version = version
1216
- end
1217
- end
1218
- when "cdf"
1219
- @feed_version = 0.4
1220
- when "!okay/news"
1221
- @feed_version = nil
1222
- end
1223
- end
1224
- return @feed_version
1225
- end
1226
-
1227
- # Sets the default feed version
1228
- def feed_version=(new_feed_version)
1229
- @feed_version = new_feed_version
1230
- end
1231
-
1232
- # Returns the feed's unique id
1233
- def id
1234
- if @id.nil?
1235
- unless channel_node.nil?
1236
- @id = XPath.first(channel_node, "id/text()").to_s
1237
- if @id == ""
1238
- @id = XPath.first(channel_node, "guid/text()").to_s
1239
- end
1240
- end
1241
- unless root_node.nil?
1242
- if @id == "" || @id.nil?
1243
- @id = XPath.first(root_node, "id/text()").to_s
1244
- end
1245
- if @id == ""
1246
- @id = XPath.first(root_node, "guid/text()").to_s
1247
- end
1248
- end
1249
- @id = nil if @id == ""
1250
- end
1251
- return @id
1252
- end
1253
-
1254
- # Sets the feed's unique id
1255
- def id=(new_id)
1256
- @id = new_id
1257
- end
1258
-
1259
- # Returns the feed url.
1260
- def url
1261
- if @url.nil? && self.xml_data != nil
1262
- @url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
1263
- @url = nil if @url == ""
1264
- end
1265
- return @url
1266
- end
1267
-
1268
- # Sets the feed url and prepares the cache_object if necessary.
1269
- def url=(new_url)
1270
- @url = FeedTools.normalize_url(new_url)
1271
- self.cache_object.url = new_url unless self.cache_object.nil?
1272
- end
1273
-
1274
- # Returns the feed title
1275
- def title
1276
- if @title.nil?
1277
- unless channel_node.nil?
1278
- repair_entities = false
1279
- title_node = XPath.first(channel_node, "title")
1280
- if title_node.nil?
1281
- title_node = XPath.first(channel_node, "dc:title")
1282
- end
1283
- if title_node.nil?
1284
- title_node = XPath.first(channel_node, "TITLE")
1285
- end
1286
- end
1287
- if title_node.nil?
1288
- return nil
1289
- end
1290
- if XPath.first(title_node, "@type").to_s == "xhtml" ||
1291
- XPath.first(title_node, "@mode").to_s == "xhtml" ||
1292
- XPath.first(title_node, "@type").to_s == "xml" ||
1293
- XPath.first(title_node, "@mode").to_s == "xml" ||
1294
- XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
1295
- @title = title_node.inner_xml
1296
- elsif XPath.first(title_node, "@type").to_s == "escaped" ||
1297
- XPath.first(title_node, "@mode").to_s == "escaped"
1298
- @title = FeedTools.unescape_entities(
1299
- XPath.first(title_node, "text()").to_s)
1300
- else
1301
- @title = title_node.inner_xml
1302
- repair_entities = true
1303
- end
1304
- unless @title.nil?
1305
- @title = FeedTools.sanitize_html(@title, :strip)
1306
- @title = FeedTools.unescape_entities(@title) if repair_entities
1307
- @title = FeedTools.tidy_html(@title)
1308
- end
1309
- @title.gsub!(/\n/, " ")
1310
- @title.strip!
1311
- @title = nil if @title == ""
1312
- self.cache_object.title = @title unless self.cache_object.nil?
1313
- end
1314
- return @title
1315
- end
1316
-
1317
- # Sets the feed title
1318
- def title=(new_title)
1319
- @title = new_title
1320
- self.cache_object.title = new_title unless self.cache_object.nil?
1321
- end
1322
-
1323
- # Returns the feed description
1324
- def description
1325
- if @description.nil?
1326
- unless channel_node.nil?
1327
- repair_entities = false
1328
- description_node = XPath.first(channel_node, "description")
1329
- if description_node.nil?
1330
- description_node = XPath.first(channel_node, "tagline")
1331
- end
1332
- if description_node.nil?
1333
- description_node = XPath.first(channel_node, "subtitle")
1334
- end
1335
- if description_node.nil?
1336
- description_node = XPath.first(channel_node, "summary")
1337
- end
1338
- if description_node.nil?
1339
- description_node = XPath.first(channel_node, "abstract")
1340
- end
1341
- if description_node.nil?
1342
- description_node = XPath.first(channel_node, "ABSTRACT")
1343
- end
1344
- if description_node.nil?
1345
- description_node = XPath.first(channel_node, "info")
1346
- end
1347
- if description_node.nil?
1348
- description_node = XPath.first(channel_node, "content:encoded")
1349
- @bozo = true unless description_node.nil?
1350
- end
1351
- if description_node.nil?
1352
- description_node = XPath.first(channel_node, "content")
1353
- @bozo = true unless description_node.nil?
1354
- end
1355
- if description_node.nil?
1356
- description_node = XPath.first(channel_node, "xhtml:body")
1357
- @bozo = true unless description_node.nil?
1358
- end
1359
- if description_node.nil?
1360
- description_node = XPath.first(channel_node, "body")
1361
- @bozo = true unless description_node.nil?
1362
- end
1363
- end
1364
- if description_node.nil?
1365
- return nil
1366
- end
1367
- unless description_node.nil?
1368
- if XPath.first(description_node, "@encoding").to_s != ""
1369
- @description =
1370
- "[Embedded data objects are not currently supported.]"
1371
- elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
1372
- XPath.first(description_node, "@mode").to_s == "xhtml" ||
1373
- XPath.first(description_node, "@type").to_s == "xml" ||
1374
- XPath.first(description_node, "@mode").to_s == "xml" ||
1375
- XPath.first(description_node, "@type").to_s ==
1376
- "application/xhtml+xml"
1377
- @description = description_node.inner_xml
1378
- elsif XPath.first(description_node, "@type").to_s == "escaped" ||
1379
- XPath.first(description_node, "@mode").to_s == "escaped"
1380
- @description = FeedTools.unescape_entities(
1381
- description_node.inner_xml)
1382
- else
1383
- @description = description_node.inner_xml
1384
- repair_entities = true
1385
- end
1386
- end
1387
- if @description == ""
1388
- @description = self.itunes_summary
1389
- @description = "" if @description.nil?
1390
- end
1391
- if @description == ""
1392
- @description = self.itunes_subtitle
1393
- @description = "" if @description.nil?
1394
- end
1395
-
1396
- unless @description.nil?
1397
- @description = FeedTools.sanitize_html(@description, :strip)
1398
- @description = FeedTools.unescape_entities(@description) if repair_entities
1399
- @description = FeedTools.tidy_html(@description)
1400
- end
1401
-
1402
- @description = @description.strip unless @description.nil?
1403
- @description = nil if @description == ""
1404
- end
1405
- return @description
1406
- end
1407
-
1408
- # Sets the feed description
1409
- def description=(new_description)
1410
- @description = new_description
1411
- end
1412
-
1413
- # Returns the contents of the itunes:summary element
1414
- def itunes_summary
1415
- if @itunes_summary.nil?
1416
- unless channel_node.nil?
1417
- @itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
1418
- "itunes:summary/text()").to_s)
1419
- end
1420
- unless root_node.nil?
1421
- if @itunes_summary == "" || @itunes_summary.nil?
1422
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
1423
- "itunes:summary/text()").to_s)
1424
- end
1425
- end
1426
- if @itunes_summary == ""
1427
- @itunes_summary = nil
1428
- end
1429
- @itunes_summary =
1430
- FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
1431
- end
1432
- return @itunes_summary
1433
- end
1434
-
1435
- # Sets the contents of the itunes:summary element
1436
- def itunes_summary=(new_itunes_summary)
1437
- @itunes_summary = new_itunes_summary
1438
- end
1439
-
1440
- # Returns the contents of the itunes:subtitle element
1441
- def itunes_subtitle
1442
- if @itunes_subtitle.nil?
1443
- unless channel_node.nil?
1444
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
1445
- "itunes:subtitle/text()").to_s)
1446
- end
1447
- unless root_node.nil?
1448
- if @itunes_subtitle == "" || @itunes_subtitle.nil?
1449
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
1450
- "itunes:subtitle/text()").to_s)
1451
- end
1452
- end
1453
- if @itunes_subtitle == ""
1454
- @itunes_subtitle = nil
1455
- end
1456
- unless @itunes_subtitle.nil?
1457
- @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
1458
- end
1459
- end
1460
- return @itunes_subtitle
1461
- end
1462
-
1463
- # Sets the contents of the itunes:subtitle element
1464
- def itunes_subtitle=(new_itunes_subtitle)
1465
- @itunes_subtitle = new_itunes_subtitle
1466
- end
1467
-
1468
- # Returns the feed link
1469
- def link
1470
- if @link.nil?
1471
- unless channel_node.nil?
1472
- # get the feed link from the xml document
1473
- @link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
1474
- if @link == ""
1475
- @link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
1476
- end
1477
- if @link == ""
1478
- @link = XPath.first(channel_node, "link/@href").to_s
1479
- end
1480
- if @link == ""
1481
- @link = XPath.first(channel_node, "link/text()").to_s
1482
- end
1483
- if @link == ""
1484
- @link = XPath.first(channel_node, "@href").to_s
1485
- end
1486
- if @link == ""
1487
- @link = XPath.first(channel_node, "@HREF").to_s
1488
- end
1489
- if @link == ""
1490
- @link = XPath.first(channel_node, "a/@href").to_s
1491
- end
1492
- if @link == ""
1493
- @link = XPath.first(channel_node, "A/@HREF").to_s
1494
- end
1495
- end
1496
- if @link == "" || @link.nil?
1497
- if FeedTools.is_uri? self.guid
1498
- @link = self.guid
1499
- end
1500
- end
1501
- if @link == "" && channel_node != nil
1502
- # Technically, we shouldn't use the base attribute for this, but if the href attribute
1503
- # is missing, it's already a given that we're looking at a messed up CDF file. We can
1504
- # always pray it's correct.
1505
- @link = XPath.first(channel_node, "@base").to_s
1506
- end
1507
- @link = FeedTools.normalize_url(@link)
1508
- unless self.cache_object.nil?
1509
- self.cache_object.link = @link
1510
- end
1511
- end
1512
- return @link
1513
- end
1514
-
1515
- # Sets the feed link
1516
- def link=(new_link)
1517
- @link = new_link
1518
- unless self.cache_object.nil?
1519
- self.cache_object.link = new_link
1520
- end
1521
- end
1522
-
1523
- # Returns the url to the icon file for this feed.
1524
- #
1525
- # This method uses the url from the link field in order to avoid grabbing
1526
- # the favicon for services like feedburner.
1527
- def icon
1528
- if @icon.nil?
1529
- icon_node = XPath.first(channel_node, "link[@rel='icon']")
1530
- if icon_node.nil?
1531
- icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
1532
- end
1533
- if icon_node.nil?
1534
- icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
1535
- end
1536
- if icon_node.nil?
1537
- icon_node = XPath.first(channel_node, "icon")
1538
- end
1539
- if icon_node.nil?
1540
- icon_node = XPath.first(channel_node, "logo[@style='icon']")
1541
- end
1542
- if icon_node.nil?
1543
- icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
1544
- end
1545
- unless icon_node.nil?
1546
- @icon = FeedTools.unescape_entities(
1547
- XPath.first(icon_node, "@href").to_s)
1548
- if @icon == ""
1549
- @icon = FeedTools.unescape_entities(
1550
- XPath.first(icon_node, "text()").to_s)
1551
- unless FeedTools.is_uri? @icon
1552
- @icon = ""
1553
- end
1554
- end
1555
- if @icon == "" && self.link != nil && self.link != ""
1556
- link_uri = URI.parse(FeedTools.normalize_url(self.link))
1557
- @icon =
1558
- link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1559
- end
1560
- @icon = nil if @icon == ""
1561
- end
1562
- end
1563
- return @icon
1564
- end
1565
-
1566
- # Returns the feed author
1567
- def author
1568
- if @author.nil?
1569
- @author = FeedTools::Feed::Author.new
1570
- unless channel_node.nil?
1571
- author_node = XPath.first(channel_node, "author")
1572
- if author_node.nil?
1573
- author_node = XPath.first(channel_node, "managingEditor")
1574
- end
1575
- if author_node.nil?
1576
- author_node = XPath.first(channel_node, "dc:author")
1577
- end
1578
- if author_node.nil?
1579
- author_node = XPath.first(channel_node, "dc:creator")
1580
- end
1581
- if author_node.nil?
1582
- author_node = XPath.first(channel_node, "atom:author")
1583
- end
1584
- end
1585
- unless author_node.nil?
1586
- @author.raw = FeedTools.unescape_entities(
1587
- XPath.first(author_node, "text()").to_s)
1588
- @author.raw = nil if @author.raw == ""
1589
- unless @author.raw.nil?
1590
- raw_scan = @author.raw.scan(
1591
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1592
- if raw_scan.nil? || raw_scan.size == 0
1593
- raw_scan = @author.raw.scan(
1594
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1595
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
1596
- else
1597
- author_raw_pair = raw_scan.first
1598
- end
1599
- if raw_scan.nil? || raw_scan.size == 0
1600
- email_scan = @author.raw.scan(
1601
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1602
- if email_scan != nil && email_scan.size > 0
1603
- @author.email = email_scan.first.strip
1604
- end
1605
- end
1606
- unless author_raw_pair.nil? || author_raw_pair.size == 0
1607
- @author.name = author_raw_pair.first.strip
1608
- @author.email = author_raw_pair.last.strip
1609
- else
1610
- unless @author.raw.include?("@")
1611
- # We can be reasonably sure we are looking at something
1612
- # that the creator didn't intend to contain an email address if
1613
- # it got through the preceeding regexes and it doesn't
1614
- # contain the tell-tale '@' symbol.
1615
- @author.name = @author.raw
1616
- end
1617
- end
1618
- end
1619
- @author.name = "" if @author.name.nil?
1620
- if @author.name == ""
1621
- @author.name = FeedTools.unescape_entities(
1622
- XPath.first(author_node, "name/text()").to_s)
1623
- end
1624
- if @author.name == ""
1625
- @author.name = FeedTools.unescape_entities(
1626
- XPath.first(author_node, "@name").to_s)
1627
- end
1628
- if @author.email == ""
1629
- @author.email = FeedTools.unescape_entities(
1630
- XPath.first(author_node, "email/text()").to_s)
1631
- end
1632
- if @author.email == ""
1633
- @author.email = FeedTools.unescape_entities(
1634
- XPath.first(author_node, "@email").to_s)
1635
- end
1636
- if @author.url == ""
1637
- @author.url = FeedTools.unescape_entities(
1638
- XPath.first(author_node, "url/text()").to_s)
1639
- end
1640
- if @author.url == ""
1641
- @author.url = FeedTools.unescape_entities(
1642
- XPath.first(author_node, "@url").to_s)
1643
- end
1644
- @author.name = nil if @author.name == ""
1645
- @author.raw = nil if @author.raw == ""
1646
- @author.email = nil if @author.email == ""
1647
- @author.url = nil if @author.url == ""
1648
- end
1649
- # Fallback on the itunes module if we didn't find an author name
1650
- begin
1651
- @author.name = self.itunes_author if @author.name.nil?
1652
- rescue
1653
- @author.name = nil
1654
- end
1655
- end
1656
- return @author
1657
- end
1658
-
1659
- # Sets the feed author
1660
- def author=(new_author)
1661
- if new_author.respond_to?(:name) &&
1662
- new_author.respond_to?(:email) &&
1663
- new_author.respond_to?(:url)
1664
- # It's a complete author object, just set it.
1665
- @author = new_author
1666
- else
1667
- # We're not looking at an author object, this is probably a string,
1668
- # default to setting the author's name.
1669
- if @author.nil?
1670
- @author = FeedTools::Feed::Author.new
1671
- end
1672
- @author.name = new_author
1673
- end
1674
- end
1675
-
1676
- # Returns the feed publisher
1677
- def publisher
1678
- if @publisher.nil?
1679
- @publisher = FeedTools::Feed::Author.new
1680
-
1681
- # Set the author name
1682
- @publisher.raw = FeedTools.unescape_entities(
1683
- XPath.first(channel_node, "dc:publisher/text()").to_s)
1684
- if @publisher.raw == ""
1685
- @publisher.raw = FeedTools.unescape_entities(
1686
- XPath.first(channel_node, "webMaster/text()").to_s)
1687
- end
1688
- unless @publisher.raw == ""
1689
- raw_scan = @publisher.raw.scan(
1690
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1691
- if raw_scan.nil? || raw_scan.size == 0
1692
- raw_scan = @publisher.raw.scan(
1693
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1694
- unless raw_scan.size == 0
1695
- publisher_raw_pair = raw_scan.first.reverse
1696
- end
1697
- else
1698
- publisher_raw_pair = raw_scan.first
1699
- end
1700
- if raw_scan.nil? || raw_scan.size == 0
1701
- email_scan = @publisher.raw.scan(
1702
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1703
- if email_scan != nil && email_scan.size > 0
1704
- @publisher.email = email_scan.first.strip
1705
- end
1706
- end
1707
- unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
1708
- @publisher.name = publisher_raw_pair.first.strip
1709
- @publisher.email = publisher_raw_pair.last.strip
1710
- else
1711
- unless @publisher.raw.include?("@")
1712
- # We can be reasonably sure we are looking at something
1713
- # that the creator didn't intend to contain an email address if
1714
- # it got through the preceeding regexes and it doesn't
1715
- # contain the tell-tale '@' symbol.
1716
- @publisher.name = @publisher.raw
1717
- end
1718
- end
1719
- end
1720
-
1721
- @publisher.name = nil if @publisher.name == ""
1722
- @publisher.raw = nil if @publisher.raw == ""
1723
- @publisher.email = nil if @publisher.email == ""
1724
- @publisher.url = nil if @publisher.url == ""
1725
- end
1726
- return @publisher
1727
- end
1728
-
1729
- # Sets the feed publisher
1730
- def publisher=(new_publisher)
1731
- if new_publisher.respond_to?(:name) &&
1732
- new_publisher.respond_to?(:email) &&
1733
- new_publisher.respond_to?(:url)
1734
- # It's a complete Author object, just set it.
1735
- @publisher = new_publisher
1736
- else
1737
- # We're not looking at an Author object, this is probably a string,
1738
- # default to setting the publisher's name.
1739
- if @publisher.nil?
1740
- @publisher = FeedTools::Feed::Author.new
1741
- end
1742
- @publisher.name = new_publisher
1743
- end
1744
- end
1745
-
1746
- # Returns the contents of the itunes:author element
1747
- #
1748
- # Returns any incorrectly placed channel-level itunes:author
1749
- # elements. They're actually amazingly common. People don't read specs.
1750
- # There is no setter for this, since this is a "bozo" attribute.
1751
- def itunes_author
1752
- if @itunes_author.nil?
1753
- @itunes_author = FeedTools.unescape_entities(XPath.first(channel_node,
1754
- "itunes:author/text()").to_s)
1755
- @itunes_author = nil if @itunes_author == ""
1756
- @bozo = true unless @itunes_author.nil?
1757
- end
1758
- return @itunes_author
1759
- end
1760
-
1761
- # Returns the feed item time
1762
- def time
1763
- if @time.nil?
1764
- unless channel_node.nil?
1765
- time_string = XPath.first(channel_node, "pubDate/text()").to_s
1766
- if time_string == ""
1767
- time_string = XPath.first(channel_node, "dc:date/text()").to_s
1768
- end
1769
- if time_string == ""
1770
- time_string = XPath.first(channel_node, "issued/text()").to_s
1771
- end
1772
- if time_string == ""
1773
- time_string = XPath.first(channel_node, "updated/text()").to_s
1774
- end
1775
- if time_string == ""
1776
- time_string = XPath.first(channel_node, "time/text()").to_s
1777
- end
1778
- end
1779
- begin
1780
- if time_string != nil && time_string != ""
1781
- @time = Time.parse(time_string) rescue self.succ_time
1782
- elsif time_string == nil
1783
- @time = self.succ_time
1784
- end
1785
- if @time == nil
1786
- @time = Time.now
1787
- end
1788
- rescue
1789
- @time = Time.now
1790
- end
1791
- end
1792
- return @time
1793
- end
1794
-
1795
- # Sets the feed item time
1796
- def time=(new_time)
1797
- @time = new_time
1798
- end
1799
-
1800
- # Returns 1 second after the previous item's time.
1801
- def succ_time #:nodoc:
1802
- begin
1803
- if feed.nil?
1804
- return nil
1805
- end
1806
- feed.items
1807
- unsorted_items = feed.instance_variable_get("@items")
1808
- item_index = unsorted_items.index(self)
1809
- if item_index.nil?
1810
- return nil
1811
- end
1812
- if item_index <= 0
1813
- return Time.now
1814
- end
1815
- previous_item = unsorted_items[item_index - 1]
1816
- return previous_item.time.succ
1817
- rescue
1818
- return nil
1819
- end
1820
- end
1821
- private :succ_time
1822
-
1823
- # Returns the feed item updated time
1824
- def updated
1825
- if @updated.nil?
1826
- unless channel_node.nil?
1827
- updated_string = XPath.first(channel_node, "updated/text()").to_s
1828
- if updated_string == ""
1829
- updated_string = XPath.first(channel_node, "modified/text()").to_s
1830
- end
1831
- end
1832
- if updated_string != nil && updated_string != ""
1833
- @updated = Time.parse(updated_string) rescue nil
1834
- else
1835
- @updated = nil
1836
- end
1837
- end
1838
- return @updated
1839
- end
1840
-
1841
- # Sets the feed item updated time
1842
- def updated=(new_updated)
1843
- @updated = new_updated
1844
- end
1845
-
1846
- # Returns the feed item issued time
1847
- def issued
1848
- if @issued.nil?
1849
- unless channel_node.nil?
1850
- issued_string = XPath.first(channel_node, "issued/text()").to_s
1851
- if issued_string == ""
1852
- issued_string = XPath.first(channel_node, "pubDate/text()").to_s
1853
- end
1854
- if issued_string == ""
1855
- issued_string = XPath.first(channel_node, "dc:date/text()").to_s
1856
- end
1857
- if issued_string == ""
1858
- issued_string = XPath.first(channel_node, "published/text()").to_s
1859
- end
1860
- end
1861
- if issued_string != nil && issued_string != ""
1862
- @issued = Time.parse(issued_string) rescue nil
1863
- else
1864
- @issued = nil
1865
- end
1866
- end
1867
- return @issued
1868
- end
1869
-
1870
- # Sets the feed item issued time
1871
- def issued=(new_issued)
1872
- @issued = new_issued
1873
- end
1874
-
1875
- # Returns the feed item published time
1876
- def published
1877
- if @published.nil?
1878
- unless channel_node.nil?
1879
- published_string = XPath.first(channel_node, "published/text()").to_s
1880
- if published_string == ""
1881
- published_string = XPath.first(channel_node, "pubDate/text()").to_s
1882
- end
1883
- if published_string == ""
1884
- published_string = XPath.first(channel_node, "dc:date/text()").to_s
1885
- end
1886
- if published_string == ""
1887
- published_string = XPath.first(channel_node, "issued/text()").to_s
1888
- end
1889
- end
1890
- if published_string != nil && published_string != ""
1891
- @published = Time.parse(published_string) rescue nil
1892
- else
1893
- @published = nil
1894
- end
1895
- end
1896
- return @published
1897
- end
1898
-
1899
- # Sets the feed item published time
1900
- def published=(new_published)
1901
- @published = new_published
1902
- end
1903
-
1904
- # Returns a list of the feed's categories
1905
- def categories
1906
- if @categories.nil?
1907
- @categories = []
1908
- category_nodes = XPath.match(channel_node, "category")
1909
- if category_nodes.nil? || category_nodes.empty?
1910
- category_nodes = XPath.match(channel_node, "dc:subject")
1911
- end
1912
- unless category_nodes.nil?
1913
- for category_node in category_nodes
1914
- category = FeedTools::Feed::Category.new
1915
- category.term = XPath.first(category_node, "@term").to_s
1916
- if category.term == ""
1917
- category.term = XPath.first(category_node, "text()").to_s
1918
- end
1919
- category.term.strip! unless category.term.nil?
1920
- category.term = nil if category.term == ""
1921
- category.label = XPath.first(category_node, "@label").to_s
1922
- category.label.strip! unless category.label.nil?
1923
- category.label = nil if category.label == ""
1924
- category.scheme = XPath.first(category_node, "@scheme").to_s
1925
- if category.scheme == ""
1926
- category.scheme = XPath.first(category_node, "@domain").to_s
1927
- end
1928
- category.scheme.strip! unless category.scheme.nil?
1929
- category.scheme = nil if category.scheme == ""
1930
- @categories << category
1931
- end
1932
- end
1933
- end
1934
- return @categories
1935
- end
1936
-
1937
- # Returns a list of the feed's images
1938
- def images
1939
- if @images.nil?
1940
- @images = []
1941
- unless channel_node.nil?
1942
- image_nodes = XPath.match(channel_node, "image")
1943
- if image_nodes.nil? || image_nodes.empty?
1944
- image_nodes = XPath.match(channel_node, "link")
1945
- end
1946
- if image_nodes.nil? || image_nodes.empty?
1947
- image_nodes = XPath.match(channel_node, "logo")
1948
- end
1949
- if image_nodes.nil? || image_nodes.empty?
1950
- image_nodes = XPath.match(channel_node, "LOGO")
1951
- end
1952
- unless image_nodes.nil?
1953
- for image_node in image_nodes
1954
- image = FeedTools::Feed::Image.new
1955
- image.url = XPath.first(image_node, "url/text()").to_s
1956
- if image.url == ""
1957
- image.url = XPath.first(image_node, "@rdf:resource").to_s
1958
- end
1959
- if image.url == "" && (image_node.name == "logo" ||
1960
- (image_node.attributes['type'] =~ /^image/) == 0)
1961
- image.url = XPath.first(image_node, "@href").to_s
1962
- end
1963
- if image.url == "" && image_node.name == "LOGO"
1964
- image.url = XPath.first(image_node, "@HREF").to_s
1965
- end
1966
- image.url.strip! unless image.url.nil?
1967
- image.url = nil if image.url == ""
1968
- image.title = XPath.first(image_node, "title/text()").to_s
1969
- image.title.strip! unless image.title.nil?
1970
- image.title = nil if image.title == ""
1971
- image.description =
1972
- XPath.first(image_node, "description/text()").to_s
1973
- image.description.strip! unless image.description.nil?
1974
- image.description = nil if image.description == ""
1975
- image.link = XPath.first(image_node, "link/text()").to_s
1976
- image.link.strip! unless image.link.nil?
1977
- image.link = nil if image.link == ""
1978
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
1979
- image.height = nil if image.height <= 0
1980
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
1981
- image.width = nil if image.width <= 0
1982
- image.style = XPath.first(image_node, "@style").to_s.downcase
1983
- if image.style == ""
1984
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1985
- end
1986
- image.style.strip! unless image.style.nil?
1987
- image.style = nil if image.style == ""
1988
- @images << image
1989
- end
1990
- end
1991
- end
1992
- end
1993
- return @images
1994
- end
1995
-
1996
- # Returns the feed's text input field
1997
- def text_input
1998
- if @text_input.nil?
1999
- @text_input = FeedTools::Feed::TextInput.new
2000
- text_input_node = XPath.first(channel_node, "textInput")
2001
- unless text_input_node.nil?
2002
- @text_input.title =
2003
- XPath.first(text_input_node, "title/text()").to_s
2004
- @text_input.title = nil if @text_input.title == ""
2005
- @text_input.description =
2006
- XPath.first(text_input_node, "description/text()").to_s
2007
- @text_input.description = nil if @text_input.description == ""
2008
- @text_input.link =
2009
- XPath.first(text_input_node, "link/text()").to_s
2010
- @text_input.link = nil if @text_input.link == ""
2011
- @text_input.name =
2012
- XPath.first(text_input_node, "name/text()").to_s
2013
- @text_input.name = nil if @text_input.name == ""
2014
- end
2015
- end
2016
- return @text_input
2017
- end
2018
-
2019
- # Returns the feed's copyright information
2020
- def copyright
2021
- if @copyright.nil?
2022
- unless channel_node.nil?
2023
- @copyright = XPath.first(channel_node, "copyright/text()").to_s
2024
- if @copyright == ""
2025
- @copyright = XPath.first(channel_node, "rights/text()").to_s
2026
- end
2027
- if @copyright == ""
2028
- @copyright = XPath.first(channel_node, "dc:rights/text()").to_s
2029
- end
2030
- if @copyright == ""
2031
- @copyright = XPath.first(channel_node, "copyrights/text()").to_s
2032
- end
2033
- @copyright = FeedTools.sanitize_html(@copyright, :strip)
2034
- @copyright = nil if @copyright == ""
2035
- end
2036
- end
2037
- return @copyright
2038
- end
2039
-
2040
- # Sets the feed's copyright information
2041
- def copyright=(new_copyright)
2042
- @copyright = new_copyright
2043
- end
2044
-
2045
- # Returns the number of seconds before the feed should expire
2046
- def time_to_live
2047
- if @time_to_live.nil?
2048
- unless channel_node.nil?
2049
- # get the feed time to live from the xml document
2050
- update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
2051
- if update_frequency != ""
2052
- update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
2053
- if update_period == "daily"
2054
- @time_to_live = update_frequency.to_i.day
2055
- elsif update_period == "weekly"
2056
- @time_to_live = update_frequency.to_i.week
2057
- elsif update_period == "monthly"
2058
- @time_to_live = update_frequency.to_i.month
2059
- elsif update_period == "yearly"
2060
- @time_to_live = update_frequency.to_i.year
2061
- else
2062
- # hourly
2063
- @time_to_live = update_frequency.to_i.hour
2064
- end
2065
- end
2066
- if @time_to_live.nil?
2067
- # usually expressed in minutes
2068
- update_frequency = XPath.first(channel_node, "ttl/text()").to_s
2069
- if update_frequency != ""
2070
- update_span = XPath.first(channel_node, "ttl/@span").to_s
2071
- if update_span == "seconds"
2072
- @time_to_live = update_frequency.to_i
2073
- elsif update_span == "minutes"
2074
- @time_to_live = update_frequency.to_i.minute
2075
- elsif update_span == "hours"
2076
- @time_to_live = update_frequency.to_i.hour
2077
- elsif update_span == "days"
2078
- @time_to_live = update_frequency.to_i.day
2079
- elsif update_span == "weeks"
2080
- @time_to_live = update_frequency.to_i.week
2081
- elsif update_span == "months"
2082
- @time_to_live = update_frequency.to_i.month
2083
- elsif update_span == "years"
2084
- @time_to_live = update_frequency.to_i.year
2085
- elsif update_frequency.to_i >= 3000
2086
- # Normally, this should default to minutes, but realistically,
2087
- # if they meant minutes, you're rarely going to see a value higher
2088
- # than 120. If we see >= 3000, we're either dealing with a stupid
2089
- # pseudo-spec that decided to use seconds, or we're looking at
2090
- # someone who only has weekly updated content. Worst case, we
2091
- # misreport the time, and we update too often. Best case, we
2092
- # avoid accidentally updating the feed only once a year. In the
2093
- # interests of being pragmatic, and since the problem we avoid
2094
- # is a far greater one than the one we cause, just run the check
2095
- # and hope no one actually gets hurt.
2096
- @time_to_live = update_frequency.to_i
2097
- else
2098
- @time_to_live = update_frequency.to_i.minute
2099
- end
2100
- end
2101
- end
2102
- if @time_to_live.nil?
2103
- @time_to_live = 0
2104
- update_frequency_days =
2105
- XPath.first(channel_node, "schedule/intervaltime/@days").to_s
2106
- update_frequency_hours =
2107
- XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
2108
- update_frequency_minutes =
2109
- XPath.first(channel_node, "schedule/intervaltime/@min").to_s
2110
- update_frequency_seconds =
2111
- XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
2112
- if update_frequency_days != ""
2113
- @time_to_live = @time_to_live + update_frequency_days.to_i.day
2114
- end
2115
- if update_frequency_hours != ""
2116
- @time_to_live = @time_to_live + update_frequency_hours.to_i.hour
2117
- end
2118
- if update_frequency_minutes != ""
2119
- @time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
2120
- end
2121
- if update_frequency_seconds != ""
2122
- @time_to_live = @time_to_live + update_frequency_seconds.to_i
2123
- end
2124
- if @time_to_live == 0
2125
- @time_to_live = 1.hour
2126
- end
2127
- end
2128
- end
2129
- end
2130
- if @time_to_live.nil? || @time_to_live == 0
2131
- # Default to one hour
2132
- @time_to_live = 1.hour
2133
- end
2134
- @time_to_live = @time_to_live.round
2135
- return @time_to_live
2136
- end
2137
-
2138
- # Sets the feed time to live
2139
- def time_to_live=(new_time_to_live)
2140
- @time_to_live = new_time_to_live.round
2141
- @time_to_live = 1.hour if @time_to_live < 1.hour
2142
- end
2143
-
2144
- # Returns the feed's cloud
2145
- def cloud
2146
- if @cloud.nil?
2147
- @cloud = FeedTools::Feed::Cloud.new
2148
- @cloud.domain = XPath.first(channel_node, "cloud/@domain").to_s
2149
- @cloud.port = XPath.first(channel_node, "cloud/@port").to_s
2150
- @cloud.path = XPath.first(channel_node, "cloud/@path").to_s
2151
- @cloud.register_procedure =
2152
- XPath.first(channel_node, "cloud/@registerProcedure").to_s
2153
- @cloud.protocol =
2154
- XPath.first(channel_node, "cloud/@protocol").to_s.downcase
2155
- @cloud.domain = nil if @cloud.domain == ""
2156
- @cloud.port = nil if @cloud.port == ""
2157
- @cloud.port = @cloud.port.to_i unless @cloud.port.nil?
2158
- @cloud.port = nil if @cloud.port == 0
2159
- @cloud.path = nil if @cloud.path == ""
2160
- @cloud.register_procedure = nil if @cloud.register_procedure == ""
2161
- @cloud.protocol = nil if @cloud.protocol == ""
2162
- end
2163
- return @cloud
2164
- end
2165
-
2166
- # Sets the feed's cloud
2167
- def cloud=(new_cloud)
2168
- @cloud = new_cloud
2169
- end
2170
-
2171
- # Returns the feed generator
2172
- def generator
2173
- if @generator.nil?
2174
- @generator = XPath.first(channel_node, "generator/text()").to_s
2175
- @generator = FeedTools.strip_html(@generator)
2176
- @generator = nil if @generator == ""
2177
- end
2178
- return @generator
2179
- end
2180
-
2181
- # Sets the feed generator
2182
- def generator=(new_generator)
2183
- @generator = new_generator
2184
- end
2185
-
2186
- # Returns the feed docs
2187
- def docs
2188
- if @docs.nil?
2189
- @docs = XPath.first(channel_node, "docs/text()").to_s
2190
- @docs = FeedTools.strip_html(@docs)
2191
- @docs = nil if @docs == ""
2192
- end
2193
- return @docs
2194
- end
2195
-
2196
- # Sets the feed docs
2197
- def docs=(new_docs)
2198
- @docs = new_docs
2199
- end
2200
-
2201
- # Returns the feed language
2202
- def language
2203
- if @language.nil?
2204
- unless channel_node.nil?
2205
- @language = XPath.first(channel_node, "language/text()").to_s
2206
- if @language == ""
2207
- @language = XPath.first(channel_node, "dc:language/text()").to_s
2208
- end
2209
- if @language == ""
2210
- @language = XPath.first(channel_node, "xml:lang/text()").to_s
2211
- end
2212
- if @language == ""
2213
- @language = XPath.first(root_node, "xml:lang/text()").to_s
2214
- end
2215
- end
2216
- if @language == "" || @language.nil?
2217
- @language = "en-us"
2218
- end
2219
- @language = @language.downcase
2220
- @language = nil if @language == ""
2221
- end
2222
- return @language
2223
- end
2224
-
2225
- # Sets the feed language
2226
- def language=(new_language)
2227
- @language = new_language
2228
- end
2229
-
2230
- # Returns true if this feed contains explicit material.
2231
- def explicit?
2232
- if @explicit.nil?
2233
- if XPath.first(channel_node,
2234
- "media:adult/text()").to_s.downcase == "true" ||
2235
- XPath.first(channel_node,
2236
- "itunes:explicit/text()").to_s.downcase == "yes" ||
2237
- XPath.first(channel_node,
2238
- "itunes:explicit/text()").to_s.downcase == "true"
2239
- @explicit = true
2240
- else
2241
- @explicit = false
2242
- end
2243
- end
2244
- return @explicit
2245
- end
2246
-
2247
- # Sets whether or not the feed contains explicit material
2248
- def explicit=(new_explicit)
2249
- @explicit = (new_explicit ? true : false)
2250
- end
2251
-
2252
- # Returns the feed items
2253
- def items
2254
- if @items.nil?
2255
- unless root_node.nil?
2256
- raw_items = XPath.match(root_node, "item")
2257
- if raw_items == nil || raw_items == []
2258
- raw_items = XPath.match(channel_node, "item")
2259
- end
2260
- if raw_items == nil || raw_items == []
2261
- raw_items = XPath.match(channel_node, "ITEM")
2262
- end
2263
- if raw_items == nil || raw_items == []
2264
- raw_items = XPath.match(root_node, "ITEM")
2265
- end
2266
- if raw_items == nil || raw_items == []
2267
- raw_items = XPath.match(channel_node, "entry")
2268
- end
2269
- if raw_items == nil || raw_items == []
2270
- raw_items = XPath.match(root_node, "entry")
2271
- end
2272
- end
2273
-
2274
- # create the individual feed items
2275
- @items = []
2276
- if raw_items != nil
2277
- for item_node in raw_items
2278
- new_item = FeedItem.new
2279
- new_item.xml_data = item_node.to_s
2280
- new_item.feed = self
2281
- @items << new_item
2282
- end
2283
- end
2284
- end
2285
-
2286
- # Sort the items
2287
- @items = @items.sort do |a,b|
2288
- (b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
2289
- end
2290
- return @items
2291
- end
2292
-
2293
- # The time that the feed was last requested from the remote server. Nil if it has
2294
- # never been pulled, or if it was created from scratch.
2295
- def last_retrieved
2296
- unless self.cache_object.nil?
2297
- @last_retrieved = self.cache_object.last_retrieved
2298
- end
2299
- return @last_retrieved
2300
- end
2301
-
2302
- # Sets the time that the feed was last updated.
2303
- def last_retrieved=(new_last_retrieved)
2304
- @last_retrieved = new_last_retrieved
2305
- unless self.cache_object.nil?
2306
- self.cache_object.last_retrieved = new_last_retrieved
2307
- end
2308
- end
2309
-
2310
- # True if this feed contains audio content enclosures
2311
- def podcast?
2312
- podcast = false
2313
- self.items.each do |item|
2314
- item.enclosures.each do |enclosure|
2315
- podcast = true if enclosure.audio?
2316
- end
2317
- end
2318
- return podcast
2319
- end
2320
-
2321
- # True if this feed contains video content enclosures
2322
- def vidlog?
2323
- vidlog = false
2324
- self.items.each do |item|
2325
- item.enclosures.each do |enclosure|
2326
- vidlog = true if enclosure.video?
2327
- end
2328
- end
2329
- return vidlog
2330
- end
2331
-
2332
- # True if this feed is malformed somehow
2333
- def bozo?
2334
- if @bozo.nil?
2335
- @bozo = false
2336
- end
2337
- return @bozo
2338
- end
2339
-
2340
- # True if the feed was not last retrieved from the cache.
2341
- def live?
2342
- return @live
2343
- end
2344
-
2345
- # True if the feed has expired and must be reacquired from the remote server.
2346
- def expired?
2347
- return self.last_retrieved == nil || (self.last_retrieved + self.time_to_live.hour) < Time.now
2348
- end
2349
-
2350
- # Forces this feed to expire.
2351
- def expire!
2352
- self.last_retrieved = Time.mktime(1970)
2353
- self.save
2354
- end
2355
-
2356
- # A hook method that is called during the feed generation process. Overriding this method
2357
- # will enable additional content to be inserted into the feed.
2358
- def build_xml_hook(feed_type, version, xml_builder)
2359
- return nil
2360
- end
2361
-
2362
- # Generates xml based on the content of the feed
2363
- def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
2364
- xml_builder=Builder::XmlMarkup.new(:indent => 2))
2365
- if feed_type == "rss" && (version == nil || version == 0.0)
2366
- version = 1.0
2367
- elsif feed_type == "atom" && (version == nil || version == 0.0)
2368
- version = 0.3
2369
- end
2370
- if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
2371
- version == 1.1)
2372
- # RDF-based rss format
2373
- return xml_builder.tag!("rdf:RDF",
2374
- "xmlns" => "http://purl.org/rss/1.0/",
2375
- "xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
2376
- "xmlns:dc" => "http://purl.org/dc/elements/1.1/",
2377
- "xmlns:syn" => "http://purl.org/rss/1.0/modules/syndication/",
2378
- "xmlns:taxo" => "http://purl.org/rss/1.0/modules/taxonomy/",
2379
- "xmlns:itunes" => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
2380
- "xmlns:media" => "http://search.yahoo.com/mrss") do
2381
- channel_attributes = {}
2382
- unless self.link.nil?
2383
- channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
2384
- end
2385
- xml_builder.channel(channel_attributes) do
2386
- unless title.nil? || title == ""
2387
- xml_builder.title(title)
2388
- else
2389
- xml_builder.title
2390
- end
2391
- unless link.nil? || link == ""
2392
- xml_builder.link(link)
2393
- else
2394
- xml_builder.link
2395
- end
2396
- unless images.nil? || images.empty?
2397
- xml_builder.image("rdf:resource" => CGI.escapeHTML(
2398
- images.first.url))
2399
- end
2400
- unless description.nil? || description == ""
2401
- xml_builder.description(description)
2402
- else
2403
- xml_builder.description
2404
- end
2405
- unless language.nil? || language == ""
2406
- xml_builder.tag!("dc:language", language)
2407
- end
2408
- xml_builder.tag!("syn:updatePeriod", "hourly")
2409
- xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
2410
- xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
2411
- xml_builder.items do
2412
- xml_builder.tag!("rdf:Seq") do
2413
- unless items.nil?
2414
- for item in items
2415
- if item.link.nil?
2416
- raise "Cannot generate an rdf-based feed with a nil item link field."
2417
- end
2418
- xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
2419
- end
2420
- end
2421
- end
2422
- end
2423
- build_xml_hook(feed_type, version, xml_builder)
2424
- end
2425
- unless images.nil? || images.empty?
2426
- best_image = nil
2427
- for image in self.images
2428
- if image.link != nil
2429
- best_image = image
2430
- break
2431
- end
2432
- end
2433
- best_image = images.first if best_image.nil?
2434
- xml_builder.image("rdf:about" => CGI.escapeHTML(best_image.url)) do
2435
- if best_image.title != nil && best_image.title != ""
2436
- xml_builder.title(best_image.title)
2437
- elsif self.title != nil && self.title != ""
2438
- xml_builder.title(self.title)
2439
- else
2440
- xml_builder.title
2441
- end
2442
- unless best_image.url.nil? || best_image.url == ""
2443
- xml_builder.url(best_image.url)
2444
- end
2445
- if best_image.link != nil && best_image.link != ""
2446
- xml_builder.link(best_image.link)
2447
- elsif self.link != nil && self.link != ""
2448
- xml_builder.link(self.link)
2449
- else
2450
- xml_builder.link
2451
- end
2452
- end
2453
- end
2454
- unless items.nil?
2455
- for item in items
2456
- item.build_xml(feed_type, version, xml_builder)
2457
- end
2458
- end
2459
- end
2460
- elsif feed_type == "rss"
2461
- # normal rss format
2462
- return xml_builder.rss("version" => "2.0",
2463
- "xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
2464
- "xmlns:dc" => "http://purl.org/dc/elements/1.1/",
2465
- "xmlns:taxo" => "http://purl.org/rss/1.0/modules/taxonomy/",
2466
- "xmlns:trackback" =>
2467
- "http://madskills.com/public/xml/rss/module/trackback/",
2468
- "xmlns:itunes" => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
2469
- "xmlns:media" => "http://search.yahoo.com/mrss") do
2470
- xml_builder.channel do
2471
- unless title.nil? || title == ""
2472
- xml_builder.title(title)
2473
- end
2474
- unless link.nil? || link == ""
2475
- xml_builder.link(link)
2476
- end
2477
- unless description.nil? || description == ""
2478
- xml_builder.description(description)
2479
- end
2480
- xml_builder.ttl((time_to_live / 1.minute).to_s)
2481
- xml_builder.generator(
2482
- "http://www.sporkmonger.com/projects/feedtools")
2483
- build_xml_hook(feed_type, version, xml_builder)
2484
- unless items.nil?
2485
- for item in items
2486
- item.build_xml(feed_type, version, xml_builder)
2487
- end
2488
- end
2489
- end
2490
- end
2491
- elsif feed_type == "atom" && version == 0.3
2492
- # normal atom format
2493
- return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
2494
- "version" => version,
2495
- "xml:lang" => language) do
2496
- unless title.nil? || title == ""
2497
- xml_builder.title(title,
2498
- "mode" => "escaped",
2499
- "type" => "text/html")
2500
- end
2501
- xml_builder.author do
2502
- unless self.author.nil? || self.author.name.nil?
2503
- xml_builder.name(self.author.name)
2504
- else
2505
- xml_builder.name("n/a")
2506
- end
2507
- unless self.author.nil? || self.author.email.nil?
2508
- xml_builder.email(self.author.email)
2509
- end
2510
- unless self.author.nil? || self.author.url.nil?
2511
- xml_builder.url(self.author.url)
2512
- end
2513
- end
2514
- unless link.nil? || link == ""
2515
- xml_builder.link("href" => link,
2516
- "rel" => "alternate",
2517
- "type" => "text/html",
2518
- "title" => title)
2519
- end
2520
- unless description.nil? || description == ""
2521
- xml_builder.tagline(description,
2522
- "mode" => "escaped",
2523
- "type" => "text/html")
2524
- end
2525
- xml_builder.generator("FeedTools",
2526
- "url" => "http://www.sporkmonger.com/projects/feedtools")
2527
- build_xml_hook(feed_type, version, xml_builder)
2528
- unless items.nil?
2529
- for item in items
2530
- item.build_xml(feed_type, version, xml_builder)
2531
- end
2532
- end
2533
- end
2534
- elsif feed_type == "atom" && version == 1.0
2535
- # normal atom format
2536
- return xml_builder.feed("xmlns" => "http://www.w3.org/2005/Atom",
2537
- "xml:lang" => language) do
2538
- unless title.nil? || title == ""
2539
- xml_builder.title(title,
2540
- "type" => "html")
2541
- end
2542
- xml_builder.author do
2543
- unless self.author.nil? || self.author.name.nil?
2544
- xml_builder.name(self.author.name)
2545
- else
2546
- xml_builder.name("n/a")
2547
- end
2548
- unless self.author.nil? || self.author.email.nil?
2549
- xml_builder.email(self.author.email)
2550
- end
2551
- unless self.author.nil? || self.author.url.nil?
2552
- xml_builder.url(self.author.url)
2553
- end
2554
- end
2555
- unless self.url.nil? || self.url == ""
2556
- xml_builder.link("href" => self.url,
2557
- "rel" => "self",
2558
- "type" => "application/atom+xml")
2559
- end
2560
- unless self.link.nil? || self.link == ""
2561
- xml_builder.link("href" => self.link,
2562
- "rel" => "alternate",
2563
- "type" => "text/html",
2564
- "title" => self.title)
2565
- end
2566
- unless description.nil? || description == ""
2567
- xml_builder.subtitle(description,
2568
- "type" => "html")
2569
- else
2570
- xml_builder.subtitle(FeedTools.no_content_string,
2571
- "type" => "html")
2572
- end
2573
- if self.updated != nil
2574
- xml_builder.updated(self.updated.iso8601)
2575
- elsif self.time != nil
2576
- # Not technically correct, but a heck of a lot better
2577
- # than the Time.now fall-back.
2578
- xml_builder.updated(self.time.iso8601)
2579
- else
2580
- xml_builder.updated(Time.now.iso8601)
2581
- end
2582
- unless self.published.nil?
2583
- xml_builder.published(self.published.iso8601)
2584
- end
2585
- xml_builder.generator("FeedTools - " +
2586
- "http://www.sporkmonger.com/projects/feedtools")
2587
- if self.id != nil
2588
- unless FeedTools.is_uri? self.id
2589
- if self.link != nil
2590
- xml_builder.id(FeedTools.build_urn_uri(self.link))
2591
- else
2592
- raise "The unique id must be a valid URI."
2593
- end
2594
- else
2595
- xml_builder.id(self.id)
2596
- end
2597
- elsif self.link != nil
2598
- xml_builder.id(FeedTools.build_urn_uri(self.link))
2599
- else
2600
- raise "Cannot build feed, missing feed unique id."
2601
- end
2602
- build_xml_hook(feed_type, version, xml_builder)
2603
- unless items.nil?
2604
- for item in items
2605
- item.build_xml(feed_type, version, xml_builder)
2606
- end
2607
- end
2608
- end
2609
- end
2610
- end
2611
-
2612
- # Persists the current feed state to the cache.
2613
- def save
2614
- if FeedTools.feed_cache.nil?
2615
- raise "Caching is currently disabled. Cannot save to cache."
2616
- elsif self.url.nil?
2617
- raise "The url field must be set to save to the cache."
2618
- elsif self.xml_data.nil?
2619
- raise "The xml_data field must be set to save to the cache."
2620
- elsif self.cache_object.nil?
2621
- raise "The cache_object is currently nil. Cannot save to cache."
2622
- else
2623
- self.cache_object.url = self.url
2624
- self.cache_object.title = self.title
2625
- self.cache_object.link = self.link
2626
- self.cache_object.xml_data = self.xml_data
2627
- unless self.http_response.nil?
2628
- self.cache_object.http_headers = self.http_headers.to_yaml
2629
- end
2630
- self.cache_object.last_retrieved = self.last_retrieved
2631
- self.cache_object.save
2632
- end
2633
- end
2634
-
2635
- alias_method :tagline, :description
2636
- alias_method :tagline=, :description=
2637
- alias_method :subtitle, :description
2638
- alias_method :subtitle=, :description=
2639
- alias_method :abstract, :description
2640
- alias_method :abstract=, :description=
2641
- alias_method :content, :description
2642
- alias_method :content=, :description=
2643
- alias_method :ttl, :time_to_live
2644
- alias_method :ttl=, :time_to_live=
2645
- alias_method :guid, :id
2646
- alias_method :guid=, :id=
2647
- alias_method :entries, :items
2648
-
2649
- # passes missing methods to the cache_object
2650
- def method_missing(msg, *params)
2651
- if self.cache_object.nil?
2652
- raise NoMethodError, "Invalid method #{msg.to_s}"
2653
- end
2654
- return self.cache_object.send(msg, params)
2655
- end
2656
-
2657
- # passes missing methods to the FeedTools.feed_cache
2658
- def Feed.method_missing(msg, *params)
2659
- if FeedTools.feed_cache.nil?
2660
- raise NoMethodError, "Invalid method Feed.#{msg.to_s}"
2661
- end
2662
- result = FeedTools.feed_cache.send(msg, params)
2663
- if result.kind_of? FeedTools.feed_cache
2664
- result = Feed.open(result.url)
2665
- end
2666
- return result
2667
- end
2668
-
2669
- # Returns a simple representation of the feed object's state.
2670
- def inspect
2671
- return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.url}>"
2672
- end
2673
- end
2674
-
2675
- class FeedItem
2676
- include REXML
2677
- include AttributeDictionary
2678
-
2679
- # This class stores information about a feed item's file enclosures.
2680
- class Enclosure
2681
- include AttributeDictionary
2682
-
2683
- # The url for the enclosure
2684
- attr_accessor :url
2685
- # The MIME type of the file referenced by the enclosure
2686
- attr_accessor :type
2687
- # The size of the file referenced by the enclosure
2688
- attr_accessor :file_size
2689
- # The total play time of the file referenced by the enclosure
2690
- attr_accessor :duration
2691
- # The height in pixels of the enclosed media
2692
- attr_accessor :height
2693
- # The width in pixels of the enclosed media
2694
- attr_accessor :width
2695
- # The bitrate of the enclosed media
2696
- attr_accessor :bitrate
2697
- # The framerate of the enclosed media
2698
- attr_accessor :framerate
2699
- # The thumbnail for this enclosure
2700
- attr_accessor :thumbnail
2701
- # The categories for this enclosure
2702
- attr_accessor :categories
2703
- # A hash of the enclosed file
2704
- attr_accessor :hash
2705
- # A website containing some kind of media player instead of a direct
2706
- # link to the media file.
2707
- attr_accessor :player
2708
- # A list of credits for the enclosed media
2709
- attr_accessor :credits
2710
- # A text rendition of the enclosed media
2711
- attr_accessor :text
2712
- # A list of alternate version of the enclosed media file
2713
- attr_accessor :versions
2714
- # The default version of the enclosed media file
2715
- attr_accessor :default_version
2716
-
2717
- # Returns true if this is the default enclosure
2718
- def is_default?
2719
- return @is_default
2720
- end
2721
-
2722
- # Sets whether this is the default enclosure for the media group
2723
- def is_default=(new_is_default)
2724
- @is_default = new_is_default
2725
- end
2726
-
2727
- # Returns true if the enclosure contains explicit material
2728
- def explicit?
2729
- return @explicit
2730
- end
2731
-
2732
- # Sets the explicit attribute on the enclosure
2733
- def explicit=(new_explicit)
2734
- @explicit = new_explicit
2735
- end
2736
-
2737
- # Determines if the object is a sample, or the full version of the
2738
- # object, or if it is a stream.
2739
- # Possible values are 'sample', 'full', 'nonstop'.
2740
- def expression
2741
- return @expression
2742
- end
2743
-
2744
- # Sets the expression attribute on the enclosure.
2745
- # Allowed values are 'sample', 'full', 'nonstop'.
2746
- def expression=(new_expression)
2747
- unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
2748
- raise ArgumentError,
2749
- "Permitted values are 'sample', 'full', 'nonstop'."
2750
- end
2751
- @expression = new_expression.downcase
2752
- end
2753
-
2754
- # Returns true if this enclosure contains audio content
2755
- def audio?
2756
- unless self.type.nil?
2757
- return true if (self.type =~ /^audio/) != nil
2758
- end
2759
- # TODO: create a more complete list
2760
- # =================================
2761
- audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
2762
- audio_extensions.each do |extension|
2763
- if (url =~ /#{extension}$/) != nil
2764
- return true
2765
- end
2766
- end
2767
- return false
2768
- end
2769
-
2770
- # Returns true if this enclosure contains video content
2771
- def video?
2772
- unless self.type.nil?
2773
- return true if (self.type =~ /^video/) != nil
2774
- return true if self.type == "image/mov"
2775
- end
2776
- # TODO: create a more complete list
2777
- # =================================
2778
- video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
2779
- video_extensions.each do |extension|
2780
- if (url =~ /#{extension}$/) != nil
2781
- return true
2782
- end
2783
- end
2784
- return false
2785
- end
2786
-
2787
- alias_method :link, :url
2788
- alias_method :link=, :url=
2789
- end
2790
-
2791
- # TODO: Make these actual classes instead of structs
2792
- # ==================================================
2793
- EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
2794
- EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
2795
- EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
2796
- EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
2797
- :width )
2798
-
2799
- # Returns the parent feed of this feed item
2800
- def feed
2801
- return @feed
2802
- end
2803
-
2804
- # Sets the parent feed of this feed item
2805
- def feed=(new_feed)
2806
- @feed = new_feed
2807
- end
2808
-
2809
- # Returns the feed item's raw xml data.
2810
- def xml_data
2811
- return @xml_data
2812
- end
2813
-
2814
- # Sets the feed item's xml data.
2815
- def xml_data=(new_xml_data)
2816
- @xml_data = new_xml_data
2817
- end
2818
-
2819
- # Returns a REXML Document of the xml_data
2820
- def xml
2821
- if @xml_doc.nil?
2822
- # TODO: :ignore_whitespace_nodes => :all
2823
- # Add that?
2824
- # ======================================
2825
- @xml_doc = Document.new(xml_data)
2826
- end
2827
- return @xml_doc
2828
- end
2829
-
2830
- # Returns the first node within the root_node that matches the xpath query.
2831
- def find_node(xpath)
2832
- return XPath.first(root_node, xpath)
2833
- end
2834
-
2835
- # Returns all nodes within the root_node that match the xpath query.
2836
- def find_all_nodes(xpath)
2837
- return XPath.match(root_node, xpath)
2838
- end
2839
-
2840
- # Returns the root node of the feed item.
2841
- def root_node
2842
- if @root_node.nil?
2843
- @root_node = xml.root
2844
- end
2845
- return @root_node
2846
- end
2847
-
2848
- # Returns the feed items's unique id
2849
- def id
2850
- if @id.nil?
2851
- unless root_node.nil?
2852
- @id = XPath.first(root_node, "id/text()").to_s
2853
- if @id == ""
2854
- @id = XPath.first(root_node, "guid/text()").to_s
2855
- end
2856
- end
2857
- @id = nil if @id == ""
2858
- end
2859
- return @id
2860
- end
2861
-
2862
- # Sets the feed item's unique id
2863
- def id=(new_id)
2864
- @id = new_id
2865
- end
2866
-
2867
- # Returns the feed item title
2868
- def title
2869
- if @title.nil?
2870
- unless root_node.nil?
2871
- repair_entities = false
2872
- title_node = XPath.first(root_node, "title")
2873
- if title_node.nil?
2874
- title_node = XPath.first(root_node, "dc:title")
2875
- end
2876
- if title_node.nil?
2877
- title_node = XPath.first(root_node, "TITLE")
2878
- end
2879
- end
2880
- if title_node.nil?
2881
- return nil
2882
- end
2883
- if XPath.first(title_node, "@type").to_s == "xhtml" ||
2884
- XPath.first(title_node, "@mode").to_s == "xhtml" ||
2885
- XPath.first(title_node, "@type").to_s == "xml" ||
2886
- XPath.first(title_node, "@mode").to_s == "xml" ||
2887
- XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
2888
- @title = title_node.inner_xml
2889
- elsif XPath.first(title_node, "@type").to_s == "escaped" ||
2890
- XPath.first(title_node, "@mode").to_s == "escaped"
2891
- @title = FeedTools.unescape_entities(
2892
- XPath.first(title_node, "text()").to_s)
2893
- else
2894
- @title = title_node.inner_xml
2895
- repair_entities = true
2896
- end
2897
- unless @title.nil?
2898
- @title = FeedTools.sanitize_html(@title, :strip)
2899
- @title = FeedTools.unescape_entities(@title) if repair_entities
2900
- @title = FeedTools.tidy_html(@title)
2901
- end
2902
- if @title != ""
2903
- # Some blogging tools include the number of comments in a post
2904
- # in the title... this is supremely ugly, and breaks any
2905
- # applications which expect the title to be static, so we're
2906
- # gonna strip them out.
2907
- #
2908
- # If for some incredibly wierd reason you need the actual
2909
- # unstripped title, just use find_node("title/text()").to_s
2910
- @title = @title.strip.gsub(/\[\d*\]$/, "").strip
2911
- end
2912
- @title.gsub!(/\n/, " ")
2913
- @title.strip!
2914
- @title = nil if @title == ""
2915
- end
2916
- return @title
2917
- end
2918
-
2919
- # Sets the feed item title
2920
- def title=(new_title)
2921
- @title = new_title
2922
- end
2923
-
2924
- # Returns the feed item description
2925
- def description
2926
- if @description.nil?
2927
- unless root_node.nil?
2928
- repair_entities = false
2929
- description_node = XPath.first(root_node, "content:encoded")
2930
- if description_node.nil?
2931
- description_node = XPath.first(root_node, "content")
2932
- end
2933
- if description_node.nil?
2934
- description_node = XPath.first(root_node, "fullitem")
2935
- end
2936
- if description_node.nil?
2937
- description_node = XPath.first(root_node, "xhtml:body")
2938
- end
2939
- if description_node.nil?
2940
- description_node = XPath.first(root_node, "body")
2941
- end
2942
- if description_node.nil?
2943
- description_node = XPath.first(root_node, "description")
2944
- end
2945
- if description_node.nil?
2946
- description_node = XPath.first(root_node, "tagline")
2947
- end
2948
- if description_node.nil?
2949
- description_node = XPath.first(root_node, "subtitle")
2950
- end
2951
- if description_node.nil?
2952
- description_node = XPath.first(root_node, "summary")
2953
- end
2954
- if description_node.nil?
2955
- description_node = XPath.first(root_node, "abstract")
2956
- end
2957
- if description_node.nil?
2958
- description_node = XPath.first(root_node, "ABSTRACT")
2959
- end
2960
- if description_node.nil?
2961
- description_node = XPath.first(root_node, "info")
2962
- @bozo = true unless description_node.nil?
2963
- end
2964
- end
2965
- if description_node.nil?
2966
- return nil
2967
- end
2968
- unless description_node.nil?
2969
- if XPath.first(description_node, "@encoding").to_s != ""
2970
- @description =
2971
- "[Embedded data objects are not currently supported.]"
2972
- elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
2973
- XPath.first(description_node, "@mode").to_s == "xhtml" ||
2974
- XPath.first(description_node, "@type").to_s == "xml" ||
2975
- XPath.first(description_node, "@mode").to_s == "xml" ||
2976
- XPath.first(description_node, "@type").to_s ==
2977
- "application/xhtml+xml"
2978
- @description = description_node.inner_xml
2979
- elsif XPath.first(description_node, "@type").to_s == "escaped" ||
2980
- XPath.first(description_node, "@mode").to_s == "escaped"
2981
- @description = FeedTools.unescape_entities(
2982
- description_node.inner_xml)
2983
- else
2984
- @description = description_node.inner_xml
2985
- repair_entities = true
2986
- end
2987
- end
2988
- if @description == ""
2989
- @description = self.itunes_summary
2990
- @description = "" if @description.nil?
2991
- end
2992
- if @description == ""
2993
- @description = self.itunes_subtitle
2994
- @description = "" if @description.nil?
2995
- end
2996
-
2997
- unless @description.nil?
2998
- @description = FeedTools.sanitize_html(@description, :strip)
2999
- @description = FeedTools.unescape_entities(@description) if repair_entities
3000
- @description = FeedTools.tidy_html(@description)
3001
- end
3002
-
3003
- @description = @description.strip unless @description.nil?
3004
- @description = nil if @description == ""
3005
- end
3006
- return @description
3007
- end
3008
-
3009
- # Sets the feed item description
3010
- def description=(new_description)
3011
- @description = new_description
3012
- end
3013
-
3014
- # Returns the contents of the itunes:summary element
3015
- def itunes_summary
3016
- if @itunes_summary.nil?
3017
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
3018
- "itunes:summary/text()").to_s)
3019
- if @itunes_summary == ""
3020
- @itunes_summary = nil
3021
- end
3022
- unless @itunes_summary.nil?
3023
- @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
3024
- end
3025
- end
3026
- return @itunes_summary
3027
- end
3028
-
3029
- # Sets the contents of the itunes:summary element
3030
- def itunes_summary=(new_itunes_summary)
3031
- @itunes_summary = new_itunes_summary
3032
- end
3033
-
3034
- # Returns the contents of the itunes:subtitle element
3035
- def itunes_subtitle
3036
- if @itunes_subtitle.nil?
3037
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
3038
- "itunes:subtitle/text()").to_s)
3039
- if @itunes_subtitle == ""
3040
- @itunes_subtitle = nil
3041
- end
3042
- unless @itunes_subtitle.nil?
3043
- @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
3044
- end
3045
- end
3046
- return @itunes_subtitle
3047
- end
3048
-
3049
- # Sets the contents of the itunes:subtitle element
3050
- def itunes_subtitle=(new_itunes_subtitle)
3051
- @itunes_subtitle = new_itunes_subtitle
3052
- end
3053
-
3054
- # Returns the contents of the media:text element
3055
- def media_text
3056
- if @media_text.nil?
3057
- @media_text = FeedTools.unescape_entities(XPath.first(root_node,
3058
- "itunes:subtitle/text()").to_s)
3059
- if @media_text == ""
3060
- @media_text = nil
3061
- end
3062
- unless @media_text.nil?
3063
- @media_text = FeedTools.sanitize_html(@media_text)
3064
- end
3065
- end
3066
- return @media_text
3067
- end
3068
-
3069
- # Sets the contents of the media:text element
3070
- def media_text=(new_media_text)
3071
- @media_text = new_media_text
3072
- end
3073
-
3074
- # Returns the feed item link
3075
- def link
3076
- if @link.nil?
3077
- unless root_node.nil?
3078
- @link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
3079
- if @link == ""
3080
- @link = XPath.first(root_node, "link/@href").to_s
3081
- end
3082
- if @link == ""
3083
- @link = XPath.first(root_node, "link/text()").to_s
3084
- end
3085
- if @link == ""
3086
- @link = XPath.first(root_node, "@rdf:about").to_s
3087
- end
3088
- if @link == ""
3089
- @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
3090
- end
3091
- if @link == ""
3092
- @link = XPath.first(root_node, "@href").to_s
3093
- end
3094
- if @link == ""
3095
- @link = XPath.first(root_node, "a/@href").to_s
3096
- end
3097
- if @link == ""
3098
- @link = XPath.first(root_node, "@HREF").to_s
3099
- end
3100
- if @link == ""
3101
- @link = XPath.first(root_node, "A/@HREF").to_s
3102
- end
3103
- end
3104
- if @link == "" || @link.nil?
3105
- if FeedTools.is_uri? self.guid
3106
- @link = self.guid
3107
- end
3108
- end
3109
- if @link != ""
3110
- @link = FeedTools.unescape_entities(@link)
3111
- end
3112
- # TODO: Actually implement proper relative url resolving instead of this crap
3113
- # ===========================================================================
3114
- #
3115
- # if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
3116
- # if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
3117
- # @link = @link[1..-1]
3118
- # end
3119
- # # prepend the base to the link since they seem to have used a relative path
3120
- # @link = feed.base + @link
3121
- # end
3122
- @link = FeedTools.normalize_url(@link)
3123
- end
3124
- return @link
3125
- end
3126
-
3127
- # Sets the feed item link
3128
- def link=(new_link)
3129
- @link = new_link
3130
- end
3131
-
3132
- # Returns a list of the feed item's categories
3133
- def categories
3134
- if @categories.nil?
3135
- @categories = []
3136
- category_nodes = XPath.match(root_node, "category")
3137
- if category_nodes.nil? || category_nodes.empty?
3138
- category_nodes = XPath.match(root_node, "dc:subject")
3139
- end
3140
- unless category_nodes.nil?
3141
- for category_node in category_nodes
3142
- category = FeedTools::Feed::Category.new
3143
- category.term = XPath.first(category_node, "@term").to_s
3144
- if category.term == ""
3145
- category.term = XPath.first(category_node, "text()").to_s
3146
- end
3147
- category.term.strip! unless category.term.nil?
3148
- category.term = nil if category.term == ""
3149
- category.label = XPath.first(category_node, "@label").to_s
3150
- category.label.strip! unless category.label.nil?
3151
- category.label = nil if category.label == ""
3152
- category.scheme = XPath.first(category_node, "@scheme").to_s
3153
- if category.scheme == ""
3154
- category.scheme = XPath.first(category_node, "@domain").to_s
3155
- end
3156
- category.scheme.strip! unless category.scheme.nil?
3157
- category.scheme = nil if category.scheme == ""
3158
- @categories << category
3159
- end
3160
- end
3161
- end
3162
- return @categories
3163
- end
3164
-
3165
- # Returns a list of the feed items's images
3166
- def images
3167
- if @images.nil?
3168
- @images = []
3169
- image_nodes = XPath.match(root_node, "link")
3170
- if image_nodes.nil? || image_nodes.empty?
3171
- image_nodes = XPath.match(root_node, "logo")
3172
- end
3173
- if image_nodes.nil? || image_nodes.empty?
3174
- image_nodes = XPath.match(root_node, "LOGO")
3175
- end
3176
- if image_nodes.nil? || image_nodes.empty?
3177
- image_nodes = XPath.match(root_node, "image")
3178
- end
3179
- unless image_nodes.nil?
3180
- for image_node in image_nodes
3181
- image = FeedTools::Feed::Image.new
3182
- image.url = XPath.first(image_node, "url/text()").to_s
3183
- if image.url != ""
3184
- self.feed.bozo = true
3185
- end
3186
- if image.url == ""
3187
- image.url = XPath.first(image_node, "@rdf:resource").to_s
3188
- end
3189
- if image.url == "" && (image_node.name == "logo" ||
3190
- (image_node.attributes['type'] =~ /^image/) == 0)
3191
- image.url = XPath.first(image_node, "@href").to_s
3192
- end
3193
- if image.url == "" && image_node.name == "LOGO"
3194
- image.url = XPath.first(image_node, "@HREF").to_s
3195
- end
3196
- image.url.strip! unless image.url.nil?
3197
- image.url = nil if image.url == ""
3198
- image.title = XPath.first(image_node, "title/text()").to_s
3199
- image.title.strip! unless image.title.nil?
3200
- image.title = nil if image.title == ""
3201
- image.description =
3202
- XPath.first(image_node, "description/text()").to_s
3203
- image.description.strip! unless image.description.nil?
3204
- image.description = nil if image.description == ""
3205
- image.link = XPath.first(image_node, "link/text()").to_s
3206
- image.link.strip! unless image.link.nil?
3207
- image.link = nil if image.link == ""
3208
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
3209
- image.height = nil if image.height <= 0
3210
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
3211
- image.width = nil if image.width <= 0
3212
- image.style = XPath.first(image_node, "@style").to_s.downcase
3213
- if image.style == ""
3214
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
3215
- end
3216
- image.style.strip! unless image.style.nil?
3217
- image.style = nil if image.style == ""
3218
- @images << image
3219
- end
3220
- end
3221
- end
3222
- return @images
3223
- end
3224
-
3225
- # Returns the feed item itunes image link
3226
- #
3227
- # If it's not present, falls back to the normal image link.
3228
- # Technically, the itunes spec says that the image needs to be
3229
- # square and larger than 300x300, but hey, if there's an image
3230
- # to be had, it's better than none at all.
3231
- def itunes_image_link
3232
- if @itunes_image_link.nil?
3233
- # get the feed item itunes image link from the xml document
3234
- @itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s
3235
- if @itunes_image_link == ""
3236
- @itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
3237
- end
3238
- @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
3239
- end
3240
- return @itunes_image_link
3241
- end
3242
-
3243
- # Sets the feed item itunes image link
3244
- def itunes_image_link=(new_itunes_image_link)
3245
- @itunes_image_link = new_itunes_image_link
3246
- end
3247
-
3248
- # Returns the feed item media thumbnail link
3249
- #
3250
- # If it's not present, falls back to the normal image link.
3251
- def media_thumbnail_link
3252
- if @media_thumbnail_link.nil?
3253
- # get the feed item itunes image link from the xml document
3254
- @media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
3255
- @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
3256
- end
3257
- return @media_thumbnail_link
3258
- end
3259
-
3260
- # Sets the feed item media thumbnail url
3261
- def media_thumbnail_link=(new_media_thumbnail_link)
3262
- @media_thumbnail_link = new_media_thumbnail_link
3263
- end
3264
-
3265
- # Returns the feed item's copyright information
3266
- def copyright
3267
- if @copyright.nil?
3268
- unless root_node.nil?
3269
- @copyright = XPath.first(root_node, "dc:rights/text()").to_s
3270
- if @copyright == ""
3271
- @copyright = XPath.first(root_node, "rights/text()").to_s
3272
- end
3273
- if @copyright == ""
3274
- @copyright = XPath.first(root_node, "copyright/text()").to_s
3275
- end
3276
- if @copyright == ""
3277
- @copyright = XPath.first(root_node, "copyrights/text()").to_s
3278
- end
3279
- @copyright = FeedTools.sanitize_html(@copyright, :strip)
3280
- @copyright = nil if @copyright == ""
3281
- end
3282
- end
3283
- return @copyright
3284
- end
3285
-
3286
- # Sets the feed item's copyright information
3287
- def copyright=(new_copyright)
3288
- @copyright = new_copyright
3289
- end
3290
-
3291
- # Returns all feed item enclosures
3292
- def enclosures
3293
- if @enclosures.nil?
3294
- @enclosures = []
3295
-
3296
- # First, load up all the different possible sources of enclosures
3297
- rss_enclosures = XPath.match(root_node, "enclosure")
3298
- atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']")
3299
- media_content_enclosures = XPath.match(root_node, "media:content")
3300
- media_group_enclosures = XPath.match(root_node, "media:group")
3301
-
3302
- # Parse RSS-type enclosures. Thanks to a few buggy enclosures implementations,
3303
- # sometimes these also manage to show up in atom files.
3304
- for enclosure_node in rss_enclosures
3305
- enclosure = Enclosure.new
3306
- enclosure.url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
3307
- enclosure.type = enclosure_node.attributes["type"].to_s
3308
- enclosure.file_size = enclosure_node.attributes["length"].to_i
3309
- enclosure.credits = []
3310
- enclosure.explicit = false
3311
- @enclosures << enclosure
3312
- end
3313
-
3314
- # Parse atom-type enclosures. If there are repeats of the same enclosure object,
3315
- # we merge the two together.
3316
- for enclosure_node in atom_enclosures
3317
- enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["href"].to_s)
3318
- enclosure = nil
3319
- new_enclosure = false
3320
- for existing_enclosure in @enclosures
3321
- if existing_enclosure.url == enclosure_url
3322
- enclosure = existing_enclosure
3323
- break
3324
- end
3325
- end
3326
- if enclosure.nil?
3327
- new_enclosure = true
3328
- enclosure = Enclosure.new
3329
- end
3330
- enclosure.url = enclosure_url
3331
- enclosure.type = enclosure_node.attributes["type"].to_s
3332
- enclosure.file_size = enclosure_node.attributes["length"].to_i
3333
- enclosure.credits = []
3334
- enclosure.explicit = false
3335
- if new_enclosure
3336
- @enclosures << enclosure
3337
- end
3338
- end
3339
-
3340
- # Creates an anonymous method to parse content objects from the media module. We
3341
- # do this to avoid excessive duplication of code since we have to do identical
3342
- # processing for content objects within group objects.
3343
- parse_media_content = lambda do |media_content_nodes|
3344
- affected_enclosures = []
3345
- for enclosure_node in media_content_nodes
3346
- enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
3347
- enclosure = nil
3348
- new_enclosure = false
3349
- for existing_enclosure in @enclosures
3350
- if existing_enclosure.url == enclosure_url
3351
- enclosure = existing_enclosure
3352
- break
3353
- end
3354
- end
3355
- if enclosure.nil?
3356
- new_enclosure = true
3357
- enclosure = Enclosure.new
3358
- end
3359
- enclosure.url = enclosure_url
3360
- enclosure.type = enclosure_node.attributes["type"].to_s
3361
- enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
3362
- enclosure.duration = enclosure_node.attributes["duration"].to_s
3363
- enclosure.height = enclosure_node.attributes["height"].to_i
3364
- enclosure.width = enclosure_node.attributes["width"].to_i
3365
- enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
3366
- enclosure.framerate = enclosure_node.attributes["framerate"].to_i
3367
- enclosure.expression = enclosure_node.attributes["expression"].to_s
3368
- enclosure.is_default =
3369
- (enclosure_node.attributes["isDefault"].to_s.downcase == "true")
3370
- if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
3371
- enclosure.thumbnail = EnclosureThumbnail.new(
3372
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
3373
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
3374
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
3375
- )
3376
- if enclosure.thumbnail.height == ""
3377
- enclosure.thumbnail.height = nil
3378
- end
3379
- if enclosure.thumbnail.width == ""
3380
- enclosure.thumbnail.width = nil
3381
- end
3382
- end
3383
- enclosure.categories = []
3384
- for category in XPath.match(enclosure_node, "media:category")
3385
- enclosure.categories << FeedTools::Feed::Category.new
3386
- enclosure.categories.last.term =
3387
- FeedTools.unescape_entities(category.text)
3388
- enclosure.categories.last.scheme =
3389
- FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3390
- enclosure.categories.last.label =
3391
- FeedTools.unescape_entities(category.attributes["label"].to_s)
3392
- if enclosure.categories.last.scheme == ""
3393
- enclosure.categories.last.scheme = nil
3394
- end
3395
- if enclosure.categories.last.label == ""
3396
- enclosure.categories.last.label = nil
3397
- end
3398
- end
3399
- if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
3400
- enclosure.hash = EnclosureHash.new(
3401
- FeedTools.sanitize_html(FeedTools.unescape_entities(XPath.first(
3402
- enclosure_node, "media:hash/text()").to_s), :strip),
3403
- "md5"
3404
- )
3405
- end
3406
- if XPath.first(enclosure_node, "media:player/@url").to_s != ""
3407
- enclosure.player = EnclosurePlayer.new(
3408
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@url").to_s),
3409
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@height").to_s),
3410
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@width").to_s)
3411
- )
3412
- if enclosure.player.height == ""
3413
- enclosure.player.height = nil
3414
- end
3415
- if enclosure.player.width == ""
3416
- enclosure.player.width = nil
3417
- end
3418
- end
3419
- enclosure.credits = []
3420
- for credit in XPath.match(enclosure_node, "media:credit")
3421
- enclosure.credits << EnclosureCredit.new(
3422
- FeedTools.unescape_entities(credit.text),
3423
- FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
3424
- )
3425
- if enclosure.credits.last.role == ""
3426
- enclosure.credits.last.role = nil
3427
- end
3428
- end
3429
- enclosure.explicit = (XPath.first(enclosure_node,
3430
- "media:adult/text()").to_s.downcase == "true")
3431
- if XPath.first(enclosure_node, "media:text/text()").to_s != ""
3432
- enclosure.text = FeedTools.unescape_entities(XPath.first(enclosure_node,
3433
- "media:text/text()").to_s)
3434
- end
3435
- affected_enclosures << enclosure
3436
- if new_enclosure
3437
- @enclosures << enclosure
3438
- end
3439
- end
3440
- affected_enclosures
3441
- end
3442
-
3443
- # Parse the independant content objects.
3444
- parse_media_content.call(media_content_enclosures)
3445
-
3446
- media_groups = []
3447
-
3448
- # Parse the group objects.
3449
- for media_group in media_group_enclosures
3450
- group_media_content_enclosures =
3451
- XPath.match(media_group, "media:content")
3452
-
3453
- # Parse the content objects within the group objects.
3454
- affected_enclosures =
3455
- parse_media_content.call(group_media_content_enclosures)
3456
-
3457
- # Now make sure that content objects inherit certain properties from
3458
- # the group objects.
3459
- for enclosure in affected_enclosures
3460
- if enclosure.thumbnail.nil? &&
3461
- XPath.first(media_group, "media:thumbnail/@url").to_s != ""
3462
- enclosure.thumbnail = EnclosureThumbnail.new(
3463
- FeedTools.unescape_entities(
3464
- XPath.first(media_group, "media:thumbnail/@url").to_s),
3465
- FeedTools.unescape_entities(
3466
- XPath.first(media_group, "media:thumbnail/@height").to_s),
3467
- FeedTools.unescape_entities(
3468
- XPath.first(media_group, "media:thumbnail/@width").to_s)
3469
- )
3470
- if enclosure.thumbnail.height == ""
3471
- enclosure.thumbnail.height = nil
3472
- end
3473
- if enclosure.thumbnail.width == ""
3474
- enclosure.thumbnail.width = nil
3475
- end
3476
- end
3477
- if (enclosure.categories.nil? || enclosure.categories.size == 0)
3478
- enclosure.categories = []
3479
- for category in XPath.match(media_group, "media:category")
3480
- enclosure.categories << FeedTools::Feed::Category.new
3481
- enclosure.categories.last.term =
3482
- FeedTools.unescape_entities(category.text)
3483
- enclosure.categories.last.scheme =
3484
- FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3485
- enclosure.categories.last.label =
3486
- FeedTools.unescape_entities(category.attributes["label"].to_s)
3487
- if enclosure.categories.last.scheme == ""
3488
- enclosure.categories.last.scheme = nil
3489
- end
3490
- if enclosure.categories.last.label == ""
3491
- enclosure.categories.last.label = nil
3492
- end
3493
- end
3494
- end
3495
- if enclosure.hash.nil? &&
3496
- XPath.first(media_group, "media:hash/text()").to_s != ""
3497
- enclosure.hash = EnclosureHash.new(
3498
- FeedTools.unescape_entities(XPath.first(media_group, "media:hash/text()").to_s),
3499
- "md5"
3500
- )
3501
- end
3502
- if enclosure.player.nil? &&
3503
- XPath.first(media_group, "media:player/@url").to_s != ""
3504
- enclosure.player = EnclosurePlayer.new(
3505
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@url").to_s),
3506
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@height").to_s),
3507
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@width").to_s)
3508
- )
3509
- if enclosure.player.height == ""
3510
- enclosure.player.height = nil
3511
- end
3512
- if enclosure.player.width == ""
3513
- enclosure.player.width = nil
3514
- end
3515
- end
3516
- if enclosure.credits.nil? || enclosure.credits.size == 0
3517
- enclosure.credits = []
3518
- for credit in XPath.match(media_group, "media:credit")
3519
- enclosure.credits << EnclosureCredit.new(
3520
- FeedTools.unescape_entities(credit.text),
3521
- FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
3522
- )
3523
- if enclosure.credits.last.role == ""
3524
- enclosure.credits.last.role = nil
3525
- end
3526
- end
3527
- end
3528
- if enclosure.explicit?.nil?
3529
- enclosure.explicit = (XPath.first(media_group,
3530
- "media:adult/text()").to_s.downcase == "true") ? true : false
3531
- end
3532
- if enclosure.text.nil? &&
3533
- XPath.first(media_group, "media:text/text()").to_s != ""
3534
- enclosure.text = FeedTools.sanitize_html(FeedTools.unescape_entities(
3535
- XPath.first(media_group, "media:text/text()").to_s), :strip)
3536
- end
3537
- end
3538
-
3539
- # Keep track of the media groups
3540
- media_groups << affected_enclosures
3541
- end
3542
-
3543
- # Now we need to inherit any relevant item level information.
3544
- if self.explicit?
3545
- for enclosure in @enclosures
3546
- enclosure.explicit = true
3547
- end
3548
- end
3549
-
3550
- # Add all the itunes categories
3551
- for itunes_category in XPath.match(root_node, "itunes:category")
3552
- genre = "Podcasts"
3553
- category = itunes_category.attributes["text"].to_s
3554
- subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s
3555
- category_path = genre
3556
- if category != ""
3557
- category_path << "/" + category
3558
- end
3559
- if subcategory != ""
3560
- category_path << "/" + subcategory
3561
- end
3562
- for enclosure in @enclosures
3563
- if enclosure.categories.nil?
3564
- enclosure.categories = []
3565
- end
3566
- enclosure.categories << EnclosureCategory.new(
3567
- FeedTools.unescape_entities(category_path),
3568
- FeedTools.unescape_entities("http://www.apple.com/itunes/store/"),
3569
- FeedTools.unescape_entities("iTunes Music Store Categories")
3570
- )
3571
- end
3572
- end
3573
-
3574
- for enclosure in @enclosures
3575
- # Clean up any of those attributes that incorrectly have ""
3576
- # or 0 as their values
3577
- if enclosure.type == ""
3578
- enclosure.type = nil
3579
- end
3580
- if enclosure.file_size == 0
3581
- enclosure.file_size = nil
3582
- end
3583
- if enclosure.duration == 0
3584
- enclosure.duration = nil
3585
- end
3586
- if enclosure.height == 0
3587
- enclosure.height = nil
3588
- end
3589
- if enclosure.width == 0
3590
- enclosure.width = nil
3591
- end
3592
- if enclosure.bitrate == 0
3593
- enclosure.bitrate = nil
3594
- end
3595
- if enclosure.framerate == 0
3596
- enclosure.framerate = nil
3597
- end
3598
- if enclosure.expression == "" || enclosure.expression.nil?
3599
- enclosure.expression = "full"
3600
- end
3601
-
3602
- # If an enclosure is missing the text field, fall back on the itunes:summary field
3603
- if enclosure.text.nil? || enclosure.text = ""
3604
- enclosure.text = self.itunes_summary
3605
- end
3606
-
3607
- # Make sure we don't have duplicate categories
3608
- unless enclosure.categories.nil?
3609
- enclosure.categories.uniq!
3610
- end
3611
- end
3612
-
3613
- # And finally, now things get complicated. This is where we make
3614
- # sure that the enclosures method only returns either default
3615
- # enclosures or enclosures with only one version. Any enclosures
3616
- # that are wrapped in a media:group will be placed in the appropriate
3617
- # versions field.
3618
- affected_enclosure_urls = []
3619
- for media_group in media_groups
3620
- affected_enclosure_urls =
3621
- affected_enclosure_urls | (media_group.map do |enclosure|
3622
- enclosure.url
3623
- end)
3624
- end
3625
- @enclosures.delete_if do |enclosure|
3626
- (affected_enclosure_urls.include? enclosure.url)
3627
- end
3628
- for media_group in media_groups
3629
- default_enclosure = nil
3630
- for enclosure in media_group
3631
- if enclosure.is_default?
3632
- default_enclosure = enclosure
3633
- end
3634
- end
3635
- for enclosure in media_group
3636
- enclosure.default_version = default_enclosure
3637
- enclosure.versions = media_group.clone
3638
- enclosure.versions.delete(enclosure)
3639
- end
3640
- @enclosures << default_enclosure
3641
- end
3642
- end
3643
-
3644
- # If we have a single enclosure, it's safe to inherit the itunes:duration field
3645
- # if it's missing.
3646
- if @enclosures.size == 1
3647
- if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
3648
- @enclosures.first.duration = self.itunes_duration
3649
- end
3650
- end
3651
-
3652
- return @enclosures
3653
- end
3654
-
3655
- def enclosures=(new_enclosures)
3656
- @enclosures = new_enclosures
3657
- end
3658
-
3659
- # Returns the feed item author
3660
- def author
3661
- if @author.nil?
3662
- @author = FeedTools::Feed::Author.new
3663
- unless root_node.nil?
3664
- author_node = XPath.first(root_node, "author")
3665
- if author_node.nil?
3666
- author_node = XPath.first(root_node, "managingEditor")
3667
- end
3668
- if author_node.nil?
3669
- author_node = XPath.first(root_node, "dc:author")
3670
- end
3671
- if author_node.nil?
3672
- author_node = XPath.first(root_node, "dc:creator")
3673
- end
3674
- if author_node.nil?
3675
- author_node = XPath.first(root_node, "atom:author")
3676
- end
3677
- end
3678
- unless author_node.nil?
3679
- @author.raw = FeedTools.unescape_entities(
3680
- XPath.first(author_node, "text()").to_s)
3681
- @author.raw = nil if @author.raw == ""
3682
- unless @author.raw.nil?
3683
- raw_scan = @author.raw.scan(
3684
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3685
- if raw_scan.nil? || raw_scan.size == 0
3686
- raw_scan = @author.raw.scan(
3687
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3688
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
3689
- else
3690
- author_raw_pair = raw_scan.first
3691
- end
3692
- if raw_scan.nil? || raw_scan.size == 0
3693
- email_scan = @author.raw.scan(
3694
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3695
- if email_scan != nil && email_scan.size > 0
3696
- @author.email = email_scan.first.strip
3697
- end
3698
- end
3699
- unless author_raw_pair.nil? || author_raw_pair.size == 0
3700
- @author.name = author_raw_pair.first.strip
3701
- @author.email = author_raw_pair.last.strip
3702
- else
3703
- unless @author.raw.include?("@")
3704
- # We can be reasonably sure we are looking at something
3705
- # that the creator didn't intend to contain an email address if
3706
- # it got through the preceeding regexes and it doesn't
3707
- # contain the tell-tale '@' symbol.
3708
- @author.name = @author.raw
3709
- end
3710
- end
3711
- end
3712
- @author.name = "" if @author.name.nil?
3713
- if @author.name == ""
3714
- @author.name = FeedTools.unescape_entities(
3715
- XPath.first(author_node, "name/text()").to_s)
3716
- end
3717
- if @author.name == ""
3718
- @author.name = FeedTools.unescape_entities(
3719
- XPath.first(author_node, "@name").to_s)
3720
- end
3721
- if @author.email == ""
3722
- @author.email = FeedTools.unescape_entities(
3723
- XPath.first(author_node, "email/text()").to_s)
3724
- end
3725
- if @author.email == ""
3726
- @author.email = FeedTools.unescape_entities(
3727
- XPath.first(author_node, "@email").to_s)
3728
- end
3729
- if @author.url == ""
3730
- @author.url = FeedTools.unescape_entities(
3731
- XPath.first(author_node, "url/text()").to_s)
3732
- end
3733
- if @author.url == ""
3734
- @author.url = FeedTools.unescape_entities(
3735
- XPath.first(author_node, "@url").to_s)
3736
- end
3737
- @author.name = nil if @author.name == ""
3738
- @author.raw = nil if @author.raw == ""
3739
- @author.email = nil if @author.email == ""
3740
- @author.url = nil if @author.url == ""
3741
- end
3742
- # Fallback on the itunes module if we didn't find an author name
3743
- begin
3744
- @author.name = self.itunes_author if @author.name.nil?
3745
- rescue
3746
- @author.name = nil
3747
- end
3748
- end
3749
- return @author
3750
- end
3751
-
3752
- # Sets the feed item author
3753
- def author=(new_author)
3754
- if new_author.respond_to?(:name) &&
3755
- new_author.respond_to?(:email) &&
3756
- new_author.respond_to?(:url)
3757
- # It's a complete author object, just set it.
3758
- @author = new_author
3759
- else
3760
- # We're not looking at an author object, this is probably a string,
3761
- # default to setting the author's name.
3762
- if @author.nil?
3763
- @author = FeedTools::Feed::Author.new
3764
- end
3765
- @author.name = new_author
3766
- end
3767
- end
3768
-
3769
- # Returns the feed publisher
3770
- def publisher
3771
- if @publisher.nil?
3772
- @publisher = FeedTools::Feed::Author.new
3773
-
3774
- # Set the author name
3775
- @publisher.raw = FeedTools.unescape_entities(
3776
- XPath.first(root_node, "dc:publisher/text()").to_s)
3777
- if @publisher.raw == ""
3778
- @publisher.raw = FeedTools.unescape_entities(
3779
- XPath.first(root_node, "webMaster/text()").to_s)
3780
- end
3781
- unless @publisher.raw == ""
3782
- raw_scan = @publisher.raw.scan(
3783
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3784
- if raw_scan.nil? || raw_scan.size == 0
3785
- raw_scan = @publisher.raw.scan(
3786
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3787
- unless raw_scan.size == 0
3788
- publisher_raw_pair = raw_scan.first.reverse
3789
- end
3790
- else
3791
- publisher_raw_pair = raw_scan.first
3792
- end
3793
- if raw_scan.nil? || raw_scan.size == 0
3794
- email_scan = @publisher.raw.scan(
3795
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3796
- if email_scan != nil && email_scan.size > 0
3797
- @publisher.email = email_scan.first.strip
3798
- end
3799
- end
3800
- unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
3801
- @publisher.name = publisher_raw_pair.first.strip
3802
- @publisher.email = publisher_raw_pair.last.strip
3803
- else
3804
- unless @publisher.raw.include?("@")
3805
- # We can be reasonably sure we are looking at something
3806
- # that the creator didn't intend to contain an email address if
3807
- # it got through the preceeding regexes and it doesn't
3808
- # contain the tell-tale '@' symbol.
3809
- @publisher.name = @publisher.raw
3810
- end
3811
- end
3812
- end
3813
-
3814
- @publisher.name = nil if @publisher.name == ""
3815
- @publisher.raw = nil if @publisher.raw == ""
3816
- @publisher.email = nil if @publisher.email == ""
3817
- @publisher.url = nil if @publisher.url == ""
3818
- end
3819
- return @publisher
3820
- end
3821
-
3822
- # Sets the feed publisher
3823
- def publisher=(new_publisher)
3824
- if new_publisher.respond_to?(:name) &&
3825
- new_publisher.respond_to?(:email) &&
3826
- new_publisher.respond_to?(:url)
3827
- # It's a complete Author object, just set it.
3828
- @publisher = new_publisher
3829
- else
3830
- # We're not looking at an Author object, this is probably a string,
3831
- # default to setting the publisher's name.
3832
- if @publisher.nil?
3833
- @publisher = FeedTools::Feed::Author.new
3834
- end
3835
- @publisher.name = new_publisher
3836
- end
3837
- end
3838
-
3839
- # Returns the contents of the itunes:author element
3840
- #
3841
- # This inherits from any incorrectly placed channel-level itunes:author
3842
- # elements. They're actually amazingly common. People don't read specs.
3843
- def itunes_author
3844
- if @itunes_author.nil?
3845
- @itunes_author = FeedTools.unescape_entities(XPath.first(root_node,
3846
- "itunes:author/text()").to_s)
3847
- @itunes_author = feed.itunes_author if @itunes_author == ""
3848
- @itunes_author = nil if @itunes_author == ""
3849
- end
3850
- return @itunes_author
3851
- end
3852
-
3853
- # Sets the contents of the itunes:author element
3854
- def itunes_author=(new_itunes_author)
3855
- @itunes_author = new_itunes_author
3856
- end
3857
-
3858
- # Returns the number of seconds that the associated media runs for
3859
- def itunes_duration
3860
- if @itunes_duration.nil?
3861
- raw_duration = FeedTools.unescape_entities(XPath.first(root_node,
3862
- "itunes:duration/text()").to_s)
3863
- if raw_duration != ""
3864
- hms = raw_duration.split(":").map { |x| x.to_i }
3865
- if hms.size == 3
3866
- @itunes_duration = hms[0].hour + hms[1].minute + hms[2]
3867
- elsif hms.size == 2
3868
- @itunes_duration = hms[0].minute + hms[1]
3869
- elsif hms.size == 1
3870
- @itunes_duration = hms[0]
3871
- end
3872
- end
3873
- end
3874
- return @itunes_duration
3875
- end
3876
-
3877
- # Sets the number of seconds that the associate media runs for
3878
- def itunes_duration=(new_itunes_duration)
3879
- @itunes_duration = new_itunes_duration
3880
- end
3881
-
3882
- # Returns the feed item time
3883
- def time
3884
- if @time.nil?
3885
- unless root_node.nil?
3886
- time_string = XPath.first(root_node, "pubDate/text()").to_s
3887
- if time_string == ""
3888
- time_string = XPath.first(root_node, "dc:date/text()").to_s
3889
- end
3890
- if time_string == ""
3891
- time_string = XPath.first(root_node, "issued/text()").to_s
3892
- end
3893
- if time_string == ""
3894
- time_string = XPath.first(root_node, "updated/text()").to_s
3895
- end
3896
- if time_string == ""
3897
- time_string = XPath.first(root_node, "time/text()").to_s
3898
- end
3899
- end
3900
- if time_string != nil && time_string != ""
3901
- @time = Time.parse(time_string) rescue Time.now
3902
- elsif time_string == nil
3903
- @time = Time.now
3904
- end
3905
- end
3906
- return @time
3907
- end
3908
-
3909
- # Sets the feed item time
3910
- def time=(new_time)
3911
- @time = new_time
3912
- end
3913
-
3914
- # Returns the feed item updated time
3915
- def updated
3916
- if @updated.nil?
3917
- unless root_node.nil?
3918
- updated_string = XPath.first(root_node, "updated/text()").to_s
3919
- if updated_string == ""
3920
- updated_string = XPath.first(root_node, "modified/text()").to_s
3921
- end
3922
- end
3923
- if updated_string != nil && updated_string != ""
3924
- @updated = Time.parse(updated_string) rescue nil
3925
- else
3926
- @updated = nil
3927
- end
3928
- end
3929
- return @updated
3930
- end
3931
-
3932
- # Sets the feed item updated time
3933
- def updated=(new_updated)
3934
- @updated = new_updated
3935
- end
3936
-
3937
- # Returns the feed item issued time
3938
- def issued
3939
- if @issued.nil?
3940
- unless root_node.nil?
3941
- issued_string = XPath.first(root_node, "issued/text()").to_s
3942
- if issued_string == ""
3943
- issued_string = XPath.first(root_node, "published/text()").to_s
3944
- end
3945
- if issued_string == ""
3946
- issued_string = XPath.first(root_node, "pubDate/text()").to_s
3947
- end
3948
- if issued_string == ""
3949
- issued_string = XPath.first(root_node, "dc:date/text()").to_s
3950
- end
3951
- end
3952
- if issued_string != nil && issued_string != ""
3953
- @issued = Time.parse(issued_string) rescue nil
3954
- else
3955
- @issued = nil
3956
- end
3957
- end
3958
- return @issued
3959
- end
3960
-
3961
- # Sets the feed item issued time
3962
- def issued=(new_issued)
3963
- @issued = new_issued
3964
- end
3965
-
3966
- # Returns the url for posting comments
3967
- def comments
3968
- if @comments.nil?
3969
- @comments = FeedTools.normalize_url(
3970
- XPath.first(root_node, "comments/text()").to_s)
3971
- @comments = nil if @comments == ""
3972
- end
3973
- return @comments
3974
- end
3975
-
3976
- # Sets the url for posting comments
3977
- def comments=(new_comments)
3978
- @comments = new_comments
3979
- end
3980
-
3981
- # The source that this post was based on
3982
- def source
3983
- if @source.nil?
3984
- @source = FeedTools::Feed::Link.new
3985
- @source.url = XPath.first(root_node, "source/@url").to_s
3986
- @source.url = nil if @source.url == ""
3987
- @source.value = XPath.first(root_node, "source/text()").to_s
3988
- @source.value = nil if @source.value == ""
3989
- end
3990
- return @source
3991
- end
3992
-
3993
- # Returns the feed item tags
3994
- def tags
3995
- # TODO: support the rel="tag" microformat
3996
- # =======================================
3997
- if @tags.nil?
3998
- @tags = []
3999
- if @tags.nil? || @tags.size == 0
4000
- @tags = []
4001
- tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()")
4002
- if tag_list.size > 1
4003
- for tag in tag_list
4004
- @tags << tag.to_s.downcase.strip
4005
- end
4006
- end
4007
- end
4008
- if @tags.nil? || @tags.size == 0
4009
- # messy effort to find ourselves some tags, mainly for del.icio.us
4010
- @tags = []
4011
- rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li")
4012
- if rdf_bag != nil && rdf_bag.size > 0
4013
- for tag_node in rdf_bag
4014
- begin
4015
- tag_url = XPath.first(root_node, "@resource").to_s
4016
- tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
4017
- if tag_match.size > 0
4018
- @tags << tag_match.first.last.downcase.strip
4019
- end
4020
- rescue
4021
- end
4022
- end
4023
- end
4024
- end
4025
- if @tags.nil? || @tags.size == 0
4026
- @tags = []
4027
- tag_list = XPath.match(root_node, "category/text()")
4028
- for tag in tag_list
4029
- @tags << tag.to_s.downcase.strip
4030
- end
4031
- end
4032
- if @tags.nil? || @tags.size == 0
4033
- @tags = []
4034
- tag_list = XPath.match(root_node, "dc:subject/text()")
4035
- for tag in tag_list
4036
- @tags << tag.to_s.downcase.strip
4037
- end
4038
- end
4039
- if @tags.nil? || @tags.size == 0
4040
- begin
4041
- @tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ")
4042
- rescue
4043
- @tags = []
4044
- end
4045
- end
4046
- if @tags.nil?
4047
- @tags = []
4048
- end
4049
- @tags.uniq!
4050
- end
4051
- return @tags
4052
- end
4053
-
4054
- # Sets the feed item tags
4055
- def tags=(new_tags)
4056
- @tags = new_tags
4057
- end
4058
-
4059
- # Returns true if this feed item contains explicit material. If the whole
4060
- # feed has been marked as explicit, this will return true even if the item
4061
- # isn't explicitly marked as explicit.
4062
- def explicit?
4063
- if @explicit.nil?
4064
- if XPath.first(root_node,
4065
- "media:adult/text()").to_s.downcase == "true" ||
4066
- XPath.first(root_node,
4067
- "itunes:explicit/text()").to_s.downcase == "yes" ||
4068
- XPath.first(root_node,
4069
- "itunes:explicit/text()").to_s.downcase == "true" ||
4070
- feed.explicit?
4071
- @explicit = true
4072
- else
4073
- @explicit = false
4074
- end
4075
- end
4076
- return @explicit
4077
- end
4078
-
4079
- # Sets whether or not the feed contains explicit material
4080
- def explicit=(new_explicit)
4081
- @explicit = (new_explicit ? true : false)
4082
- end
4083
-
4084
- # A hook method that is called during the feed generation process. Overriding this method
4085
- # will enable additional content to be inserted into the feed.
4086
- def build_xml_hook(feed_type, version, xml_builder)
4087
- return nil
4088
- end
4089
-
4090
- # Generates xml based on the content of the feed item
4091
- def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil,
4092
- xml_builder=Builder::XmlMarkup.new(:indent => 2))
4093
- if feed_type == "rss" && (version == nil || version == 0.0)
4094
- version = 1.0
4095
- elsif feed_type == "atom" && (version == nil || version == 0.0)
4096
- version = 0.3
4097
- end
4098
- if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
4099
- # RDF-based rss format
4100
- if link.nil?
4101
- raise "Cannot generate an rdf-based feed item with a nil link field."
4102
- end
4103
- return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do
4104
- unless title.nil? || title == ""
4105
- xml_builder.title(title)
4106
- else
4107
- xml_builder.title
4108
- end
4109
- unless link.nil? || link == ""
4110
- xml_builder.link(link)
4111
- else
4112
- xml_builder.link
4113
- end
4114
- unless description.nil? || description == ""
4115
- xml_builder.description(description)
4116
- else
4117
- xml_builder.description
4118
- end
4119
- unless time.nil?
4120
- xml_builder.tag!("dc:date", time.iso8601)
4121
- end
4122
- unless tags.nil? || tags.size == 0
4123
- xml_builder.tag!("taxo:topics") do
4124
- xml_builder.tag!("rdf:Bag") do
4125
- for tag in tags
4126
- xml_builder.tag!("rdf:li", tag)
4127
- end
4128
- end
4129
- end
4130
- xml_builder.tag!("itunes:keywords", tags.join(" "))
4131
- end
4132
- build_xml_hook(feed_type, version, xml_builder)
4133
- end
4134
- elsif feed_type == "rss"
4135
- # normal rss format
4136
- return xml_builder.item do
4137
- unless title.nil? || title == ""
4138
- xml_builder.title(title)
4139
- end
4140
- unless link.nil? || link == ""
4141
- xml_builder.link(link)
4142
- end
4143
- unless description.nil? || description == ""
4144
- xml_builder.description(description)
4145
- end
4146
- unless time.nil?
4147
- xml_builder.pubDate(time.rfc822)
4148
- end
4149
- unless tags.nil? || tags.size == 0
4150
- xml_builder.tag!("taxo:topics") do
4151
- xml_builder.tag!("rdf:Bag") do
4152
- for tag in tags
4153
- xml_builder.tag!("rdf:li", tag)
4154
- end
4155
- end
4156
- end
4157
- xml_builder.tag!("itunes:keywords", tags.join(" "))
4158
- end
4159
- build_xml_hook(feed_type, version, xml_builder)
4160
- end
4161
- elsif feed_type == "atom" && version == 0.3
4162
- # normal atom format
4163
- return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
4164
- unless title.nil? || title == ""
4165
- xml_builder.title(title,
4166
- "mode" => "escaped",
4167
- "type" => "text/html")
4168
- end
4169
- xml_builder.author do
4170
- unless self.author.nil? || self.author.name.nil?
4171
- xml_builder.name(self.author.name)
4172
- else
4173
- xml_builder.name("n/a")
4174
- end
4175
- unless self.author.nil? || self.author.email.nil?
4176
- xml_builder.email(self.author.email)
4177
- end
4178
- unless self.author.nil? || self.author.url.nil?
4179
- xml_builder.url(self.author.url)
4180
- end
4181
- end
4182
- unless link.nil? || link == ""
4183
- xml_builder.link("href" => link,
4184
- "rel" => "alternate",
4185
- "type" => "text/html",
4186
- "title" => title)
4187
- end
4188
- unless description.nil? || description == ""
4189
- xml_builder.content(description,
4190
- "mode" => "escaped",
4191
- "type" => "text/html")
4192
- end
4193
- unless time.nil?
4194
- xml_builder.issued(time.iso8601)
4195
- end
4196
- unless tags.nil? || tags.size == 0
4197
- for tag in tags
4198
- xml_builder.category(tag)
4199
- end
4200
- end
4201
- build_xml_hook(feed_type, version, xml_builder)
4202
- end
4203
- elsif feed_type == "atom" && version == 1.0
4204
- # normal atom format
4205
- return xml_builder.entry("xmlns" => "http://www.w3.org/2005/Atom") do
4206
- unless title.nil? || title == ""
4207
- xml_builder.title(title,
4208
- "type" => "html")
4209
- end
4210
- xml_builder.author do
4211
- unless self.author.nil? || self.author.name.nil?
4212
- xml_builder.name(self.author.name)
4213
- else
4214
- xml_builder.name("n/a")
4215
- end
4216
- unless self.author.nil? || self.author.email.nil?
4217
- xml_builder.email(self.author.email)
4218
- end
4219
- unless self.author.nil? || self.author.url.nil?
4220
- xml_builder.url(self.author.url)
4221
- end
4222
- end
4223
- unless link.nil? || link == ""
4224
- xml_builder.link("href" => link,
4225
- "rel" => "alternate",
4226
- "type" => "text/html",
4227
- "title" => title)
4228
- end
4229
- unless description.nil? || description == ""
4230
- xml_builder.content(description,
4231
- "type" => "html")
4232
- else
4233
- xml_builder.content(FeedTools.no_content_string,
4234
- "type" => "html")
4235
- end
4236
- if self.updated != nil
4237
- xml_builder.updated(self.updated.iso8601)
4238
- elsif self.time != nil
4239
- # Not technically correct, but a heck of a lot better
4240
- # than the Time.now fall-back.
4241
- xml_builder.updated(self.time.iso8601)
4242
- else
4243
- xml_builder.updated(Time.now.iso8601)
4244
- end
4245
- unless self.published.nil?
4246
- xml_builder.published(self.published.iso8601)
4247
- end
4248
- if self.id != nil
4249
- unless FeedTools.is_uri? self.id
4250
- if self.time != nil && self.link != nil
4251
- xml_builder.id(FeedTools.build_tag_uri(self.link, self.time))
4252
- elsif self.link != nil
4253
- xml_builder.id(FeedTools.build_urn_uuid_uri(self.link))
4254
- else
4255
- raise "The unique id must be a URI. " +
4256
- "(Attempted to generate id, but failed.)"
4257
- end
4258
- else
4259
- xml_builder.id(self.id)
4260
- end
4261
- elsif self.time != nil && self.link != nil
4262
- xml_builder.id(FeedTools.build_tag_uri(self.link, self.time))
4263
- else
4264
- raise "Cannot build feed, missing feed unique id."
4265
- end
4266
- unless self.tags.nil? || self.tags.size == 0
4267
- for tag in self.tags
4268
- xml_builder.category("term" => tag)
4269
- end
4270
- end
4271
- build_xml_hook(feed_type, version, xml_builder)
4272
- end
4273
- end
4274
- end
4275
-
4276
- alias_method :tagline, :description
4277
- alias_method :tagline=, :description=
4278
- alias_method :subtitle, :description
4279
- alias_method :subtitle=, :description=
4280
- alias_method :summary, :description
4281
- alias_method :summary=, :description=
4282
- alias_method :abstract, :description
4283
- alias_method :abstract=, :description=
4284
- alias_method :content, :description
4285
- alias_method :content=, :description=
4286
- alias_method :guid, :id
4287
- alias_method :guid=, :id=
4288
- alias_method :published, :issued
4289
- alias_method :published=, :issued=
4290
-
4291
- # Returns a simple representation of the feed item object's state.
4292
- def inspect
4293
- return "#<FeedTools::FeedItem:0x#{self.object_id.to_s(16)} " +
4294
- "LINK:#{self.link}>"
4295
- end
4296
- end
4297
556
  end
4298
557
 
4299
558
  module REXML # :nodoc: