feedtools 0.2.10 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ == FeedTools 0.2.11
2
+ * ruby -w shouldn't produce nearly as many warnings for feed_tools.rb anymore
3
+ * you can now force the open method to only pull from the cache
4
+ * the global cache_only option has been removed
5
+ * nows queries the cache after each redirection
6
+ * attribute dictionary stuff removed for being unnecessary and pythonic
7
+ * better file structure -- no more 5000 line files
8
+ * schema files are formatted better to keep them from looking wierd in rdoc
1
9
  == FeedTools 0.2.10
2
10
  * http error messages now sport a list of redirections to simplify debugging
3
11
  * automatic table creation removed
@@ -1,10 +1,11 @@
1
- CREATE TABLE `feeds` (
2
- `id` int(10) unsigned NOT NULL auto_increment,
3
- `url` varchar(255) default NULL,
4
- `title` varchar(255) default NULL,
5
- `link` varchar(255) default NULL,
6
- `xml_data` longtext default NULL,
7
- `http_headers` text default NULL,
8
- `last_retrieved` datetime default NULL,
9
- PRIMARY KEY (`id`)
10
- )
1
+ -- Example MySQL schema
2
+ CREATE TABLE `feeds` (
3
+ `id` int(10) unsigned NOT NULL auto_increment,
4
+ `url` varchar(255) default NULL,
5
+ `title` varchar(255) default NULL,
6
+ `link` varchar(255) default NULL,
7
+ `xml_data` longtext default NULL,
8
+ `http_headers` text default NULL,
9
+ `last_retrieved` datetime default NULL,
10
+ PRIMARY KEY (`id`)
11
+ )
@@ -1,9 +1,10 @@
1
- CREATE TABLE feeds (
2
- id SERIAL PRIMARY KEY NOT NULL,
3
- url varchar(255) default NULL,
4
- title varchar(255) default NULL,
5
- link varchar(255) default NULL,
6
- xml_data text default NULL,
7
- http_headers text default NULL,
8
- last_retrieved timestamp default NULL
9
- );
1
+ -- Example PostgreSQL schema
2
+ CREATE TABLE feeds (
3
+ id SERIAL PRIMARY KEY NOT NULL,
4
+ url varchar(255) default NULL,
5
+ title varchar(255) default NULL,
6
+ link varchar(255) default NULL,
7
+ xml_data text default NULL,
8
+ http_headers text default NULL,
9
+ last_retrieved timestamp default NULL
10
+ );
@@ -1,9 +1,10 @@
1
- CREATE TABLE 'feeds' (
2
- 'id' INTEGER PRIMARY KEY NOT NULL,
3
- 'url' VARCHAR(255) DEFAULT NULL,
4
- 'title' VARCHAR(255) DEFAULT NULL,
5
- 'link' VARCHAR(255) DEFAULT NULL,
6
- 'xml_data' TEXT DEFAULT NULL,
7
- 'http_headers' TEXT DEFAULT NULL,
8
- 'last_retrieved' DATETIME DEFAULT NULL
9
- );
1
+ -- Example Sqlite schema
2
+ CREATE TABLE feeds (
3
+ id INTEGER PRIMARY KEY NOT NULL,
4
+ url VARCHAR(255) DEFAULT NULL,
5
+ title VARCHAR(255) DEFAULT NULL,
6
+ link VARCHAR(255) DEFAULT NULL,
7
+ xml_data TEXT DEFAULT NULL,
8
+ http_headers TEXT DEFAULT NULL,
9
+ last_retrieved DATETIME DEFAULT NULL
10
+ );
@@ -32,7 +32,7 @@ FEED_TOOLS_ENV = ENV['FEED_TOOLS_ENV'] ||
32
32
  ENV['RAILS_ENV'] ||
33
33
  'production' # :nodoc:
34
34
 
35
- FEED_TOOLS_VERSION = "0.2.10"
35
+ FEED_TOOLS_VERSION = "0.2.11"
36
36
 
37
37
  $:.unshift(File.dirname(__FILE__))
38
38
  $:.unshift(File.dirname(__FILE__) + "/feed_tools/vendor")
@@ -77,7 +77,9 @@ require 'yaml'
77
77
  require_gem('activerecord', '>= 1.10.1')
78
78
  require_gem('uuidtools', '>= 0.1.2')
79
79
 
80
- require 'database_feed_cache'
80
+ require 'feed_tools/feed'
81
+ require 'feed_tools/feed_item'
82
+ require 'feed_tools/database_feed_cache'
81
83
 
82
84
  #= feed_tools.rb
83
85
  #
@@ -96,99 +98,17 @@ require 'database_feed_cache'
96
98
  # => "43,37,28,23,11,3,1"
97
99
  module FeedTools
98
100
 
99
- # Error raised when a feed cannot be retrieved
100
- class FeedAccessError < StandardError
101
- end
102
-
103
- # Quick method of enabling small classes to have their attributes
104
- # accessible as a dictionary. These methods should not be used whenever
105
- # performance is going to be an issue. They exist almost entirely for the
106
- # purposes of aesthetics and/or debugging.
107
- module AttributeDictionary
108
- # Access the attributes as a dictionary.
109
- def [](key)
110
- return nil unless self.keys.include? key
111
- return self.send(key)
112
- end
113
-
114
- # Access the attributes as a dictionary.
115
- def []=(key, value)
116
- pseudo_key = key
117
- pseudo_key = key[0..-2] if key[-1..-1] == "?"
118
- return nil unless self.method(pseudo_key + "=").arity == 1
119
- local_keys = self.keys
120
- unless local_keys.include?(key) || local_keys.include?(pseudo_key)
121
- return nil
122
- end
123
- return self.send(pseudo_key + "=", value)
124
- end
125
-
126
- # Access the attributes as a dictionary.
127
- def keys
128
- key_methods = []
129
- for key in self.methods
130
- # Quick-n-dirty hack to speed things up and keep the list clean
131
- if self.method(key).arity == 0 && key[-1..-1] != "=" &&
132
- key[-1..-1] != "!" && key[0..1] != "__" &&
133
- key[0..2] != "to_" && key[-5..-1] != "_node" &&
134
- key != "cache_object" && key != "save" && key != "xml" &&
135
- key != "xml_data" && key != "expired?" && key != "live?" &&
136
- key != "feed"
137
- superfluous_ancestors = self.class.ancestors
138
- superfluous_ancestors = superfluous_ancestors[1..-1]
139
- superfluous = false
140
- for ancestor in superfluous_ancestors
141
- if ancestor.instance_methods.include? key
142
- superfluous = true
143
- break
144
- end
145
- end
146
- next if superfluous
147
- key_methods << key
148
- end
149
- end
150
- return key_methods.sort
151
- end
152
-
153
- # Access the attributes as a dictionary.
154
- # Please note that this method may cause a nearly complete parse of a
155
- # feed. This will be very slow.
156
- def values
157
- return self.keys.map { |key| self[key] }
158
- end
159
-
160
- # Access the attributes as a dictionary.
161
- # Please note that this method may cause a complete parse of a feed.
162
- # This will be very slow.
163
- def to_hash
164
- attribute_hash = {}
165
- for key in keys
166
- value = self[key]
167
- if value.respond_to? :to_hash
168
- value = value.to_hash
169
- end
170
- if value.respond_to? :to_ary
171
- new_value = []
172
- for item in value.to_ary
173
- if item.respond_to? :to_hash
174
- new_value << item.to_hash
175
- else
176
- new_value << item
177
- end
178
- end
179
- value = new_value
180
- end
181
- attribute_hash[key] = value
182
- end
183
- return attribute_hash
184
- end
185
- end
186
-
101
+ @force_tidy_enabled = true
102
+ @tidy_enabled = false
187
103
  @feed_cache = DatabaseFeedCache
188
104
  @user_agent = "FeedTools/#{FEED_TOOLS_VERSION} " +
189
105
  "+http://www.sporkmonger.com/projects/feedtools/"
190
106
  @no_content_string = "[no description]"
191
107
 
108
+ # Error raised when a feed cannot be retrieved
109
+ class FeedAccessError < StandardError
110
+ end
111
+
192
112
  # Returns the current caching mechanism.
193
113
  def FeedTools.feed_cache
194
114
  return @feed_cache
@@ -222,23 +142,7 @@ module FeedTools
222
142
  # ==================================================================
223
143
  @feed_cache = new_feed_cache
224
144
  end
225
-
226
- # Returns true if FeedTools should only retrieve from the cache and avoid
227
- # pulling feeds from their remote location.
228
- def FeedTools.cache_only?
229
- @cache_only = false if @cache_only.nil?
230
- return @cache_only
231
- end
232
-
233
- # Sets whether or not FeedTools should retrieve feeds from remote locations
234
- # or if it should rely on the cache only.
235
- def FeedTools.cache_only=(new_cache_only)
236
- if new_cache_only != true && new_cache_only != false
237
- raise ArgumentError, "Must be either true or false."
238
- end
239
- @cache_only = new_cache_only
240
- end
241
-
145
+
242
146
  # Returns true if FeedTools.feed_cache is not nil and a connection with
243
147
  # the cache has been successfully established. Also returns false if an
244
148
  # error is raised while trying to determine the status of the cache.
@@ -649,3651 +553,6 @@ module FeedTools
649
553
  end
650
554
  return merged_feed
651
555
  end
652
-
653
- class Feed
654
- include REXML # :nodoc:
655
- include AttributeDictionary
656
-
657
- # Represents a feed/feed item's category
658
- class Category
659
- include AttributeDictionary
660
-
661
- # The category term value
662
- attr_accessor :term
663
- # The categorization scheme
664
- attr_accessor :scheme
665
- # A human-readable description of the category
666
- attr_accessor :label
667
-
668
- alias_method :value, :term
669
- alias_method :category, :term
670
- alias_method :domain, :scheme
671
- end
672
-
673
- # Represents a feed/feed item's author
674
- class Author
675
- include AttributeDictionary
676
-
677
- # The author's real name
678
- attr_accessor :name
679
- # The author's email address
680
- attr_accessor :email
681
- # The url of the author's homepage
682
- attr_accessor :url
683
- # The raw value of the author tag if present
684
- attr_accessor :raw
685
- end
686
-
687
- # Represents a feed's image
688
- class Image
689
- include AttributeDictionary
690
-
691
- # The image's title
692
- attr_accessor :title
693
- # The image's description
694
- attr_accessor :description
695
- # The image's url
696
- attr_accessor :url
697
- # The url to link the image to
698
- attr_accessor :link
699
- # The width of the image
700
- attr_accessor :width
701
- # The height of the image
702
- attr_accessor :height
703
- # The style of the image
704
- # Possible values are "icon", "image", or "image-wide"
705
- attr_accessor :style
706
- end
707
-
708
- # Represents a feed's text input element.
709
- # Be aware that this will be ignored for feed generation. It's a
710
- # pointless element that aggregators usually ignore and it doesn't have an
711
- # equivalent in all feeds types.
712
- class TextInput
713
- include AttributeDictionary
714
-
715
- # The label of the Submit button in the text input area.
716
- attr_accessor :title
717
- # The description explains the text input area.
718
- attr_accessor :description
719
- # The URL of the CGI script that processes text input requests.
720
- attr_accessor :link
721
- # The name of the text object in the text input area.
722
- attr_accessor :name
723
- end
724
-
725
- # Represents a feed's cloud.
726
- # Be aware that this will be ignored for feed generation.
727
- class Cloud
728
- include AttributeDictionary
729
-
730
- # The domain of the cloud.
731
- attr_accessor :domain
732
- # The path for the cloud.
733
- attr_accessor :path
734
- # The port the cloud is listening on.
735
- attr_accessor :port
736
- # The web services protocol the cloud uses.
737
- # Possible values are either "xml-rpc" or "soap".
738
- attr_accessor :protocol
739
- # The procedure to use to request notification.
740
- attr_accessor :register_procedure
741
- end
742
-
743
- # Represents a simple hyperlink
744
- class Link
745
- include AttributeDictionary
746
-
747
- # The url that is being linked to
748
- attr_accessor :url
749
- # The content of the hyperlink
750
- attr_accessor :value
751
-
752
- alias_method :href, :url
753
- end
754
-
755
- # Loads the feed specified by the url, pulling the data from the cache if it hasn't expired.
756
- def Feed.open(url)
757
- # clean up the url
758
- url = FeedTools.normalize_url(url)
759
-
760
- # create and load the new feed
761
- feed = Feed.new
762
- feed.url = url
763
- feed.update!
764
- return feed
765
- end
766
-
767
- # Loads the feed from the remote url if the feed has expired from the cache or cannot be
768
- # retrieved from the cache for some reason.
769
- def update!
770
- if self.http_headers.nil? && !(self.cache_object.nil?) &&
771
- !(self.cache_object.http_headers.nil?)
772
- @http_headers = YAML.load(self.cache_object.http_headers)
773
- @http_headers = {} unless @http_headers.kind_of? Hash
774
- end
775
- if FeedTools.cache_only? || self.expired? == false
776
- @live = false
777
- else
778
- load_remote_feed!
779
- end
780
- end
781
-
782
- # Attempts to load the feed from the remote location. Requires the url
783
- # field to be set. If an etag or the last_modified date has been set,
784
- # attempts to use them to prevent unnecessary reloading of identical
785
- # content.
786
- def load_remote_feed!
787
- @live = true
788
- if self.http_headers.nil? && !(self.cache_object.nil?) &&
789
- !(self.cache_object.http_headers.nil?)
790
- @http_headers = YAML.load(self.cache_object.http_headers)
791
- end
792
-
793
- if (self.url =~ /^feed:/) == 0
794
- # Woah, Nelly, how'd that happen? You should've already been
795
- # corrected. So let's fix that url. And please,
796
- # just use less crappy browsers instead of badly defined
797
- # pseudo-protocol hacks.
798
- self.url = FeedTools.normalize_url(self.url)
799
- end
800
-
801
- # Find out what method we're going to be using to obtain this feed.
802
- uri = URI.parse(self.url)
803
- retrieval_method = "http"
804
- case uri.scheme
805
- when "http"
806
- retrieval_method = "http"
807
- when "ftp"
808
- retrieval_method = "ftp"
809
- when "file"
810
- retrieval_method = "file"
811
- when nil
812
- raise FeedAccessError,
813
- "No protocol was specified in the url."
814
- else
815
- raise FeedAccessError,
816
- "Cannot retrieve feed using unrecognized protocol: " + uri.scheme
817
- end
818
-
819
- # No need for http headers unless we're actually doing http
820
- if retrieval_method == "http"
821
- # Set up the appropriate http headers
822
- headers = {}
823
- unless self.http_headers.nil?
824
- headers["If-None-Match"] =
825
- self.http_headers['etag'] unless self.http_headers['etag'].nil?
826
- headers["If-Modified-Since"] =
827
- self.http_headers['last-modified'] unless
828
- self.http_headers['last-modified'].nil?
829
- end
830
- headers["User-Agent"] =
831
- FeedTools.user_agent unless FeedTools.user_agent.nil?
832
-
833
- # The http feed access method
834
- http_fetch = lambda do |feed_url, http_headers, redirect_limit,
835
- response_chain, no_headers|
836
- raise FeedAccessError, 'Redirect too deep' if redirect_limit == 0
837
- feed_uri = nil
838
- begin
839
- feed_uri = URI.parse(feed_url)
840
- rescue URI::InvalidURIError
841
- # Uh, maybe try to fix it?
842
- feed_uri = URI.parse(FeedTools.normalize_url(feed_url))
843
- end
844
-
845
- # Borrowed from open-uri:
846
- # According to RFC2616 14.23, Host: request-header field should be
847
- # set to an origin server.
848
- # But net/http wrongly set a proxy server if an absolute URI is
849
- # specified as a request URI.
850
- # So override it here explicitly.
851
- http_headers['Host'] = feed_uri.host
852
- http_headers['Host'] += ":#{feed_uri.port}" if feed_uri.port
853
-
854
- Net::HTTP.start(feed_uri.host, (feed_uri.port or 80)) do |http|
855
- final_uri = feed_uri.path
856
- final_uri += ('?' + feed_uri.query) if feed_uri.query
857
- http_headers = {} if no_headers
858
- response = http.request_get(final_uri, http_headers)
859
-
860
- case response
861
- when Net::HTTPSuccess
862
- # We've reached the final destination, process all previous
863
- # redirections, and see if we need to update the url.
864
- for redirected_response in response_chain
865
- if redirected_response.last.code.to_i == 301
866
- # Reset the cache object or we may get duplicate entries
867
- self.cache_object = nil
868
- self.url = redirected_response.last['location']
869
- else
870
- # Jump out as soon as we hit anything that isn't a
871
- # permanently moved redirection.
872
- break
873
- end
874
- end
875
- response
876
- when Net::HTTPRedirection
877
- if response.code.to_i == 304
878
- response.error!
879
- else
880
- if response['location'].nil?
881
- raise FeedAccessError,
882
- "No location to redirect to supplied: " + response.code
883
- end
884
- response_chain << [feed_url, response]
885
- new_location = response['location']
886
- if response_chain.assoc(new_location) != nil
887
- raise FeedAccessError, "Redirection loop detected."
888
- end
889
- # TODO: deal with stupid people using relative urls
890
- # in Location header
891
- # =================================================
892
- http_fetch.call(new_location, http_headers,
893
- redirect_limit - 1, response_chain, no_headers)
894
- end
895
- else
896
- class << response
897
- def response_chain
898
- return @response_chain
899
- end
900
- end
901
- response.instance_variable_set("@response_chain",
902
- response_chain)
903
- response.error!
904
- end
905
- end
906
- end
907
-
908
- begin
909
- begin
910
- @http_response = http_fetch.call(self.url, headers, 10, [], false)
911
- rescue => error
912
- if error.respond_to?(:response)
913
- # You might not believe this, but...
914
- #
915
- # Under certain circumstances, web servers will try to block
916
- # based on the User-Agent header. This is *retarded*. But
917
- # we won't let their stupid error stop us!
918
- #
919
- # This is, of course, a quick-n-dirty hack. But at least
920
- # we get to blame other people's bad software and/or bad
921
- # configuration files.
922
- if error.response.code.to_i == 404 &&
923
- FeedTools.user_agent != nil
924
- @http_response = http_fetch.call(self.url, {}, 10, [], true)
925
- if @http_response != nil && @http_response.code.to_i == 200
926
- warn("The server appears to be blocking based on the " +
927
- "User-Agent header. This is stupid, and you should " +
928
- "inform the webmaster of this.")
929
- end
930
- else
931
- raise error
932
- end
933
- else
934
- raise error
935
- end
936
- end
937
- @http_headers = {}
938
- self.http_response.each_header do |header|
939
- self.http_headers[header.first.downcase] = header.last
940
- end
941
- self.last_retrieved = Time.now
942
- self.xml_data = self.http_response.body
943
- rescue FeedAccessError
944
- @live = false
945
- if self.xml_data.nil?
946
- raise
947
- end
948
- rescue Timeout::Error
949
- # if we time out, do nothing, it should fall back to the xml_data
950
- # stored in the cache.
951
- @live = false
952
- if self.xml_data.nil?
953
- raise
954
- end
955
- rescue Errno::ECONNRESET
956
- # if the connection gets reset by peer, oh well, fall back to the
957
- # xml_data stored in the cache
958
- @live = false
959
- if self.xml_data.nil?
960
- raise
961
- end
962
- rescue => error
963
- # heck, if anything at all bad happens, fall back to the xml_data
964
- # stored in the cache.
965
-
966
- # If we can, get the HTTPResponse...
967
- @http_response = nil
968
- if error.respond_to?(:each_header)
969
- @http_response = error
970
- end
971
- if error.respond_to?(:response) &&
972
- error.response.respond_to?(:each_header)
973
- @http_response = error.response
974
- end
975
- if @http_response != nil
976
- @http_headers = {}
977
- self.http_response.each_header do |header|
978
- self.http_headers[header.first] = header.last
979
- end
980
- if self.http_response.code.to_i == 304
981
- self.last_retrieved = Time.now
982
- end
983
- end
984
- @live = false
985
- if self.xml_data.nil?
986
- if error.respond_to?(:response) &&
987
- error.response.respond_to?(:response_chain)
988
- redirects = error.response.response_chain.map do |pair|
989
- pair.first
990
- end
991
- error.message << (" - Redirects: " + redirects.inspect)
992
- end
993
- raise error
994
- end
995
- end
996
- elsif retrieval_method == "https"
997
- # Not supported... yet
998
- elsif retrieval_method == "ftp"
999
- # Not supported... yet
1000
- # Technically, CDF feeds are supposed to be able to be accessed directly
1001
- # from an ftp server. This is silly, but we'll humor Microsoft.
1002
- #
1003
- # Eventually.
1004
- elsif retrieval_method == "file"
1005
- # Now that we've gone to all that trouble to ensure the url begins
1006
- # with 'file://', strip the 'file://' off the front of the url.
1007
- file_name = self.url.gsub(/^file:\/\//, "")
1008
- begin
1009
- open(file_name) do |file|
1010
- @http_response = nil
1011
- @http_headers = {}
1012
- self.last_retrieved = Time.now
1013
- self.xml_data = file.read
1014
- end
1015
- rescue
1016
- @live = false
1017
- # In this case, pulling from the cache is probably not going
1018
- # to help at all, and the use should probably be immediately
1019
- # appraised of the problem. Raise the exception.
1020
- raise
1021
- end
1022
- end
1023
- unless self.cache_object.nil?
1024
- begin
1025
- self.save
1026
- rescue
1027
- end
1028
- end
1029
- end
1030
-
1031
- # Returns the relevant information from an http request.
1032
- def http_response
1033
- return @http_response
1034
- end
1035
-
1036
- # Returns a hash of the http headers from the response.
1037
- def http_headers
1038
- return @http_headers
1039
- end
1040
-
1041
- # Returns the feed's raw xml data.
1042
- def xml_data
1043
- if @xml_data.nil?
1044
- unless self.cache_object.nil?
1045
- @xml_data = self.cache_object.xml_data
1046
- end
1047
- end
1048
- return @xml_data
1049
- end
1050
-
1051
- # Sets the feed's xml data.
1052
- def xml_data=(new_xml_data)
1053
- @xml_data = new_xml_data
1054
- unless self.cache_object.nil?
1055
- self.cache_object.xml_data = new_xml_data
1056
- end
1057
- end
1058
-
1059
- # Returns a REXML Document of the xml_data
1060
- def xml
1061
- if @xml_doc.nil?
1062
- begin
1063
- # TODO: :ignore_whitespace_nodes => :all
1064
- # Add that?
1065
- # ======================================
1066
- @xml_doc = Document.new(xml_data)
1067
- rescue
1068
- # Something failed, attempt to repair the xml with htree.
1069
- @xml_doc = HTree.parse(xml_data).to_rexml
1070
- end
1071
- end
1072
- return @xml_doc
1073
- end
1074
-
1075
- # Returns the first node within the channel_node that matches the xpath query.
1076
- def find_node(xpath)
1077
- return XPath.first(channel_node, xpath)
1078
- end
1079
-
1080
- # Returns all nodes within the channel_node that match the xpath query.
1081
- def find_all_nodes(xpath)
1082
- return XPath.match(channel_node, xpath)
1083
- end
1084
-
1085
- # Returns the root node of the feed.
1086
- def root_node
1087
- if @root_node.nil?
1088
- # TODO: Fix this so that added content at the end of the file doesn't
1089
- # break this stuff.
1090
- # E.g.: http://smogzer.tripod.com/smog.rdf
1091
- # ===================================================================
1092
- @root_node = xml.root
1093
- end
1094
- return @root_node
1095
- end
1096
-
1097
- # Returns the channel node of the feed.
1098
- def channel_node
1099
- if @channel_node.nil? && root_node != nil
1100
- @channel_node = XPath.first(root_node, "channel")
1101
- if @channel_node == nil
1102
- @channel_node = XPath.first(root_node, "CHANNEL")
1103
- end
1104
- if @channel_node == nil
1105
- @channel_node = XPath.first(root_node, "feedinfo")
1106
- end
1107
- if @channel_node == nil
1108
- @channel_node = root_node
1109
- end
1110
- end
1111
- return @channel_node
1112
- end
1113
-
1114
- # The cache object that handles the feed persistence.
1115
- def cache_object
1116
- unless FeedTools.feed_cache.nil?
1117
- if @cache_object.nil?
1118
- begin
1119
- if @id != nil
1120
- @cache_object = FeedTools.feed_cache.find_by_id(@id)
1121
- elsif @url != nil
1122
- @cache_object = FeedTools.feed_cache.find_by_url(@url)
1123
- end
1124
- if @cache_object.nil?
1125
- @cache_object = FeedTools.feed_cache.new
1126
- end
1127
- rescue
1128
- end
1129
- end
1130
- end
1131
- return @cache_object
1132
- end
1133
-
1134
- # Sets the cache object for this feed.
1135
- #
1136
- # This can be any object, but it must accept the following messages:
1137
- # url
1138
- # url=
1139
- # title
1140
- # title=
1141
- # link
1142
- # link=
1143
- # xml_data
1144
- # xml_data=
1145
- # etag
1146
- # etag=
1147
- # last_modified
1148
- # last_modified=
1149
- # save
1150
- def cache_object=(new_cache_object)
1151
- @cache_object = new_cache_object
1152
- end
1153
-
1154
- # Returns the type of feed
1155
- # Possible values:
1156
- # "rss", "atom", "cdf", "!okay/news"
1157
- def feed_type
1158
- if @feed_type.nil?
1159
- case self.root_node.name.downcase
1160
- when "feed"
1161
- @feed_type = "atom"
1162
- when "rdf:rdf"
1163
- @feed_type = "rss"
1164
- when "rdf"
1165
- @feed_type = "rss"
1166
- when "rss"
1167
- @feed_type = "rss"
1168
- when "channel"
1169
- @feed_type = "cdf"
1170
- end
1171
- end
1172
- return @feed_type
1173
- end
1174
-
1175
- # Sets the default feed type
1176
- def feed_type=(new_feed_type)
1177
- @feed_type = new_feed_type
1178
- end
1179
-
1180
- # Returns the version number of the feed type.
1181
- # Intentionally does not differentiate between the Netscape and Userland
1182
- # versions of RSS 0.91.
1183
- def feed_version
1184
- if @feed_version.nil?
1185
- version = nil
1186
- begin
1187
- version = XPath.first(root_node, "@version").to_s.strip.to_f
1188
- rescue
1189
- end
1190
- version = nil if version == 0.0
1191
- default_namespace = XPath.first(root_node, "@xmlns").to_s.strip
1192
- case self.feed_type
1193
- when "atom"
1194
- if default_namespace == "http://www.w3.org/2005/Atom"
1195
- @feed_version = 1.0
1196
- elsif version != nil
1197
- @feed_version = version
1198
- elsif default_namespace == "http://purl.org/atom/ns#"
1199
- @feed_version = 0.3
1200
- end
1201
- when "rss"
1202
- if default_namespace == "http://my.netscape.com/rdf/simple/0.9/"
1203
- @feed_version = 0.9
1204
- elsif default_namespace == "http://purl.org/rss/1.0/"
1205
- @feed_version = 1.0
1206
- elsif default_namespace == "http://purl.org/net/rss1.1#"
1207
- @feed_version = 1.1
1208
- elsif version != nil
1209
- case version
1210
- when 2.1
1211
- @feed_version = 2.0
1212
- when 2.01
1213
- @feed_version = 2.0
1214
- else
1215
- @feed_version = version
1216
- end
1217
- end
1218
- when "cdf"
1219
- @feed_version = 0.4
1220
- when "!okay/news"
1221
- @feed_version = nil
1222
- end
1223
- end
1224
- return @feed_version
1225
- end
1226
-
1227
- # Sets the default feed version
1228
- def feed_version=(new_feed_version)
1229
- @feed_version = new_feed_version
1230
- end
1231
-
1232
- # Returns the feed's unique id
1233
- def id
1234
- if @id.nil?
1235
- unless channel_node.nil?
1236
- @id = XPath.first(channel_node, "id/text()").to_s
1237
- if @id == ""
1238
- @id = XPath.first(channel_node, "guid/text()").to_s
1239
- end
1240
- end
1241
- unless root_node.nil?
1242
- if @id == "" || @id.nil?
1243
- @id = XPath.first(root_node, "id/text()").to_s
1244
- end
1245
- if @id == ""
1246
- @id = XPath.first(root_node, "guid/text()").to_s
1247
- end
1248
- end
1249
- @id = nil if @id == ""
1250
- end
1251
- return @id
1252
- end
1253
-
1254
- # Sets the feed's unique id
1255
- def id=(new_id)
1256
- @id = new_id
1257
- end
1258
-
1259
- # Returns the feed url.
1260
- def url
1261
- if @url.nil? && self.xml_data != nil
1262
- @url = XPath.first(channel_node, "link[@rel='self']/@href").to_s
1263
- @url = nil if @url == ""
1264
- end
1265
- return @url
1266
- end
1267
-
1268
- # Sets the feed url and prepares the cache_object if necessary.
1269
- def url=(new_url)
1270
- @url = FeedTools.normalize_url(new_url)
1271
- self.cache_object.url = new_url unless self.cache_object.nil?
1272
- end
1273
-
1274
- # Returns the feed title
1275
- def title
1276
- if @title.nil?
1277
- unless channel_node.nil?
1278
- repair_entities = false
1279
- title_node = XPath.first(channel_node, "title")
1280
- if title_node.nil?
1281
- title_node = XPath.first(channel_node, "dc:title")
1282
- end
1283
- if title_node.nil?
1284
- title_node = XPath.first(channel_node, "TITLE")
1285
- end
1286
- end
1287
- if title_node.nil?
1288
- return nil
1289
- end
1290
- if XPath.first(title_node, "@type").to_s == "xhtml" ||
1291
- XPath.first(title_node, "@mode").to_s == "xhtml" ||
1292
- XPath.first(title_node, "@type").to_s == "xml" ||
1293
- XPath.first(title_node, "@mode").to_s == "xml" ||
1294
- XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
1295
- @title = title_node.inner_xml
1296
- elsif XPath.first(title_node, "@type").to_s == "escaped" ||
1297
- XPath.first(title_node, "@mode").to_s == "escaped"
1298
- @title = FeedTools.unescape_entities(
1299
- XPath.first(title_node, "text()").to_s)
1300
- else
1301
- @title = title_node.inner_xml
1302
- repair_entities = true
1303
- end
1304
- unless @title.nil?
1305
- @title = FeedTools.sanitize_html(@title, :strip)
1306
- @title = FeedTools.unescape_entities(@title) if repair_entities
1307
- @title = FeedTools.tidy_html(@title)
1308
- end
1309
- @title.gsub!(/\n/, " ")
1310
- @title.strip!
1311
- @title = nil if @title == ""
1312
- self.cache_object.title = @title unless self.cache_object.nil?
1313
- end
1314
- return @title
1315
- end
1316
-
1317
- # Sets the feed title
1318
- def title=(new_title)
1319
- @title = new_title
1320
- self.cache_object.title = new_title unless self.cache_object.nil?
1321
- end
1322
-
1323
- # Returns the feed description
1324
- def description
1325
- if @description.nil?
1326
- unless channel_node.nil?
1327
- repair_entities = false
1328
- description_node = XPath.first(channel_node, "description")
1329
- if description_node.nil?
1330
- description_node = XPath.first(channel_node, "tagline")
1331
- end
1332
- if description_node.nil?
1333
- description_node = XPath.first(channel_node, "subtitle")
1334
- end
1335
- if description_node.nil?
1336
- description_node = XPath.first(channel_node, "summary")
1337
- end
1338
- if description_node.nil?
1339
- description_node = XPath.first(channel_node, "abstract")
1340
- end
1341
- if description_node.nil?
1342
- description_node = XPath.first(channel_node, "ABSTRACT")
1343
- end
1344
- if description_node.nil?
1345
- description_node = XPath.first(channel_node, "info")
1346
- end
1347
- if description_node.nil?
1348
- description_node = XPath.first(channel_node, "content:encoded")
1349
- @bozo = true unless description_node.nil?
1350
- end
1351
- if description_node.nil?
1352
- description_node = XPath.first(channel_node, "content")
1353
- @bozo = true unless description_node.nil?
1354
- end
1355
- if description_node.nil?
1356
- description_node = XPath.first(channel_node, "xhtml:body")
1357
- @bozo = true unless description_node.nil?
1358
- end
1359
- if description_node.nil?
1360
- description_node = XPath.first(channel_node, "body")
1361
- @bozo = true unless description_node.nil?
1362
- end
1363
- end
1364
- if description_node.nil?
1365
- return nil
1366
- end
1367
- unless description_node.nil?
1368
- if XPath.first(description_node, "@encoding").to_s != ""
1369
- @description =
1370
- "[Embedded data objects are not currently supported.]"
1371
- elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
1372
- XPath.first(description_node, "@mode").to_s == "xhtml" ||
1373
- XPath.first(description_node, "@type").to_s == "xml" ||
1374
- XPath.first(description_node, "@mode").to_s == "xml" ||
1375
- XPath.first(description_node, "@type").to_s ==
1376
- "application/xhtml+xml"
1377
- @description = description_node.inner_xml
1378
- elsif XPath.first(description_node, "@type").to_s == "escaped" ||
1379
- XPath.first(description_node, "@mode").to_s == "escaped"
1380
- @description = FeedTools.unescape_entities(
1381
- description_node.inner_xml)
1382
- else
1383
- @description = description_node.inner_xml
1384
- repair_entities = true
1385
- end
1386
- end
1387
- if @description == ""
1388
- @description = self.itunes_summary
1389
- @description = "" if @description.nil?
1390
- end
1391
- if @description == ""
1392
- @description = self.itunes_subtitle
1393
- @description = "" if @description.nil?
1394
- end
1395
-
1396
- unless @description.nil?
1397
- @description = FeedTools.sanitize_html(@description, :strip)
1398
- @description = FeedTools.unescape_entities(@description) if repair_entities
1399
- @description = FeedTools.tidy_html(@description)
1400
- end
1401
-
1402
- @description = @description.strip unless @description.nil?
1403
- @description = nil if @description == ""
1404
- end
1405
- return @description
1406
- end
1407
-
1408
- # Sets the feed description
1409
- def description=(new_description)
1410
- @description = new_description
1411
- end
1412
-
1413
- # Returns the contents of the itunes:summary element
1414
- def itunes_summary
1415
- if @itunes_summary.nil?
1416
- unless channel_node.nil?
1417
- @itunes_summary = FeedTools.unescape_entities(XPath.first(channel_node,
1418
- "itunes:summary/text()").to_s)
1419
- end
1420
- unless root_node.nil?
1421
- if @itunes_summary == "" || @itunes_summary.nil?
1422
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
1423
- "itunes:summary/text()").to_s)
1424
- end
1425
- end
1426
- if @itunes_summary == ""
1427
- @itunes_summary = nil
1428
- end
1429
- @itunes_summary =
1430
- FeedTools.sanitize_html(@itunes_summary) unless @itunes_summary.nil?
1431
- end
1432
- return @itunes_summary
1433
- end
1434
-
1435
- # Sets the contents of the itunes:summary element
1436
- def itunes_summary=(new_itunes_summary)
1437
- @itunes_summary = new_itunes_summary
1438
- end
1439
-
1440
- # Returns the contents of the itunes:subtitle element
1441
- def itunes_subtitle
1442
- if @itunes_subtitle.nil?
1443
- unless channel_node.nil?
1444
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(channel_node,
1445
- "itunes:subtitle/text()").to_s)
1446
- end
1447
- unless root_node.nil?
1448
- if @itunes_subtitle == "" || @itunes_subtitle.nil?
1449
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
1450
- "itunes:subtitle/text()").to_s)
1451
- end
1452
- end
1453
- if @itunes_subtitle == ""
1454
- @itunes_subtitle = nil
1455
- end
1456
- unless @itunes_subtitle.nil?
1457
- @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
1458
- end
1459
- end
1460
- return @itunes_subtitle
1461
- end
1462
-
1463
- # Sets the contents of the itunes:subtitle element
1464
- def itunes_subtitle=(new_itunes_subtitle)
1465
- @itunes_subtitle = new_itunes_subtitle
1466
- end
1467
-
1468
- # Returns the feed link
1469
- def link
1470
- if @link.nil?
1471
- unless channel_node.nil?
1472
- # get the feed link from the xml document
1473
- @link = XPath.first(channel_node, "link[@rel='alternate' @type='text/html']/@href").to_s
1474
- if @link == ""
1475
- @link = XPath.first(channel_node, "link[@rel='alternate']/@href").to_s
1476
- end
1477
- if @link == ""
1478
- @link = XPath.first(channel_node, "link/@href").to_s
1479
- end
1480
- if @link == ""
1481
- @link = XPath.first(channel_node, "link/text()").to_s
1482
- end
1483
- if @link == ""
1484
- @link = XPath.first(channel_node, "@href").to_s
1485
- end
1486
- if @link == ""
1487
- @link = XPath.first(channel_node, "@HREF").to_s
1488
- end
1489
- if @link == ""
1490
- @link = XPath.first(channel_node, "a/@href").to_s
1491
- end
1492
- if @link == ""
1493
- @link = XPath.first(channel_node, "A/@HREF").to_s
1494
- end
1495
- end
1496
- if @link == "" || @link.nil?
1497
- if FeedTools.is_uri? self.guid
1498
- @link = self.guid
1499
- end
1500
- end
1501
- if @link == "" && channel_node != nil
1502
- # Technically, we shouldn't use the base attribute for this, but if the href attribute
1503
- # is missing, it's already a given that we're looking at a messed up CDF file. We can
1504
- # always pray it's correct.
1505
- @link = XPath.first(channel_node, "@base").to_s
1506
- end
1507
- @link = FeedTools.normalize_url(@link)
1508
- unless self.cache_object.nil?
1509
- self.cache_object.link = @link
1510
- end
1511
- end
1512
- return @link
1513
- end
1514
-
1515
- # Sets the feed link
1516
- def link=(new_link)
1517
- @link = new_link
1518
- unless self.cache_object.nil?
1519
- self.cache_object.link = new_link
1520
- end
1521
- end
1522
-
1523
- # Returns the url to the icon file for this feed.
1524
- #
1525
- # This method uses the url from the link field in order to avoid grabbing
1526
- # the favicon for services like feedburner.
1527
- def icon
1528
- if @icon.nil?
1529
- icon_node = XPath.first(channel_node, "link[@rel='icon']")
1530
- if icon_node.nil?
1531
- icon_node = XPath.first(channel_node, "link[@rel='shortcut icon']")
1532
- end
1533
- if icon_node.nil?
1534
- icon_node = XPath.first(channel_node, "link[@type='image/x-icon']")
1535
- end
1536
- if icon_node.nil?
1537
- icon_node = XPath.first(channel_node, "icon")
1538
- end
1539
- if icon_node.nil?
1540
- icon_node = XPath.first(channel_node, "logo[@style='icon']")
1541
- end
1542
- if icon_node.nil?
1543
- icon_node = XPath.first(channel_node, "LOGO[@STYLE='ICON']")
1544
- end
1545
- unless icon_node.nil?
1546
- @icon = FeedTools.unescape_entities(
1547
- XPath.first(icon_node, "@href").to_s)
1548
- if @icon == ""
1549
- @icon = FeedTools.unescape_entities(
1550
- XPath.first(icon_node, "text()").to_s)
1551
- unless FeedTools.is_uri? @icon
1552
- @icon = ""
1553
- end
1554
- end
1555
- if @icon == "" && self.link != nil && self.link != ""
1556
- link_uri = URI.parse(FeedTools.normalize_url(self.link))
1557
- @icon =
1558
- link_uri.scheme + "://" + link_uri.host + "/favicon.ico"
1559
- end
1560
- @icon = nil if @icon == ""
1561
- end
1562
- end
1563
- return @icon
1564
- end
1565
-
1566
- # Returns the feed author
1567
- def author
1568
- if @author.nil?
1569
- @author = FeedTools::Feed::Author.new
1570
- unless channel_node.nil?
1571
- author_node = XPath.first(channel_node, "author")
1572
- if author_node.nil?
1573
- author_node = XPath.first(channel_node, "managingEditor")
1574
- end
1575
- if author_node.nil?
1576
- author_node = XPath.first(channel_node, "dc:author")
1577
- end
1578
- if author_node.nil?
1579
- author_node = XPath.first(channel_node, "dc:creator")
1580
- end
1581
- if author_node.nil?
1582
- author_node = XPath.first(channel_node, "atom:author")
1583
- end
1584
- end
1585
- unless author_node.nil?
1586
- @author.raw = FeedTools.unescape_entities(
1587
- XPath.first(author_node, "text()").to_s)
1588
- @author.raw = nil if @author.raw == ""
1589
- unless @author.raw.nil?
1590
- raw_scan = @author.raw.scan(
1591
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1592
- if raw_scan.nil? || raw_scan.size == 0
1593
- raw_scan = @author.raw.scan(
1594
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1595
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
1596
- else
1597
- author_raw_pair = raw_scan.first
1598
- end
1599
- if raw_scan.nil? || raw_scan.size == 0
1600
- email_scan = @author.raw.scan(
1601
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1602
- if email_scan != nil && email_scan.size > 0
1603
- @author.email = email_scan.first.strip
1604
- end
1605
- end
1606
- unless author_raw_pair.nil? || author_raw_pair.size == 0
1607
- @author.name = author_raw_pair.first.strip
1608
- @author.email = author_raw_pair.last.strip
1609
- else
1610
- unless @author.raw.include?("@")
1611
- # We can be reasonably sure we are looking at something
1612
- # that the creator didn't intend to contain an email address if
1613
- # it got through the preceeding regexes and it doesn't
1614
- # contain the tell-tale '@' symbol.
1615
- @author.name = @author.raw
1616
- end
1617
- end
1618
- end
1619
- @author.name = "" if @author.name.nil?
1620
- if @author.name == ""
1621
- @author.name = FeedTools.unescape_entities(
1622
- XPath.first(author_node, "name/text()").to_s)
1623
- end
1624
- if @author.name == ""
1625
- @author.name = FeedTools.unescape_entities(
1626
- XPath.first(author_node, "@name").to_s)
1627
- end
1628
- if @author.email == ""
1629
- @author.email = FeedTools.unescape_entities(
1630
- XPath.first(author_node, "email/text()").to_s)
1631
- end
1632
- if @author.email == ""
1633
- @author.email = FeedTools.unescape_entities(
1634
- XPath.first(author_node, "@email").to_s)
1635
- end
1636
- if @author.url == ""
1637
- @author.url = FeedTools.unescape_entities(
1638
- XPath.first(author_node, "url/text()").to_s)
1639
- end
1640
- if @author.url == ""
1641
- @author.url = FeedTools.unescape_entities(
1642
- XPath.first(author_node, "@url").to_s)
1643
- end
1644
- @author.name = nil if @author.name == ""
1645
- @author.raw = nil if @author.raw == ""
1646
- @author.email = nil if @author.email == ""
1647
- @author.url = nil if @author.url == ""
1648
- end
1649
- # Fallback on the itunes module if we didn't find an author name
1650
- begin
1651
- @author.name = self.itunes_author if @author.name.nil?
1652
- rescue
1653
- @author.name = nil
1654
- end
1655
- end
1656
- return @author
1657
- end
1658
-
1659
- # Sets the feed author
1660
- def author=(new_author)
1661
- if new_author.respond_to?(:name) &&
1662
- new_author.respond_to?(:email) &&
1663
- new_author.respond_to?(:url)
1664
- # It's a complete author object, just set it.
1665
- @author = new_author
1666
- else
1667
- # We're not looking at an author object, this is probably a string,
1668
- # default to setting the author's name.
1669
- if @author.nil?
1670
- @author = FeedTools::Feed::Author.new
1671
- end
1672
- @author.name = new_author
1673
- end
1674
- end
1675
-
1676
- # Returns the feed publisher
1677
- def publisher
1678
- if @publisher.nil?
1679
- @publisher = FeedTools::Feed::Author.new
1680
-
1681
- # Set the author name
1682
- @publisher.raw = FeedTools.unescape_entities(
1683
- XPath.first(channel_node, "dc:publisher/text()").to_s)
1684
- if @publisher.raw == ""
1685
- @publisher.raw = FeedTools.unescape_entities(
1686
- XPath.first(channel_node, "webMaster/text()").to_s)
1687
- end
1688
- unless @publisher.raw == ""
1689
- raw_scan = @publisher.raw.scan(
1690
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
1691
- if raw_scan.nil? || raw_scan.size == 0
1692
- raw_scan = @publisher.raw.scan(
1693
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
1694
- unless raw_scan.size == 0
1695
- publisher_raw_pair = raw_scan.first.reverse
1696
- end
1697
- else
1698
- publisher_raw_pair = raw_scan.first
1699
- end
1700
- if raw_scan.nil? || raw_scan.size == 0
1701
- email_scan = @publisher.raw.scan(
1702
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
1703
- if email_scan != nil && email_scan.size > 0
1704
- @publisher.email = email_scan.first.strip
1705
- end
1706
- end
1707
- unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
1708
- @publisher.name = publisher_raw_pair.first.strip
1709
- @publisher.email = publisher_raw_pair.last.strip
1710
- else
1711
- unless @publisher.raw.include?("@")
1712
- # We can be reasonably sure we are looking at something
1713
- # that the creator didn't intend to contain an email address if
1714
- # it got through the preceeding regexes and it doesn't
1715
- # contain the tell-tale '@' symbol.
1716
- @publisher.name = @publisher.raw
1717
- end
1718
- end
1719
- end
1720
-
1721
- @publisher.name = nil if @publisher.name == ""
1722
- @publisher.raw = nil if @publisher.raw == ""
1723
- @publisher.email = nil if @publisher.email == ""
1724
- @publisher.url = nil if @publisher.url == ""
1725
- end
1726
- return @publisher
1727
- end
1728
-
1729
- # Sets the feed publisher
1730
- def publisher=(new_publisher)
1731
- if new_publisher.respond_to?(:name) &&
1732
- new_publisher.respond_to?(:email) &&
1733
- new_publisher.respond_to?(:url)
1734
- # It's a complete Author object, just set it.
1735
- @publisher = new_publisher
1736
- else
1737
- # We're not looking at an Author object, this is probably a string,
1738
- # default to setting the publisher's name.
1739
- if @publisher.nil?
1740
- @publisher = FeedTools::Feed::Author.new
1741
- end
1742
- @publisher.name = new_publisher
1743
- end
1744
- end
1745
-
1746
- # Returns the contents of the itunes:author element
1747
- #
1748
- # Returns any incorrectly placed channel-level itunes:author
1749
- # elements. They're actually amazingly common. People don't read specs.
1750
- # There is no setter for this, since this is a "bozo" attribute.
1751
- def itunes_author
1752
- if @itunes_author.nil?
1753
- @itunes_author = FeedTools.unescape_entities(XPath.first(channel_node,
1754
- "itunes:author/text()").to_s)
1755
- @itunes_author = nil if @itunes_author == ""
1756
- @bozo = true unless @itunes_author.nil?
1757
- end
1758
- return @itunes_author
1759
- end
1760
-
1761
- # Returns the feed item time
1762
- def time
1763
- if @time.nil?
1764
- unless channel_node.nil?
1765
- time_string = XPath.first(channel_node, "pubDate/text()").to_s
1766
- if time_string == ""
1767
- time_string = XPath.first(channel_node, "dc:date/text()").to_s
1768
- end
1769
- if time_string == ""
1770
- time_string = XPath.first(channel_node, "issued/text()").to_s
1771
- end
1772
- if time_string == ""
1773
- time_string = XPath.first(channel_node, "updated/text()").to_s
1774
- end
1775
- if time_string == ""
1776
- time_string = XPath.first(channel_node, "time/text()").to_s
1777
- end
1778
- end
1779
- begin
1780
- if time_string != nil && time_string != ""
1781
- @time = Time.parse(time_string) rescue self.succ_time
1782
- elsif time_string == nil
1783
- @time = self.succ_time
1784
- end
1785
- if @time == nil
1786
- @time = Time.now
1787
- end
1788
- rescue
1789
- @time = Time.now
1790
- end
1791
- end
1792
- return @time
1793
- end
1794
-
1795
- # Sets the feed item time
1796
- def time=(new_time)
1797
- @time = new_time
1798
- end
1799
-
1800
- # Returns 1 second after the previous item's time.
1801
- def succ_time #:nodoc:
1802
- begin
1803
- if feed.nil?
1804
- return nil
1805
- end
1806
- feed.items
1807
- unsorted_items = feed.instance_variable_get("@items")
1808
- item_index = unsorted_items.index(self)
1809
- if item_index.nil?
1810
- return nil
1811
- end
1812
- if item_index <= 0
1813
- return Time.now
1814
- end
1815
- previous_item = unsorted_items[item_index - 1]
1816
- return previous_item.time.succ
1817
- rescue
1818
- return nil
1819
- end
1820
- end
1821
- private :succ_time
1822
-
1823
- # Returns the feed item updated time
1824
- def updated
1825
- if @updated.nil?
1826
- unless channel_node.nil?
1827
- updated_string = XPath.first(channel_node, "updated/text()").to_s
1828
- if updated_string == ""
1829
- updated_string = XPath.first(channel_node, "modified/text()").to_s
1830
- end
1831
- end
1832
- if updated_string != nil && updated_string != ""
1833
- @updated = Time.parse(updated_string) rescue nil
1834
- else
1835
- @updated = nil
1836
- end
1837
- end
1838
- return @updated
1839
- end
1840
-
1841
- # Sets the feed item updated time
1842
- def updated=(new_updated)
1843
- @updated = new_updated
1844
- end
1845
-
1846
- # Returns the feed item issued time
1847
- def issued
1848
- if @issued.nil?
1849
- unless channel_node.nil?
1850
- issued_string = XPath.first(channel_node, "issued/text()").to_s
1851
- if issued_string == ""
1852
- issued_string = XPath.first(channel_node, "pubDate/text()").to_s
1853
- end
1854
- if issued_string == ""
1855
- issued_string = XPath.first(channel_node, "dc:date/text()").to_s
1856
- end
1857
- if issued_string == ""
1858
- issued_string = XPath.first(channel_node, "published/text()").to_s
1859
- end
1860
- end
1861
- if issued_string != nil && issued_string != ""
1862
- @issued = Time.parse(issued_string) rescue nil
1863
- else
1864
- @issued = nil
1865
- end
1866
- end
1867
- return @issued
1868
- end
1869
-
1870
- # Sets the feed item issued time
1871
- def issued=(new_issued)
1872
- @issued = new_issued
1873
- end
1874
-
1875
- # Returns the feed item published time
1876
- def published
1877
- if @published.nil?
1878
- unless channel_node.nil?
1879
- published_string = XPath.first(channel_node, "published/text()").to_s
1880
- if published_string == ""
1881
- published_string = XPath.first(channel_node, "pubDate/text()").to_s
1882
- end
1883
- if published_string == ""
1884
- published_string = XPath.first(channel_node, "dc:date/text()").to_s
1885
- end
1886
- if published_string == ""
1887
- published_string = XPath.first(channel_node, "issued/text()").to_s
1888
- end
1889
- end
1890
- if published_string != nil && published_string != ""
1891
- @published = Time.parse(published_string) rescue nil
1892
- else
1893
- @published = nil
1894
- end
1895
- end
1896
- return @published
1897
- end
1898
-
1899
- # Sets the feed item published time
1900
- def published=(new_published)
1901
- @published = new_published
1902
- end
1903
-
1904
- # Returns a list of the feed's categories
1905
- def categories
1906
- if @categories.nil?
1907
- @categories = []
1908
- category_nodes = XPath.match(channel_node, "category")
1909
- if category_nodes.nil? || category_nodes.empty?
1910
- category_nodes = XPath.match(channel_node, "dc:subject")
1911
- end
1912
- unless category_nodes.nil?
1913
- for category_node in category_nodes
1914
- category = FeedTools::Feed::Category.new
1915
- category.term = XPath.first(category_node, "@term").to_s
1916
- if category.term == ""
1917
- category.term = XPath.first(category_node, "text()").to_s
1918
- end
1919
- category.term.strip! unless category.term.nil?
1920
- category.term = nil if category.term == ""
1921
- category.label = XPath.first(category_node, "@label").to_s
1922
- category.label.strip! unless category.label.nil?
1923
- category.label = nil if category.label == ""
1924
- category.scheme = XPath.first(category_node, "@scheme").to_s
1925
- if category.scheme == ""
1926
- category.scheme = XPath.first(category_node, "@domain").to_s
1927
- end
1928
- category.scheme.strip! unless category.scheme.nil?
1929
- category.scheme = nil if category.scheme == ""
1930
- @categories << category
1931
- end
1932
- end
1933
- end
1934
- return @categories
1935
- end
1936
-
1937
- # Returns a list of the feed's images
1938
- def images
1939
- if @images.nil?
1940
- @images = []
1941
- unless channel_node.nil?
1942
- image_nodes = XPath.match(channel_node, "image")
1943
- if image_nodes.nil? || image_nodes.empty?
1944
- image_nodes = XPath.match(channel_node, "link")
1945
- end
1946
- if image_nodes.nil? || image_nodes.empty?
1947
- image_nodes = XPath.match(channel_node, "logo")
1948
- end
1949
- if image_nodes.nil? || image_nodes.empty?
1950
- image_nodes = XPath.match(channel_node, "LOGO")
1951
- end
1952
- unless image_nodes.nil?
1953
- for image_node in image_nodes
1954
- image = FeedTools::Feed::Image.new
1955
- image.url = XPath.first(image_node, "url/text()").to_s
1956
- if image.url == ""
1957
- image.url = XPath.first(image_node, "@rdf:resource").to_s
1958
- end
1959
- if image.url == "" && (image_node.name == "logo" ||
1960
- (image_node.attributes['type'] =~ /^image/) == 0)
1961
- image.url = XPath.first(image_node, "@href").to_s
1962
- end
1963
- if image.url == "" && image_node.name == "LOGO"
1964
- image.url = XPath.first(image_node, "@HREF").to_s
1965
- end
1966
- image.url.strip! unless image.url.nil?
1967
- image.url = nil if image.url == ""
1968
- image.title = XPath.first(image_node, "title/text()").to_s
1969
- image.title.strip! unless image.title.nil?
1970
- image.title = nil if image.title == ""
1971
- image.description =
1972
- XPath.first(image_node, "description/text()").to_s
1973
- image.description.strip! unless image.description.nil?
1974
- image.description = nil if image.description == ""
1975
- image.link = XPath.first(image_node, "link/text()").to_s
1976
- image.link.strip! unless image.link.nil?
1977
- image.link = nil if image.link == ""
1978
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
1979
- image.height = nil if image.height <= 0
1980
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
1981
- image.width = nil if image.width <= 0
1982
- image.style = XPath.first(image_node, "@style").to_s.downcase
1983
- if image.style == ""
1984
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
1985
- end
1986
- image.style.strip! unless image.style.nil?
1987
- image.style = nil if image.style == ""
1988
- @images << image
1989
- end
1990
- end
1991
- end
1992
- end
1993
- return @images
1994
- end
1995
-
1996
- # Returns the feed's text input field
1997
- def text_input
1998
- if @text_input.nil?
1999
- @text_input = FeedTools::Feed::TextInput.new
2000
- text_input_node = XPath.first(channel_node, "textInput")
2001
- unless text_input_node.nil?
2002
- @text_input.title =
2003
- XPath.first(text_input_node, "title/text()").to_s
2004
- @text_input.title = nil if @text_input.title == ""
2005
- @text_input.description =
2006
- XPath.first(text_input_node, "description/text()").to_s
2007
- @text_input.description = nil if @text_input.description == ""
2008
- @text_input.link =
2009
- XPath.first(text_input_node, "link/text()").to_s
2010
- @text_input.link = nil if @text_input.link == ""
2011
- @text_input.name =
2012
- XPath.first(text_input_node, "name/text()").to_s
2013
- @text_input.name = nil if @text_input.name == ""
2014
- end
2015
- end
2016
- return @text_input
2017
- end
2018
-
2019
- # Returns the feed's copyright information
2020
- def copyright
2021
- if @copyright.nil?
2022
- unless channel_node.nil?
2023
- @copyright = XPath.first(channel_node, "copyright/text()").to_s
2024
- if @copyright == ""
2025
- @copyright = XPath.first(channel_node, "rights/text()").to_s
2026
- end
2027
- if @copyright == ""
2028
- @copyright = XPath.first(channel_node, "dc:rights/text()").to_s
2029
- end
2030
- if @copyright == ""
2031
- @copyright = XPath.first(channel_node, "copyrights/text()").to_s
2032
- end
2033
- @copyright = FeedTools.sanitize_html(@copyright, :strip)
2034
- @copyright = nil if @copyright == ""
2035
- end
2036
- end
2037
- return @copyright
2038
- end
2039
-
2040
- # Sets the feed's copyright information
2041
- def copyright=(new_copyright)
2042
- @copyright = new_copyright
2043
- end
2044
-
2045
- # Returns the number of seconds before the feed should expire
2046
- def time_to_live
2047
- if @time_to_live.nil?
2048
- unless channel_node.nil?
2049
- # get the feed time to live from the xml document
2050
- update_frequency = XPath.first(channel_node, "syn:updateFrequency/text()").to_s
2051
- if update_frequency != ""
2052
- update_period = XPath.first(channel_node, "syn:updatePeriod/text()").to_s
2053
- if update_period == "daily"
2054
- @time_to_live = update_frequency.to_i.day
2055
- elsif update_period == "weekly"
2056
- @time_to_live = update_frequency.to_i.week
2057
- elsif update_period == "monthly"
2058
- @time_to_live = update_frequency.to_i.month
2059
- elsif update_period == "yearly"
2060
- @time_to_live = update_frequency.to_i.year
2061
- else
2062
- # hourly
2063
- @time_to_live = update_frequency.to_i.hour
2064
- end
2065
- end
2066
- if @time_to_live.nil?
2067
- # usually expressed in minutes
2068
- update_frequency = XPath.first(channel_node, "ttl/text()").to_s
2069
- if update_frequency != ""
2070
- update_span = XPath.first(channel_node, "ttl/@span").to_s
2071
- if update_span == "seconds"
2072
- @time_to_live = update_frequency.to_i
2073
- elsif update_span == "minutes"
2074
- @time_to_live = update_frequency.to_i.minute
2075
- elsif update_span == "hours"
2076
- @time_to_live = update_frequency.to_i.hour
2077
- elsif update_span == "days"
2078
- @time_to_live = update_frequency.to_i.day
2079
- elsif update_span == "weeks"
2080
- @time_to_live = update_frequency.to_i.week
2081
- elsif update_span == "months"
2082
- @time_to_live = update_frequency.to_i.month
2083
- elsif update_span == "years"
2084
- @time_to_live = update_frequency.to_i.year
2085
- elsif update_frequency.to_i >= 3000
2086
- # Normally, this should default to minutes, but realistically,
2087
- # if they meant minutes, you're rarely going to see a value higher
2088
- # than 120. If we see >= 3000, we're either dealing with a stupid
2089
- # pseudo-spec that decided to use seconds, or we're looking at
2090
- # someone who only has weekly updated content. Worst case, we
2091
- # misreport the time, and we update too often. Best case, we
2092
- # avoid accidentally updating the feed only once a year. In the
2093
- # interests of being pragmatic, and since the problem we avoid
2094
- # is a far greater one than the one we cause, just run the check
2095
- # and hope no one actually gets hurt.
2096
- @time_to_live = update_frequency.to_i
2097
- else
2098
- @time_to_live = update_frequency.to_i.minute
2099
- end
2100
- end
2101
- end
2102
- if @time_to_live.nil?
2103
- @time_to_live = 0
2104
- update_frequency_days =
2105
- XPath.first(channel_node, "schedule/intervaltime/@days").to_s
2106
- update_frequency_hours =
2107
- XPath.first(channel_node, "schedule/intervaltime/@hour").to_s
2108
- update_frequency_minutes =
2109
- XPath.first(channel_node, "schedule/intervaltime/@min").to_s
2110
- update_frequency_seconds =
2111
- XPath.first(channel_node, "schedule/intervaltime/@sec").to_s
2112
- if update_frequency_days != ""
2113
- @time_to_live = @time_to_live + update_frequency_days.to_i.day
2114
- end
2115
- if update_frequency_hours != ""
2116
- @time_to_live = @time_to_live + update_frequency_hours.to_i.hour
2117
- end
2118
- if update_frequency_minutes != ""
2119
- @time_to_live = @time_to_live + update_frequency_minutes.to_i.minute
2120
- end
2121
- if update_frequency_seconds != ""
2122
- @time_to_live = @time_to_live + update_frequency_seconds.to_i
2123
- end
2124
- if @time_to_live == 0
2125
- @time_to_live = 1.hour
2126
- end
2127
- end
2128
- end
2129
- end
2130
- if @time_to_live.nil? || @time_to_live == 0
2131
- # Default to one hour
2132
- @time_to_live = 1.hour
2133
- end
2134
- @time_to_live = @time_to_live.round
2135
- return @time_to_live
2136
- end
2137
-
2138
- # Sets the feed time to live
2139
- def time_to_live=(new_time_to_live)
2140
- @time_to_live = new_time_to_live.round
2141
- @time_to_live = 1.hour if @time_to_live < 1.hour
2142
- end
2143
-
2144
- # Returns the feed's cloud
2145
- def cloud
2146
- if @cloud.nil?
2147
- @cloud = FeedTools::Feed::Cloud.new
2148
- @cloud.domain = XPath.first(channel_node, "cloud/@domain").to_s
2149
- @cloud.port = XPath.first(channel_node, "cloud/@port").to_s
2150
- @cloud.path = XPath.first(channel_node, "cloud/@path").to_s
2151
- @cloud.register_procedure =
2152
- XPath.first(channel_node, "cloud/@registerProcedure").to_s
2153
- @cloud.protocol =
2154
- XPath.first(channel_node, "cloud/@protocol").to_s.downcase
2155
- @cloud.domain = nil if @cloud.domain == ""
2156
- @cloud.port = nil if @cloud.port == ""
2157
- @cloud.port = @cloud.port.to_i unless @cloud.port.nil?
2158
- @cloud.port = nil if @cloud.port == 0
2159
- @cloud.path = nil if @cloud.path == ""
2160
- @cloud.register_procedure = nil if @cloud.register_procedure == ""
2161
- @cloud.protocol = nil if @cloud.protocol == ""
2162
- end
2163
- return @cloud
2164
- end
2165
-
2166
- # Sets the feed's cloud
2167
- def cloud=(new_cloud)
2168
- @cloud = new_cloud
2169
- end
2170
-
2171
- # Returns the feed generator
2172
- def generator
2173
- if @generator.nil?
2174
- @generator = XPath.first(channel_node, "generator/text()").to_s
2175
- @generator = FeedTools.strip_html(@generator)
2176
- @generator = nil if @generator == ""
2177
- end
2178
- return @generator
2179
- end
2180
-
2181
- # Sets the feed generator
2182
- def generator=(new_generator)
2183
- @generator = new_generator
2184
- end
2185
-
2186
- # Returns the feed docs
2187
- def docs
2188
- if @docs.nil?
2189
- @docs = XPath.first(channel_node, "docs/text()").to_s
2190
- @docs = FeedTools.strip_html(@docs)
2191
- @docs = nil if @docs == ""
2192
- end
2193
- return @docs
2194
- end
2195
-
2196
- # Sets the feed docs
2197
- def docs=(new_docs)
2198
- @docs = new_docs
2199
- end
2200
-
2201
- # Returns the feed language
2202
- def language
2203
- if @language.nil?
2204
- unless channel_node.nil?
2205
- @language = XPath.first(channel_node, "language/text()").to_s
2206
- if @language == ""
2207
- @language = XPath.first(channel_node, "dc:language/text()").to_s
2208
- end
2209
- if @language == ""
2210
- @language = XPath.first(channel_node, "xml:lang/text()").to_s
2211
- end
2212
- if @language == ""
2213
- @language = XPath.first(root_node, "xml:lang/text()").to_s
2214
- end
2215
- end
2216
- if @language == "" || @language.nil?
2217
- @language = "en-us"
2218
- end
2219
- @language = @language.downcase
2220
- @language = nil if @language == ""
2221
- end
2222
- return @language
2223
- end
2224
-
2225
- # Sets the feed language
2226
- def language=(new_language)
2227
- @language = new_language
2228
- end
2229
-
2230
- # Returns true if this feed contains explicit material.
2231
- def explicit?
2232
- if @explicit.nil?
2233
- if XPath.first(channel_node,
2234
- "media:adult/text()").to_s.downcase == "true" ||
2235
- XPath.first(channel_node,
2236
- "itunes:explicit/text()").to_s.downcase == "yes" ||
2237
- XPath.first(channel_node,
2238
- "itunes:explicit/text()").to_s.downcase == "true"
2239
- @explicit = true
2240
- else
2241
- @explicit = false
2242
- end
2243
- end
2244
- return @explicit
2245
- end
2246
-
2247
- # Sets whether or not the feed contains explicit material
2248
- def explicit=(new_explicit)
2249
- @explicit = (new_explicit ? true : false)
2250
- end
2251
-
2252
- # Returns the feed items
2253
- def items
2254
- if @items.nil?
2255
- unless root_node.nil?
2256
- raw_items = XPath.match(root_node, "item")
2257
- if raw_items == nil || raw_items == []
2258
- raw_items = XPath.match(channel_node, "item")
2259
- end
2260
- if raw_items == nil || raw_items == []
2261
- raw_items = XPath.match(channel_node, "ITEM")
2262
- end
2263
- if raw_items == nil || raw_items == []
2264
- raw_items = XPath.match(root_node, "ITEM")
2265
- end
2266
- if raw_items == nil || raw_items == []
2267
- raw_items = XPath.match(channel_node, "entry")
2268
- end
2269
- if raw_items == nil || raw_items == []
2270
- raw_items = XPath.match(root_node, "entry")
2271
- end
2272
- end
2273
-
2274
- # create the individual feed items
2275
- @items = []
2276
- if raw_items != nil
2277
- for item_node in raw_items
2278
- new_item = FeedItem.new
2279
- new_item.xml_data = item_node.to_s
2280
- new_item.feed = self
2281
- @items << new_item
2282
- end
2283
- end
2284
- end
2285
-
2286
- # Sort the items
2287
- @items = @items.sort do |a,b|
2288
- (b.time or Time.mktime(1970)) <=> (a.time or Time.mktime(1970))
2289
- end
2290
- return @items
2291
- end
2292
-
2293
- # The time that the feed was last requested from the remote server. Nil if it has
2294
- # never been pulled, or if it was created from scratch.
2295
- def last_retrieved
2296
- unless self.cache_object.nil?
2297
- @last_retrieved = self.cache_object.last_retrieved
2298
- end
2299
- return @last_retrieved
2300
- end
2301
-
2302
- # Sets the time that the feed was last updated.
2303
- def last_retrieved=(new_last_retrieved)
2304
- @last_retrieved = new_last_retrieved
2305
- unless self.cache_object.nil?
2306
- self.cache_object.last_retrieved = new_last_retrieved
2307
- end
2308
- end
2309
-
2310
- # True if this feed contains audio content enclosures
2311
- def podcast?
2312
- podcast = false
2313
- self.items.each do |item|
2314
- item.enclosures.each do |enclosure|
2315
- podcast = true if enclosure.audio?
2316
- end
2317
- end
2318
- return podcast
2319
- end
2320
-
2321
- # True if this feed contains video content enclosures
2322
- def vidlog?
2323
- vidlog = false
2324
- self.items.each do |item|
2325
- item.enclosures.each do |enclosure|
2326
- vidlog = true if enclosure.video?
2327
- end
2328
- end
2329
- return vidlog
2330
- end
2331
-
2332
- # True if this feed is malformed somehow
2333
- def bozo?
2334
- if @bozo.nil?
2335
- @bozo = false
2336
- end
2337
- return @bozo
2338
- end
2339
-
2340
- # True if the feed was not last retrieved from the cache.
2341
- def live?
2342
- return @live
2343
- end
2344
-
2345
- # True if the feed has expired and must be reacquired from the remote server.
2346
- def expired?
2347
- return self.last_retrieved == nil || (self.last_retrieved + self.time_to_live.hour) < Time.now
2348
- end
2349
-
2350
- # Forces this feed to expire.
2351
- def expire!
2352
- self.last_retrieved = Time.mktime(1970)
2353
- self.save
2354
- end
2355
-
2356
- # A hook method that is called during the feed generation process. Overriding this method
2357
- # will enable additional content to be inserted into the feed.
2358
- def build_xml_hook(feed_type, version, xml_builder)
2359
- return nil
2360
- end
2361
-
2362
- # Generates xml based on the content of the feed
2363
- def build_xml(feed_type=(self.feed_type or "rss"), version=nil,
2364
- xml_builder=Builder::XmlMarkup.new(:indent => 2))
2365
- if feed_type == "rss" && (version == nil || version == 0.0)
2366
- version = 1.0
2367
- elsif feed_type == "atom" && (version == nil || version == 0.0)
2368
- version = 0.3
2369
- end
2370
- if feed_type == "rss" && (version == 0.9 || version == 1.0 ||
2371
- version == 1.1)
2372
- # RDF-based rss format
2373
- return xml_builder.tag!("rdf:RDF",
2374
- "xmlns" => "http://purl.org/rss/1.0/",
2375
- "xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
2376
- "xmlns:dc" => "http://purl.org/dc/elements/1.1/",
2377
- "xmlns:syn" => "http://purl.org/rss/1.0/modules/syndication/",
2378
- "xmlns:taxo" => "http://purl.org/rss/1.0/modules/taxonomy/",
2379
- "xmlns:itunes" => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
2380
- "xmlns:media" => "http://search.yahoo.com/mrss") do
2381
- channel_attributes = {}
2382
- unless self.link.nil?
2383
- channel_attributes["rdf:about"] = CGI.escapeHTML(self.link)
2384
- end
2385
- xml_builder.channel(channel_attributes) do
2386
- unless title.nil? || title == ""
2387
- xml_builder.title(title)
2388
- else
2389
- xml_builder.title
2390
- end
2391
- unless link.nil? || link == ""
2392
- xml_builder.link(link)
2393
- else
2394
- xml_builder.link
2395
- end
2396
- unless images.nil? || images.empty?
2397
- xml_builder.image("rdf:resource" => CGI.escapeHTML(
2398
- images.first.url))
2399
- end
2400
- unless description.nil? || description == ""
2401
- xml_builder.description(description)
2402
- else
2403
- xml_builder.description
2404
- end
2405
- unless language.nil? || language == ""
2406
- xml_builder.tag!("dc:language", language)
2407
- end
2408
- xml_builder.tag!("syn:updatePeriod", "hourly")
2409
- xml_builder.tag!("syn:updateFrequency", (time_to_live / 1.hour).to_s)
2410
- xml_builder.tag!("syn:updateBase", Time.mktime(1970).iso8601)
2411
- xml_builder.items do
2412
- xml_builder.tag!("rdf:Seq") do
2413
- unless items.nil?
2414
- for item in items
2415
- if item.link.nil?
2416
- raise "Cannot generate an rdf-based feed with a nil item link field."
2417
- end
2418
- xml_builder.tag!("rdf:li", "rdf:resource" => CGI.escapeHTML(item.link))
2419
- end
2420
- end
2421
- end
2422
- end
2423
- build_xml_hook(feed_type, version, xml_builder)
2424
- end
2425
- unless images.nil? || images.empty?
2426
- best_image = nil
2427
- for image in self.images
2428
- if image.link != nil
2429
- best_image = image
2430
- break
2431
- end
2432
- end
2433
- best_image = images.first if best_image.nil?
2434
- xml_builder.image("rdf:about" => CGI.escapeHTML(best_image.url)) do
2435
- if best_image.title != nil && best_image.title != ""
2436
- xml_builder.title(best_image.title)
2437
- elsif self.title != nil && self.title != ""
2438
- xml_builder.title(self.title)
2439
- else
2440
- xml_builder.title
2441
- end
2442
- unless best_image.url.nil? || best_image.url == ""
2443
- xml_builder.url(best_image.url)
2444
- end
2445
- if best_image.link != nil && best_image.link != ""
2446
- xml_builder.link(best_image.link)
2447
- elsif self.link != nil && self.link != ""
2448
- xml_builder.link(self.link)
2449
- else
2450
- xml_builder.link
2451
- end
2452
- end
2453
- end
2454
- unless items.nil?
2455
- for item in items
2456
- item.build_xml(feed_type, version, xml_builder)
2457
- end
2458
- end
2459
- end
2460
- elsif feed_type == "rss"
2461
- # normal rss format
2462
- return xml_builder.rss("version" => "2.0",
2463
- "xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
2464
- "xmlns:dc" => "http://purl.org/dc/elements/1.1/",
2465
- "xmlns:taxo" => "http://purl.org/rss/1.0/modules/taxonomy/",
2466
- "xmlns:trackback" =>
2467
- "http://madskills.com/public/xml/rss/module/trackback/",
2468
- "xmlns:itunes" => "http://www.itunes.com/DTDs/Podcast-1.0.dtd",
2469
- "xmlns:media" => "http://search.yahoo.com/mrss") do
2470
- xml_builder.channel do
2471
- unless title.nil? || title == ""
2472
- xml_builder.title(title)
2473
- end
2474
- unless link.nil? || link == ""
2475
- xml_builder.link(link)
2476
- end
2477
- unless description.nil? || description == ""
2478
- xml_builder.description(description)
2479
- end
2480
- xml_builder.ttl((time_to_live / 1.minute).to_s)
2481
- xml_builder.generator(
2482
- "http://www.sporkmonger.com/projects/feedtools")
2483
- build_xml_hook(feed_type, version, xml_builder)
2484
- unless items.nil?
2485
- for item in items
2486
- item.build_xml(feed_type, version, xml_builder)
2487
- end
2488
- end
2489
- end
2490
- end
2491
- elsif feed_type == "atom" && version == 0.3
2492
- # normal atom format
2493
- return xml_builder.feed("xmlns" => "http://purl.org/atom/ns#",
2494
- "version" => version,
2495
- "xml:lang" => language) do
2496
- unless title.nil? || title == ""
2497
- xml_builder.title(title,
2498
- "mode" => "escaped",
2499
- "type" => "text/html")
2500
- end
2501
- xml_builder.author do
2502
- unless self.author.nil? || self.author.name.nil?
2503
- xml_builder.name(self.author.name)
2504
- else
2505
- xml_builder.name("n/a")
2506
- end
2507
- unless self.author.nil? || self.author.email.nil?
2508
- xml_builder.email(self.author.email)
2509
- end
2510
- unless self.author.nil? || self.author.url.nil?
2511
- xml_builder.url(self.author.url)
2512
- end
2513
- end
2514
- unless link.nil? || link == ""
2515
- xml_builder.link("href" => link,
2516
- "rel" => "alternate",
2517
- "type" => "text/html",
2518
- "title" => title)
2519
- end
2520
- unless description.nil? || description == ""
2521
- xml_builder.tagline(description,
2522
- "mode" => "escaped",
2523
- "type" => "text/html")
2524
- end
2525
- xml_builder.generator("FeedTools",
2526
- "url" => "http://www.sporkmonger.com/projects/feedtools")
2527
- build_xml_hook(feed_type, version, xml_builder)
2528
- unless items.nil?
2529
- for item in items
2530
- item.build_xml(feed_type, version, xml_builder)
2531
- end
2532
- end
2533
- end
2534
- elsif feed_type == "atom" && version == 1.0
2535
- # normal atom format
2536
- return xml_builder.feed("xmlns" => "http://www.w3.org/2005/Atom",
2537
- "xml:lang" => language) do
2538
- unless title.nil? || title == ""
2539
- xml_builder.title(title,
2540
- "type" => "html")
2541
- end
2542
- xml_builder.author do
2543
- unless self.author.nil? || self.author.name.nil?
2544
- xml_builder.name(self.author.name)
2545
- else
2546
- xml_builder.name("n/a")
2547
- end
2548
- unless self.author.nil? || self.author.email.nil?
2549
- xml_builder.email(self.author.email)
2550
- end
2551
- unless self.author.nil? || self.author.url.nil?
2552
- xml_builder.url(self.author.url)
2553
- end
2554
- end
2555
- unless self.url.nil? || self.url == ""
2556
- xml_builder.link("href" => self.url,
2557
- "rel" => "self",
2558
- "type" => "application/atom+xml")
2559
- end
2560
- unless self.link.nil? || self.link == ""
2561
- xml_builder.link("href" => self.link,
2562
- "rel" => "alternate",
2563
- "type" => "text/html",
2564
- "title" => self.title)
2565
- end
2566
- unless description.nil? || description == ""
2567
- xml_builder.subtitle(description,
2568
- "type" => "html")
2569
- else
2570
- xml_builder.subtitle(FeedTools.no_content_string,
2571
- "type" => "html")
2572
- end
2573
- if self.updated != nil
2574
- xml_builder.updated(self.updated.iso8601)
2575
- elsif self.time != nil
2576
- # Not technically correct, but a heck of a lot better
2577
- # than the Time.now fall-back.
2578
- xml_builder.updated(self.time.iso8601)
2579
- else
2580
- xml_builder.updated(Time.now.iso8601)
2581
- end
2582
- unless self.published.nil?
2583
- xml_builder.published(self.published.iso8601)
2584
- end
2585
- xml_builder.generator("FeedTools - " +
2586
- "http://www.sporkmonger.com/projects/feedtools")
2587
- if self.id != nil
2588
- unless FeedTools.is_uri? self.id
2589
- if self.link != nil
2590
- xml_builder.id(FeedTools.build_urn_uri(self.link))
2591
- else
2592
- raise "The unique id must be a valid URI."
2593
- end
2594
- else
2595
- xml_builder.id(self.id)
2596
- end
2597
- elsif self.link != nil
2598
- xml_builder.id(FeedTools.build_urn_uri(self.link))
2599
- else
2600
- raise "Cannot build feed, missing feed unique id."
2601
- end
2602
- build_xml_hook(feed_type, version, xml_builder)
2603
- unless items.nil?
2604
- for item in items
2605
- item.build_xml(feed_type, version, xml_builder)
2606
- end
2607
- end
2608
- end
2609
- end
2610
- end
2611
-
2612
- # Persists the current feed state to the cache.
2613
- def save
2614
- if FeedTools.feed_cache.nil?
2615
- raise "Caching is currently disabled. Cannot save to cache."
2616
- elsif self.url.nil?
2617
- raise "The url field must be set to save to the cache."
2618
- elsif self.xml_data.nil?
2619
- raise "The xml_data field must be set to save to the cache."
2620
- elsif self.cache_object.nil?
2621
- raise "The cache_object is currently nil. Cannot save to cache."
2622
- else
2623
- self.cache_object.url = self.url
2624
- self.cache_object.title = self.title
2625
- self.cache_object.link = self.link
2626
- self.cache_object.xml_data = self.xml_data
2627
- unless self.http_response.nil?
2628
- self.cache_object.http_headers = self.http_headers.to_yaml
2629
- end
2630
- self.cache_object.last_retrieved = self.last_retrieved
2631
- self.cache_object.save
2632
- end
2633
- end
2634
-
2635
- alias_method :tagline, :description
2636
- alias_method :tagline=, :description=
2637
- alias_method :subtitle, :description
2638
- alias_method :subtitle=, :description=
2639
- alias_method :abstract, :description
2640
- alias_method :abstract=, :description=
2641
- alias_method :content, :description
2642
- alias_method :content=, :description=
2643
- alias_method :ttl, :time_to_live
2644
- alias_method :ttl=, :time_to_live=
2645
- alias_method :guid, :id
2646
- alias_method :guid=, :id=
2647
- alias_method :entries, :items
2648
-
2649
- # passes missing methods to the cache_object
2650
- def method_missing(msg, *params)
2651
- if self.cache_object.nil?
2652
- raise NoMethodError, "Invalid method #{msg.to_s}"
2653
- end
2654
- return self.cache_object.send(msg, params)
2655
- end
2656
-
2657
- # passes missing methods to the FeedTools.feed_cache
2658
- def Feed.method_missing(msg, *params)
2659
- if FeedTools.feed_cache.nil?
2660
- raise NoMethodError, "Invalid method Feed.#{msg.to_s}"
2661
- end
2662
- result = FeedTools.feed_cache.send(msg, params)
2663
- if result.kind_of? FeedTools.feed_cache
2664
- result = Feed.open(result.url)
2665
- end
2666
- return result
2667
- end
2668
-
2669
- # Returns a simple representation of the feed object's state.
2670
- def inspect
2671
- return "#<FeedTools::Feed:0x#{self.object_id.to_s(16)} URL:#{self.url}>"
2672
- end
2673
- end
2674
-
2675
- class FeedItem
2676
- include REXML
2677
- include AttributeDictionary
2678
-
2679
- # This class stores information about a feed item's file enclosures.
2680
- class Enclosure
2681
- include AttributeDictionary
2682
-
2683
- # The url for the enclosure
2684
- attr_accessor :url
2685
- # The MIME type of the file referenced by the enclosure
2686
- attr_accessor :type
2687
- # The size of the file referenced by the enclosure
2688
- attr_accessor :file_size
2689
- # The total play time of the file referenced by the enclosure
2690
- attr_accessor :duration
2691
- # The height in pixels of the enclosed media
2692
- attr_accessor :height
2693
- # The width in pixels of the enclosed media
2694
- attr_accessor :width
2695
- # The bitrate of the enclosed media
2696
- attr_accessor :bitrate
2697
- # The framerate of the enclosed media
2698
- attr_accessor :framerate
2699
- # The thumbnail for this enclosure
2700
- attr_accessor :thumbnail
2701
- # The categories for this enclosure
2702
- attr_accessor :categories
2703
- # A hash of the enclosed file
2704
- attr_accessor :hash
2705
- # A website containing some kind of media player instead of a direct
2706
- # link to the media file.
2707
- attr_accessor :player
2708
- # A list of credits for the enclosed media
2709
- attr_accessor :credits
2710
- # A text rendition of the enclosed media
2711
- attr_accessor :text
2712
- # A list of alternate version of the enclosed media file
2713
- attr_accessor :versions
2714
- # The default version of the enclosed media file
2715
- attr_accessor :default_version
2716
-
2717
- # Returns true if this is the default enclosure
2718
- def is_default?
2719
- return @is_default
2720
- end
2721
-
2722
- # Sets whether this is the default enclosure for the media group
2723
- def is_default=(new_is_default)
2724
- @is_default = new_is_default
2725
- end
2726
-
2727
- # Returns true if the enclosure contains explicit material
2728
- def explicit?
2729
- return @explicit
2730
- end
2731
-
2732
- # Sets the explicit attribute on the enclosure
2733
- def explicit=(new_explicit)
2734
- @explicit = new_explicit
2735
- end
2736
-
2737
- # Determines if the object is a sample, or the full version of the
2738
- # object, or if it is a stream.
2739
- # Possible values are 'sample', 'full', 'nonstop'.
2740
- def expression
2741
- return @expression
2742
- end
2743
-
2744
- # Sets the expression attribute on the enclosure.
2745
- # Allowed values are 'sample', 'full', 'nonstop'.
2746
- def expression=(new_expression)
2747
- unless ['sample', 'full', 'nonstop'].include? new_expression.downcase
2748
- raise ArgumentError,
2749
- "Permitted values are 'sample', 'full', 'nonstop'."
2750
- end
2751
- @expression = new_expression.downcase
2752
- end
2753
-
2754
- # Returns true if this enclosure contains audio content
2755
- def audio?
2756
- unless self.type.nil?
2757
- return true if (self.type =~ /^audio/) != nil
2758
- end
2759
- # TODO: create a more complete list
2760
- # =================================
2761
- audio_extensions = ['mp3', 'm4a', 'm4p', 'wav', 'ogg', 'wma']
2762
- audio_extensions.each do |extension|
2763
- if (url =~ /#{extension}$/) != nil
2764
- return true
2765
- end
2766
- end
2767
- return false
2768
- end
2769
-
2770
- # Returns true if this enclosure contains video content
2771
- def video?
2772
- unless self.type.nil?
2773
- return true if (self.type =~ /^video/) != nil
2774
- return true if self.type == "image/mov"
2775
- end
2776
- # TODO: create a more complete list
2777
- # =================================
2778
- video_extensions = ['mov', 'mp4', 'avi', 'wmv', 'asf']
2779
- video_extensions.each do |extension|
2780
- if (url =~ /#{extension}$/) != nil
2781
- return true
2782
- end
2783
- end
2784
- return false
2785
- end
2786
-
2787
- alias_method :link, :url
2788
- alias_method :link=, :url=
2789
- end
2790
-
2791
- # TODO: Make these actual classes instead of structs
2792
- # ==================================================
2793
- EnclosureHash = Struct.new( "EnclosureHash", :hash, :type )
2794
- EnclosurePlayer = Struct.new( "EnclosurePlayer", :url, :height, :width )
2795
- EnclosureCredit = Struct.new( "EnclosureCredit", :name, :role )
2796
- EnclosureThumbnail = Struct.new( "EnclosureThumbnail", :url, :height,
2797
- :width )
2798
-
2799
- # Returns the parent feed of this feed item
2800
- def feed
2801
- return @feed
2802
- end
2803
-
2804
- # Sets the parent feed of this feed item
2805
- def feed=(new_feed)
2806
- @feed = new_feed
2807
- end
2808
-
2809
- # Returns the feed item's raw xml data.
2810
- def xml_data
2811
- return @xml_data
2812
- end
2813
-
2814
- # Sets the feed item's xml data.
2815
- def xml_data=(new_xml_data)
2816
- @xml_data = new_xml_data
2817
- end
2818
-
2819
- # Returns a REXML Document of the xml_data
2820
- def xml
2821
- if @xml_doc.nil?
2822
- # TODO: :ignore_whitespace_nodes => :all
2823
- # Add that?
2824
- # ======================================
2825
- @xml_doc = Document.new(xml_data)
2826
- end
2827
- return @xml_doc
2828
- end
2829
-
2830
- # Returns the first node within the root_node that matches the xpath query.
2831
- def find_node(xpath)
2832
- return XPath.first(root_node, xpath)
2833
- end
2834
-
2835
- # Returns all nodes within the root_node that match the xpath query.
2836
- def find_all_nodes(xpath)
2837
- return XPath.match(root_node, xpath)
2838
- end
2839
-
2840
- # Returns the root node of the feed item.
2841
- def root_node
2842
- if @root_node.nil?
2843
- @root_node = xml.root
2844
- end
2845
- return @root_node
2846
- end
2847
-
2848
- # Returns the feed items's unique id
2849
- def id
2850
- if @id.nil?
2851
- unless root_node.nil?
2852
- @id = XPath.first(root_node, "id/text()").to_s
2853
- if @id == ""
2854
- @id = XPath.first(root_node, "guid/text()").to_s
2855
- end
2856
- end
2857
- @id = nil if @id == ""
2858
- end
2859
- return @id
2860
- end
2861
-
2862
- # Sets the feed item's unique id
2863
- def id=(new_id)
2864
- @id = new_id
2865
- end
2866
-
2867
- # Returns the feed item title
2868
- def title
2869
- if @title.nil?
2870
- unless root_node.nil?
2871
- repair_entities = false
2872
- title_node = XPath.first(root_node, "title")
2873
- if title_node.nil?
2874
- title_node = XPath.first(root_node, "dc:title")
2875
- end
2876
- if title_node.nil?
2877
- title_node = XPath.first(root_node, "TITLE")
2878
- end
2879
- end
2880
- if title_node.nil?
2881
- return nil
2882
- end
2883
- if XPath.first(title_node, "@type").to_s == "xhtml" ||
2884
- XPath.first(title_node, "@mode").to_s == "xhtml" ||
2885
- XPath.first(title_node, "@type").to_s == "xml" ||
2886
- XPath.first(title_node, "@mode").to_s == "xml" ||
2887
- XPath.first(title_node, "@type").to_s == "application/xhtml+xml"
2888
- @title = title_node.inner_xml
2889
- elsif XPath.first(title_node, "@type").to_s == "escaped" ||
2890
- XPath.first(title_node, "@mode").to_s == "escaped"
2891
- @title = FeedTools.unescape_entities(
2892
- XPath.first(title_node, "text()").to_s)
2893
- else
2894
- @title = title_node.inner_xml
2895
- repair_entities = true
2896
- end
2897
- unless @title.nil?
2898
- @title = FeedTools.sanitize_html(@title, :strip)
2899
- @title = FeedTools.unescape_entities(@title) if repair_entities
2900
- @title = FeedTools.tidy_html(@title)
2901
- end
2902
- if @title != ""
2903
- # Some blogging tools include the number of comments in a post
2904
- # in the title... this is supremely ugly, and breaks any
2905
- # applications which expect the title to be static, so we're
2906
- # gonna strip them out.
2907
- #
2908
- # If for some incredibly wierd reason you need the actual
2909
- # unstripped title, just use find_node("title/text()").to_s
2910
- @title = @title.strip.gsub(/\[\d*\]$/, "").strip
2911
- end
2912
- @title.gsub!(/\n/, " ")
2913
- @title.strip!
2914
- @title = nil if @title == ""
2915
- end
2916
- return @title
2917
- end
2918
-
2919
- # Sets the feed item title
2920
- def title=(new_title)
2921
- @title = new_title
2922
- end
2923
-
2924
- # Returns the feed item description
2925
- def description
2926
- if @description.nil?
2927
- unless root_node.nil?
2928
- repair_entities = false
2929
- description_node = XPath.first(root_node, "content:encoded")
2930
- if description_node.nil?
2931
- description_node = XPath.first(root_node, "content")
2932
- end
2933
- if description_node.nil?
2934
- description_node = XPath.first(root_node, "fullitem")
2935
- end
2936
- if description_node.nil?
2937
- description_node = XPath.first(root_node, "xhtml:body")
2938
- end
2939
- if description_node.nil?
2940
- description_node = XPath.first(root_node, "body")
2941
- end
2942
- if description_node.nil?
2943
- description_node = XPath.first(root_node, "description")
2944
- end
2945
- if description_node.nil?
2946
- description_node = XPath.first(root_node, "tagline")
2947
- end
2948
- if description_node.nil?
2949
- description_node = XPath.first(root_node, "subtitle")
2950
- end
2951
- if description_node.nil?
2952
- description_node = XPath.first(root_node, "summary")
2953
- end
2954
- if description_node.nil?
2955
- description_node = XPath.first(root_node, "abstract")
2956
- end
2957
- if description_node.nil?
2958
- description_node = XPath.first(root_node, "ABSTRACT")
2959
- end
2960
- if description_node.nil?
2961
- description_node = XPath.first(root_node, "info")
2962
- @bozo = true unless description_node.nil?
2963
- end
2964
- end
2965
- if description_node.nil?
2966
- return nil
2967
- end
2968
- unless description_node.nil?
2969
- if XPath.first(description_node, "@encoding").to_s != ""
2970
- @description =
2971
- "[Embedded data objects are not currently supported.]"
2972
- elsif XPath.first(description_node, "@type").to_s == "xhtml" ||
2973
- XPath.first(description_node, "@mode").to_s == "xhtml" ||
2974
- XPath.first(description_node, "@type").to_s == "xml" ||
2975
- XPath.first(description_node, "@mode").to_s == "xml" ||
2976
- XPath.first(description_node, "@type").to_s ==
2977
- "application/xhtml+xml"
2978
- @description = description_node.inner_xml
2979
- elsif XPath.first(description_node, "@type").to_s == "escaped" ||
2980
- XPath.first(description_node, "@mode").to_s == "escaped"
2981
- @description = FeedTools.unescape_entities(
2982
- description_node.inner_xml)
2983
- else
2984
- @description = description_node.inner_xml
2985
- repair_entities = true
2986
- end
2987
- end
2988
- if @description == ""
2989
- @description = self.itunes_summary
2990
- @description = "" if @description.nil?
2991
- end
2992
- if @description == ""
2993
- @description = self.itunes_subtitle
2994
- @description = "" if @description.nil?
2995
- end
2996
-
2997
- unless @description.nil?
2998
- @description = FeedTools.sanitize_html(@description, :strip)
2999
- @description = FeedTools.unescape_entities(@description) if repair_entities
3000
- @description = FeedTools.tidy_html(@description)
3001
- end
3002
-
3003
- @description = @description.strip unless @description.nil?
3004
- @description = nil if @description == ""
3005
- end
3006
- return @description
3007
- end
3008
-
3009
- # Sets the feed item description
3010
- def description=(new_description)
3011
- @description = new_description
3012
- end
3013
-
3014
- # Returns the contents of the itunes:summary element
3015
- def itunes_summary
3016
- if @itunes_summary.nil?
3017
- @itunes_summary = FeedTools.unescape_entities(XPath.first(root_node,
3018
- "itunes:summary/text()").to_s)
3019
- if @itunes_summary == ""
3020
- @itunes_summary = nil
3021
- end
3022
- unless @itunes_summary.nil?
3023
- @itunes_summary = FeedTools.sanitize_html(@itunes_summary)
3024
- end
3025
- end
3026
- return @itunes_summary
3027
- end
3028
-
3029
- # Sets the contents of the itunes:summary element
3030
- def itunes_summary=(new_itunes_summary)
3031
- @itunes_summary = new_itunes_summary
3032
- end
3033
-
3034
- # Returns the contents of the itunes:subtitle element
3035
- def itunes_subtitle
3036
- if @itunes_subtitle.nil?
3037
- @itunes_subtitle = FeedTools.unescape_entities(XPath.first(root_node,
3038
- "itunes:subtitle/text()").to_s)
3039
- if @itunes_subtitle == ""
3040
- @itunes_subtitle = nil
3041
- end
3042
- unless @itunes_subtitle.nil?
3043
- @itunes_subtitle = FeedTools.sanitize_html(@itunes_subtitle)
3044
- end
3045
- end
3046
- return @itunes_subtitle
3047
- end
3048
-
3049
- # Sets the contents of the itunes:subtitle element
3050
- def itunes_subtitle=(new_itunes_subtitle)
3051
- @itunes_subtitle = new_itunes_subtitle
3052
- end
3053
-
3054
- # Returns the contents of the media:text element
3055
- def media_text
3056
- if @media_text.nil?
3057
- @media_text = FeedTools.unescape_entities(XPath.first(root_node,
3058
- "itunes:subtitle/text()").to_s)
3059
- if @media_text == ""
3060
- @media_text = nil
3061
- end
3062
- unless @media_text.nil?
3063
- @media_text = FeedTools.sanitize_html(@media_text)
3064
- end
3065
- end
3066
- return @media_text
3067
- end
3068
-
3069
- # Sets the contents of the media:text element
3070
- def media_text=(new_media_text)
3071
- @media_text = new_media_text
3072
- end
3073
-
3074
- # Returns the feed item link
3075
- def link
3076
- if @link.nil?
3077
- unless root_node.nil?
3078
- @link = XPath.first(root_node, "link[@rel='alternate']/@href").to_s
3079
- if @link == ""
3080
- @link = XPath.first(root_node, "link/@href").to_s
3081
- end
3082
- if @link == ""
3083
- @link = XPath.first(root_node, "link/text()").to_s
3084
- end
3085
- if @link == ""
3086
- @link = XPath.first(root_node, "@rdf:about").to_s
3087
- end
3088
- if @link == ""
3089
- @link = XPath.first(root_node, "guid[@isPermaLink='true']/text()").to_s
3090
- end
3091
- if @link == ""
3092
- @link = XPath.first(root_node, "@href").to_s
3093
- end
3094
- if @link == ""
3095
- @link = XPath.first(root_node, "a/@href").to_s
3096
- end
3097
- if @link == ""
3098
- @link = XPath.first(root_node, "@HREF").to_s
3099
- end
3100
- if @link == ""
3101
- @link = XPath.first(root_node, "A/@HREF").to_s
3102
- end
3103
- end
3104
- if @link == "" || @link.nil?
3105
- if FeedTools.is_uri? self.guid
3106
- @link = self.guid
3107
- end
3108
- end
3109
- if @link != ""
3110
- @link = FeedTools.unescape_entities(@link)
3111
- end
3112
- # TODO: Actually implement proper relative url resolving instead of this crap
3113
- # ===========================================================================
3114
- #
3115
- # if @link != "" && (@link =~ /http:\/\//) != 0 && (@link =~ /https:\/\//) != 0
3116
- # if (feed.base[-1..-1] == "/" && @link[0..0] == "/")
3117
- # @link = @link[1..-1]
3118
- # end
3119
- # # prepend the base to the link since they seem to have used a relative path
3120
- # @link = feed.base + @link
3121
- # end
3122
- @link = FeedTools.normalize_url(@link)
3123
- end
3124
- return @link
3125
- end
3126
-
3127
- # Sets the feed item link
3128
- def link=(new_link)
3129
- @link = new_link
3130
- end
3131
-
3132
- # Returns a list of the feed item's categories
3133
- def categories
3134
- if @categories.nil?
3135
- @categories = []
3136
- category_nodes = XPath.match(root_node, "category")
3137
- if category_nodes.nil? || category_nodes.empty?
3138
- category_nodes = XPath.match(root_node, "dc:subject")
3139
- end
3140
- unless category_nodes.nil?
3141
- for category_node in category_nodes
3142
- category = FeedTools::Feed::Category.new
3143
- category.term = XPath.first(category_node, "@term").to_s
3144
- if category.term == ""
3145
- category.term = XPath.first(category_node, "text()").to_s
3146
- end
3147
- category.term.strip! unless category.term.nil?
3148
- category.term = nil if category.term == ""
3149
- category.label = XPath.first(category_node, "@label").to_s
3150
- category.label.strip! unless category.label.nil?
3151
- category.label = nil if category.label == ""
3152
- category.scheme = XPath.first(category_node, "@scheme").to_s
3153
- if category.scheme == ""
3154
- category.scheme = XPath.first(category_node, "@domain").to_s
3155
- end
3156
- category.scheme.strip! unless category.scheme.nil?
3157
- category.scheme = nil if category.scheme == ""
3158
- @categories << category
3159
- end
3160
- end
3161
- end
3162
- return @categories
3163
- end
3164
-
3165
- # Returns a list of the feed items's images
3166
- def images
3167
- if @images.nil?
3168
- @images = []
3169
- image_nodes = XPath.match(root_node, "link")
3170
- if image_nodes.nil? || image_nodes.empty?
3171
- image_nodes = XPath.match(root_node, "logo")
3172
- end
3173
- if image_nodes.nil? || image_nodes.empty?
3174
- image_nodes = XPath.match(root_node, "LOGO")
3175
- end
3176
- if image_nodes.nil? || image_nodes.empty?
3177
- image_nodes = XPath.match(root_node, "image")
3178
- end
3179
- unless image_nodes.nil?
3180
- for image_node in image_nodes
3181
- image = FeedTools::Feed::Image.new
3182
- image.url = XPath.first(image_node, "url/text()").to_s
3183
- if image.url != ""
3184
- self.feed.bozo = true
3185
- end
3186
- if image.url == ""
3187
- image.url = XPath.first(image_node, "@rdf:resource").to_s
3188
- end
3189
- if image.url == "" && (image_node.name == "logo" ||
3190
- (image_node.attributes['type'] =~ /^image/) == 0)
3191
- image.url = XPath.first(image_node, "@href").to_s
3192
- end
3193
- if image.url == "" && image_node.name == "LOGO"
3194
- image.url = XPath.first(image_node, "@HREF").to_s
3195
- end
3196
- image.url.strip! unless image.url.nil?
3197
- image.url = nil if image.url == ""
3198
- image.title = XPath.first(image_node, "title/text()").to_s
3199
- image.title.strip! unless image.title.nil?
3200
- image.title = nil if image.title == ""
3201
- image.description =
3202
- XPath.first(image_node, "description/text()").to_s
3203
- image.description.strip! unless image.description.nil?
3204
- image.description = nil if image.description == ""
3205
- image.link = XPath.first(image_node, "link/text()").to_s
3206
- image.link.strip! unless image.link.nil?
3207
- image.link = nil if image.link == ""
3208
- image.height = XPath.first(image_node, "height/text()").to_s.to_i
3209
- image.height = nil if image.height <= 0
3210
- image.width = XPath.first(image_node, "width/text()").to_s.to_i
3211
- image.width = nil if image.width <= 0
3212
- image.style = XPath.first(image_node, "@style").to_s.downcase
3213
- if image.style == ""
3214
- image.style = XPath.first(image_node, "@STYLE").to_s.downcase
3215
- end
3216
- image.style.strip! unless image.style.nil?
3217
- image.style = nil if image.style == ""
3218
- @images << image
3219
- end
3220
- end
3221
- end
3222
- return @images
3223
- end
3224
-
3225
- # Returns the feed item itunes image link
3226
- #
3227
- # If it's not present, falls back to the normal image link.
3228
- # Technically, the itunes spec says that the image needs to be
3229
- # square and larger than 300x300, but hey, if there's an image
3230
- # to be had, it's better than none at all.
3231
- def itunes_image_link
3232
- if @itunes_image_link.nil?
3233
- # get the feed item itunes image link from the xml document
3234
- @itunes_image_link = XPath.first(root_node, "itunes:image/@href").to_s
3235
- if @itunes_image_link == ""
3236
- @itunes_image_link = XPath.first(root_node, "itunes:link[@rel='image']/@href").to_s
3237
- end
3238
- @itunes_image_link = FeedTools.normalize_url(@itunes_image_link)
3239
- end
3240
- return @itunes_image_link
3241
- end
3242
-
3243
- # Sets the feed item itunes image link
3244
- def itunes_image_link=(new_itunes_image_link)
3245
- @itunes_image_link = new_itunes_image_link
3246
- end
3247
-
3248
- # Returns the feed item media thumbnail link
3249
- #
3250
- # If it's not present, falls back to the normal image link.
3251
- def media_thumbnail_link
3252
- if @media_thumbnail_link.nil?
3253
- # get the feed item itunes image link from the xml document
3254
- @media_thumbnail_link = XPath.first(root_node, "media:thumbnail/@url").to_s
3255
- @media_thumbnail_link = FeedTools.normalize_url(@media_thumbnail_link)
3256
- end
3257
- return @media_thumbnail_link
3258
- end
3259
-
3260
- # Sets the feed item media thumbnail url
3261
- def media_thumbnail_link=(new_media_thumbnail_link)
3262
- @media_thumbnail_link = new_media_thumbnail_link
3263
- end
3264
-
3265
- # Returns the feed item's copyright information
3266
- def copyright
3267
- if @copyright.nil?
3268
- unless root_node.nil?
3269
- @copyright = XPath.first(root_node, "dc:rights/text()").to_s
3270
- if @copyright == ""
3271
- @copyright = XPath.first(root_node, "rights/text()").to_s
3272
- end
3273
- if @copyright == ""
3274
- @copyright = XPath.first(root_node, "copyright/text()").to_s
3275
- end
3276
- if @copyright == ""
3277
- @copyright = XPath.first(root_node, "copyrights/text()").to_s
3278
- end
3279
- @copyright = FeedTools.sanitize_html(@copyright, :strip)
3280
- @copyright = nil if @copyright == ""
3281
- end
3282
- end
3283
- return @copyright
3284
- end
3285
-
3286
- # Sets the feed item's copyright information
3287
- def copyright=(new_copyright)
3288
- @copyright = new_copyright
3289
- end
3290
-
3291
- # Returns all feed item enclosures
3292
- def enclosures
3293
- if @enclosures.nil?
3294
- @enclosures = []
3295
-
3296
- # First, load up all the different possible sources of enclosures
3297
- rss_enclosures = XPath.match(root_node, "enclosure")
3298
- atom_enclosures = XPath.match(root_node, "link[@rel='enclosure']")
3299
- media_content_enclosures = XPath.match(root_node, "media:content")
3300
- media_group_enclosures = XPath.match(root_node, "media:group")
3301
-
3302
- # Parse RSS-type enclosures. Thanks to a few buggy enclosures implementations,
3303
- # sometimes these also manage to show up in atom files.
3304
- for enclosure_node in rss_enclosures
3305
- enclosure = Enclosure.new
3306
- enclosure.url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
3307
- enclosure.type = enclosure_node.attributes["type"].to_s
3308
- enclosure.file_size = enclosure_node.attributes["length"].to_i
3309
- enclosure.credits = []
3310
- enclosure.explicit = false
3311
- @enclosures << enclosure
3312
- end
3313
-
3314
- # Parse atom-type enclosures. If there are repeats of the same enclosure object,
3315
- # we merge the two together.
3316
- for enclosure_node in atom_enclosures
3317
- enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["href"].to_s)
3318
- enclosure = nil
3319
- new_enclosure = false
3320
- for existing_enclosure in @enclosures
3321
- if existing_enclosure.url == enclosure_url
3322
- enclosure = existing_enclosure
3323
- break
3324
- end
3325
- end
3326
- if enclosure.nil?
3327
- new_enclosure = true
3328
- enclosure = Enclosure.new
3329
- end
3330
- enclosure.url = enclosure_url
3331
- enclosure.type = enclosure_node.attributes["type"].to_s
3332
- enclosure.file_size = enclosure_node.attributes["length"].to_i
3333
- enclosure.credits = []
3334
- enclosure.explicit = false
3335
- if new_enclosure
3336
- @enclosures << enclosure
3337
- end
3338
- end
3339
-
3340
- # Creates an anonymous method to parse content objects from the media module. We
3341
- # do this to avoid excessive duplication of code since we have to do identical
3342
- # processing for content objects within group objects.
3343
- parse_media_content = lambda do |media_content_nodes|
3344
- affected_enclosures = []
3345
- for enclosure_node in media_content_nodes
3346
- enclosure_url = FeedTools.unescape_entities(enclosure_node.attributes["url"].to_s)
3347
- enclosure = nil
3348
- new_enclosure = false
3349
- for existing_enclosure in @enclosures
3350
- if existing_enclosure.url == enclosure_url
3351
- enclosure = existing_enclosure
3352
- break
3353
- end
3354
- end
3355
- if enclosure.nil?
3356
- new_enclosure = true
3357
- enclosure = Enclosure.new
3358
- end
3359
- enclosure.url = enclosure_url
3360
- enclosure.type = enclosure_node.attributes["type"].to_s
3361
- enclosure.file_size = enclosure_node.attributes["fileSize"].to_i
3362
- enclosure.duration = enclosure_node.attributes["duration"].to_s
3363
- enclosure.height = enclosure_node.attributes["height"].to_i
3364
- enclosure.width = enclosure_node.attributes["width"].to_i
3365
- enclosure.bitrate = enclosure_node.attributes["bitrate"].to_i
3366
- enclosure.framerate = enclosure_node.attributes["framerate"].to_i
3367
- enclosure.expression = enclosure_node.attributes["expression"].to_s
3368
- enclosure.is_default =
3369
- (enclosure_node.attributes["isDefault"].to_s.downcase == "true")
3370
- if XPath.first(enclosure_node, "media:thumbnail/@url").to_s != ""
3371
- enclosure.thumbnail = EnclosureThumbnail.new(
3372
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@url").to_s),
3373
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@height").to_s),
3374
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:thumbnail/@width").to_s)
3375
- )
3376
- if enclosure.thumbnail.height == ""
3377
- enclosure.thumbnail.height = nil
3378
- end
3379
- if enclosure.thumbnail.width == ""
3380
- enclosure.thumbnail.width = nil
3381
- end
3382
- end
3383
- enclosure.categories = []
3384
- for category in XPath.match(enclosure_node, "media:category")
3385
- enclosure.categories << FeedTools::Feed::Category.new
3386
- enclosure.categories.last.term =
3387
- FeedTools.unescape_entities(category.text)
3388
- enclosure.categories.last.scheme =
3389
- FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3390
- enclosure.categories.last.label =
3391
- FeedTools.unescape_entities(category.attributes["label"].to_s)
3392
- if enclosure.categories.last.scheme == ""
3393
- enclosure.categories.last.scheme = nil
3394
- end
3395
- if enclosure.categories.last.label == ""
3396
- enclosure.categories.last.label = nil
3397
- end
3398
- end
3399
- if XPath.first(enclosure_node, "media:hash/text()").to_s != ""
3400
- enclosure.hash = EnclosureHash.new(
3401
- FeedTools.sanitize_html(FeedTools.unescape_entities(XPath.first(
3402
- enclosure_node, "media:hash/text()").to_s), :strip),
3403
- "md5"
3404
- )
3405
- end
3406
- if XPath.first(enclosure_node, "media:player/@url").to_s != ""
3407
- enclosure.player = EnclosurePlayer.new(
3408
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@url").to_s),
3409
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@height").to_s),
3410
- FeedTools.unescape_entities(XPath.first(enclosure_node, "media:player/@width").to_s)
3411
- )
3412
- if enclosure.player.height == ""
3413
- enclosure.player.height = nil
3414
- end
3415
- if enclosure.player.width == ""
3416
- enclosure.player.width = nil
3417
- end
3418
- end
3419
- enclosure.credits = []
3420
- for credit in XPath.match(enclosure_node, "media:credit")
3421
- enclosure.credits << EnclosureCredit.new(
3422
- FeedTools.unescape_entities(credit.text),
3423
- FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
3424
- )
3425
- if enclosure.credits.last.role == ""
3426
- enclosure.credits.last.role = nil
3427
- end
3428
- end
3429
- enclosure.explicit = (XPath.first(enclosure_node,
3430
- "media:adult/text()").to_s.downcase == "true")
3431
- if XPath.first(enclosure_node, "media:text/text()").to_s != ""
3432
- enclosure.text = FeedTools.unescape_entities(XPath.first(enclosure_node,
3433
- "media:text/text()").to_s)
3434
- end
3435
- affected_enclosures << enclosure
3436
- if new_enclosure
3437
- @enclosures << enclosure
3438
- end
3439
- end
3440
- affected_enclosures
3441
- end
3442
-
3443
- # Parse the independant content objects.
3444
- parse_media_content.call(media_content_enclosures)
3445
-
3446
- media_groups = []
3447
-
3448
- # Parse the group objects.
3449
- for media_group in media_group_enclosures
3450
- group_media_content_enclosures =
3451
- XPath.match(media_group, "media:content")
3452
-
3453
- # Parse the content objects within the group objects.
3454
- affected_enclosures =
3455
- parse_media_content.call(group_media_content_enclosures)
3456
-
3457
- # Now make sure that content objects inherit certain properties from
3458
- # the group objects.
3459
- for enclosure in affected_enclosures
3460
- if enclosure.thumbnail.nil? &&
3461
- XPath.first(media_group, "media:thumbnail/@url").to_s != ""
3462
- enclosure.thumbnail = EnclosureThumbnail.new(
3463
- FeedTools.unescape_entities(
3464
- XPath.first(media_group, "media:thumbnail/@url").to_s),
3465
- FeedTools.unescape_entities(
3466
- XPath.first(media_group, "media:thumbnail/@height").to_s),
3467
- FeedTools.unescape_entities(
3468
- XPath.first(media_group, "media:thumbnail/@width").to_s)
3469
- )
3470
- if enclosure.thumbnail.height == ""
3471
- enclosure.thumbnail.height = nil
3472
- end
3473
- if enclosure.thumbnail.width == ""
3474
- enclosure.thumbnail.width = nil
3475
- end
3476
- end
3477
- if (enclosure.categories.nil? || enclosure.categories.size == 0)
3478
- enclosure.categories = []
3479
- for category in XPath.match(media_group, "media:category")
3480
- enclosure.categories << FeedTools::Feed::Category.new
3481
- enclosure.categories.last.term =
3482
- FeedTools.unescape_entities(category.text)
3483
- enclosure.categories.last.scheme =
3484
- FeedTools.unescape_entities(category.attributes["scheme"].to_s)
3485
- enclosure.categories.last.label =
3486
- FeedTools.unescape_entities(category.attributes["label"].to_s)
3487
- if enclosure.categories.last.scheme == ""
3488
- enclosure.categories.last.scheme = nil
3489
- end
3490
- if enclosure.categories.last.label == ""
3491
- enclosure.categories.last.label = nil
3492
- end
3493
- end
3494
- end
3495
- if enclosure.hash.nil? &&
3496
- XPath.first(media_group, "media:hash/text()").to_s != ""
3497
- enclosure.hash = EnclosureHash.new(
3498
- FeedTools.unescape_entities(XPath.first(media_group, "media:hash/text()").to_s),
3499
- "md5"
3500
- )
3501
- end
3502
- if enclosure.player.nil? &&
3503
- XPath.first(media_group, "media:player/@url").to_s != ""
3504
- enclosure.player = EnclosurePlayer.new(
3505
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@url").to_s),
3506
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@height").to_s),
3507
- FeedTools.unescape_entities(XPath.first(media_group, "media:player/@width").to_s)
3508
- )
3509
- if enclosure.player.height == ""
3510
- enclosure.player.height = nil
3511
- end
3512
- if enclosure.player.width == ""
3513
- enclosure.player.width = nil
3514
- end
3515
- end
3516
- if enclosure.credits.nil? || enclosure.credits.size == 0
3517
- enclosure.credits = []
3518
- for credit in XPath.match(media_group, "media:credit")
3519
- enclosure.credits << EnclosureCredit.new(
3520
- FeedTools.unescape_entities(credit.text),
3521
- FeedTools.unescape_entities(credit.attributes["role"].to_s.downcase)
3522
- )
3523
- if enclosure.credits.last.role == ""
3524
- enclosure.credits.last.role = nil
3525
- end
3526
- end
3527
- end
3528
- if enclosure.explicit?.nil?
3529
- enclosure.explicit = (XPath.first(media_group,
3530
- "media:adult/text()").to_s.downcase == "true") ? true : false
3531
- end
3532
- if enclosure.text.nil? &&
3533
- XPath.first(media_group, "media:text/text()").to_s != ""
3534
- enclosure.text = FeedTools.sanitize_html(FeedTools.unescape_entities(
3535
- XPath.first(media_group, "media:text/text()").to_s), :strip)
3536
- end
3537
- end
3538
-
3539
- # Keep track of the media groups
3540
- media_groups << affected_enclosures
3541
- end
3542
-
3543
- # Now we need to inherit any relevant item level information.
3544
- if self.explicit?
3545
- for enclosure in @enclosures
3546
- enclosure.explicit = true
3547
- end
3548
- end
3549
-
3550
- # Add all the itunes categories
3551
- for itunes_category in XPath.match(root_node, "itunes:category")
3552
- genre = "Podcasts"
3553
- category = itunes_category.attributes["text"].to_s
3554
- subcategory = XPath.first(itunes_category, "itunes:category/@text").to_s
3555
- category_path = genre
3556
- if category != ""
3557
- category_path << "/" + category
3558
- end
3559
- if subcategory != ""
3560
- category_path << "/" + subcategory
3561
- end
3562
- for enclosure in @enclosures
3563
- if enclosure.categories.nil?
3564
- enclosure.categories = []
3565
- end
3566
- enclosure.categories << EnclosureCategory.new(
3567
- FeedTools.unescape_entities(category_path),
3568
- FeedTools.unescape_entities("http://www.apple.com/itunes/store/"),
3569
- FeedTools.unescape_entities("iTunes Music Store Categories")
3570
- )
3571
- end
3572
- end
3573
-
3574
- for enclosure in @enclosures
3575
- # Clean up any of those attributes that incorrectly have ""
3576
- # or 0 as their values
3577
- if enclosure.type == ""
3578
- enclosure.type = nil
3579
- end
3580
- if enclosure.file_size == 0
3581
- enclosure.file_size = nil
3582
- end
3583
- if enclosure.duration == 0
3584
- enclosure.duration = nil
3585
- end
3586
- if enclosure.height == 0
3587
- enclosure.height = nil
3588
- end
3589
- if enclosure.width == 0
3590
- enclosure.width = nil
3591
- end
3592
- if enclosure.bitrate == 0
3593
- enclosure.bitrate = nil
3594
- end
3595
- if enclosure.framerate == 0
3596
- enclosure.framerate = nil
3597
- end
3598
- if enclosure.expression == "" || enclosure.expression.nil?
3599
- enclosure.expression = "full"
3600
- end
3601
-
3602
- # If an enclosure is missing the text field, fall back on the itunes:summary field
3603
- if enclosure.text.nil? || enclosure.text = ""
3604
- enclosure.text = self.itunes_summary
3605
- end
3606
-
3607
- # Make sure we don't have duplicate categories
3608
- unless enclosure.categories.nil?
3609
- enclosure.categories.uniq!
3610
- end
3611
- end
3612
-
3613
- # And finally, now things get complicated. This is where we make
3614
- # sure that the enclosures method only returns either default
3615
- # enclosures or enclosures with only one version. Any enclosures
3616
- # that are wrapped in a media:group will be placed in the appropriate
3617
- # versions field.
3618
- affected_enclosure_urls = []
3619
- for media_group in media_groups
3620
- affected_enclosure_urls =
3621
- affected_enclosure_urls | (media_group.map do |enclosure|
3622
- enclosure.url
3623
- end)
3624
- end
3625
- @enclosures.delete_if do |enclosure|
3626
- (affected_enclosure_urls.include? enclosure.url)
3627
- end
3628
- for media_group in media_groups
3629
- default_enclosure = nil
3630
- for enclosure in media_group
3631
- if enclosure.is_default?
3632
- default_enclosure = enclosure
3633
- end
3634
- end
3635
- for enclosure in media_group
3636
- enclosure.default_version = default_enclosure
3637
- enclosure.versions = media_group.clone
3638
- enclosure.versions.delete(enclosure)
3639
- end
3640
- @enclosures << default_enclosure
3641
- end
3642
- end
3643
-
3644
- # If we have a single enclosure, it's safe to inherit the itunes:duration field
3645
- # if it's missing.
3646
- if @enclosures.size == 1
3647
- if @enclosures.first.duration.nil? || @enclosures.first.duration == 0
3648
- @enclosures.first.duration = self.itunes_duration
3649
- end
3650
- end
3651
-
3652
- return @enclosures
3653
- end
3654
-
3655
- def enclosures=(new_enclosures)
3656
- @enclosures = new_enclosures
3657
- end
3658
-
3659
- # Returns the feed item author
3660
- def author
3661
- if @author.nil?
3662
- @author = FeedTools::Feed::Author.new
3663
- unless root_node.nil?
3664
- author_node = XPath.first(root_node, "author")
3665
- if author_node.nil?
3666
- author_node = XPath.first(root_node, "managingEditor")
3667
- end
3668
- if author_node.nil?
3669
- author_node = XPath.first(root_node, "dc:author")
3670
- end
3671
- if author_node.nil?
3672
- author_node = XPath.first(root_node, "dc:creator")
3673
- end
3674
- if author_node.nil?
3675
- author_node = XPath.first(root_node, "atom:author")
3676
- end
3677
- end
3678
- unless author_node.nil?
3679
- @author.raw = FeedTools.unescape_entities(
3680
- XPath.first(author_node, "text()").to_s)
3681
- @author.raw = nil if @author.raw == ""
3682
- unless @author.raw.nil?
3683
- raw_scan = @author.raw.scan(
3684
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3685
- if raw_scan.nil? || raw_scan.size == 0
3686
- raw_scan = @author.raw.scan(
3687
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3688
- author_raw_pair = raw_scan.first.reverse unless raw_scan.size == 0
3689
- else
3690
- author_raw_pair = raw_scan.first
3691
- end
3692
- if raw_scan.nil? || raw_scan.size == 0
3693
- email_scan = @author.raw.scan(
3694
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3695
- if email_scan != nil && email_scan.size > 0
3696
- @author.email = email_scan.first.strip
3697
- end
3698
- end
3699
- unless author_raw_pair.nil? || author_raw_pair.size == 0
3700
- @author.name = author_raw_pair.first.strip
3701
- @author.email = author_raw_pair.last.strip
3702
- else
3703
- unless @author.raw.include?("@")
3704
- # We can be reasonably sure we are looking at something
3705
- # that the creator didn't intend to contain an email address if
3706
- # it got through the preceeding regexes and it doesn't
3707
- # contain the tell-tale '@' symbol.
3708
- @author.name = @author.raw
3709
- end
3710
- end
3711
- end
3712
- @author.name = "" if @author.name.nil?
3713
- if @author.name == ""
3714
- @author.name = FeedTools.unescape_entities(
3715
- XPath.first(author_node, "name/text()").to_s)
3716
- end
3717
- if @author.name == ""
3718
- @author.name = FeedTools.unescape_entities(
3719
- XPath.first(author_node, "@name").to_s)
3720
- end
3721
- if @author.email == ""
3722
- @author.email = FeedTools.unescape_entities(
3723
- XPath.first(author_node, "email/text()").to_s)
3724
- end
3725
- if @author.email == ""
3726
- @author.email = FeedTools.unescape_entities(
3727
- XPath.first(author_node, "@email").to_s)
3728
- end
3729
- if @author.url == ""
3730
- @author.url = FeedTools.unescape_entities(
3731
- XPath.first(author_node, "url/text()").to_s)
3732
- end
3733
- if @author.url == ""
3734
- @author.url = FeedTools.unescape_entities(
3735
- XPath.first(author_node, "@url").to_s)
3736
- end
3737
- @author.name = nil if @author.name == ""
3738
- @author.raw = nil if @author.raw == ""
3739
- @author.email = nil if @author.email == ""
3740
- @author.url = nil if @author.url == ""
3741
- end
3742
- # Fallback on the itunes module if we didn't find an author name
3743
- begin
3744
- @author.name = self.itunes_author if @author.name.nil?
3745
- rescue
3746
- @author.name = nil
3747
- end
3748
- end
3749
- return @author
3750
- end
3751
-
3752
- # Sets the feed item author
3753
- def author=(new_author)
3754
- if new_author.respond_to?(:name) &&
3755
- new_author.respond_to?(:email) &&
3756
- new_author.respond_to?(:url)
3757
- # It's a complete author object, just set it.
3758
- @author = new_author
3759
- else
3760
- # We're not looking at an author object, this is probably a string,
3761
- # default to setting the author's name.
3762
- if @author.nil?
3763
- @author = FeedTools::Feed::Author.new
3764
- end
3765
- @author.name = new_author
3766
- end
3767
- end
3768
-
3769
- # Returns the feed publisher
3770
- def publisher
3771
- if @publisher.nil?
3772
- @publisher = FeedTools::Feed::Author.new
3773
-
3774
- # Set the author name
3775
- @publisher.raw = FeedTools.unescape_entities(
3776
- XPath.first(root_node, "dc:publisher/text()").to_s)
3777
- if @publisher.raw == ""
3778
- @publisher.raw = FeedTools.unescape_entities(
3779
- XPath.first(root_node, "webMaster/text()").to_s)
3780
- end
3781
- unless @publisher.raw == ""
3782
- raw_scan = @publisher.raw.scan(
3783
- /(.*)\((\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\)/i)
3784
- if raw_scan.nil? || raw_scan.size == 0
3785
- raw_scan = @publisher.raw.scan(
3786
- /(\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b)\s*\((.*)\)/i)
3787
- unless raw_scan.size == 0
3788
- publisher_raw_pair = raw_scan.first.reverse
3789
- end
3790
- else
3791
- publisher_raw_pair = raw_scan.first
3792
- end
3793
- if raw_scan.nil? || raw_scan.size == 0
3794
- email_scan = @publisher.raw.scan(
3795
- /\b[A-Z0-9._%-\+]+@[A-Z0-9._%-]+\.[A-Z]{2,4}\b/i)
3796
- if email_scan != nil && email_scan.size > 0
3797
- @publisher.email = email_scan.first.strip
3798
- end
3799
- end
3800
- unless publisher_raw_pair.nil? || publisher_raw_pair.size == 0
3801
- @publisher.name = publisher_raw_pair.first.strip
3802
- @publisher.email = publisher_raw_pair.last.strip
3803
- else
3804
- unless @publisher.raw.include?("@")
3805
- # We can be reasonably sure we are looking at something
3806
- # that the creator didn't intend to contain an email address if
3807
- # it got through the preceeding regexes and it doesn't
3808
- # contain the tell-tale '@' symbol.
3809
- @publisher.name = @publisher.raw
3810
- end
3811
- end
3812
- end
3813
-
3814
- @publisher.name = nil if @publisher.name == ""
3815
- @publisher.raw = nil if @publisher.raw == ""
3816
- @publisher.email = nil if @publisher.email == ""
3817
- @publisher.url = nil if @publisher.url == ""
3818
- end
3819
- return @publisher
3820
- end
3821
-
3822
- # Sets the feed publisher
3823
- def publisher=(new_publisher)
3824
- if new_publisher.respond_to?(:name) &&
3825
- new_publisher.respond_to?(:email) &&
3826
- new_publisher.respond_to?(:url)
3827
- # It's a complete Author object, just set it.
3828
- @publisher = new_publisher
3829
- else
3830
- # We're not looking at an Author object, this is probably a string,
3831
- # default to setting the publisher's name.
3832
- if @publisher.nil?
3833
- @publisher = FeedTools::Feed::Author.new
3834
- end
3835
- @publisher.name = new_publisher
3836
- end
3837
- end
3838
-
3839
- # Returns the contents of the itunes:author element
3840
- #
3841
- # This inherits from any incorrectly placed channel-level itunes:author
3842
- # elements. They're actually amazingly common. People don't read specs.
3843
- def itunes_author
3844
- if @itunes_author.nil?
3845
- @itunes_author = FeedTools.unescape_entities(XPath.first(root_node,
3846
- "itunes:author/text()").to_s)
3847
- @itunes_author = feed.itunes_author if @itunes_author == ""
3848
- @itunes_author = nil if @itunes_author == ""
3849
- end
3850
- return @itunes_author
3851
- end
3852
-
3853
- # Sets the contents of the itunes:author element
3854
- def itunes_author=(new_itunes_author)
3855
- @itunes_author = new_itunes_author
3856
- end
3857
-
3858
- # Returns the number of seconds that the associated media runs for
3859
- def itunes_duration
3860
- if @itunes_duration.nil?
3861
- raw_duration = FeedTools.unescape_entities(XPath.first(root_node,
3862
- "itunes:duration/text()").to_s)
3863
- if raw_duration != ""
3864
- hms = raw_duration.split(":").map { |x| x.to_i }
3865
- if hms.size == 3
3866
- @itunes_duration = hms[0].hour + hms[1].minute + hms[2]
3867
- elsif hms.size == 2
3868
- @itunes_duration = hms[0].minute + hms[1]
3869
- elsif hms.size == 1
3870
- @itunes_duration = hms[0]
3871
- end
3872
- end
3873
- end
3874
- return @itunes_duration
3875
- end
3876
-
3877
- # Sets the number of seconds that the associate media runs for
3878
- def itunes_duration=(new_itunes_duration)
3879
- @itunes_duration = new_itunes_duration
3880
- end
3881
-
3882
- # Returns the feed item time
3883
- def time
3884
- if @time.nil?
3885
- unless root_node.nil?
3886
- time_string = XPath.first(root_node, "pubDate/text()").to_s
3887
- if time_string == ""
3888
- time_string = XPath.first(root_node, "dc:date/text()").to_s
3889
- end
3890
- if time_string == ""
3891
- time_string = XPath.first(root_node, "issued/text()").to_s
3892
- end
3893
- if time_string == ""
3894
- time_string = XPath.first(root_node, "updated/text()").to_s
3895
- end
3896
- if time_string == ""
3897
- time_string = XPath.first(root_node, "time/text()").to_s
3898
- end
3899
- end
3900
- if time_string != nil && time_string != ""
3901
- @time = Time.parse(time_string) rescue Time.now
3902
- elsif time_string == nil
3903
- @time = Time.now
3904
- end
3905
- end
3906
- return @time
3907
- end
3908
-
3909
- # Sets the feed item time
3910
- def time=(new_time)
3911
- @time = new_time
3912
- end
3913
-
3914
- # Returns the feed item updated time
3915
- def updated
3916
- if @updated.nil?
3917
- unless root_node.nil?
3918
- updated_string = XPath.first(root_node, "updated/text()").to_s
3919
- if updated_string == ""
3920
- updated_string = XPath.first(root_node, "modified/text()").to_s
3921
- end
3922
- end
3923
- if updated_string != nil && updated_string != ""
3924
- @updated = Time.parse(updated_string) rescue nil
3925
- else
3926
- @updated = nil
3927
- end
3928
- end
3929
- return @updated
3930
- end
3931
-
3932
- # Sets the feed item updated time
3933
- def updated=(new_updated)
3934
- @updated = new_updated
3935
- end
3936
-
3937
- # Returns the feed item issued time
3938
- def issued
3939
- if @issued.nil?
3940
- unless root_node.nil?
3941
- issued_string = XPath.first(root_node, "issued/text()").to_s
3942
- if issued_string == ""
3943
- issued_string = XPath.first(root_node, "published/text()").to_s
3944
- end
3945
- if issued_string == ""
3946
- issued_string = XPath.first(root_node, "pubDate/text()").to_s
3947
- end
3948
- if issued_string == ""
3949
- issued_string = XPath.first(root_node, "dc:date/text()").to_s
3950
- end
3951
- end
3952
- if issued_string != nil && issued_string != ""
3953
- @issued = Time.parse(issued_string) rescue nil
3954
- else
3955
- @issued = nil
3956
- end
3957
- end
3958
- return @issued
3959
- end
3960
-
3961
- # Sets the feed item issued time
3962
- def issued=(new_issued)
3963
- @issued = new_issued
3964
- end
3965
-
3966
- # Returns the url for posting comments
3967
- def comments
3968
- if @comments.nil?
3969
- @comments = FeedTools.normalize_url(
3970
- XPath.first(root_node, "comments/text()").to_s)
3971
- @comments = nil if @comments == ""
3972
- end
3973
- return @comments
3974
- end
3975
-
3976
- # Sets the url for posting comments
3977
- def comments=(new_comments)
3978
- @comments = new_comments
3979
- end
3980
-
3981
- # The source that this post was based on
3982
- def source
3983
- if @source.nil?
3984
- @source = FeedTools::Feed::Link.new
3985
- @source.url = XPath.first(root_node, "source/@url").to_s
3986
- @source.url = nil if @source.url == ""
3987
- @source.value = XPath.first(root_node, "source/text()").to_s
3988
- @source.value = nil if @source.value == ""
3989
- end
3990
- return @source
3991
- end
3992
-
3993
- # Returns the feed item tags
3994
- def tags
3995
- # TODO: support the rel="tag" microformat
3996
- # =======================================
3997
- if @tags.nil?
3998
- @tags = []
3999
- if @tags.nil? || @tags.size == 0
4000
- @tags = []
4001
- tag_list = XPath.match(root_node, "dc:subject/rdf:Bag/rdf:li/text()")
4002
- if tag_list.size > 1
4003
- for tag in tag_list
4004
- @tags << tag.to_s.downcase.strip
4005
- end
4006
- end
4007
- end
4008
- if @tags.nil? || @tags.size == 0
4009
- # messy effort to find ourselves some tags, mainly for del.icio.us
4010
- @tags = []
4011
- rdf_bag = XPath.match(root_node, "taxo:topics/rdf:Bag/rdf:li")
4012
- if rdf_bag != nil && rdf_bag.size > 0
4013
- for tag_node in rdf_bag
4014
- begin
4015
- tag_url = XPath.first(root_node, "@resource").to_s
4016
- tag_match = tag_url.scan(/\/(tag|tags)\/(\w+)/)
4017
- if tag_match.size > 0
4018
- @tags << tag_match.first.last.downcase.strip
4019
- end
4020
- rescue
4021
- end
4022
- end
4023
- end
4024
- end
4025
- if @tags.nil? || @tags.size == 0
4026
- @tags = []
4027
- tag_list = XPath.match(root_node, "category/text()")
4028
- for tag in tag_list
4029
- @tags << tag.to_s.downcase.strip
4030
- end
4031
- end
4032
- if @tags.nil? || @tags.size == 0
4033
- @tags = []
4034
- tag_list = XPath.match(root_node, "dc:subject/text()")
4035
- for tag in tag_list
4036
- @tags << tag.to_s.downcase.strip
4037
- end
4038
- end
4039
- if @tags.nil? || @tags.size == 0
4040
- begin
4041
- @tags = XPath.first(root_node, "itunes:keywords/text()").to_s.downcase.split(" ")
4042
- rescue
4043
- @tags = []
4044
- end
4045
- end
4046
- if @tags.nil?
4047
- @tags = []
4048
- end
4049
- @tags.uniq!
4050
- end
4051
- return @tags
4052
- end
4053
-
4054
- # Sets the feed item tags
4055
- def tags=(new_tags)
4056
- @tags = new_tags
4057
- end
4058
-
4059
- # Returns true if this feed item contains explicit material. If the whole
4060
- # feed has been marked as explicit, this will return true even if the item
4061
- # isn't explicitly marked as explicit.
4062
- def explicit?
4063
- if @explicit.nil?
4064
- if XPath.first(root_node,
4065
- "media:adult/text()").to_s.downcase == "true" ||
4066
- XPath.first(root_node,
4067
- "itunes:explicit/text()").to_s.downcase == "yes" ||
4068
- XPath.first(root_node,
4069
- "itunes:explicit/text()").to_s.downcase == "true" ||
4070
- feed.explicit?
4071
- @explicit = true
4072
- else
4073
- @explicit = false
4074
- end
4075
- end
4076
- return @explicit
4077
- end
4078
-
4079
- # Sets whether or not the feed contains explicit material
4080
- def explicit=(new_explicit)
4081
- @explicit = (new_explicit ? true : false)
4082
- end
4083
-
4084
- # A hook method that is called during the feed generation process. Overriding this method
4085
- # will enable additional content to be inserted into the feed.
4086
- def build_xml_hook(feed_type, version, xml_builder)
4087
- return nil
4088
- end
4089
-
4090
- # Generates xml based on the content of the feed item
4091
- def build_xml(feed_type=(self.feed.feed_type or "rss"), version=nil,
4092
- xml_builder=Builder::XmlMarkup.new(:indent => 2))
4093
- if feed_type == "rss" && (version == nil || version == 0.0)
4094
- version = 1.0
4095
- elsif feed_type == "atom" && (version == nil || version == 0.0)
4096
- version = 0.3
4097
- end
4098
- if feed_type == "rss" && (version == 0.9 || version == 1.0 || version == 1.1)
4099
- # RDF-based rss format
4100
- if link.nil?
4101
- raise "Cannot generate an rdf-based feed item with a nil link field."
4102
- end
4103
- return xml_builder.item("rdf:about" => CGI.escapeHTML(link)) do
4104
- unless title.nil? || title == ""
4105
- xml_builder.title(title)
4106
- else
4107
- xml_builder.title
4108
- end
4109
- unless link.nil? || link == ""
4110
- xml_builder.link(link)
4111
- else
4112
- xml_builder.link
4113
- end
4114
- unless description.nil? || description == ""
4115
- xml_builder.description(description)
4116
- else
4117
- xml_builder.description
4118
- end
4119
- unless time.nil?
4120
- xml_builder.tag!("dc:date", time.iso8601)
4121
- end
4122
- unless tags.nil? || tags.size == 0
4123
- xml_builder.tag!("taxo:topics") do
4124
- xml_builder.tag!("rdf:Bag") do
4125
- for tag in tags
4126
- xml_builder.tag!("rdf:li", tag)
4127
- end
4128
- end
4129
- end
4130
- xml_builder.tag!("itunes:keywords", tags.join(" "))
4131
- end
4132
- build_xml_hook(feed_type, version, xml_builder)
4133
- end
4134
- elsif feed_type == "rss"
4135
- # normal rss format
4136
- return xml_builder.item do
4137
- unless title.nil? || title == ""
4138
- xml_builder.title(title)
4139
- end
4140
- unless link.nil? || link == ""
4141
- xml_builder.link(link)
4142
- end
4143
- unless description.nil? || description == ""
4144
- xml_builder.description(description)
4145
- end
4146
- unless time.nil?
4147
- xml_builder.pubDate(time.rfc822)
4148
- end
4149
- unless tags.nil? || tags.size == 0
4150
- xml_builder.tag!("taxo:topics") do
4151
- xml_builder.tag!("rdf:Bag") do
4152
- for tag in tags
4153
- xml_builder.tag!("rdf:li", tag)
4154
- end
4155
- end
4156
- end
4157
- xml_builder.tag!("itunes:keywords", tags.join(" "))
4158
- end
4159
- build_xml_hook(feed_type, version, xml_builder)
4160
- end
4161
- elsif feed_type == "atom" && version == 0.3
4162
- # normal atom format
4163
- return xml_builder.entry("xmlns" => "http://purl.org/atom/ns#") do
4164
- unless title.nil? || title == ""
4165
- xml_builder.title(title,
4166
- "mode" => "escaped",
4167
- "type" => "text/html")
4168
- end
4169
- xml_builder.author do
4170
- unless self.author.nil? || self.author.name.nil?
4171
- xml_builder.name(self.author.name)
4172
- else
4173
- xml_builder.name("n/a")
4174
- end
4175
- unless self.author.nil? || self.author.email.nil?
4176
- xml_builder.email(self.author.email)
4177
- end
4178
- unless self.author.nil? || self.author.url.nil?
4179
- xml_builder.url(self.author.url)
4180
- end
4181
- end
4182
- unless link.nil? || link == ""
4183
- xml_builder.link("href" => link,
4184
- "rel" => "alternate",
4185
- "type" => "text/html",
4186
- "title" => title)
4187
- end
4188
- unless description.nil? || description == ""
4189
- xml_builder.content(description,
4190
- "mode" => "escaped",
4191
- "type" => "text/html")
4192
- end
4193
- unless time.nil?
4194
- xml_builder.issued(time.iso8601)
4195
- end
4196
- unless tags.nil? || tags.size == 0
4197
- for tag in tags
4198
- xml_builder.category(tag)
4199
- end
4200
- end
4201
- build_xml_hook(feed_type, version, xml_builder)
4202
- end
4203
- elsif feed_type == "atom" && version == 1.0
4204
- # normal atom format
4205
- return xml_builder.entry("xmlns" => "http://www.w3.org/2005/Atom") do
4206
- unless title.nil? || title == ""
4207
- xml_builder.title(title,
4208
- "type" => "html")
4209
- end
4210
- xml_builder.author do
4211
- unless self.author.nil? || self.author.name.nil?
4212
- xml_builder.name(self.author.name)
4213
- else
4214
- xml_builder.name("n/a")
4215
- end
4216
- unless self.author.nil? || self.author.email.nil?
4217
- xml_builder.email(self.author.email)
4218
- end
4219
- unless self.author.nil? || self.author.url.nil?
4220
- xml_builder.url(self.author.url)
4221
- end
4222
- end
4223
- unless link.nil? || link == ""
4224
- xml_builder.link("href" => link,
4225
- "rel" => "alternate",
4226
- "type" => "text/html",
4227
- "title" => title)
4228
- end
4229
- unless description.nil? || description == ""
4230
- xml_builder.content(description,
4231
- "type" => "html")
4232
- else
4233
- xml_builder.content(FeedTools.no_content_string,
4234
- "type" => "html")
4235
- end
4236
- if self.updated != nil
4237
- xml_builder.updated(self.updated.iso8601)
4238
- elsif self.time != nil
4239
- # Not technically correct, but a heck of a lot better
4240
- # than the Time.now fall-back.
4241
- xml_builder.updated(self.time.iso8601)
4242
- else
4243
- xml_builder.updated(Time.now.iso8601)
4244
- end
4245
- unless self.published.nil?
4246
- xml_builder.published(self.published.iso8601)
4247
- end
4248
- if self.id != nil
4249
- unless FeedTools.is_uri? self.id
4250
- if self.time != nil && self.link != nil
4251
- xml_builder.id(FeedTools.build_tag_uri(self.link, self.time))
4252
- elsif self.link != nil
4253
- xml_builder.id(FeedTools.build_urn_uuid_uri(self.link))
4254
- else
4255
- raise "The unique id must be a URI. " +
4256
- "(Attempted to generate id, but failed.)"
4257
- end
4258
- else
4259
- xml_builder.id(self.id)
4260
- end
4261
- elsif self.time != nil && self.link != nil
4262
- xml_builder.id(FeedTools.build_tag_uri(self.link, self.time))
4263
- else
4264
- raise "Cannot build feed, missing feed unique id."
4265
- end
4266
- unless self.tags.nil? || self.tags.size == 0
4267
- for tag in self.tags
4268
- xml_builder.category("term" => tag)
4269
- end
4270
- end
4271
- build_xml_hook(feed_type, version, xml_builder)
4272
- end
4273
- end
4274
- end
4275
-
4276
- alias_method :tagline, :description
4277
- alias_method :tagline=, :description=
4278
- alias_method :subtitle, :description
4279
- alias_method :subtitle=, :description=
4280
- alias_method :summary, :description
4281
- alias_method :summary=, :description=
4282
- alias_method :abstract, :description
4283
- alias_method :abstract=, :description=
4284
- alias_method :content, :description
4285
- alias_method :content=, :description=
4286
- alias_method :guid, :id
4287
- alias_method :guid=, :id=
4288
- alias_method :published, :issued
4289
- alias_method :published=, :issued=
4290
-
4291
- # Returns a simple representation of the feed item object's state.
4292
- def inspect
4293
- return "#<FeedTools::FeedItem:0x#{self.object_id.to_s(16)} " +
4294
- "LINK:#{self.link}>"
4295
- end
4296
- end
4297
556
  end
4298
557
 
4299
558
  module REXML # :nodoc: