rfeedparser 0.9.92 → 0.9.93

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/ruby
2
- gem 'hpricot', ">=0.5"
2
+ gem 'hpricot', "=0.5"
3
3
  require 'hpricot'
4
4
  # This used to be based on Michael Moen's Hpricot#scrub, but that seems to
5
5
  # have only been part of its evolution. Hpricot#scrub is cool code, though.
@@ -20,35 +20,33 @@ module FeedParserMixin
20
20
  # FIXME The century regexp maybe not work ('\d\d$' says "two numbers at
21
21
  # end of line" but we then attach more of a regexp.
22
22
  iso8601_regexps = [ '^(\d{4})-?([01]\d)-([0123]\d)',
23
- '^(\d{4})-([01]\d)',
24
- '^(\d{4})-?([0123]\d\d)',
25
- '^(\d\d)-?([01]\d)-?([0123]\d)',
26
- '^(\d\d)-?([0123]\d\d)',
27
- '^(\d{4})',
28
- '-(\d\d)-?([01]\d)',
29
- '-([0123]\d\d)',
30
- '-(\d\d)',
31
- '--([01]\d)-?([0123]\d)',
32
- '--([01]\d)',
33
- '---([0123]\d)',
34
- '(\d\d$)',
35
- ''
36
- ]
23
+ '^(\d{4})-([01]\d)',
24
+ '^(\d{4})-?([0123]\d\d)',
25
+ '^(\d\d)-?([01]\d)-?([0123]\d)',
26
+ '^(\d\d)-?([0123]\d\d)',
27
+ '^(\d{4})',
28
+ '-(\d\d)-?([01]\d)',
29
+ '-([0123]\d\d)',
30
+ '-(\d\d)',
31
+ '--([01]\d)-?([0123]\d)',
32
+ '--([01]\d)',
33
+ '---([0123]\d)',
34
+ '(\d\d$)',
35
+ '' ]
37
36
  iso8601_values = { '^(\d{4})-?([01]\d)-([0123]\d)' => ['year', 'month', 'day'],
38
- '^(\d{4})-([01]\d)' => ['year','month'],
39
- '^(\d{4})-?([0123]\d\d)' => ['year', 'ordinal'],
40
- '^(\d\d)-?([01]\d)-?([0123]\d)' => ['year','month','day'],
41
- '^(\d\d)-?([0123]\d\d)' => ['year','ordinal'],
42
- '^(\d{4})' => ['year'],
43
- '-(\d\d)-?([01]\d)' => ['year','month'],
44
- '-([0123]\d\d)' => ['ordinal'],
45
- '-(\d\d)' => ['year'],
46
- '--([01]\d)-?([0123]\d)' => ['month','day'],
47
- '--([01]\d)' => ['month'],
48
- '---([0123]\d)' => ['day'],
49
- '(\d\d$)' => ['century'],
50
- '' => []
51
- }
37
+ '^(\d{4})-([01]\d)' => ['year','month'],
38
+ '^(\d{4})-?([0123]\d\d)' => ['year', 'ordinal'],
39
+ '^(\d\d)-?([01]\d)-?([0123]\d)' => ['year','month','day'],
40
+ '^(\d\d)-?([0123]\d\d)' => ['year','ordinal'],
41
+ '^(\d{4})' => ['year'],
42
+ '-(\d\d)-?([01]\d)' => ['year','month'],
43
+ '-([0123]\d\d)' => ['ordinal'],
44
+ '-(\d\d)' => ['year'],
45
+ '--([01]\d)-?([0123]\d)' => ['month','day'],
46
+ '--([01]\d)' => ['month'],
47
+ '---([0123]\d)' => ['day'],
48
+ '(\d\d$)' => ['century'],
49
+ '' => [] }
52
50
  add_to_all = '(T?(\d\d):(\d\d)(?::(\d\d))?([+-](\d\d)(?::(\d\d))?|Z)?)?'
53
51
  add_to_all_fields = ['hour', 'minute', 'second', 'tz', 'tzhour', 'tzmin']
54
52
  # NOTE We use '(?:' to prevent grouping of optional matches (ones trailed
@@ -86,9 +84,9 @@ module FeedParserMixin
86
84
  # ordinals are NOT normalized by mktime, we simulate them
87
85
  # by setting month=1, day=ordinal
88
86
  if ordinal
89
- month = DateTime.ordinal(year,ordinal).month
87
+ month = DateTime.ordinal(year,ordinal).month
90
88
  else
91
- month = Time.now.utc.month
89
+ month = Time.now.utc.month
92
90
  end
93
91
  end
94
92
  month = month.to_i unless month.nil?
@@ -96,11 +94,11 @@ module FeedParserMixin
96
94
  if day.nil? or day.empty?
97
95
  # see above
98
96
  if ordinal
99
- day = DateTime.ordinal(year,ordinal).day
97
+ day = DateTime.ordinal(year,ordinal).day
100
98
  elsif params['century'] or params['year'] or params['month']
101
- day = 1
99
+ day = 1
102
100
  else
103
- day = Time.now.utc.day
101
+ day = Time.now.utc.day
104
102
  end
105
103
  else
106
104
  day = day.to_i
@@ -124,13 +122,13 @@ module FeedParserMixin
124
122
  if tz and not tz.empty? and tz != 'Z'
125
123
  # FIXME does this cross over days?
126
124
  if tz[0] == '-'
127
- tm[3] += params['tzhour'].to_i
128
- tm[4] += params['tzmin'].to_i
125
+ tm[3] += params['tzhour'].to_i
126
+ tm[4] += params['tzmin'].to_i
129
127
  elsif tz[0] == '+'
130
- tm[3] -= params['tzhour'].to_i
131
- tm[4] -= params['tzmin'].to_i
128
+ tm[3] -= params['tzhour'].to_i
129
+ tm[4] -= params['tzmin'].to_i
132
130
  else
133
- return nil
131
+ return nil
134
132
  end
135
133
  end
136
134
  return Time.utc(*tm) # Magic!
@@ -148,7 +146,7 @@ module FeedParserMixin
148
146
  korean_onblog_date_re = /(\d{4})#{korean_year}\s+(\d{2})#{korean_month}\s+(\d{2})#{korean_day}\s+(\d{2}):(\d{2}):(\d{2})/
149
147
 
150
148
 
151
- m = korean_onblog_date_re.match(dateString)
149
+ m = korean_onblog_date_re.match(dateString)
152
150
  return unless m
153
151
  w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
154
152
 
@@ -163,7 +161,7 @@ module FeedParserMixin
163
161
  korean_pm = u("오후") # bfc0 c8c4 in euc-kr
164
162
 
165
163
  korean_nate_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(#{korean_am}|#{korean_pm})\s+(\d{0,2}):(\d{0,2}):(\d{0,2})/
166
- m = korean_nate_date_re.match(dateString)
164
+ m = korean_nate_date_re.match(dateString)
167
165
  return unless m
168
166
  hour = m[5].to_i
169
167
  ampm = m[4]
@@ -179,7 +177,7 @@ module FeedParserMixin
179
177
  def _parse_date_mssql(dateString)
180
178
  mssql_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?/
181
179
 
182
- m = mssql_date_re.match(dateString)
180
+ m = mssql_date_re.match(dateString)
183
181
  return unless m
184
182
  w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
185
183
  $stderr << "MS SQL date parsed as: %s\n" % w3dtfdate if $debug
@@ -223,7 +221,7 @@ module FeedParserMixin
223
221
 
224
222
  greek_date_format = /([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)/
225
223
 
226
- m = greek_date_format.match(dateString)
224
+ m = greek_date_format.match(dateString)
227
225
  return unless m
228
226
  begin
229
227
  wday = greek_wdays[m[1]]
@@ -239,7 +237,7 @@ module FeedParserMixin
239
237
  def _parse_date_hungarian(dateString)
240
238
  # Parse a string according to a Hungarian 8-bit date format.
241
239
  hungarian_date_format_re = /(\d{4})-([^-]+)-(\d{0,2})T(\d{0,2}):(\d{2})((\+|-)(\d{0,2}:\d{2}))/
242
- m = hungarian_date_format_re.match(dateString)
240
+ m = hungarian_date_format_re.match(dateString)
243
241
  return unless m
244
242
 
245
243
  # Unicode strings for Hungarian date strings
@@ -314,8 +312,8 @@ module FeedParserMixin
314
312
  w3[2] -= num_days
315
313
  w3[1] += 1
316
314
  if w3[1] > 12
317
- w3[0] += 1
318
- w3[1] = set_self(w3[1], 12)
315
+ w3[0] += 1
316
+ w3[1] = set_self(w3[1], 12)
319
317
  end
320
318
  num_days = Time.days_in_month(w3[1], w3[0])
321
319
  end
@@ -323,9 +321,9 @@ module FeedParserMixin
323
321
 
324
322
  unless w3[6].class != String
325
323
  if /^-/ =~ w3[6] # Zone offset goes backwards
326
- w3[6][0] = '+'
324
+ w3[6][0] = '+'
327
325
  elsif /^\+/ =~ w3[6]
328
- w3[6][0] = '-'
326
+ w3[6][0] = '-'
329
327
  end
330
328
  end
331
329
  return Time.utc(w3[0], w3[1], w3[2] , w3[3], w3[4], w3[5])+Time.zone_offset(w3[6] || "UTC")
@@ -335,8 +333,8 @@ module FeedParserMixin
335
333
  # Parse an RFC822, RFC1123, RFC2822 or asctime-style date
336
334
  # These first few lines are to fix up the stupid proprietary format from Disney
337
335
  unknown_timezones = { 'AT' => 'EDT', 'ET' => 'EST',
338
- 'CT' => 'CST', 'MT' => 'MST',
339
- 'PT' => 'PST'
336
+ 'CT' => 'CST', 'MT' => 'MST',
337
+ 'PT' => 'PST'
340
338
  }
341
339
 
342
340
  mon = dateString.split[2]
@@ -390,11 +388,11 @@ module FeedParserMixin
390
388
  def parse_date(dateString)
391
389
  @date_handlers.each do |handler|
392
390
  begin
393
- $stderr << "Trying date_handler #{handler}\n" if $debug
394
- datething = extract_tuple(send(handler,dateString))
395
- return datething
391
+ $stderr << "Trying date_handler #{handler}\n" if $debug
392
+ datething = extract_tuple(send(handler,dateString))
393
+ return datething
396
394
  rescue Exception => e
397
- $stderr << "#{handler} raised #{e}\n" if $debug
395
+ $stderr << "#{handler} raised #{e}\n" if $debug
398
396
  end
399
397
  end
400
398
  return nil
@@ -403,6 +401,6 @@ end
403
401
 
404
402
  module FeedParserUtilities
405
403
  def py2rtime(pytuple)
406
- Time.utc(pytuple[0..5])
404
+ return Time.utc(*pytuple[0..5]) unless pytuple.blank?
407
405
  end
408
406
  end
@@ -0,0 +1,20 @@
1
+ <!--
2
+ Description: interprets media:content
3
+ Expect: not bozo and entries[0]['enclosures'][0]['href'] == u'http://www.webmonkey.com/monkeyrock.mpg'
4
+ -->
5
+ <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
6
+ <channel>
7
+ <title>Some Bands I Like</title>
8
+ <link>http://www.andyvolk.com/webmonkey/bands/</link>
9
+ <description>A list of some bands I like (or have been a member of).</description>
10
+
11
+ <item>
12
+ <title>Rocking Webmonkey Garage Band</title>
13
+ <link>http://www.webmonkey.com/ourband.html</link>
14
+ <description>The best ever garage band on the Internet.</description>
15
+ <guid isPermaLink="false"> http://www.webmonkey.com/ourband.html</guid>
16
+ <media:content url="http://www.webmonkey.com/monkeyrock.mpg" fileSize="2471632" type="video/mpeg" height="240" width="320" duration="147" medium="video" isDefault="true">
17
+ </media:content>
18
+ </item>
19
+ </channel>
20
+ </rss>
@@ -0,0 +1,21 @@
1
+ <!--
2
+ Description: interprets media:content
3
+ Expect: not bozo and entries[0]['enclosures'][1]['href'] == u'http://www.webmonkey.com/images/monkeyrock-thumb.jpg'
4
+ -->
5
+ <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
6
+ <channel>
7
+ <title>Some Bands I Like</title>
8
+ <link>http://www.andyvolk.com/webmonkey/bands/</link>
9
+ <description>A list of some bands I like (or have been a member of).</description>
10
+
11
+ <item>
12
+ <title>Rocking Webmonkey Garage Band</title>
13
+ <link>http://www.webmonkey.com/ourband.html</link>
14
+ <description>The best ever garage band on the Internet.</description>
15
+ <guid isPermaLink="false"> http://www.webmonkey.com/ourband.html</guid>
16
+ <media:content url="http://www.webmonkey.com/monkeyrock.mpg" fileSize="2471632" type="video/mpeg" height="240" width="320" duration="147" medium="video" isDefault="true">
17
+ <media:thumbnail url="http://www.webmonkey.com/images/monkeyrock-thumb.jpg" height="98" width="145"/>
18
+ </media:content>
19
+ </item>
20
+ </channel>
21
+ </rss>
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: rfeedparser
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.9.92
7
- date: 2007-06-07 00:00:00 -07:00
6
+ version: 0.9.93
7
+ date: 2007-07-21 00:00:00 -07:00
8
8
  summary: Parse RSS and Atom feeds in Ruby
9
9
  require_paths:
10
10
  - lib
@@ -1822,6 +1822,11 @@ files:
1822
1822
  - tests/illformed/sanitize/item_xhtml_body_style.xml
1823
1823
  - tests/rfeedparserserver.rb
1824
1824
  - tests/rfeedparsertest.rb
1825
+ - tests/rfponly
1826
+ - tests/rfponly/wellformed
1827
+ - tests/rfponly/wellformed/mrss
1828
+ - tests/rfponly/wellformed/mrss/mrss_media_content.xml
1829
+ - tests/rfponly/wellformed/mrss/mrss_thumbnail.xml
1825
1830
  - tests/wellformed
1826
1831
  - tests/wellformed/amp
1827
1832
  - tests/wellformed/amp/amp01.xml
@@ -3426,7 +3431,7 @@ dependencies:
3426
3431
  requirements:
3427
3432
  - - ">="
3428
3433
  - !ruby/object:Gem::Version
3429
- version: "1.0"
3434
+ version: "1.1"
3430
3435
  version:
3431
3436
  - !ruby/object:Gem::Dependency
3432
3437
  name: activesupport
@@ -3442,7 +3447,7 @@ dependencies:
3442
3447
  version_requirement:
3443
3448
  version_requirements: !ruby/object:Gem::Version::Requirement
3444
3449
  requirements:
3445
- - - ">="
3450
+ - - "="
3446
3451
  - !ruby/object:Gem::Version
3447
3452
  version: "0.5"
3448
3453
  version: