rfeedparser 0.9.92 → 0.9.93

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/ruby
2
- gem 'hpricot', ">=0.5"
2
+ gem 'hpricot', "=0.5"
3
3
  require 'hpricot'
4
4
  # This used to be based on Michael Moen's Hpricot#scrub, but that seems to
5
5
  # have only been part of its evolution. Hpricot#scrub is cool code, though.
@@ -20,35 +20,33 @@ module FeedParserMixin
20
20
  # FIXME The century regexp maybe not work ('\d\d$' says "two numbers at
21
21
  # end of line" but we then attach more of a regexp.
22
22
  iso8601_regexps = [ '^(\d{4})-?([01]\d)-([0123]\d)',
23
- '^(\d{4})-([01]\d)',
24
- '^(\d{4})-?([0123]\d\d)',
25
- '^(\d\d)-?([01]\d)-?([0123]\d)',
26
- '^(\d\d)-?([0123]\d\d)',
27
- '^(\d{4})',
28
- '-(\d\d)-?([01]\d)',
29
- '-([0123]\d\d)',
30
- '-(\d\d)',
31
- '--([01]\d)-?([0123]\d)',
32
- '--([01]\d)',
33
- '---([0123]\d)',
34
- '(\d\d$)',
35
- ''
36
- ]
23
+ '^(\d{4})-([01]\d)',
24
+ '^(\d{4})-?([0123]\d\d)',
25
+ '^(\d\d)-?([01]\d)-?([0123]\d)',
26
+ '^(\d\d)-?([0123]\d\d)',
27
+ '^(\d{4})',
28
+ '-(\d\d)-?([01]\d)',
29
+ '-([0123]\d\d)',
30
+ '-(\d\d)',
31
+ '--([01]\d)-?([0123]\d)',
32
+ '--([01]\d)',
33
+ '---([0123]\d)',
34
+ '(\d\d$)',
35
+ '' ]
37
36
  iso8601_values = { '^(\d{4})-?([01]\d)-([0123]\d)' => ['year', 'month', 'day'],
38
- '^(\d{4})-([01]\d)' => ['year','month'],
39
- '^(\d{4})-?([0123]\d\d)' => ['year', 'ordinal'],
40
- '^(\d\d)-?([01]\d)-?([0123]\d)' => ['year','month','day'],
41
- '^(\d\d)-?([0123]\d\d)' => ['year','ordinal'],
42
- '^(\d{4})' => ['year'],
43
- '-(\d\d)-?([01]\d)' => ['year','month'],
44
- '-([0123]\d\d)' => ['ordinal'],
45
- '-(\d\d)' => ['year'],
46
- '--([01]\d)-?([0123]\d)' => ['month','day'],
47
- '--([01]\d)' => ['month'],
48
- '---([0123]\d)' => ['day'],
49
- '(\d\d$)' => ['century'],
50
- '' => []
51
- }
37
+ '^(\d{4})-([01]\d)' => ['year','month'],
38
+ '^(\d{4})-?([0123]\d\d)' => ['year', 'ordinal'],
39
+ '^(\d\d)-?([01]\d)-?([0123]\d)' => ['year','month','day'],
40
+ '^(\d\d)-?([0123]\d\d)' => ['year','ordinal'],
41
+ '^(\d{4})' => ['year'],
42
+ '-(\d\d)-?([01]\d)' => ['year','month'],
43
+ '-([0123]\d\d)' => ['ordinal'],
44
+ '-(\d\d)' => ['year'],
45
+ '--([01]\d)-?([0123]\d)' => ['month','day'],
46
+ '--([01]\d)' => ['month'],
47
+ '---([0123]\d)' => ['day'],
48
+ '(\d\d$)' => ['century'],
49
+ '' => [] }
52
50
  add_to_all = '(T?(\d\d):(\d\d)(?::(\d\d))?([+-](\d\d)(?::(\d\d))?|Z)?)?'
53
51
  add_to_all_fields = ['hour', 'minute', 'second', 'tz', 'tzhour', 'tzmin']
54
52
  # NOTE We use '(?:' to prevent grouping of optional matches (ones trailed
@@ -86,9 +84,9 @@ module FeedParserMixin
86
84
  # ordinals are NOT normalized by mktime, we simulate them
87
85
  # by setting month=1, day=ordinal
88
86
  if ordinal
89
- month = DateTime.ordinal(year,ordinal).month
87
+ month = DateTime.ordinal(year,ordinal).month
90
88
  else
91
- month = Time.now.utc.month
89
+ month = Time.now.utc.month
92
90
  end
93
91
  end
94
92
  month = month.to_i unless month.nil?
@@ -96,11 +94,11 @@ module FeedParserMixin
96
94
  if day.nil? or day.empty?
97
95
  # see above
98
96
  if ordinal
99
- day = DateTime.ordinal(year,ordinal).day
97
+ day = DateTime.ordinal(year,ordinal).day
100
98
  elsif params['century'] or params['year'] or params['month']
101
- day = 1
99
+ day = 1
102
100
  else
103
- day = Time.now.utc.day
101
+ day = Time.now.utc.day
104
102
  end
105
103
  else
106
104
  day = day.to_i
@@ -124,13 +122,13 @@ module FeedParserMixin
124
122
  if tz and not tz.empty? and tz != 'Z'
125
123
  # FIXME does this cross over days?
126
124
  if tz[0] == '-'
127
- tm[3] += params['tzhour'].to_i
128
- tm[4] += params['tzmin'].to_i
125
+ tm[3] += params['tzhour'].to_i
126
+ tm[4] += params['tzmin'].to_i
129
127
  elsif tz[0] == '+'
130
- tm[3] -= params['tzhour'].to_i
131
- tm[4] -= params['tzmin'].to_i
128
+ tm[3] -= params['tzhour'].to_i
129
+ tm[4] -= params['tzmin'].to_i
132
130
  else
133
- return nil
131
+ return nil
134
132
  end
135
133
  end
136
134
  return Time.utc(*tm) # Magic!
@@ -148,7 +146,7 @@ module FeedParserMixin
148
146
  korean_onblog_date_re = /(\d{4})#{korean_year}\s+(\d{2})#{korean_month}\s+(\d{2})#{korean_day}\s+(\d{2}):(\d{2}):(\d{2})/
149
147
 
150
148
 
151
- m = korean_onblog_date_re.match(dateString)
149
+ m = korean_onblog_date_re.match(dateString)
152
150
  return unless m
153
151
  w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
154
152
 
@@ -163,7 +161,7 @@ module FeedParserMixin
163
161
  korean_pm = u("오후") # bfc0 c8c4 in euc-kr
164
162
 
165
163
  korean_nate_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(#{korean_am}|#{korean_pm})\s+(\d{0,2}):(\d{0,2}):(\d{0,2})/
166
- m = korean_nate_date_re.match(dateString)
164
+ m = korean_nate_date_re.match(dateString)
167
165
  return unless m
168
166
  hour = m[5].to_i
169
167
  ampm = m[4]
@@ -179,7 +177,7 @@ module FeedParserMixin
179
177
  def _parse_date_mssql(dateString)
180
178
  mssql_date_re = /(\d{4})-(\d{2})-(\d{2})\s+(\d{2}):(\d{2}):(\d{2})(\.\d+)?/
181
179
 
182
- m = mssql_date_re.match(dateString)
180
+ m = mssql_date_re.match(dateString)
183
181
  return unless m
184
182
  w3dtfdate = "#{m[1]}-#{m[2]}-#{m[3]}T#{m[4]}:#{m[5]}:#{m[6]}+09:00"
185
183
  $stderr << "MS SQL date parsed as: %s\n" % w3dtfdate if $debug
@@ -223,7 +221,7 @@ module FeedParserMixin
223
221
 
224
222
  greek_date_format = /([^,]+),\s+(\d{2})\s+([^\s]+)\s+(\d{4})\s+(\d{2}):(\d{2}):(\d{2})\s+([^\s]+)/
225
223
 
226
- m = greek_date_format.match(dateString)
224
+ m = greek_date_format.match(dateString)
227
225
  return unless m
228
226
  begin
229
227
  wday = greek_wdays[m[1]]
@@ -239,7 +237,7 @@ module FeedParserMixin
239
237
  def _parse_date_hungarian(dateString)
240
238
  # Parse a string according to a Hungarian 8-bit date format.
241
239
  hungarian_date_format_re = /(\d{4})-([^-]+)-(\d{0,2})T(\d{0,2}):(\d{2})((\+|-)(\d{0,2}:\d{2}))/
242
- m = hungarian_date_format_re.match(dateString)
240
+ m = hungarian_date_format_re.match(dateString)
243
241
  return unless m
244
242
 
245
243
  # Unicode strings for Hungarian date strings
@@ -314,8 +312,8 @@ module FeedParserMixin
314
312
  w3[2] -= num_days
315
313
  w3[1] += 1
316
314
  if w3[1] > 12
317
- w3[0] += 1
318
- w3[1] = set_self(w3[1], 12)
315
+ w3[0] += 1
316
+ w3[1] = set_self(w3[1], 12)
319
317
  end
320
318
  num_days = Time.days_in_month(w3[1], w3[0])
321
319
  end
@@ -323,9 +321,9 @@ module FeedParserMixin
323
321
 
324
322
  unless w3[6].class != String
325
323
  if /^-/ =~ w3[6] # Zone offset goes backwards
326
- w3[6][0] = '+'
324
+ w3[6][0] = '+'
327
325
  elsif /^\+/ =~ w3[6]
328
- w3[6][0] = '-'
326
+ w3[6][0] = '-'
329
327
  end
330
328
  end
331
329
  return Time.utc(w3[0], w3[1], w3[2] , w3[3], w3[4], w3[5])+Time.zone_offset(w3[6] || "UTC")
@@ -335,8 +333,8 @@ module FeedParserMixin
335
333
  # Parse an RFC822, RFC1123, RFC2822 or asctime-style date
336
334
  # These first few lines are to fix up the stupid proprietary format from Disney
337
335
  unknown_timezones = { 'AT' => 'EDT', 'ET' => 'EST',
338
- 'CT' => 'CST', 'MT' => 'MST',
339
- 'PT' => 'PST'
336
+ 'CT' => 'CST', 'MT' => 'MST',
337
+ 'PT' => 'PST'
340
338
  }
341
339
 
342
340
  mon = dateString.split[2]
@@ -390,11 +388,11 @@ module FeedParserMixin
390
388
  def parse_date(dateString)
391
389
  @date_handlers.each do |handler|
392
390
  begin
393
- $stderr << "Trying date_handler #{handler}\n" if $debug
394
- datething = extract_tuple(send(handler,dateString))
395
- return datething
391
+ $stderr << "Trying date_handler #{handler}\n" if $debug
392
+ datething = extract_tuple(send(handler,dateString))
393
+ return datething
396
394
  rescue Exception => e
397
- $stderr << "#{handler} raised #{e}\n" if $debug
395
+ $stderr << "#{handler} raised #{e}\n" if $debug
398
396
  end
399
397
  end
400
398
  return nil
@@ -403,6 +401,6 @@ end
403
401
 
404
402
  module FeedParserUtilities
405
403
  def py2rtime(pytuple)
406
- Time.utc(pytuple[0..5])
404
+ return Time.utc(*pytuple[0..5]) unless pytuple.blank?
407
405
  end
408
406
  end
@@ -0,0 +1,20 @@
1
+ <!--
2
+ Description: interprets media:content
3
+ Expect: not bozo and entries[0]['enclosures'][0]['href'] == u'http://www.webmonkey.com/monkeyrock.mpg'
4
+ -->
5
+ <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
6
+ <channel>
7
+ <title>Some Bands I Like</title>
8
+ <link>http://www.andyvolk.com/webmonkey/bands/</link>
9
+ <description>A list of some bands I like (or have been a member of).</description>
10
+
11
+ <item>
12
+ <title>Rocking Webmonkey Garage Band</title>
13
+ <link>http://www.webmonkey.com/ourband.html</link>
14
+ <description>The best ever garage band on the Internet.</description>
15
+ <guid isPermaLink="false"> http://www.webmonkey.com/ourband.html</guid>
16
+ <media:content url="http://www.webmonkey.com/monkeyrock.mpg" fileSize="2471632" type="video/mpeg" height="240" width="320" duration="147" medium="video" isDefault="true">
17
+ </media:content>
18
+ </item>
19
+ </channel>
20
+ </rss>
@@ -0,0 +1,21 @@
1
+ <!--
2
+ Description: interprets media:content
3
+ Expect: not bozo and entries[0]['enclosures'][1]['href'] == u'http://www.webmonkey.com/images/monkeyrock-thumb.jpg'
4
+ -->
5
+ <rss version="2.0" xmlns:media="http://search.yahoo.com/mrss/">
6
+ <channel>
7
+ <title>Some Bands I Like</title>
8
+ <link>http://www.andyvolk.com/webmonkey/bands/</link>
9
+ <description>A list of some bands I like (or have been a member of).</description>
10
+
11
+ <item>
12
+ <title>Rocking Webmonkey Garage Band</title>
13
+ <link>http://www.webmonkey.com/ourband.html</link>
14
+ <description>The best ever garage band on the Internet.</description>
15
+ <guid isPermaLink="false"> http://www.webmonkey.com/ourband.html</guid>
16
+ <media:content url="http://www.webmonkey.com/monkeyrock.mpg" fileSize="2471632" type="video/mpeg" height="240" width="320" duration="147" medium="video" isDefault="true">
17
+ <media:thumbnail url="http://www.webmonkey.com/images/monkeyrock-thumb.jpg" height="98" width="145"/>
18
+ </media:content>
19
+ </item>
20
+ </channel>
21
+ </rss>
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.9.2
2
+ rubygems_version: 0.9.4
3
3
  specification_version: 1
4
4
  name: rfeedparser
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.9.92
7
- date: 2007-06-07 00:00:00 -07:00
6
+ version: 0.9.93
7
+ date: 2007-07-21 00:00:00 -07:00
8
8
  summary: Parse RSS and Atom feeds in Ruby
9
9
  require_paths:
10
10
  - lib
@@ -1822,6 +1822,11 @@ files:
1822
1822
  - tests/illformed/sanitize/item_xhtml_body_style.xml
1823
1823
  - tests/rfeedparserserver.rb
1824
1824
  - tests/rfeedparsertest.rb
1825
+ - tests/rfponly
1826
+ - tests/rfponly/wellformed
1827
+ - tests/rfponly/wellformed/mrss
1828
+ - tests/rfponly/wellformed/mrss/mrss_media_content.xml
1829
+ - tests/rfponly/wellformed/mrss/mrss_thumbnail.xml
1825
1830
  - tests/wellformed
1826
1831
  - tests/wellformed/amp
1827
1832
  - tests/wellformed/amp/amp01.xml
@@ -3426,7 +3431,7 @@ dependencies:
3426
3431
  requirements:
3427
3432
  - - ">="
3428
3433
  - !ruby/object:Gem::Version
3429
- version: "1.0"
3434
+ version: "1.1"
3430
3435
  version:
3431
3436
  - !ruby/object:Gem::Dependency
3432
3437
  name: activesupport
@@ -3442,7 +3447,7 @@ dependencies:
3442
3447
  version_requirement:
3443
3448
  version_requirements: !ruby/object:Gem::Version::Requirement
3444
3449
  requirements:
3445
- - - ">="
3450
+ - - "="
3446
3451
  - !ruby/object:Gem::Version
3447
3452
  version: "0.5"
3448
3453
  version: