feedme 0.8.4 → 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/History.txt +4 -0
  2. data/lib/feedme.rb +140 -130
  3. metadata +2 -2
@@ -1,3 +1,7 @@
1
+ === 0.8.5 / 2010-06-15
2
+
3
+ * Added default value selector
4
+
1
5
  === 0.8.4 / 2010-06-02
2
6
 
3
7
  * Add :apply transformation
@@ -52,6 +52,8 @@ module FeedMe
52
52
  # A hash of functions for selecting the correct value to return when a tags
53
53
  # has multiple values and the singluar accessor is called
54
54
  attr_accessor :value_selectors
55
+ # Value selector to use if there is no value selector defined for a tag
56
+ attr_accessor :default_value_selector
55
57
  # A hash of attribute/tag name aliases.
56
58
  attr_accessor :aliases
57
59
  # An array of the transformation functions applied when the !
@@ -75,16 +77,16 @@ module FeedMe
75
77
  @options = options
76
78
 
77
79
  # rss tags
78
- @rss_tags = [
79
- {
80
- :image => nil,
80
+ @rss_tags = [
81
+ {
82
+ :image => nil,
81
83
  :textinput => nil,
82
84
  :skiphours => nil,
83
85
  :skipdays => nil,
84
86
  :items => [{ :rdf_seq => nil }],
85
87
  #:item => @rss_item_tags
86
- }
87
- ]
88
+ }
89
+ ]
88
90
  @rss_item_tags = [ {} ]
89
91
 
90
92
  #atom tags
@@ -124,45 +126,51 @@ module FeedMe
124
126
  links.first
125
127
  end
126
128
  }
129
+ @default_value_selector = proc do |x|
130
+ x = x.sort do |a,b|
131
+ a.is_a?(String) ? -1 : (b.is_a?(String) ? 1 : 0)
132
+ end
133
+ x.first
134
+ end
127
135
 
128
136
  # tag/attribute aliases
129
- @aliases = {
130
- :items => :item_array,
131
- :item_array => :entry_array,
132
- :entries => :entry_array,
133
- :entry_array => :item_array,
134
- :link => :'link+self'
135
- }
136
-
137
- # transformations
138
- @html_helper_lib = HPRICOT_HELPER
139
- @default_transformation = [ :cleanHtml ]
140
- @transformations = {}
141
- @transformation_fns = {
142
- # remove all HTML tags
143
- :stripHtml => proc do |str|
144
- require @html_helper_lib
145
- FeedMe.html_helper.strip_html(str)
146
- end,
147
-
148
- # clean HTML content using FeedNormalizer's HtmlCleaner class
149
- :cleanHtml => proc do |str|
150
- require @html_helper_lib
151
- FeedMe.html_helper.clean_html(str)
152
- end,
153
-
154
- # wrap text at a certain number of characters (respecting word boundaries)
155
- :wrap => proc do |str, col|
156
- str.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n").strip
157
- end,
158
-
159
- # truncate text, respecting word boundaries
160
- :trunc => proc {|str, wordcount| str.trunc(wordcount.to_i) },
137
+ @aliases = {
138
+ :items => :item_array,
139
+ :item_array => :entry_array,
140
+ :entries => :entry_array,
141
+ :entry_array => :item_array,
142
+ :link => :'link+self'
143
+ }
144
+
145
+ # transformations
146
+ @html_helper_lib = HPRICOT_HELPER
147
+ @default_transformation = [ :cleanHtml ]
148
+ @transformations = {}
149
+ @transformation_fns = {
150
+ # remove all HTML tags
151
+ :stripHtml => proc do |str|
152
+ require @html_helper_lib
153
+ FeedMe.html_helper.strip_html(str)
154
+ end,
155
+
156
+ # clean HTML content using FeedNormalizer's HtmlCleaner class
157
+ :cleanHtml => proc do |str|
158
+ require @html_helper_lib
159
+ FeedMe.html_helper.clean_html(str)
160
+ end,
161
+
162
+ # wrap text at a certain number of characters (respecting word boundaries)
163
+ :wrap => proc do |str, col|
164
+ str.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n").strip
165
+ end,
166
+
167
+ # truncate text, respecting word boundaries
168
+ :trunc => proc {|str, wordcount| str.trunc(wordcount.to_i) },
161
169
 
162
170
  # truncate HTML and leave enclosing HTML tags
163
171
  :truncHtml => proc do |str, wordcount|
164
172
  require @html_helper_lib
165
- FeedMe.html_helper.truncate_html(str, wordcount.to_i)
173
+ FeedMe.html_helper.truncate_html(str, wordcount.to_i)
166
174
  end,
167
175
 
168
176
  :regexp => proc do |str, regexp|
@@ -176,7 +184,7 @@ module FeedMe
176
184
 
177
185
  # apply an arbitrary function
178
186
  :apply => proc {|str, fn, *args| fn.call(str, *args) }
179
- }
187
+ }
180
188
  end
181
189
 
182
190
  # Prepare tag list for an RSS feed.
@@ -227,8 +235,8 @@ module FeedMe
227
235
 
228
236
  # Parse +source+ using a +Parser+ created from this +ParserBuilder+.
229
237
  def parse(source)
230
- Parser.new(self, source, options)
231
- end
238
+ Parser.new(self, source, options)
239
+ end
232
240
  end
233
241
 
234
242
  # This class is used to create strict parsers
@@ -239,9 +247,9 @@ module FeedMe
239
247
  super(options)
240
248
 
241
249
  # rss tags
242
- @rss_tags = [
243
- {
244
- :image => [ :url, :title, :link, :width, :height, :description ],
250
+ @rss_tags = [
251
+ {
252
+ :image => [ :url, :title, :link, :width, :height, :description ],
245
253
  :textinput => [ :title, :description, :name, :link ],
246
254
  :skiphours => [ :hour ],
247
255
  :skipdays => [ :day ],
@@ -252,20 +260,20 @@ module FeedMe
252
260
  :rdf_seq
253
261
  ],
254
262
  #:item => @item_tags
255
- },
256
- :title, :link, :description, # required
257
- :language, :copyright, :managingeditor, :webmaster, # optional
258
- :pubdate, :lastbuilddate, :category, :generator,
259
- :docs, :cloud, :ttl, :rating,
260
- :image, :textinput, :skiphours, :skipdays, :item, # have subtags
261
- :items
262
- ]
263
+ },
264
+ :title, :link, :description, # required
265
+ :language, :copyright, :managingeditor, :webmaster, # optional
266
+ :pubdate, :lastbuilddate, :category, :generator,
267
+ :docs, :cloud, :ttl, :rating,
268
+ :image, :textinput, :skiphours, :skipdays, :item, # have subtags
269
+ :items
270
+ ]
263
271
  @rss_item_tags = [
264
272
  {},
265
273
  :title, :description, # required
266
274
  :link, :author, :category, :comments, :enclosure, # optional
267
275
  :guid, :pubdate, :source, :expirationdate
268
- ]
276
+ ]
269
277
 
270
278
  #atom tags
271
279
  person_tags = [ :name, :uri, :email ]
@@ -318,7 +326,7 @@ module FeedMe
318
326
  all_tags[0][:entry] = atom_entry_tags + (item_ext_tags or [])
319
327
  return all_tags
320
328
  end
321
- end
329
+ end
322
330
 
323
331
  class FeedData
324
332
  attr_reader :fm_tag_name, :fm_parent, :fm_builder
@@ -429,8 +437,10 @@ module FeedMe
429
437
  elt = if array.size > 1
430
438
  if (!args.empty? && args.first.is_a?(Proc))
431
439
  args.first.call(array)
432
- elsif (fm_builder.value_sorters.key?(name))
433
- value_selectors[name].call(array)
440
+ elsif (fm_builder.value_selectors.key?(name))
441
+ fm_builder.value_selectors[name].call(array)
442
+ elsif !fm_builder.default_value_selector.nil?
443
+ fm_builder.default_value_selector.call(array)
434
444
  end
435
445
  end
436
446
  elt || array.first
@@ -462,16 +472,16 @@ module FeedMe
462
472
  end
463
473
  value
464
474
  elsif name_str.include?('+')
465
- name_data = name_str.split('+')
466
- rel = name_data[1]
467
- value = nil
468
- call_virtual_method(arrayize(name_data[0]), args, history).each do |elt|
469
- next unless elt.is_a?(FeedData) and elt.rel?
470
- value = elt if elt.rel.casecmp(rel) == 0
471
- break unless value.nil?
472
- end
473
- value
474
- elsif fm_builder.aliases.key? name
475
+ name_data = name_str.split('+')
476
+ rel = name_data[1]
477
+ value = nil
478
+ call_virtual_method(arrayize(name_data[0]), args, history).each do |elt|
479
+ next unless elt.is_a?(FeedData) and elt.rel?
480
+ value = elt if elt.rel.casecmp(rel) == 0
481
+ break unless value.nil?
482
+ end
483
+ value
484
+ elsif fm_builder.aliases.key? name
475
485
  names = fm_builder.aliases[name]
476
486
  names = [names] unless names.is_a? Array
477
487
  value = nil
@@ -534,8 +544,8 @@ module FeedMe
534
544
  protected
535
545
 
536
546
  def clean_tag(tag)
537
- tag.to_s.downcase.gsub(':','_').intern
538
- end
547
+ tag.to_s.downcase.gsub(':','_').intern
548
+ end
539
549
 
540
550
  # generate a name for the array variable corresponding to a single-value variable
541
551
  def arrayize(key)
@@ -569,12 +579,12 @@ module FeedMe
569
579
 
570
580
  def initialize(builder, source, options={})
571
581
  super(nil, nil, builder)
572
- @fm_source = source.respond_to?(:read) ? source.read : source.to_s
582
+ @fm_source = source.respond_to?(:read) ? source.read : source.to_s
573
583
  @fm_options = Hash.new.update(options)
574
584
  @fm_parsed = []
575
585
  @fm_unparsed = []
576
- parse
577
- end
586
+ parse
587
+ end
578
588
 
579
589
  def channel() self end
580
590
  alias :feed :channel
@@ -616,7 +626,7 @@ module FeedMe
616
626
  else
617
627
  raise FeedMeError, "Poorly formatted feed"
618
628
  end
619
- end
629
+ end
620
630
 
621
631
  # References within the <channel> element are replaced by the actual
622
632
  def dereference_rdf_tags(rdf_tag, rss_tag, refs)
@@ -640,61 +650,61 @@ module FeedMe
640
650
  end
641
651
  end
642
652
 
643
- def parse_content(parent, attrs, content, tags)
644
- # add attributes to parent
645
- attrs.each_pair {|key, value| parent[key] = unescape(value) }
653
+ def parse_content(parent, attrs, content, tags)
654
+ # add attributes to parent
655
+ attrs.each_pair {|key, value| parent[key] = unescape(value) }
646
656
  return if content.nil?
647
657
 
648
- # split the content into elements
649
- elements = {}
650
- # TODO: this will break if a namespace is used that is not rss: or atom:
651
- content.scan( %r{(<([\w:]+)(.*?)(?:/>|>(.*?)</\2>))}mi ) do |match|
652
- # \1 = full content (from start to end tag), \2 = tag name
653
- # \3 = attributes, and \4 = content between tags
654
- key = clean_tag(match[1])
655
- value = [parse_attributes(match[2]), match[3]]
656
- if elements.key? key
657
- elements[key] << value
658
- else
659
- elements[key] = [value]
660
- end
661
- end
658
+ # split the content into elements
659
+ elements = {}
660
+ # TODO: this will break if a namespace is used that is not rss: or atom:
661
+ content.scan( %r{(<([\w:]+)(.*?)(?:/>|>(.*?)</\2>))}mi ) do |match|
662
+ # \1 = full content (from start to end tag), \2 = tag name
663
+ # \3 = attributes, and \4 = content between tags
664
+ key = clean_tag(match[1])
665
+ value = [parse_attributes(match[2]), match[3]]
666
+ if elements.key? key
667
+ elements[key] << value
668
+ else
669
+ elements[key] = [value]
670
+ end
671
+ end
662
672
 
663
673
  # the first item in a tag array may be a hash that defines tags that have subtags
664
- sub_tags = tags[0] if !nil_or_empty?(tags) && tags[0].is_a?(Hash)
665
- first_tag = sub_tags.nil? || tags.size == 1 ? 0 : 1
666
- # if this is a promiscuous parser, tag names will depend on the elements found in the feed
667
- tags = elements.keys if (sub_tags.nil? ? nil_or_empty?(tags) : first_tag == 0)
668
-
669
- # iterate over all tags (some or all of which may not be present)
670
- tags[first_tag..-1].each do |tag|
671
- key = clean_tag(tag)
672
- element_array = elements.delete(tag) or next
673
- @fm_parsed << key
674
+ sub_tags = tags[0] if !nil_or_empty?(tags) && tags[0].is_a?(Hash)
675
+ first_tag = sub_tags.nil? || tags.size == 1 ? 0 : 1
676
+ # if this is a promiscuous parser, tag names will depend on the elements found in the feed
677
+ tags = elements.keys if (sub_tags.nil? ? nil_or_empty?(tags) : first_tag == 0)
678
+
679
+ # iterate over all tags (some or all of which may not be present)
680
+ tags[first_tag..-1].each do |tag|
681
+ key = clean_tag(tag)
682
+ element_array = elements.delete(tag) or next
683
+ @fm_parsed << key
674
684
 
675
- element_array.each do |elt|
676
- elt_attrs = elt[0]
677
- elt_content = elt[1]
678
- rels = fm_builder.rels[key] if fm_builder.respond_to?(:rels)
679
-
680
- # if a list of accepted rels is specified, only parse this tag
681
- # if its rel attribute is inlcuded in the list
682
- next unless rels.nil? || elt_attrs.nil? || !elt_attrs.rel? || rels.include?(elt_attrs.rel)
683
-
684
- if !sub_tags.nil? && sub_tags.key?(key)
685
- new_parent = FeedData.new(key, parent, fm_builder)
686
- add_tag(parent, key, new_parent)
687
- parse_content(new_parent, elt_attrs, elt_content, sub_tags[key])
688
- else
689
- add_tag(parent, key, clean_content(key, elt_attrs, elt_content, parent))
690
- end
691
- end
692
- end
685
+ element_array.each do |elt|
686
+ elt_attrs = elt[0]
687
+ elt_content = elt[1]
688
+ rels = fm_builder.rels[key] if fm_builder.respond_to?(:rels)
689
+
690
+ # if a list of accepted rels is specified, only parse this tag
691
+ # if its rel attribute is inlcuded in the list
692
+ next unless rels.nil? || elt_attrs.nil? || !elt_attrs.rel? || rels.include?(elt_attrs.rel)
693
+
694
+ if !sub_tags.nil? && sub_tags.key?(key)
695
+ new_parent = FeedData.new(key, parent, fm_builder)
696
+ add_tag(parent, key, new_parent)
697
+ parse_content(new_parent, elt_attrs, elt_content, sub_tags[key])
698
+ else
699
+ add_tag(parent, key, clean_content(key, elt_attrs, elt_content, parent))
700
+ end
701
+ end
702
+ end
693
703
 
694
- @fm_unparsed += elements.keys
695
- @fm_parsed.uniq!
696
- @fm_unparsed.uniq!
697
- end
704
+ @fm_unparsed += elements.keys
705
+ @fm_parsed.uniq!
706
+ @fm_unparsed.uniq!
707
+ end
698
708
 
699
709
  def add_tag(hash, key, value)
700
710
  array_var = arrayize(key)
@@ -710,13 +720,13 @@ module FeedMe
710
720
  camelize(underscore(tag).downcase, false)
711
721
  end
712
722
 
713
- def clean_content(tag, attrs, content, parent)
714
- content = content.to_s
715
- if fm_builder.date_tags.include? tag
716
- content = Time.parse(content) rescue unescape(content)
717
- else
718
- content = unescape(content)
719
- end
723
+ def clean_content(tag, attrs, content, parent)
724
+ content = content.to_s
725
+ if fm_builder.date_tags.include? tag
726
+ content = Time.parse(content) rescue unescape(content)
727
+ else
728
+ content = unescape(content)
729
+ end
720
730
 
721
731
  unless attrs.empty?
722
732
  hash = FeedData.new(tag, parent, fm_builder)
@@ -728,7 +738,7 @@ module FeedMe
728
738
  end
729
739
 
730
740
  return content
731
- end
741
+ end
732
742
 
733
743
  def parse_attributes(*attrs)
734
744
  hash = {}
@@ -763,7 +773,7 @@ module FeedMe
763
773
  obj.nil? || obj.empty? || (obj.is_a?(String) && obj.strip.empty?)
764
774
  end
765
775
  end
766
-
776
+
767
777
  class FeedMeError < StandardError
768
778
  end
769
779
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedme
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.4
4
+ version: 0.8.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Didion
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-06-02 00:00:00 -04:00
12
+ date: 2010-06-15 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies: []
15
15