feedme 0.8.4 → 0.8.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/History.txt +4 -0
  2. data/lib/feedme.rb +140 -130
  3. metadata +2 -2
@@ -1,3 +1,7 @@
1
+ === 0.8.5 / 2010-06-15
2
+
3
+ * Added default value selector
4
+
1
5
  === 0.8.4 / 2010-06-02
2
6
 
3
7
  * Add :apply transformation
@@ -52,6 +52,8 @@ module FeedMe
52
52
  # A hash of functions for selecting the correct value to return when a tags
53
53
  # has multiple values and the singluar accessor is called
54
54
  attr_accessor :value_selectors
55
+ # Value selector to use if there is no value selector defined for a tag
56
+ attr_accessor :default_value_selector
55
57
  # A hash of attribute/tag name aliases.
56
58
  attr_accessor :aliases
57
59
  # An array of the transformation functions applied when the !
@@ -75,16 +77,16 @@ module FeedMe
75
77
  @options = options
76
78
 
77
79
  # rss tags
78
- @rss_tags = [
79
- {
80
- :image => nil,
80
+ @rss_tags = [
81
+ {
82
+ :image => nil,
81
83
  :textinput => nil,
82
84
  :skiphours => nil,
83
85
  :skipdays => nil,
84
86
  :items => [{ :rdf_seq => nil }],
85
87
  #:item => @rss_item_tags
86
- }
87
- ]
88
+ }
89
+ ]
88
90
  @rss_item_tags = [ {} ]
89
91
 
90
92
  #atom tags
@@ -124,45 +126,51 @@ module FeedMe
124
126
  links.first
125
127
  end
126
128
  }
129
+ @default_value_selector = proc do |x|
130
+ x = x.sort do |a,b|
131
+ a.is_a?(String) ? -1 : (b.is_a?(String) ? 1 : 0)
132
+ end
133
+ x.first
134
+ end
127
135
 
128
136
  # tag/attribute aliases
129
- @aliases = {
130
- :items => :item_array,
131
- :item_array => :entry_array,
132
- :entries => :entry_array,
133
- :entry_array => :item_array,
134
- :link => :'link+self'
135
- }
136
-
137
- # transformations
138
- @html_helper_lib = HPRICOT_HELPER
139
- @default_transformation = [ :cleanHtml ]
140
- @transformations = {}
141
- @transformation_fns = {
142
- # remove all HTML tags
143
- :stripHtml => proc do |str|
144
- require @html_helper_lib
145
- FeedMe.html_helper.strip_html(str)
146
- end,
147
-
148
- # clean HTML content using FeedNormalizer's HtmlCleaner class
149
- :cleanHtml => proc do |str|
150
- require @html_helper_lib
151
- FeedMe.html_helper.clean_html(str)
152
- end,
153
-
154
- # wrap text at a certain number of characters (respecting word boundaries)
155
- :wrap => proc do |str, col|
156
- str.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n").strip
157
- end,
158
-
159
- # truncate text, respecting word boundaries
160
- :trunc => proc {|str, wordcount| str.trunc(wordcount.to_i) },
137
+ @aliases = {
138
+ :items => :item_array,
139
+ :item_array => :entry_array,
140
+ :entries => :entry_array,
141
+ :entry_array => :item_array,
142
+ :link => :'link+self'
143
+ }
144
+
145
+ # transformations
146
+ @html_helper_lib = HPRICOT_HELPER
147
+ @default_transformation = [ :cleanHtml ]
148
+ @transformations = {}
149
+ @transformation_fns = {
150
+ # remove all HTML tags
151
+ :stripHtml => proc do |str|
152
+ require @html_helper_lib
153
+ FeedMe.html_helper.strip_html(str)
154
+ end,
155
+
156
+ # clean HTML content using FeedNormalizer's HtmlCleaner class
157
+ :cleanHtml => proc do |str|
158
+ require @html_helper_lib
159
+ FeedMe.html_helper.clean_html(str)
160
+ end,
161
+
162
+ # wrap text at a certain number of characters (respecting word boundaries)
163
+ :wrap => proc do |str, col|
164
+ str.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n").strip
165
+ end,
166
+
167
+ # truncate text, respecting word boundaries
168
+ :trunc => proc {|str, wordcount| str.trunc(wordcount.to_i) },
161
169
 
162
170
  # truncate HTML and leave enclosing HTML tags
163
171
  :truncHtml => proc do |str, wordcount|
164
172
  require @html_helper_lib
165
- FeedMe.html_helper.truncate_html(str, wordcount.to_i)
173
+ FeedMe.html_helper.truncate_html(str, wordcount.to_i)
166
174
  end,
167
175
 
168
176
  :regexp => proc do |str, regexp|
@@ -176,7 +184,7 @@ module FeedMe
176
184
 
177
185
  # apply an arbitrary function
178
186
  :apply => proc {|str, fn, *args| fn.call(str, *args) }
179
- }
187
+ }
180
188
  end
181
189
 
182
190
  # Prepare tag list for an RSS feed.
@@ -227,8 +235,8 @@ module FeedMe
227
235
 
228
236
  # Parse +source+ using a +Parser+ created from this +ParserBuilder+.
229
237
  def parse(source)
230
- Parser.new(self, source, options)
231
- end
238
+ Parser.new(self, source, options)
239
+ end
232
240
  end
233
241
 
234
242
  # This class is used to create strict parsers
@@ -239,9 +247,9 @@ module FeedMe
239
247
  super(options)
240
248
 
241
249
  # rss tags
242
- @rss_tags = [
243
- {
244
- :image => [ :url, :title, :link, :width, :height, :description ],
250
+ @rss_tags = [
251
+ {
252
+ :image => [ :url, :title, :link, :width, :height, :description ],
245
253
  :textinput => [ :title, :description, :name, :link ],
246
254
  :skiphours => [ :hour ],
247
255
  :skipdays => [ :day ],
@@ -252,20 +260,20 @@ module FeedMe
252
260
  :rdf_seq
253
261
  ],
254
262
  #:item => @item_tags
255
- },
256
- :title, :link, :description, # required
257
- :language, :copyright, :managingeditor, :webmaster, # optional
258
- :pubdate, :lastbuilddate, :category, :generator,
259
- :docs, :cloud, :ttl, :rating,
260
- :image, :textinput, :skiphours, :skipdays, :item, # have subtags
261
- :items
262
- ]
263
+ },
264
+ :title, :link, :description, # required
265
+ :language, :copyright, :managingeditor, :webmaster, # optional
266
+ :pubdate, :lastbuilddate, :category, :generator,
267
+ :docs, :cloud, :ttl, :rating,
268
+ :image, :textinput, :skiphours, :skipdays, :item, # have subtags
269
+ :items
270
+ ]
263
271
  @rss_item_tags = [
264
272
  {},
265
273
  :title, :description, # required
266
274
  :link, :author, :category, :comments, :enclosure, # optional
267
275
  :guid, :pubdate, :source, :expirationdate
268
- ]
276
+ ]
269
277
 
270
278
  #atom tags
271
279
  person_tags = [ :name, :uri, :email ]
@@ -318,7 +326,7 @@ module FeedMe
318
326
  all_tags[0][:entry] = atom_entry_tags + (item_ext_tags or [])
319
327
  return all_tags
320
328
  end
321
- end
329
+ end
322
330
 
323
331
  class FeedData
324
332
  attr_reader :fm_tag_name, :fm_parent, :fm_builder
@@ -429,8 +437,10 @@ module FeedMe
429
437
  elt = if array.size > 1
430
438
  if (!args.empty? && args.first.is_a?(Proc))
431
439
  args.first.call(array)
432
- elsif (fm_builder.value_sorters.key?(name))
433
- value_selectors[name].call(array)
440
+ elsif (fm_builder.value_selectors.key?(name))
441
+ fm_builder.value_selectors[name].call(array)
442
+ elsif !fm_builder.default_value_selector.nil?
443
+ fm_builder.default_value_selector.call(array)
434
444
  end
435
445
  end
436
446
  elt || array.first
@@ -462,16 +472,16 @@ module FeedMe
462
472
  end
463
473
  value
464
474
  elsif name_str.include?('+')
465
- name_data = name_str.split('+')
466
- rel = name_data[1]
467
- value = nil
468
- call_virtual_method(arrayize(name_data[0]), args, history).each do |elt|
469
- next unless elt.is_a?(FeedData) and elt.rel?
470
- value = elt if elt.rel.casecmp(rel) == 0
471
- break unless value.nil?
472
- end
473
- value
474
- elsif fm_builder.aliases.key? name
475
+ name_data = name_str.split('+')
476
+ rel = name_data[1]
477
+ value = nil
478
+ call_virtual_method(arrayize(name_data[0]), args, history).each do |elt|
479
+ next unless elt.is_a?(FeedData) and elt.rel?
480
+ value = elt if elt.rel.casecmp(rel) == 0
481
+ break unless value.nil?
482
+ end
483
+ value
484
+ elsif fm_builder.aliases.key? name
475
485
  names = fm_builder.aliases[name]
476
486
  names = [names] unless names.is_a? Array
477
487
  value = nil
@@ -534,8 +544,8 @@ module FeedMe
534
544
  protected
535
545
 
536
546
  def clean_tag(tag)
537
- tag.to_s.downcase.gsub(':','_').intern
538
- end
547
+ tag.to_s.downcase.gsub(':','_').intern
548
+ end
539
549
 
540
550
  # generate a name for the array variable corresponding to a single-value variable
541
551
  def arrayize(key)
@@ -569,12 +579,12 @@ module FeedMe
569
579
 
570
580
  def initialize(builder, source, options={})
571
581
  super(nil, nil, builder)
572
- @fm_source = source.respond_to?(:read) ? source.read : source.to_s
582
+ @fm_source = source.respond_to?(:read) ? source.read : source.to_s
573
583
  @fm_options = Hash.new.update(options)
574
584
  @fm_parsed = []
575
585
  @fm_unparsed = []
576
- parse
577
- end
586
+ parse
587
+ end
578
588
 
579
589
  def channel() self end
580
590
  alias :feed :channel
@@ -616,7 +626,7 @@ module FeedMe
616
626
  else
617
627
  raise FeedMeError, "Poorly formatted feed"
618
628
  end
619
- end
629
+ end
620
630
 
621
631
  # References within the <channel> element are replaced by the actual
622
632
  def dereference_rdf_tags(rdf_tag, rss_tag, refs)
@@ -640,61 +650,61 @@ module FeedMe
640
650
  end
641
651
  end
642
652
 
643
- def parse_content(parent, attrs, content, tags)
644
- # add attributes to parent
645
- attrs.each_pair {|key, value| parent[key] = unescape(value) }
653
+ def parse_content(parent, attrs, content, tags)
654
+ # add attributes to parent
655
+ attrs.each_pair {|key, value| parent[key] = unescape(value) }
646
656
  return if content.nil?
647
657
 
648
- # split the content into elements
649
- elements = {}
650
- # TODO: this will break if a namespace is used that is not rss: or atom:
651
- content.scan( %r{(<([\w:]+)(.*?)(?:/>|>(.*?)</\2>))}mi ) do |match|
652
- # \1 = full content (from start to end tag), \2 = tag name
653
- # \3 = attributes, and \4 = content between tags
654
- key = clean_tag(match[1])
655
- value = [parse_attributes(match[2]), match[3]]
656
- if elements.key? key
657
- elements[key] << value
658
- else
659
- elements[key] = [value]
660
- end
661
- end
658
+ # split the content into elements
659
+ elements = {}
660
+ # TODO: this will break if a namespace is used that is not rss: or atom:
661
+ content.scan( %r{(<([\w:]+)(.*?)(?:/>|>(.*?)</\2>))}mi ) do |match|
662
+ # \1 = full content (from start to end tag), \2 = tag name
663
+ # \3 = attributes, and \4 = content between tags
664
+ key = clean_tag(match[1])
665
+ value = [parse_attributes(match[2]), match[3]]
666
+ if elements.key? key
667
+ elements[key] << value
668
+ else
669
+ elements[key] = [value]
670
+ end
671
+ end
662
672
 
663
673
  # the first item in a tag array may be a hash that defines tags that have subtags
664
- sub_tags = tags[0] if !nil_or_empty?(tags) && tags[0].is_a?(Hash)
665
- first_tag = sub_tags.nil? || tags.size == 1 ? 0 : 1
666
- # if this is a promiscuous parser, tag names will depend on the elements found in the feed
667
- tags = elements.keys if (sub_tags.nil? ? nil_or_empty?(tags) : first_tag == 0)
668
-
669
- # iterate over all tags (some or all of which may not be present)
670
- tags[first_tag..-1].each do |tag|
671
- key = clean_tag(tag)
672
- element_array = elements.delete(tag) or next
673
- @fm_parsed << key
674
+ sub_tags = tags[0] if !nil_or_empty?(tags) && tags[0].is_a?(Hash)
675
+ first_tag = sub_tags.nil? || tags.size == 1 ? 0 : 1
676
+ # if this is a promiscuous parser, tag names will depend on the elements found in the feed
677
+ tags = elements.keys if (sub_tags.nil? ? nil_or_empty?(tags) : first_tag == 0)
678
+
679
+ # iterate over all tags (some or all of which may not be present)
680
+ tags[first_tag..-1].each do |tag|
681
+ key = clean_tag(tag)
682
+ element_array = elements.delete(tag) or next
683
+ @fm_parsed << key
674
684
 
675
- element_array.each do |elt|
676
- elt_attrs = elt[0]
677
- elt_content = elt[1]
678
- rels = fm_builder.rels[key] if fm_builder.respond_to?(:rels)
679
-
680
- # if a list of accepted rels is specified, only parse this tag
681
- # if its rel attribute is inlcuded in the list
682
- next unless rels.nil? || elt_attrs.nil? || !elt_attrs.rel? || rels.include?(elt_attrs.rel)
683
-
684
- if !sub_tags.nil? && sub_tags.key?(key)
685
- new_parent = FeedData.new(key, parent, fm_builder)
686
- add_tag(parent, key, new_parent)
687
- parse_content(new_parent, elt_attrs, elt_content, sub_tags[key])
688
- else
689
- add_tag(parent, key, clean_content(key, elt_attrs, elt_content, parent))
690
- end
691
- end
692
- end
685
+ element_array.each do |elt|
686
+ elt_attrs = elt[0]
687
+ elt_content = elt[1]
688
+ rels = fm_builder.rels[key] if fm_builder.respond_to?(:rels)
689
+
690
+ # if a list of accepted rels is specified, only parse this tag
691
+ # if its rel attribute is inlcuded in the list
692
+ next unless rels.nil? || elt_attrs.nil? || !elt_attrs.rel? || rels.include?(elt_attrs.rel)
693
+
694
+ if !sub_tags.nil? && sub_tags.key?(key)
695
+ new_parent = FeedData.new(key, parent, fm_builder)
696
+ add_tag(parent, key, new_parent)
697
+ parse_content(new_parent, elt_attrs, elt_content, sub_tags[key])
698
+ else
699
+ add_tag(parent, key, clean_content(key, elt_attrs, elt_content, parent))
700
+ end
701
+ end
702
+ end
693
703
 
694
- @fm_unparsed += elements.keys
695
- @fm_parsed.uniq!
696
- @fm_unparsed.uniq!
697
- end
704
+ @fm_unparsed += elements.keys
705
+ @fm_parsed.uniq!
706
+ @fm_unparsed.uniq!
707
+ end
698
708
 
699
709
  def add_tag(hash, key, value)
700
710
  array_var = arrayize(key)
@@ -710,13 +720,13 @@ module FeedMe
710
720
  camelize(underscore(tag).downcase, false)
711
721
  end
712
722
 
713
- def clean_content(tag, attrs, content, parent)
714
- content = content.to_s
715
- if fm_builder.date_tags.include? tag
716
- content = Time.parse(content) rescue unescape(content)
717
- else
718
- content = unescape(content)
719
- end
723
+ def clean_content(tag, attrs, content, parent)
724
+ content = content.to_s
725
+ if fm_builder.date_tags.include? tag
726
+ content = Time.parse(content) rescue unescape(content)
727
+ else
728
+ content = unescape(content)
729
+ end
720
730
 
721
731
  unless attrs.empty?
722
732
  hash = FeedData.new(tag, parent, fm_builder)
@@ -728,7 +738,7 @@ module FeedMe
728
738
  end
729
739
 
730
740
  return content
731
- end
741
+ end
732
742
 
733
743
  def parse_attributes(*attrs)
734
744
  hash = {}
@@ -763,7 +773,7 @@ module FeedMe
763
773
  obj.nil? || obj.empty? || (obj.is_a?(String) && obj.strip.empty?)
764
774
  end
765
775
  end
766
-
776
+
767
777
  class FeedMeError < StandardError
768
778
  end
769
779
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: feedme
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.4
4
+ version: 0.8.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - John Didion
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-06-02 00:00:00 -04:00
12
+ date: 2010-06-15 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies: []
15
15