feedme 0.8.4 → 0.8.5
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/lib/feedme.rb +140 -130
- metadata +2 -2
data/History.txt
CHANGED
data/lib/feedme.rb
CHANGED
@@ -52,6 +52,8 @@ module FeedMe
|
|
52
52
|
# A hash of functions for selecting the correct value to return when a tags
|
53
53
|
# has multiple values and the singluar accessor is called
|
54
54
|
attr_accessor :value_selectors
|
55
|
+
# Value selector to use if there is no value selector defined for a tag
|
56
|
+
attr_accessor :default_value_selector
|
55
57
|
# A hash of attribute/tag name aliases.
|
56
58
|
attr_accessor :aliases
|
57
59
|
# An array of the transformation functions applied when the !
|
@@ -75,16 +77,16 @@ module FeedMe
|
|
75
77
|
@options = options
|
76
78
|
|
77
79
|
# rss tags
|
78
|
-
|
79
|
-
|
80
|
-
|
80
|
+
@rss_tags = [
|
81
|
+
{
|
82
|
+
:image => nil,
|
81
83
|
:textinput => nil,
|
82
84
|
:skiphours => nil,
|
83
85
|
:skipdays => nil,
|
84
86
|
:items => [{ :rdf_seq => nil }],
|
85
87
|
#:item => @rss_item_tags
|
86
|
-
|
87
|
-
|
88
|
+
}
|
89
|
+
]
|
88
90
|
@rss_item_tags = [ {} ]
|
89
91
|
|
90
92
|
#atom tags
|
@@ -124,45 +126,51 @@ module FeedMe
|
|
124
126
|
links.first
|
125
127
|
end
|
126
128
|
}
|
129
|
+
@default_value_selector = proc do |x|
|
130
|
+
x = x.sort do |a,b|
|
131
|
+
a.is_a?(String) ? -1 : (b.is_a?(String) ? 1 : 0)
|
132
|
+
end
|
133
|
+
x.first
|
134
|
+
end
|
127
135
|
|
128
136
|
# tag/attribute aliases
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
137
|
+
@aliases = {
|
138
|
+
:items => :item_array,
|
139
|
+
:item_array => :entry_array,
|
140
|
+
:entries => :entry_array,
|
141
|
+
:entry_array => :item_array,
|
142
|
+
:link => :'link+self'
|
143
|
+
}
|
144
|
+
|
145
|
+
# transformations
|
146
|
+
@html_helper_lib = HPRICOT_HELPER
|
147
|
+
@default_transformation = [ :cleanHtml ]
|
148
|
+
@transformations = {}
|
149
|
+
@transformation_fns = {
|
150
|
+
# remove all HTML tags
|
151
|
+
:stripHtml => proc do |str|
|
152
|
+
require @html_helper_lib
|
153
|
+
FeedMe.html_helper.strip_html(str)
|
154
|
+
end,
|
155
|
+
|
156
|
+
# clean HTML content using FeedNormalizer's HtmlCleaner class
|
157
|
+
:cleanHtml => proc do |str|
|
158
|
+
require @html_helper_lib
|
159
|
+
FeedMe.html_helper.clean_html(str)
|
160
|
+
end,
|
161
|
+
|
162
|
+
# wrap text at a certain number of characters (respecting word boundaries)
|
163
|
+
:wrap => proc do |str, col|
|
164
|
+
str.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n").strip
|
165
|
+
end,
|
166
|
+
|
167
|
+
# truncate text, respecting word boundaries
|
168
|
+
:trunc => proc {|str, wordcount| str.trunc(wordcount.to_i) },
|
161
169
|
|
162
170
|
# truncate HTML and leave enclosing HTML tags
|
163
171
|
:truncHtml => proc do |str, wordcount|
|
164
172
|
require @html_helper_lib
|
165
|
-
|
173
|
+
FeedMe.html_helper.truncate_html(str, wordcount.to_i)
|
166
174
|
end,
|
167
175
|
|
168
176
|
:regexp => proc do |str, regexp|
|
@@ -176,7 +184,7 @@ module FeedMe
|
|
176
184
|
|
177
185
|
# apply an arbitrary function
|
178
186
|
:apply => proc {|str, fn, *args| fn.call(str, *args) }
|
179
|
-
|
187
|
+
}
|
180
188
|
end
|
181
189
|
|
182
190
|
# Prepare tag list for an RSS feed.
|
@@ -227,8 +235,8 @@ module FeedMe
|
|
227
235
|
|
228
236
|
# Parse +source+ using a +Parser+ created from this +ParserBuilder+.
|
229
237
|
def parse(source)
|
230
|
-
|
231
|
-
|
238
|
+
Parser.new(self, source, options)
|
239
|
+
end
|
232
240
|
end
|
233
241
|
|
234
242
|
# This class is used to create strict parsers
|
@@ -239,9 +247,9 @@ module FeedMe
|
|
239
247
|
super(options)
|
240
248
|
|
241
249
|
# rss tags
|
242
|
-
|
243
|
-
|
244
|
-
|
250
|
+
@rss_tags = [
|
251
|
+
{
|
252
|
+
:image => [ :url, :title, :link, :width, :height, :description ],
|
245
253
|
:textinput => [ :title, :description, :name, :link ],
|
246
254
|
:skiphours => [ :hour ],
|
247
255
|
:skipdays => [ :day ],
|
@@ -252,20 +260,20 @@ module FeedMe
|
|
252
260
|
:rdf_seq
|
253
261
|
],
|
254
262
|
#:item => @item_tags
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
+
},
|
264
|
+
:title, :link, :description, # required
|
265
|
+
:language, :copyright, :managingeditor, :webmaster, # optional
|
266
|
+
:pubdate, :lastbuilddate, :category, :generator,
|
267
|
+
:docs, :cloud, :ttl, :rating,
|
268
|
+
:image, :textinput, :skiphours, :skipdays, :item, # have subtags
|
269
|
+
:items
|
270
|
+
]
|
263
271
|
@rss_item_tags = [
|
264
272
|
{},
|
265
273
|
:title, :description, # required
|
266
274
|
:link, :author, :category, :comments, :enclosure, # optional
|
267
275
|
:guid, :pubdate, :source, :expirationdate
|
268
|
-
|
276
|
+
]
|
269
277
|
|
270
278
|
#atom tags
|
271
279
|
person_tags = [ :name, :uri, :email ]
|
@@ -318,7 +326,7 @@ module FeedMe
|
|
318
326
|
all_tags[0][:entry] = atom_entry_tags + (item_ext_tags or [])
|
319
327
|
return all_tags
|
320
328
|
end
|
321
|
-
|
329
|
+
end
|
322
330
|
|
323
331
|
class FeedData
|
324
332
|
attr_reader :fm_tag_name, :fm_parent, :fm_builder
|
@@ -429,8 +437,10 @@ module FeedMe
|
|
429
437
|
elt = if array.size > 1
|
430
438
|
if (!args.empty? && args.first.is_a?(Proc))
|
431
439
|
args.first.call(array)
|
432
|
-
elsif (fm_builder.
|
433
|
-
value_selectors[name].call(array)
|
440
|
+
elsif (fm_builder.value_selectors.key?(name))
|
441
|
+
fm_builder.value_selectors[name].call(array)
|
442
|
+
elsif !fm_builder.default_value_selector.nil?
|
443
|
+
fm_builder.default_value_selector.call(array)
|
434
444
|
end
|
435
445
|
end
|
436
446
|
elt || array.first
|
@@ -462,16 +472,16 @@ module FeedMe
|
|
462
472
|
end
|
463
473
|
value
|
464
474
|
elsif name_str.include?('+')
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
+
name_data = name_str.split('+')
|
476
|
+
rel = name_data[1]
|
477
|
+
value = nil
|
478
|
+
call_virtual_method(arrayize(name_data[0]), args, history).each do |elt|
|
479
|
+
next unless elt.is_a?(FeedData) and elt.rel?
|
480
|
+
value = elt if elt.rel.casecmp(rel) == 0
|
481
|
+
break unless value.nil?
|
482
|
+
end
|
483
|
+
value
|
484
|
+
elsif fm_builder.aliases.key? name
|
475
485
|
names = fm_builder.aliases[name]
|
476
486
|
names = [names] unless names.is_a? Array
|
477
487
|
value = nil
|
@@ -534,8 +544,8 @@ module FeedMe
|
|
534
544
|
protected
|
535
545
|
|
536
546
|
def clean_tag(tag)
|
537
|
-
|
538
|
-
|
547
|
+
tag.to_s.downcase.gsub(':','_').intern
|
548
|
+
end
|
539
549
|
|
540
550
|
# generate a name for the array variable corresponding to a single-value variable
|
541
551
|
def arrayize(key)
|
@@ -569,12 +579,12 @@ module FeedMe
|
|
569
579
|
|
570
580
|
def initialize(builder, source, options={})
|
571
581
|
super(nil, nil, builder)
|
572
|
-
|
582
|
+
@fm_source = source.respond_to?(:read) ? source.read : source.to_s
|
573
583
|
@fm_options = Hash.new.update(options)
|
574
584
|
@fm_parsed = []
|
575
585
|
@fm_unparsed = []
|
576
|
-
|
577
|
-
|
586
|
+
parse
|
587
|
+
end
|
578
588
|
|
579
589
|
def channel() self end
|
580
590
|
alias :feed :channel
|
@@ -616,7 +626,7 @@ module FeedMe
|
|
616
626
|
else
|
617
627
|
raise FeedMeError, "Poorly formatted feed"
|
618
628
|
end
|
619
|
-
|
629
|
+
end
|
620
630
|
|
621
631
|
# References within the <channel> element are replaced by the actual
|
622
632
|
def dereference_rdf_tags(rdf_tag, rss_tag, refs)
|
@@ -640,61 +650,61 @@ module FeedMe
|
|
640
650
|
end
|
641
651
|
end
|
642
652
|
|
643
|
-
|
644
|
-
|
645
|
-
|
653
|
+
def parse_content(parent, attrs, content, tags)
|
654
|
+
# add attributes to parent
|
655
|
+
attrs.each_pair {|key, value| parent[key] = unescape(value) }
|
646
656
|
return if content.nil?
|
647
657
|
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
658
|
+
# split the content into elements
|
659
|
+
elements = {}
|
660
|
+
# TODO: this will break if a namespace is used that is not rss: or atom:
|
661
|
+
content.scan( %r{(<([\w:]+)(.*?)(?:/>|>(.*?)</\2>))}mi ) do |match|
|
662
|
+
# \1 = full content (from start to end tag), \2 = tag name
|
663
|
+
# \3 = attributes, and \4 = content between tags
|
664
|
+
key = clean_tag(match[1])
|
665
|
+
value = [parse_attributes(match[2]), match[3]]
|
666
|
+
if elements.key? key
|
667
|
+
elements[key] << value
|
668
|
+
else
|
669
|
+
elements[key] = [value]
|
670
|
+
end
|
671
|
+
end
|
662
672
|
|
663
673
|
# the first item in a tag array may be a hash that defines tags that have subtags
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
+
sub_tags = tags[0] if !nil_or_empty?(tags) && tags[0].is_a?(Hash)
|
675
|
+
first_tag = sub_tags.nil? || tags.size == 1 ? 0 : 1
|
676
|
+
# if this is a promiscuous parser, tag names will depend on the elements found in the feed
|
677
|
+
tags = elements.keys if (sub_tags.nil? ? nil_or_empty?(tags) : first_tag == 0)
|
678
|
+
|
679
|
+
# iterate over all tags (some or all of which may not be present)
|
680
|
+
tags[first_tag..-1].each do |tag|
|
681
|
+
key = clean_tag(tag)
|
682
|
+
element_array = elements.delete(tag) or next
|
683
|
+
@fm_parsed << key
|
674
684
|
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
685
|
+
element_array.each do |elt|
|
686
|
+
elt_attrs = elt[0]
|
687
|
+
elt_content = elt[1]
|
688
|
+
rels = fm_builder.rels[key] if fm_builder.respond_to?(:rels)
|
689
|
+
|
690
|
+
# if a list of accepted rels is specified, only parse this tag
|
691
|
+
# if its rel attribute is inlcuded in the list
|
692
|
+
next unless rels.nil? || elt_attrs.nil? || !elt_attrs.rel? || rels.include?(elt_attrs.rel)
|
693
|
+
|
694
|
+
if !sub_tags.nil? && sub_tags.key?(key)
|
695
|
+
new_parent = FeedData.new(key, parent, fm_builder)
|
696
|
+
add_tag(parent, key, new_parent)
|
697
|
+
parse_content(new_parent, elt_attrs, elt_content, sub_tags[key])
|
698
|
+
else
|
699
|
+
add_tag(parent, key, clean_content(key, elt_attrs, elt_content, parent))
|
700
|
+
end
|
701
|
+
end
|
702
|
+
end
|
693
703
|
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
704
|
+
@fm_unparsed += elements.keys
|
705
|
+
@fm_parsed.uniq!
|
706
|
+
@fm_unparsed.uniq!
|
707
|
+
end
|
698
708
|
|
699
709
|
def add_tag(hash, key, value)
|
700
710
|
array_var = arrayize(key)
|
@@ -710,13 +720,13 @@ module FeedMe
|
|
710
720
|
camelize(underscore(tag).downcase, false)
|
711
721
|
end
|
712
722
|
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
723
|
+
def clean_content(tag, attrs, content, parent)
|
724
|
+
content = content.to_s
|
725
|
+
if fm_builder.date_tags.include? tag
|
726
|
+
content = Time.parse(content) rescue unescape(content)
|
727
|
+
else
|
728
|
+
content = unescape(content)
|
729
|
+
end
|
720
730
|
|
721
731
|
unless attrs.empty?
|
722
732
|
hash = FeedData.new(tag, parent, fm_builder)
|
@@ -728,7 +738,7 @@ module FeedMe
|
|
728
738
|
end
|
729
739
|
|
730
740
|
return content
|
731
|
-
|
741
|
+
end
|
732
742
|
|
733
743
|
def parse_attributes(*attrs)
|
734
744
|
hash = {}
|
@@ -763,7 +773,7 @@ module FeedMe
|
|
763
773
|
obj.nil? || obj.empty? || (obj.is_a?(String) && obj.strip.empty?)
|
764
774
|
end
|
765
775
|
end
|
766
|
-
|
776
|
+
|
767
777
|
class FeedMeError < StandardError
|
768
778
|
end
|
769
779
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedme
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Didion
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-06-
|
12
|
+
date: 2010-06-15 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|