feedme 0.8.4 → 0.8.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/lib/feedme.rb +140 -130
- metadata +2 -2
data/History.txt
CHANGED
data/lib/feedme.rb
CHANGED
@@ -52,6 +52,8 @@ module FeedMe
|
|
52
52
|
# A hash of functions for selecting the correct value to return when a tags
|
53
53
|
# has multiple values and the singluar accessor is called
|
54
54
|
attr_accessor :value_selectors
|
55
|
+
# Value selector to use if there is no value selector defined for a tag
|
56
|
+
attr_accessor :default_value_selector
|
55
57
|
# A hash of attribute/tag name aliases.
|
56
58
|
attr_accessor :aliases
|
57
59
|
# An array of the transformation functions applied when the !
|
@@ -75,16 +77,16 @@ module FeedMe
|
|
75
77
|
@options = options
|
76
78
|
|
77
79
|
# rss tags
|
78
|
-
|
79
|
-
|
80
|
-
|
80
|
+
@rss_tags = [
|
81
|
+
{
|
82
|
+
:image => nil,
|
81
83
|
:textinput => nil,
|
82
84
|
:skiphours => nil,
|
83
85
|
:skipdays => nil,
|
84
86
|
:items => [{ :rdf_seq => nil }],
|
85
87
|
#:item => @rss_item_tags
|
86
|
-
|
87
|
-
|
88
|
+
}
|
89
|
+
]
|
88
90
|
@rss_item_tags = [ {} ]
|
89
91
|
|
90
92
|
#atom tags
|
@@ -124,45 +126,51 @@ module FeedMe
|
|
124
126
|
links.first
|
125
127
|
end
|
126
128
|
}
|
129
|
+
@default_value_selector = proc do |x|
|
130
|
+
x = x.sort do |a,b|
|
131
|
+
a.is_a?(String) ? -1 : (b.is_a?(String) ? 1 : 0)
|
132
|
+
end
|
133
|
+
x.first
|
134
|
+
end
|
127
135
|
|
128
136
|
# tag/attribute aliases
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
137
|
+
@aliases = {
|
138
|
+
:items => :item_array,
|
139
|
+
:item_array => :entry_array,
|
140
|
+
:entries => :entry_array,
|
141
|
+
:entry_array => :item_array,
|
142
|
+
:link => :'link+self'
|
143
|
+
}
|
144
|
+
|
145
|
+
# transformations
|
146
|
+
@html_helper_lib = HPRICOT_HELPER
|
147
|
+
@default_transformation = [ :cleanHtml ]
|
148
|
+
@transformations = {}
|
149
|
+
@transformation_fns = {
|
150
|
+
# remove all HTML tags
|
151
|
+
:stripHtml => proc do |str|
|
152
|
+
require @html_helper_lib
|
153
|
+
FeedMe.html_helper.strip_html(str)
|
154
|
+
end,
|
155
|
+
|
156
|
+
# clean HTML content using FeedNormalizer's HtmlCleaner class
|
157
|
+
:cleanHtml => proc do |str|
|
158
|
+
require @html_helper_lib
|
159
|
+
FeedMe.html_helper.clean_html(str)
|
160
|
+
end,
|
161
|
+
|
162
|
+
# wrap text at a certain number of characters (respecting word boundaries)
|
163
|
+
:wrap => proc do |str, col|
|
164
|
+
str.gsub(/(.{1,#{col}})( +|$\n?)|(.{1,#{col}})/, "\\1\\3\n").strip
|
165
|
+
end,
|
166
|
+
|
167
|
+
# truncate text, respecting word boundaries
|
168
|
+
:trunc => proc {|str, wordcount| str.trunc(wordcount.to_i) },
|
161
169
|
|
162
170
|
# truncate HTML and leave enclosing HTML tags
|
163
171
|
:truncHtml => proc do |str, wordcount|
|
164
172
|
require @html_helper_lib
|
165
|
-
|
173
|
+
FeedMe.html_helper.truncate_html(str, wordcount.to_i)
|
166
174
|
end,
|
167
175
|
|
168
176
|
:regexp => proc do |str, regexp|
|
@@ -176,7 +184,7 @@ module FeedMe
|
|
176
184
|
|
177
185
|
# apply an arbitrary function
|
178
186
|
:apply => proc {|str, fn, *args| fn.call(str, *args) }
|
179
|
-
|
187
|
+
}
|
180
188
|
end
|
181
189
|
|
182
190
|
# Prepare tag list for an RSS feed.
|
@@ -227,8 +235,8 @@ module FeedMe
|
|
227
235
|
|
228
236
|
# Parse +source+ using a +Parser+ created from this +ParserBuilder+.
|
229
237
|
def parse(source)
|
230
|
-
|
231
|
-
|
238
|
+
Parser.new(self, source, options)
|
239
|
+
end
|
232
240
|
end
|
233
241
|
|
234
242
|
# This class is used to create strict parsers
|
@@ -239,9 +247,9 @@ module FeedMe
|
|
239
247
|
super(options)
|
240
248
|
|
241
249
|
# rss tags
|
242
|
-
|
243
|
-
|
244
|
-
|
250
|
+
@rss_tags = [
|
251
|
+
{
|
252
|
+
:image => [ :url, :title, :link, :width, :height, :description ],
|
245
253
|
:textinput => [ :title, :description, :name, :link ],
|
246
254
|
:skiphours => [ :hour ],
|
247
255
|
:skipdays => [ :day ],
|
@@ -252,20 +260,20 @@ module FeedMe
|
|
252
260
|
:rdf_seq
|
253
261
|
],
|
254
262
|
#:item => @item_tags
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
+
},
|
264
|
+
:title, :link, :description, # required
|
265
|
+
:language, :copyright, :managingeditor, :webmaster, # optional
|
266
|
+
:pubdate, :lastbuilddate, :category, :generator,
|
267
|
+
:docs, :cloud, :ttl, :rating,
|
268
|
+
:image, :textinput, :skiphours, :skipdays, :item, # have subtags
|
269
|
+
:items
|
270
|
+
]
|
263
271
|
@rss_item_tags = [
|
264
272
|
{},
|
265
273
|
:title, :description, # required
|
266
274
|
:link, :author, :category, :comments, :enclosure, # optional
|
267
275
|
:guid, :pubdate, :source, :expirationdate
|
268
|
-
|
276
|
+
]
|
269
277
|
|
270
278
|
#atom tags
|
271
279
|
person_tags = [ :name, :uri, :email ]
|
@@ -318,7 +326,7 @@ module FeedMe
|
|
318
326
|
all_tags[0][:entry] = atom_entry_tags + (item_ext_tags or [])
|
319
327
|
return all_tags
|
320
328
|
end
|
321
|
-
|
329
|
+
end
|
322
330
|
|
323
331
|
class FeedData
|
324
332
|
attr_reader :fm_tag_name, :fm_parent, :fm_builder
|
@@ -429,8 +437,10 @@ module FeedMe
|
|
429
437
|
elt = if array.size > 1
|
430
438
|
if (!args.empty? && args.first.is_a?(Proc))
|
431
439
|
args.first.call(array)
|
432
|
-
elsif (fm_builder.
|
433
|
-
value_selectors[name].call(array)
|
440
|
+
elsif (fm_builder.value_selectors.key?(name))
|
441
|
+
fm_builder.value_selectors[name].call(array)
|
442
|
+
elsif !fm_builder.default_value_selector.nil?
|
443
|
+
fm_builder.default_value_selector.call(array)
|
434
444
|
end
|
435
445
|
end
|
436
446
|
elt || array.first
|
@@ -462,16 +472,16 @@ module FeedMe
|
|
462
472
|
end
|
463
473
|
value
|
464
474
|
elsif name_str.include?('+')
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
+
name_data = name_str.split('+')
|
476
|
+
rel = name_data[1]
|
477
|
+
value = nil
|
478
|
+
call_virtual_method(arrayize(name_data[0]), args, history).each do |elt|
|
479
|
+
next unless elt.is_a?(FeedData) and elt.rel?
|
480
|
+
value = elt if elt.rel.casecmp(rel) == 0
|
481
|
+
break unless value.nil?
|
482
|
+
end
|
483
|
+
value
|
484
|
+
elsif fm_builder.aliases.key? name
|
475
485
|
names = fm_builder.aliases[name]
|
476
486
|
names = [names] unless names.is_a? Array
|
477
487
|
value = nil
|
@@ -534,8 +544,8 @@ module FeedMe
|
|
534
544
|
protected
|
535
545
|
|
536
546
|
def clean_tag(tag)
|
537
|
-
|
538
|
-
|
547
|
+
tag.to_s.downcase.gsub(':','_').intern
|
548
|
+
end
|
539
549
|
|
540
550
|
# generate a name for the array variable corresponding to a single-value variable
|
541
551
|
def arrayize(key)
|
@@ -569,12 +579,12 @@ module FeedMe
|
|
569
579
|
|
570
580
|
def initialize(builder, source, options={})
|
571
581
|
super(nil, nil, builder)
|
572
|
-
|
582
|
+
@fm_source = source.respond_to?(:read) ? source.read : source.to_s
|
573
583
|
@fm_options = Hash.new.update(options)
|
574
584
|
@fm_parsed = []
|
575
585
|
@fm_unparsed = []
|
576
|
-
|
577
|
-
|
586
|
+
parse
|
587
|
+
end
|
578
588
|
|
579
589
|
def channel() self end
|
580
590
|
alias :feed :channel
|
@@ -616,7 +626,7 @@ module FeedMe
|
|
616
626
|
else
|
617
627
|
raise FeedMeError, "Poorly formatted feed"
|
618
628
|
end
|
619
|
-
|
629
|
+
end
|
620
630
|
|
621
631
|
# References within the <channel> element are replaced by the actual
|
622
632
|
def dereference_rdf_tags(rdf_tag, rss_tag, refs)
|
@@ -640,61 +650,61 @@ module FeedMe
|
|
640
650
|
end
|
641
651
|
end
|
642
652
|
|
643
|
-
|
644
|
-
|
645
|
-
|
653
|
+
def parse_content(parent, attrs, content, tags)
|
654
|
+
# add attributes to parent
|
655
|
+
attrs.each_pair {|key, value| parent[key] = unescape(value) }
|
646
656
|
return if content.nil?
|
647
657
|
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
652
|
-
|
653
|
-
|
654
|
-
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
|
660
|
-
|
661
|
-
|
658
|
+
# split the content into elements
|
659
|
+
elements = {}
|
660
|
+
# TODO: this will break if a namespace is used that is not rss: or atom:
|
661
|
+
content.scan( %r{(<([\w:]+)(.*?)(?:/>|>(.*?)</\2>))}mi ) do |match|
|
662
|
+
# \1 = full content (from start to end tag), \2 = tag name
|
663
|
+
# \3 = attributes, and \4 = content between tags
|
664
|
+
key = clean_tag(match[1])
|
665
|
+
value = [parse_attributes(match[2]), match[3]]
|
666
|
+
if elements.key? key
|
667
|
+
elements[key] << value
|
668
|
+
else
|
669
|
+
elements[key] = [value]
|
670
|
+
end
|
671
|
+
end
|
662
672
|
|
663
673
|
# the first item in a tag array may be a hash that defines tags that have subtags
|
664
|
-
|
665
|
-
|
666
|
-
|
667
|
-
|
668
|
-
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
+
sub_tags = tags[0] if !nil_or_empty?(tags) && tags[0].is_a?(Hash)
|
675
|
+
first_tag = sub_tags.nil? || tags.size == 1 ? 0 : 1
|
676
|
+
# if this is a promiscuous parser, tag names will depend on the elements found in the feed
|
677
|
+
tags = elements.keys if (sub_tags.nil? ? nil_or_empty?(tags) : first_tag == 0)
|
678
|
+
|
679
|
+
# iterate over all tags (some or all of which may not be present)
|
680
|
+
tags[first_tag..-1].each do |tag|
|
681
|
+
key = clean_tag(tag)
|
682
|
+
element_array = elements.delete(tag) or next
|
683
|
+
@fm_parsed << key
|
674
684
|
|
675
|
-
|
676
|
-
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
685
|
+
element_array.each do |elt|
|
686
|
+
elt_attrs = elt[0]
|
687
|
+
elt_content = elt[1]
|
688
|
+
rels = fm_builder.rels[key] if fm_builder.respond_to?(:rels)
|
689
|
+
|
690
|
+
# if a list of accepted rels is specified, only parse this tag
|
691
|
+
# if its rel attribute is inlcuded in the list
|
692
|
+
next unless rels.nil? || elt_attrs.nil? || !elt_attrs.rel? || rels.include?(elt_attrs.rel)
|
693
|
+
|
694
|
+
if !sub_tags.nil? && sub_tags.key?(key)
|
695
|
+
new_parent = FeedData.new(key, parent, fm_builder)
|
696
|
+
add_tag(parent, key, new_parent)
|
697
|
+
parse_content(new_parent, elt_attrs, elt_content, sub_tags[key])
|
698
|
+
else
|
699
|
+
add_tag(parent, key, clean_content(key, elt_attrs, elt_content, parent))
|
700
|
+
end
|
701
|
+
end
|
702
|
+
end
|
693
703
|
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
704
|
+
@fm_unparsed += elements.keys
|
705
|
+
@fm_parsed.uniq!
|
706
|
+
@fm_unparsed.uniq!
|
707
|
+
end
|
698
708
|
|
699
709
|
def add_tag(hash, key, value)
|
700
710
|
array_var = arrayize(key)
|
@@ -710,13 +720,13 @@ module FeedMe
|
|
710
720
|
camelize(underscore(tag).downcase, false)
|
711
721
|
end
|
712
722
|
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
719
|
-
|
723
|
+
def clean_content(tag, attrs, content, parent)
|
724
|
+
content = content.to_s
|
725
|
+
if fm_builder.date_tags.include? tag
|
726
|
+
content = Time.parse(content) rescue unescape(content)
|
727
|
+
else
|
728
|
+
content = unescape(content)
|
729
|
+
end
|
720
730
|
|
721
731
|
unless attrs.empty?
|
722
732
|
hash = FeedData.new(tag, parent, fm_builder)
|
@@ -728,7 +738,7 @@ module FeedMe
|
|
728
738
|
end
|
729
739
|
|
730
740
|
return content
|
731
|
-
|
741
|
+
end
|
732
742
|
|
733
743
|
def parse_attributes(*attrs)
|
734
744
|
hash = {}
|
@@ -763,7 +773,7 @@ module FeedMe
|
|
763
773
|
obj.nil? || obj.empty? || (obj.is_a?(String) && obj.strip.empty?)
|
764
774
|
end
|
765
775
|
end
|
766
|
-
|
776
|
+
|
767
777
|
class FeedMeError < StandardError
|
768
778
|
end
|
769
779
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: feedme
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- John Didion
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-06-
|
12
|
+
date: 2010-06-15 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|