feedparser 2.2.0 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,264 +1,264 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
-
6
- class HyFeedBuilder
7
-
8
- include LogUtils::Logging
9
-
10
-
11
- def self.build( hash )
12
- feed = self.new( hash )
13
- feed.to_feed
14
- end
15
-
16
- def initialize( hash )
17
- @feed = build_feed( hash )
18
- end
19
-
20
- def to_feed
21
- @feed
22
- end
23
-
24
-
25
- def build_feed( h )
26
-
27
- b = HyBuilder.new( h ) ## convert hash to structs
28
-
29
- ## use first feed - more really possible?
30
- ## fix/todo: handle no feed too!!!
31
- hy = b.feeds[0]
32
-
33
- ## pp hy
34
-
35
- feed = Feed.new
36
- feed.format = 'html'
37
-
38
- ### todo: add
39
- ## - feed.title
40
- ## - feed.url
41
- ## - feed.feed_url
42
- ## - feed.summary
43
- ## - feed.authors
44
- ## etc.
45
-
46
- hy.entries.each do |entry|
47
- feed.items << build_item( entry )
48
- end
49
-
50
- feed # return new feed
51
- end # method build_feed
52
-
53
-
54
- def build_author( hy )
55
- author = Author.new
56
-
57
- author.name = hy.name
58
-
59
- ## todo - add:
60
- ## author.url
61
-
62
- author
63
- end
64
-
65
-
66
-
67
- def build_item( hy )
68
- item = Item.new # Item.new
69
-
70
- item.title = hy.name
71
- item.url = hy.url
72
- item.published_local = hy.published_local
73
- item.published = hy.published
74
-
75
- item.content_html = hy.content_html
76
- item.content_text = hy.content_text
77
- item.summary = hy.summary
78
-
79
- ## check: how to add an id - auto-generate - why? why not??
80
- ## item.id = h['id']
81
-
82
- hy.authors.each do |author|
83
- item.authors << build_author( author )
84
- end
85
-
86
- item
87
- end # method build_item
88
-
89
- end # class HyFeedBuilder
90
-
91
-
92
-
93
- class HyFeed
94
- attr_accessor :entries
95
-
96
- def initialize
97
- @entries = []
98
- end
99
- end # class HyFeed
100
-
101
-
102
- class HyEntry
103
- attr_accessor :name
104
- attr_accessor :content
105
- attr_accessor :content_text
106
- attr_accessor :summary
107
-
108
- attr_accessor :published # utc time
109
- attr_accessor :published_local # local time (with timezone/offset)
110
- attr_accessor :url
111
-
112
- attr_accessor :authors # note: allow multiple authors
113
-
114
- # note: title is an alias for name
115
- alias :title :name
116
- alias :title= :name=
117
-
118
- # note: content_html is an alias for name
119
- alias :content_html :content
120
- alias :content_html= :content=
121
-
122
- def initialize
123
- @authors = []
124
- end
125
-
126
- end ## class HyEntry
127
-
128
-
129
- class HyAuthor
130
- attr_accessor :name
131
- attr_accessor :url
132
- end ## class HyAuthor
133
-
134
-
135
-
136
-
137
- class HyBuilder
138
-
139
- attr_reader :feeds
140
-
141
- def initialize( hash )
142
- @h = hash
143
- @feeds = []
144
- build
145
-
146
- pp @feeds
147
- end
148
-
149
- def build
150
-
151
- entries = []
152
- @h['items'].each_with_index do |item_hash,i|
153
- puts "item #{i+1}:"
154
- pp item_hash
155
-
156
- types = item_hash['type']
157
- pp types
158
- if types.include?( 'h-feed' )
159
- @feeds << build_feed( item_hash )
160
- elsif types.include?( 'h-entry' )
161
- entries << build_entry( item_hash )
162
- else
163
- ## unknown type; skip for now
164
- end
165
- end
166
-
167
- ## wrap all "loose" entries in a "dummy" h-entry feed
168
- if entries.any?
169
- feed = HyFeed.new
170
- feed.entries = entries
171
- @feeds << feed
172
- end
173
-
174
- end # method build
175
-
176
- def build_feed( h )
177
- puts " build_feed"
178
-
179
- feed = HyFeed.new
180
-
181
- h['children'].each_with_index do |item_hash,i|
182
- puts "item #{i+1}:"
183
- pp item_hash
184
-
185
- types = item_hash['type']
186
- pp types
187
- if types.include?( 'h-entry' )
188
- feed.entries << build_entry( item_hash )
189
- else
190
- ## unknown type; skip for now
191
- end
192
- end
193
-
194
- feed
195
- end ## method build_feed
196
-
197
-
198
- def build_entry( h )
199
- puts " build_entry"
200
-
201
- entry = HyEntry.new
202
-
203
- props = h['properties']
204
- pp props
205
-
206
- entry.name = props['name'].join( ' ') # check an example with more entries (how to join??)
207
-
208
- if props['summary']
209
- entry.summary = props['summary'].join( ' ' )
210
- end
211
-
212
- if props['content']
213
- ## add up all value attribs in content
214
- entry.content_text = props['content'].map { |h| h[:value] }.join( ' ' ).strip
215
- ## add up all html attribs in content; plus strip leading n trailing whitespaces
216
- entry.content = props['content'].map { |h| h[:html] }.join( ' ' ).strip
217
- end
218
-
219
-
220
- # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
221
- ## todo: check if datetime is always utc (or local possible?)
222
- url_str = props.fetch( 'url', [] )[0]
223
- if url_str
224
- entry.url = url_str
225
- end
226
-
227
- # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
228
- ## todo: check if datetime is always utc (or local possible?)
229
- published_str = props.fetch( 'published', [] )[0]
230
- pp published_str
231
- if published_str
232
- ## entry.published = DateTime.iso8601( published_str )
233
- entry.published_local = DateTime.parse( published_str )
234
- entry.published = entry.published_local.utc
235
- end
236
-
237
- ## check for authors
238
- if props['author']
239
- props['author'].each do |author_hash|
240
- pp author_hash
241
- entry.authors << build_author( author_hash )
242
- end
243
- end
244
-
245
- entry
246
- end # method build_entry
247
-
248
- def build_author( h )
249
- puts " build_author"
250
-
251
- author = HyAuthor.new
252
-
253
- author.name = h['value']
254
-
255
- ## todo/fix: -- note: for now skip possible embedded h-card
256
- author
257
- end # method build_author
258
-
259
-
260
- end # class HyBuilder
261
-
262
-
263
-
264
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+
6
+ class HyFeedBuilder
7
+
8
+ include LogUtils::Logging
9
+
10
+
11
+ def self.build( hash )
12
+ feed = self.new( hash )
13
+ feed.to_feed
14
+ end
15
+
16
+ def initialize( hash )
17
+ @feed = build_feed( hash )
18
+ end
19
+
20
+ def to_feed
21
+ @feed
22
+ end
23
+
24
+
25
+ def build_feed( h )
26
+
27
+ b = HyBuilder.new( h ) ## convert hash to structs
28
+
29
+ ## use first feed - more really possible?
30
+ ## fix/todo: handle no feed too!!!
31
+ hy = b.feeds[0]
32
+
33
+ ## pp hy
34
+
35
+ feed = Feed.new
36
+ feed.format = 'html'
37
+
38
+ ### todo: add
39
+ ## - feed.title
40
+ ## - feed.url
41
+ ## - feed.feed_url
42
+ ## - feed.summary
43
+ ## - feed.authors
44
+ ## etc.
45
+
46
+ hy.entries.each do |entry|
47
+ feed.items << build_item( entry )
48
+ end
49
+
50
+ feed # return new feed
51
+ end # method build_feed
52
+
53
+
54
+ def build_author( hy )
55
+ author = Author.new
56
+
57
+ author.name = hy.name
58
+
59
+ ## todo - add:
60
+ ## author.url
61
+
62
+ author
63
+ end
64
+
65
+
66
+
67
+ def build_item( hy )
68
+ item = Item.new # Item.new
69
+
70
+ item.title = hy.name
71
+ item.url = hy.url
72
+ item.published_local = hy.published_local
73
+ item.published = hy.published
74
+
75
+ item.content_html = hy.content_html
76
+ item.content_text = hy.content_text
77
+ item.summary = hy.summary
78
+
79
+ ## check: how to add an id - auto-generate - why? why not??
80
+ ## item.id = h['id']
81
+
82
+ hy.authors.each do |author|
83
+ item.authors << build_author( author )
84
+ end
85
+
86
+ item
87
+ end # method build_item
88
+
89
+ end # class HyFeedBuilder
90
+
91
+
92
+
93
+ class HyFeed
94
+ attr_accessor :entries
95
+
96
+ def initialize
97
+ @entries = []
98
+ end
99
+ end # class HyFeed
100
+
101
+
102
+ class HyEntry
103
+ attr_accessor :name
104
+ attr_accessor :content
105
+ attr_accessor :content_text
106
+ attr_accessor :summary
107
+
108
+ attr_accessor :published # utc time
109
+ attr_accessor :published_local # local time (with timezone/offset)
110
+ attr_accessor :url
111
+
112
+ attr_accessor :authors # note: allow multiple authors
113
+
114
+ # note: title is an alias for name
115
+ alias :title :name
116
+ alias :title= :name=
117
+
118
+ # note: content_html is an alias for name
119
+ alias :content_html :content
120
+ alias :content_html= :content=
121
+
122
+ def initialize
123
+ @authors = []
124
+ end
125
+
126
+ end ## class HyEntry
127
+
128
+
129
+ class HyAuthor
130
+ attr_accessor :name
131
+ attr_accessor :url
132
+ end ## class HyAuthor
133
+
134
+
135
+
136
+
137
+ class HyBuilder
138
+
139
+ attr_reader :feeds
140
+
141
+ def initialize( hash )
142
+ @h = hash
143
+ @feeds = []
144
+ build
145
+
146
+ pp @feeds
147
+ end
148
+
149
+ def build
150
+
151
+ entries = []
152
+ @h['items'].each_with_index do |item_hash,i|
153
+ puts "item #{i+1}:"
154
+ pp item_hash
155
+
156
+ types = item_hash['type']
157
+ pp types
158
+ if types.include?( 'h-feed' )
159
+ @feeds << build_feed( item_hash )
160
+ elsif types.include?( 'h-entry' )
161
+ entries << build_entry( item_hash )
162
+ else
163
+ ## unknown type; skip for now
164
+ end
165
+ end
166
+
167
+ ## wrap all "loose" entries in a "dummy" h-entry feed
168
+ if entries.any?
169
+ feed = HyFeed.new
170
+ feed.entries = entries
171
+ @feeds << feed
172
+ end
173
+
174
+ end # method build
175
+
176
+ def build_feed( h )
177
+ puts " build_feed"
178
+
179
+ feed = HyFeed.new
180
+
181
+ h['children'].each_with_index do |item_hash,i|
182
+ puts "item #{i+1}:"
183
+ pp item_hash
184
+
185
+ types = item_hash['type']
186
+ pp types
187
+ if types.include?( 'h-entry' )
188
+ feed.entries << build_entry( item_hash )
189
+ else
190
+ ## unknown type; skip for now
191
+ end
192
+ end
193
+
194
+ feed
195
+ end ## method build_feed
196
+
197
+
198
+ def build_entry( h )
199
+ puts " build_entry"
200
+
201
+ entry = HyEntry.new
202
+
203
+ props = h['properties']
204
+ pp props
205
+
206
+ entry.name = props['name'].join( ' ') # check an example with more entries (how to join??)
207
+
208
+ if props['summary']
209
+ entry.summary = props['summary'].join( ' ' )
210
+ end
211
+
212
+ if props['content']
213
+ ## add up all value attribs in content
214
+ entry.content_text = props['content'].map { |h| h[:value] }.join( ' ' ).strip
215
+ ## add up all html attribs in content; plus strip leading n trailing whitespaces
216
+ entry.content = props['content'].map { |h| h[:html] }.join( ' ' ).strip
217
+ end
218
+
219
+
220
+ # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
221
+ ## todo: check if datetime is always utc (or local possible?)
222
+ url_str = props.fetch( 'url', [] )[0]
223
+ if url_str
224
+ entry.url = url_str
225
+ end
226
+
227
+ # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
228
+ ## todo: check if datetime is always utc (or local possible?)
229
+ published_str = props.fetch( 'published', [] )[0]
230
+ pp published_str
231
+ if published_str
232
+ ## entry.published = DateTime.iso8601( published_str )
233
+ entry.published_local = DateTime.parse( published_str )
234
+ entry.published = entry.published_local.utc
235
+ end
236
+
237
+ ## check for authors
238
+ if props['author']
239
+ props['author'].each do |author_hash|
240
+ pp author_hash
241
+ entry.authors << build_author( author_hash )
242
+ end
243
+ end
244
+
245
+ entry
246
+ end # method build_entry
247
+
248
+ def build_author( h )
249
+ puts " build_author"
250
+
251
+ author = HyAuthor.new
252
+
253
+ author.name = h['value']
254
+
255
+ ## todo/fix: -- note: for now skip possible embedded h-card
256
+ author
257
+ end # method build_author
258
+
259
+
260
+ end # class HyBuilder
261
+
262
+
263
+
264
+ end # module FeedParser
@@ -1,36 +1,36 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Generator
6
-
7
- attr_accessor :name
8
- ## note: title is an alias for name
9
- alias :title :name
10
- alias :title= :name=
11
-
12
- attr_accessor :version
13
-
14
- attr_accessor :url
15
- ## note: uri is an alias for url
16
- alias :uri :url ## add atom alias for uri - why? why not?
17
- alias :uri= :url=
18
-
19
-
20
- attr_accessor :text # note: holds "unparsed" text (content) line form rss:generator
21
- alias :line :text # line|text (add str?? too)
22
-
23
-
24
- def to_s
25
- ## note: to_s - allows to use just generator in templates
26
- ## will by default return name if present or as fallback "unparsed" text line
27
- if @name ## not blank
28
- @name
29
- else
30
- @text
31
- end
32
- end
33
-
34
- end # class Generator
35
-
36
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Generator
6
+
7
+ attr_accessor :name
8
+ ## note: title is an alias for name
9
+ alias :title :name
10
+ alias :title= :name=
11
+
12
+ attr_accessor :version
13
+
14
+ attr_accessor :url
15
+ ## note: uri is an alias for url
16
+ alias :uri :url ## add atom alias for uri - why? why not?
17
+ alias :uri= :url=
18
+
19
+
20
+ attr_accessor :text # note: holds "unparsed" text (content) line form rss:generator
21
+ alias :line :text # line|text (add str?? too)
22
+
23
+
24
+ def to_s
25
+ ## note: to_s - allows to use just generator in templates
26
+ ## will by default return name if present or as fallback "unparsed" text line
27
+ if @name ## not blank
28
+ @name
29
+ else
30
+ @text
31
+ end
32
+ end
33
+
34
+ end # class Generator
35
+
36
+ end # module FeedParser
@@ -28,7 +28,8 @@ class Parser
28
28
  ## check if starts with knownn xml prologs
29
29
  @head.start_with?( '<?xml' ) ||
30
30
  @head.start_with?( '<feed' ) ||
31
- @head.start_with?( '<rss' )
31
+ @head.start_with?( '<rss' ) ||
32
+ @head.start_with?( '<rdf' ) # note - add support for rss 1.0 (aka rdf)
32
33
  end
33
34
  alias_method :xml?, :is_xml?
34
35
 
@@ -1,23 +1,23 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Tag
6
-
7
- attr_accessor :name
8
- ## note: title n term are aliases for name
9
- alias :title :name
10
- alias :title= :name=
11
-
12
- alias :term :name
13
- alias :term= :name=
14
-
15
-
16
- attr_accessor :scheme ## use scheme_url -why? why not? is it always a url/uri??
17
- ## note: domain (rss) is an alias for scheme (atom)
18
- alias :domain :scheme
19
- alias :domain= :scheme=
20
-
21
- end # class Tag
22
-
23
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Tag
6
+
7
+ attr_accessor :name
8
+ ## note: title n term are aliases for name
9
+ alias :title :name
10
+ alias :title= :name=
11
+
12
+ alias :term :name
13
+ alias :term= :name=
14
+
15
+
16
+ attr_accessor :scheme ## use scheme_url -why? why not? is it always a url/uri??
17
+ ## note: domain (rss) is an alias for scheme (atom)
18
+ alias :domain :scheme
19
+ alias :domain= :scheme=
20
+
21
+ end # class Tag
22
+
23
+ end # module FeedParser