feedparser 2.2.0 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,264 +1,264 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
-
6
- class HyFeedBuilder
7
-
8
- include LogUtils::Logging
9
-
10
-
11
- def self.build( hash )
12
- feed = self.new( hash )
13
- feed.to_feed
14
- end
15
-
16
- def initialize( hash )
17
- @feed = build_feed( hash )
18
- end
19
-
20
- def to_feed
21
- @feed
22
- end
23
-
24
-
25
- def build_feed( h )
26
-
27
- b = HyBuilder.new( h ) ## convert hash to structs
28
-
29
- ## use first feed - more really possible?
30
- ## fix/todo: handle no feed too!!!
31
- hy = b.feeds[0]
32
-
33
- ## pp hy
34
-
35
- feed = Feed.new
36
- feed.format = 'html'
37
-
38
- ### todo: add
39
- ## - feed.title
40
- ## - feed.url
41
- ## - feed.feed_url
42
- ## - feed.summary
43
- ## - feed.authors
44
- ## etc.
45
-
46
- hy.entries.each do |entry|
47
- feed.items << build_item( entry )
48
- end
49
-
50
- feed # return new feed
51
- end # method build_feed
52
-
53
-
54
- def build_author( hy )
55
- author = Author.new
56
-
57
- author.name = hy.name
58
-
59
- ## todo - add:
60
- ## author.url
61
-
62
- author
63
- end
64
-
65
-
66
-
67
- def build_item( hy )
68
- item = Item.new # Item.new
69
-
70
- item.title = hy.name
71
- item.url = hy.url
72
- item.published_local = hy.published_local
73
- item.published = hy.published
74
-
75
- item.content_html = hy.content_html
76
- item.content_text = hy.content_text
77
- item.summary = hy.summary
78
-
79
- ## check: how to add an id - auto-generate - why? why not??
80
- ## item.id = h['id']
81
-
82
- hy.authors.each do |author|
83
- item.authors << build_author( author )
84
- end
85
-
86
- item
87
- end # method build_item
88
-
89
- end # class HyFeedBuilder
90
-
91
-
92
-
93
- class HyFeed
94
- attr_accessor :entries
95
-
96
- def initialize
97
- @entries = []
98
- end
99
- end # class HyFeed
100
-
101
-
102
- class HyEntry
103
- attr_accessor :name
104
- attr_accessor :content
105
- attr_accessor :content_text
106
- attr_accessor :summary
107
-
108
- attr_accessor :published # utc time
109
- attr_accessor :published_local # local time (with timezone/offset)
110
- attr_accessor :url
111
-
112
- attr_accessor :authors # note: allow multiple authors
113
-
114
- # note: title is an alias for name
115
- alias :title :name
116
- alias :title= :name=
117
-
118
- # note: content_html is an alias for name
119
- alias :content_html :content
120
- alias :content_html= :content=
121
-
122
- def initialize
123
- @authors = []
124
- end
125
-
126
- end ## class HyEntry
127
-
128
-
129
- class HyAuthor
130
- attr_accessor :name
131
- attr_accessor :url
132
- end ## class HyAuthor
133
-
134
-
135
-
136
-
137
- class HyBuilder
138
-
139
- attr_reader :feeds
140
-
141
- def initialize( hash )
142
- @h = hash
143
- @feeds = []
144
- build
145
-
146
- pp @feeds
147
- end
148
-
149
- def build
150
-
151
- entries = []
152
- @h['items'].each_with_index do |item_hash,i|
153
- puts "item #{i+1}:"
154
- pp item_hash
155
-
156
- types = item_hash['type']
157
- pp types
158
- if types.include?( 'h-feed' )
159
- @feeds << build_feed( item_hash )
160
- elsif types.include?( 'h-entry' )
161
- entries << build_entry( item_hash )
162
- else
163
- ## unknown type; skip for now
164
- end
165
- end
166
-
167
- ## wrap all "loose" entries in a "dummy" h-entry feed
168
- if entries.any?
169
- feed = HyFeed.new
170
- feed.entries = entries
171
- @feeds << feed
172
- end
173
-
174
- end # method build
175
-
176
- def build_feed( h )
177
- puts " build_feed"
178
-
179
- feed = HyFeed.new
180
-
181
- h['children'].each_with_index do |item_hash,i|
182
- puts "item #{i+1}:"
183
- pp item_hash
184
-
185
- types = item_hash['type']
186
- pp types
187
- if types.include?( 'h-entry' )
188
- feed.entries << build_entry( item_hash )
189
- else
190
- ## unknown type; skip for now
191
- end
192
- end
193
-
194
- feed
195
- end ## method build_feed
196
-
197
-
198
- def build_entry( h )
199
- puts " build_entry"
200
-
201
- entry = HyEntry.new
202
-
203
- props = h['properties']
204
- pp props
205
-
206
- entry.name = props['name'].join( ' ') # check an example with more entries (how to join??)
207
-
208
- if props['summary']
209
- entry.summary = props['summary'].join( ' ' )
210
- end
211
-
212
- if props['content']
213
- ## add up all value attribs in content
214
- entry.content_text = props['content'].map { |h| h[:value] }.join( ' ' ).strip
215
- ## add up all html attribs in content; plus strip leading n trailing whitespaces
216
- entry.content = props['content'].map { |h| h[:html] }.join( ' ' ).strip
217
- end
218
-
219
-
220
- # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
221
- ## todo: check if datetime is always utc (or local possible?)
222
- url_str = props.fetch( 'url', [] )[0]
223
- if url_str
224
- entry.url = url_str
225
- end
226
-
227
- # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
228
- ## todo: check if datetime is always utc (or local possible?)
229
- published_str = props.fetch( 'published', [] )[0]
230
- pp published_str
231
- if published_str
232
- ## entry.published = DateTime.iso8601( published_str )
233
- entry.published_local = DateTime.parse( published_str )
234
- entry.published = entry.published_local.utc
235
- end
236
-
237
- ## check for authors
238
- if props['author']
239
- props['author'].each do |author_hash|
240
- pp author_hash
241
- entry.authors << build_author( author_hash )
242
- end
243
- end
244
-
245
- entry
246
- end # method build_entry
247
-
248
- def build_author( h )
249
- puts " build_author"
250
-
251
- author = HyAuthor.new
252
-
253
- author.name = h['value']
254
-
255
- ## todo/fix: -- note: for now skip possible embedded h-card
256
- author
257
- end # method build_author
258
-
259
-
260
- end # class HyBuilder
261
-
262
-
263
-
264
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+
6
+ class HyFeedBuilder
7
+
8
+ include LogUtils::Logging
9
+
10
+
11
+ def self.build( hash )
12
+ feed = self.new( hash )
13
+ feed.to_feed
14
+ end
15
+
16
+ def initialize( hash )
17
+ @feed = build_feed( hash )
18
+ end
19
+
20
+ def to_feed
21
+ @feed
22
+ end
23
+
24
+
25
+ def build_feed( h )
26
+
27
+ b = HyBuilder.new( h ) ## convert hash to structs
28
+
29
+ ## use first feed - more really possible?
30
+ ## fix/todo: handle no feed too!!!
31
+ hy = b.feeds[0]
32
+
33
+ ## pp hy
34
+
35
+ feed = Feed.new
36
+ feed.format = 'html'
37
+
38
+ ### todo: add
39
+ ## - feed.title
40
+ ## - feed.url
41
+ ## - feed.feed_url
42
+ ## - feed.summary
43
+ ## - feed.authors
44
+ ## etc.
45
+
46
+ hy.entries.each do |entry|
47
+ feed.items << build_item( entry )
48
+ end
49
+
50
+ feed # return new feed
51
+ end # method build_feed
52
+
53
+
54
+ def build_author( hy )
55
+ author = Author.new
56
+
57
+ author.name = hy.name
58
+
59
+ ## todo - add:
60
+ ## author.url
61
+
62
+ author
63
+ end
64
+
65
+
66
+
67
+ def build_item( hy )
68
+ item = Item.new # Item.new
69
+
70
+ item.title = hy.name
71
+ item.url = hy.url
72
+ item.published_local = hy.published_local
73
+ item.published = hy.published
74
+
75
+ item.content_html = hy.content_html
76
+ item.content_text = hy.content_text
77
+ item.summary = hy.summary
78
+
79
+ ## check: how to add an id - auto-generate - why? why not??
80
+ ## item.id = h['id']
81
+
82
+ hy.authors.each do |author|
83
+ item.authors << build_author( author )
84
+ end
85
+
86
+ item
87
+ end # method build_item
88
+
89
+ end # class HyFeedBuilder
90
+
91
+
92
+
93
+ class HyFeed
94
+ attr_accessor :entries
95
+
96
+ def initialize
97
+ @entries = []
98
+ end
99
+ end # class HyFeed
100
+
101
+
102
+ class HyEntry
103
+ attr_accessor :name
104
+ attr_accessor :content
105
+ attr_accessor :content_text
106
+ attr_accessor :summary
107
+
108
+ attr_accessor :published # utc time
109
+ attr_accessor :published_local # local time (with timezone/offset)
110
+ attr_accessor :url
111
+
112
+ attr_accessor :authors # note: allow multiple authors
113
+
114
+ # note: title is an alias for name
115
+ alias :title :name
116
+ alias :title= :name=
117
+
118
+ # note: content_html is an alias for name
119
+ alias :content_html :content
120
+ alias :content_html= :content=
121
+
122
+ def initialize
123
+ @authors = []
124
+ end
125
+
126
+ end ## class HyEntry
127
+
128
+
129
+ class HyAuthor
130
+ attr_accessor :name
131
+ attr_accessor :url
132
+ end ## class HyAuthor
133
+
134
+
135
+
136
+
137
+ class HyBuilder
138
+
139
+ attr_reader :feeds
140
+
141
+ def initialize( hash )
142
+ @h = hash
143
+ @feeds = []
144
+ build
145
+
146
+ pp @feeds
147
+ end
148
+
149
+ def build
150
+
151
+ entries = []
152
+ @h['items'].each_with_index do |item_hash,i|
153
+ puts "item #{i+1}:"
154
+ pp item_hash
155
+
156
+ types = item_hash['type']
157
+ pp types
158
+ if types.include?( 'h-feed' )
159
+ @feeds << build_feed( item_hash )
160
+ elsif types.include?( 'h-entry' )
161
+ entries << build_entry( item_hash )
162
+ else
163
+ ## unknown type; skip for now
164
+ end
165
+ end
166
+
167
+ ## wrap all "loose" entries in a "dummy" h-entry feed
168
+ if entries.any?
169
+ feed = HyFeed.new
170
+ feed.entries = entries
171
+ @feeds << feed
172
+ end
173
+
174
+ end # method build
175
+
176
+ def build_feed( h )
177
+ puts " build_feed"
178
+
179
+ feed = HyFeed.new
180
+
181
+ h['children'].each_with_index do |item_hash,i|
182
+ puts "item #{i+1}:"
183
+ pp item_hash
184
+
185
+ types = item_hash['type']
186
+ pp types
187
+ if types.include?( 'h-entry' )
188
+ feed.entries << build_entry( item_hash )
189
+ else
190
+ ## unknown type; skip for now
191
+ end
192
+ end
193
+
194
+ feed
195
+ end ## method build_feed
196
+
197
+
198
+ def build_entry( h )
199
+ puts " build_entry"
200
+
201
+ entry = HyEntry.new
202
+
203
+ props = h['properties']
204
+ pp props
205
+
206
+ entry.name = props['name'].join( ' ') # check an example with more entries (how to join??)
207
+
208
+ if props['summary']
209
+ entry.summary = props['summary'].join( ' ' )
210
+ end
211
+
212
+ if props['content']
213
+ ## add up all value attribs in content
214
+ entry.content_text = props['content'].map { |h| h[:value] }.join( ' ' ).strip
215
+ ## add up all html attribs in content; plus strip leading n trailing whitespaces
216
+ entry.content = props['content'].map { |h| h[:html] }.join( ' ' ).strip
217
+ end
218
+
219
+
220
+ # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
221
+ ## todo: check if datetime is always utc (or local possible?)
222
+ url_str = props.fetch( 'url', [] )[0]
223
+ if url_str
224
+ entry.url = url_str
225
+ end
226
+
227
+ # get first field in array -- check if really ever possible more than one? what does it mean (many dates)???
228
+ ## todo: check if datetime is always utc (or local possible?)
229
+ published_str = props.fetch( 'published', [] )[0]
230
+ pp published_str
231
+ if published_str
232
+ ## entry.published = DateTime.iso8601( published_str )
233
+ entry.published_local = DateTime.parse( published_str )
234
+ entry.published = entry.published_local.utc
235
+ end
236
+
237
+ ## check for authors
238
+ if props['author']
239
+ props['author'].each do |author_hash|
240
+ pp author_hash
241
+ entry.authors << build_author( author_hash )
242
+ end
243
+ end
244
+
245
+ entry
246
+ end # method build_entry
247
+
248
+ def build_author( h )
249
+ puts " build_author"
250
+
251
+ author = HyAuthor.new
252
+
253
+ author.name = h['value']
254
+
255
+ ## todo/fix: -- note: for now skip possible embedded h-card
256
+ author
257
+ end # method build_author
258
+
259
+
260
+ end # class HyBuilder
261
+
262
+
263
+
264
+ end # module FeedParser
@@ -1,36 +1,36 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Generator
6
-
7
- attr_accessor :name
8
- ## note: title is an alias for name
9
- alias :title :name
10
- alias :title= :name=
11
-
12
- attr_accessor :version
13
-
14
- attr_accessor :url
15
- ## note: uri is an alias for url
16
- alias :uri :url ## add atom alias for uri - why? why not?
17
- alias :uri= :url=
18
-
19
-
20
- attr_accessor :text # note: holds "unparsed" text (content) line form rss:generator
21
- alias :line :text # line|text (add str?? too)
22
-
23
-
24
- def to_s
25
- ## note: to_s - allows to use just generator in templates
26
- ## will by default return name if present or as fallback "unparsed" text line
27
- if @name ## not blank
28
- @name
29
- else
30
- @text
31
- end
32
- end
33
-
34
- end # class Generator
35
-
36
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Generator
6
+
7
+ attr_accessor :name
8
+ ## note: title is an alias for name
9
+ alias :title :name
10
+ alias :title= :name=
11
+
12
+ attr_accessor :version
13
+
14
+ attr_accessor :url
15
+ ## note: uri is an alias for url
16
+ alias :uri :url ## add atom alias for uri - why? why not?
17
+ alias :uri= :url=
18
+
19
+
20
+ attr_accessor :text # note: holds "unparsed" text (content) line form rss:generator
21
+ alias :line :text # line|text (add str?? too)
22
+
23
+
24
+ def to_s
25
+ ## note: to_s - allows to use just generator in templates
26
+ ## will by default return name if present or as fallback "unparsed" text line
27
+ if @name ## not blank
28
+ @name
29
+ else
30
+ @text
31
+ end
32
+ end
33
+
34
+ end # class Generator
35
+
36
+ end # module FeedParser
@@ -28,7 +28,8 @@ class Parser
28
28
  ## check if starts with knownn xml prologs
29
29
  @head.start_with?( '<?xml' ) ||
30
30
  @head.start_with?( '<feed' ) ||
31
- @head.start_with?( '<rss' )
31
+ @head.start_with?( '<rss' ) ||
32
+ @head.start_with?( '<rdf' ) # note - add support for rss 1.0 (aka rdf)
32
33
  end
33
34
  alias_method :xml?, :is_xml?
34
35
 
@@ -1,23 +1,23 @@
1
- # encoding: utf-8
2
-
3
- module FeedParser
4
-
5
- class Tag
6
-
7
- attr_accessor :name
8
- ## note: title n term are aliases for name
9
- alias :title :name
10
- alias :title= :name=
11
-
12
- alias :term :name
13
- alias :term= :name=
14
-
15
-
16
- attr_accessor :scheme ## use scheme_url -why? why not? is it always a url/uri??
17
- ## note: domain (rss) is an alias for scheme (atom)
18
- alias :domain :scheme
19
- alias :domain= :scheme=
20
-
21
- end # class Tag
22
-
23
- end # module FeedParser
1
+ # encoding: utf-8
2
+
3
+ module FeedParser
4
+
5
+ class Tag
6
+
7
+ attr_accessor :name
8
+ ## note: title n term are aliases for name
9
+ alias :title :name
10
+ alias :title= :name=
11
+
12
+ alias :term :name
13
+ alias :term= :name=
14
+
15
+
16
+ attr_accessor :scheme ## use scheme_url -why? why not? is it always a url/uri??
17
+ ## note: domain (rss) is an alias for scheme (atom)
18
+ alias :domain :scheme
19
+ alias :domain= :scheme=
20
+
21
+ end # class Tag
22
+
23
+ end # module FeedParser