polyrex-feed-reader 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/polyrex-feed-reader.rb +156 -0
- metadata +84 -0
@@ -0,0 +1,156 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
# file: polyrex-feed-reader.rb
|
4
|
+
|
5
|
+
require 'polyrex'
|
6
|
+
require 'open-uri'
|
7
|
+
require 'rexml/document'
|
8
|
+
require 'builder'
|
9
|
+
require 'time'
|
10
|
+
require 'date'
|
11
|
+
require 'chronic'
|
12
|
+
|
13
|
+
|
14
|
+
class Fixnum
|
15
|
+
def seconds() self end
|
16
|
+
def minutes() self * 60 end
|
17
|
+
def hours() self * 60 * 60 end
|
18
|
+
def days() self * 3600 * 24 end
|
19
|
+
def weeks() self * 3600 * 24 * 7 end
|
20
|
+
def months() self * 86400 * 30 end
|
21
|
+
alias second seconds; alias hour hours; alias minute minutes
|
22
|
+
alias day days; alias week weeks; alias month months
|
23
|
+
end
|
24
|
+
|
25
|
+
|
26
|
+
class PolyrexFeedReader
|
27
|
+
include REXML
|
28
|
+
|
29
|
+
def initialize(file_path)
|
30
|
+
@file_path = file_path
|
31
|
+
if File.exists? @file_path then
|
32
|
+
@feeds = Polyrex.new @file_path
|
33
|
+
else
|
34
|
+
schema = 'feeds/column[id]/feed[rss_url,title,important,occurrence,recent,url, xhtml, xhtml_mobile, last_modified]/item[title,link,description]'
|
35
|
+
@feeds = Polyrex.new schema
|
36
|
+
@feeds.save @file_path
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse(lines)
|
41
|
+
@feeds.parse(lines)
|
42
|
+
end
|
43
|
+
|
44
|
+
def read()
|
45
|
+
@feeds.records.each do |col|
|
46
|
+
col.records.each do |feed|
|
47
|
+
|
48
|
+
if scheduled? feed.occurrence then
|
49
|
+
|
50
|
+
rss_doc = Document.new(open(feed.rss_url, 'UserAgent' => 'PolyrexFeedReader').read)
|
51
|
+
rss_items = XPath.match(rss_doc.root, '//item')[0..2]
|
52
|
+
|
53
|
+
if feed.records.length <= 0 then
|
54
|
+
# create the items
|
55
|
+
k = 3 - feed.records.length
|
56
|
+
k.times {|x| feed.create.item}
|
57
|
+
|
58
|
+
fetch_items(rss_items, feed)
|
59
|
+
|
60
|
+
else
|
61
|
+
if feed.item[0] != rss_items[0].text('title').to_s then
|
62
|
+
fetch_items(rss_items, feed)
|
63
|
+
else
|
64
|
+
feed.recent = recency(feed.last_modified)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
col.records.sort_by!{|x| -Time.parse(x.text('summary/last_modified').to_s).to_i}
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
@feeds.save @file_path
|
74
|
+
|
75
|
+
a = @feeds.records.map {|column| column.records.select{|feed| feed.records.length > 0 }}
|
76
|
+
interleaved = a[0].zip(*a[1..-1]).flatten
|
77
|
+
|
78
|
+
xml = Builder::XmlMarkup.new( :target => buffer='', :indent => 2 )
|
79
|
+
xml.instruct! :xml, :version => "1.0", :encoding => "UTF-8"
|
80
|
+
|
81
|
+
xml.feeds do
|
82
|
+
xml.summary do
|
83
|
+
xml.recordx_type 'polyrex'
|
84
|
+
end
|
85
|
+
xml.records do
|
86
|
+
interleaved.each do |feed|
|
87
|
+
xml.feed do
|
88
|
+
xml.summary do
|
89
|
+
xml.title feed.title
|
90
|
+
xml.last_modified feed.last_modified
|
91
|
+
xml.recent feed.recent
|
92
|
+
xml.important feed.important
|
93
|
+
end
|
94
|
+
xml.records do
|
95
|
+
feed.records.each do |item|
|
96
|
+
xml.item do
|
97
|
+
xml.title item.title
|
98
|
+
xml.description item.description
|
99
|
+
end
|
100
|
+
end #
|
101
|
+
end # / records
|
102
|
+
end # /feed
|
103
|
+
end
|
104
|
+
end # /records
|
105
|
+
end
|
106
|
+
|
107
|
+
buffer
|
108
|
+
end
|
109
|
+
|
110
|
+
private
|
111
|
+
|
112
|
+
def recency(time)
|
113
|
+
case (Time.now - Time.parse(time))
|
114
|
+
when 1.second..5.minutes then 'hot'
|
115
|
+
when 5.minutes..4.hours then 'warm'
|
116
|
+
when 4.hours..1.week then 'cold'
|
117
|
+
when 1.week..1.month then 'coldx1week'
|
118
|
+
when 1.month..6.months then 'coldx1month'
|
119
|
+
else 'coldx6months'
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
def fetch_items(rss_items, feed)
|
124
|
+
important = feed.important.downcase == 'important'
|
125
|
+
feed.last_modified = Time.now
|
126
|
+
feed.recent = 'hot'
|
127
|
+
|
128
|
+
rss_items.each_with_index do |rss_item,i|
|
129
|
+
feed.item[i].title = rss_item.text('title')
|
130
|
+
end
|
131
|
+
feed.item[0].description = rss_items[0].text('description') if important
|
132
|
+
end
|
133
|
+
|
134
|
+
def scheduled?(s='')
|
135
|
+
a = s.split(/,/).map &:strip
|
136
|
+
return true if a.empty?
|
137
|
+
|
138
|
+
d = Time.now.wday
|
139
|
+
pattern = "%s|%s" % [Date::DAYNAMES[d],Date::ABBR_DAYNAMES[d]]
|
140
|
+
a.map!{|x| x.sub(/#{pattern}/i,'today')}
|
141
|
+
|
142
|
+
dates = a.map do |s|
|
143
|
+
if s.split(/\s/).length > 1 then
|
144
|
+
d = Chronic.parse(s, guess: false)
|
145
|
+
[d.first, d.last]
|
146
|
+
else
|
147
|
+
d = Chronic.parse(s)
|
148
|
+
d1 = d2 = Time.parse("%s-%s-%s" % [d.year, d.month, d.day])
|
149
|
+
[d1, d2 + 24.hours]
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
dates.detect{|x| Time.now.between? *x} ? true : false
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
metadata
ADDED
@@ -0,0 +1,84 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: polyrex-feed-reader
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors: []
|
7
|
+
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2010-06-13 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: polyrex
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: builder
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: chronic
|
37
|
+
type: :runtime
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: "0"
|
44
|
+
version:
|
45
|
+
description:
|
46
|
+
email:
|
47
|
+
executables: []
|
48
|
+
|
49
|
+
extensions: []
|
50
|
+
|
51
|
+
extra_rdoc_files: []
|
52
|
+
|
53
|
+
files:
|
54
|
+
- lib/polyrex-feed-reader.rb
|
55
|
+
has_rdoc: true
|
56
|
+
homepage:
|
57
|
+
licenses: []
|
58
|
+
|
59
|
+
post_install_message:
|
60
|
+
rdoc_options: []
|
61
|
+
|
62
|
+
require_paths:
|
63
|
+
- lib
|
64
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: "0"
|
69
|
+
version:
|
70
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - ">="
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: "0"
|
75
|
+
version:
|
76
|
+
requirements: []
|
77
|
+
|
78
|
+
rubyforge_project:
|
79
|
+
rubygems_version: 1.3.5
|
80
|
+
signing_key:
|
81
|
+
specification_version: 3
|
82
|
+
summary: polyrex-feed-reader
|
83
|
+
test_files: []
|
84
|
+
|