fastreader 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/fastreader.rb ADDED
@@ -0,0 +1,283 @@
1
+ $KCODE = 'u'
2
+ require 'jcode'
3
+ require 'rubygems' # I thought of using datamapper, but using activerecord will avoid installation
4
+ # problems for people with macs
5
+ require 'simple-rss'
6
+ require 'activerecord'
7
+ require 'action_view/helpers/date_helper.rb'
8
+ require 'yaml'
9
+ require 'feed-normalizer'
10
+ require 'open-uri'
11
+ require 'hpricot'
12
+ $:.unshift File.dirname(__FILE__)
13
+ require 'curses'
14
+ require 'curses_extensions'
15
+ require 'feed'
16
+ require 'virtual_feed'
17
+ require 'entry'
18
+ require 'autodiscovery'
19
+ require 'character_cleaner'
20
+ require 'display'
21
+ require 'entry_controller'
22
+ require 'feeds_controller'
23
+ require 'entries_controller'
24
+ require 'curses_controller'
25
+ require 'menu_window'
26
+ require 'entry_window'
27
+ require 'command_window'
28
+ require 'menu_pager'
29
+ require 'opml'
30
+ require 'logger'
31
+ require 'fileutils'
32
+ include FileUtils
33
+ include ActionView::Helpers::DateHelper
34
+
35
+ class Fastreader
36
+ VERSION = '1.0.0'
37
+
38
+ attr_accessor :database_path
39
+
40
+ def setup_database(database_path)
41
+ unless File.exist?(database_path)
42
+ # copy the stock starter feed database (which just contains a few feed
43
+ # subscriptions) to the database_path
44
+ default_db = File.dirname(__FILE__) + "/../db/default.sqlite3"
45
+ cp(default_db, database_path)
46
+ end
47
+ end
48
+
49
+ def initialize(options={})
50
+ @curses = options[:curses] # If true it is in curses mode
51
+ @debug = false # change if there is an option
52
+ @environment = options[:environment]
53
+
54
+ if ['test', 'development'].include?(@environment)
55
+ config = File.open(File.dirname(__FILE__) + '/../config/database.yml')
56
+ dbconfig = YAML::load(config)[@environment]
57
+ else
58
+ dbconfig = {"timeout"=>5000, "adapter"=>"sqlite3"}.merge({'database' => options[:database]})
59
+ database_path = options[:database] || ENV['HOME'] + '/fastreader.sqlite3'
60
+ setup_database(database_path)
61
+ end
62
+
63
+ ActiveRecord::Base.establish_connection(dbconfig)
64
+
65
+ # establish logging if in development mode
66
+ if @debug
67
+ log_file_path = File.dirname(__FILE__) + "/../log/#{@environment}.log"
68
+ log_file = File.open(log_file_path, 'a')
69
+ log_file.sync = true
70
+ else
71
+ log_file = STDOUT
72
+ end
73
+ ActiveRecord::Base.logger = Logger.new(log_file)
74
+ ActiveRecord::Base.logger.level = Logger::INFO
75
+
76
+
77
+ # get Display object
78
+ @display = Display.new(options)
79
+ end
80
+
81
+ def parse(command)
82
+ self.instance_eval(command)
83
+ end
84
+
85
+ def auto_discover_and_subscribe(url)
86
+ uri = URI.parse(url)
87
+ feed_url = Autodiscovery.new(fetch(url)).discover
88
+ if feed_url
89
+ feed_url = uri.merge(feed_url).to_s
90
+ puts "Found feed: #{feed_url}"
91
+ return feed_url
92
+ else
93
+ puts "Can't find feed for #{url}"
94
+ return nil
95
+ end
96
+ end
97
+
98
+ def puts(string)
99
+ if @output_block
100
+ @output_block.call(string)
101
+ else
102
+ STDOUT.puts( string )
103
+ end
104
+ end
105
+
106
+ def import_opml(opml)
107
+ importer = OPMLImporter.new(opml)
108
+ feeds = importer.feed_urls.each do | url |
109
+ subscribe(url)
110
+ end
111
+ end
112
+
113
+ def subscribe(feed_url, &block)
114
+
115
+ if @output_block.nil? && (block_given? || block)
116
+ @output_block = block
117
+ end
118
+
119
+ # try to repair the URL if possible
120
+ unless feed_url =~ /^http:\/\//
121
+ feed_url = "http://" + feed_url
122
+ end
123
+
124
+ puts "Subscribing to #{feed_url}"
125
+ begin
126
+ xml = fetch(feed_url)
127
+ rescue SocketError
128
+ puts "Error trying to load page at #{feed_url}"
129
+ return
130
+ end
131
+ if xml.nil?
132
+ puts "Can't find any resource at #{feed_url}"
133
+ return
134
+ end
135
+
136
+ LOGGER.debug( "xml length: %s, feed_url: %s, block: %s" % [xml.length, feed_url, block.class])
137
+ feed = Feed.create_feed( xml, feed_url.strip, &block )
138
+ LOGGER.debug(feed.class)
139
+
140
+ if feed.nil?
141
+
142
+ puts "Can't find feed at #{feed_url}"
143
+ puts "Attempting autodiscovery..."
144
+
145
+ feed_url = auto_discover_and_subscribe(feed_url)
146
+ if feed_url
147
+ puts "Subscribing to #{feed_url}"
148
+ xml = fetch(feed_url)
149
+
150
+ feed = Feed.create_feed( xml, feed_url.strip, &block )
151
+ end
152
+ end
153
+ feed
154
+ end
155
+
156
+ def update(options = {}, &block)
157
+ if @output_block.nil? && (block_given? || block)
158
+ @output_block = block
159
+ end
160
+
161
+ num = 0
162
+ if feed_id = options[:feed_id]
163
+
164
+ f = Feed.find(feed_id)
165
+
166
+ puts "Updating from #{f.feed_url}"
167
+
168
+ result = f.update_self( fetch(f.feed_url), options[:force], &block )
169
+ num += result || 0
170
+
171
+ else
172
+ Feed.find(:all).each {|f|
173
+
174
+ begin
175
+ puts f.feed_url
176
+ result = f.update_self( fetch(f.feed_url) )
177
+ num += result || 0
178
+ rescue
179
+ puts "Error trying to update from #{f.feed_url}! Skipping for now."
180
+ end
181
+
182
+ }
183
+ end
184
+ # Return the number updated
185
+ return num
186
+ end
187
+
188
+ def delete_all
189
+ Feed.delete_all
190
+ end
191
+
192
+ # Shows the +number+ most recent posts across all feeds
193
+ def most_recent(number=10)
194
+ entries = Entry.find(:all,
195
+ :order => "last_updated desc",
196
+ :limit => number)
197
+ @display.display_entries(entries)
198
+ end
199
+
200
+ def list
201
+ # Add virtual feeds here
202
+ feeds = Feed.feeds_list
203
+ @display.list_feeds( feeds )
204
+ end
205
+
206
+ alias_method :ls, :list
207
+
208
+ # a simple wrapper over open-uri call. Easier to mock in testing.
209
+ def fetch(url)
210
+ begin
211
+ open(url).read
212
+ rescue Timeout::Error
213
+ puts "-> attempt to fetch #{url} timed out"
214
+ rescue Exception => e
215
+ puts "-> error trying to fetch #{url}: #{$!}"
216
+ end
217
+ end
218
+
219
+ def get_binding
220
+ return binding()
221
+ end
222
+ end
223
+
224
+
225
+ # for development
226
+ def reload
227
+ puts "Reloading " + __FILE__
228
+ load __FILE__
229
+ end
230
+
231
+ def preprocess(command_string)
232
+
233
+ # Preprocessing steps to make the command a valid Ruby statement:
234
+
235
+ # If the command is simply a url, then subscribe to it
236
+ if command_string.strip =~ /^http:/
237
+ command_string = "subscribe " + command_string
238
+ end
239
+
240
+
241
+ # Surround any url with quotes:
242
+ command_string = command_string.gsub(/(http:[^\s]*)/, '"\1"')
243
+
244
+ # default action is list feeds
245
+ if command_string.strip == ""
246
+ command_string = "ls"
247
+ end
248
+ command_string
249
+ end
250
+
251
+ # set up logging, especially for the curses part
252
+ #logfile = File.open(File.dirname(__FILE__) + "/../textfeeds_development.log", "a")
253
+ logfile = STDOUT #production mode
254
+ LOGGER = Logger.new(logfile)
255
+ LOGGER.level = Logger::INFO
256
+
257
+ database = ENV['HOME'] + '/fastreader.sqlite3'
258
+ FASTREADER_CONTROLLER = Fastreader.new(:database => database,
259
+ :no_links => true,
260
+ :simple => true,
261
+ :curses => true,
262
+ :width => 60)
263
+
264
+ def run(argv)
265
+ # If there are arguments, then interpret them directly. Otherwise, start
266
+ # an interactive session.
267
+
268
+ command = preprocess(argv.join(' '))
269
+
270
+ # If the command is an OPML file, import the feeds in it
271
+ if command.strip =~ /\.opml$/
272
+ puts "Importing OPML: #{command}"
273
+ FASTREADER_CONTROLLER.import_opml File.read(command.strip)
274
+ else
275
+ eval(command, FASTREADER_CONTROLLER.get_binding)
276
+ end
277
+
278
+ end
279
+
280
+ if __FILE__ == $0
281
+ run(ARGV)
282
+ end
283
+
data/lib/feed.rb ADDED
@@ -0,0 +1,222 @@
1
+ class Feed < ActiveRecord::Base
2
+ serialize :urls
3
+ serialize :authors
4
+ serialize :skip_hours
5
+ serialize :skip_days
6
+ has_many :entries,
7
+ :order => "date_published desc, created_at desc, id desc",
8
+ :dependent => :destroy # TODO make sure the user can override this
9
+
10
+ # Takes a url an creates a feed object and subscription
11
+ def self.create_feed(xml, feed_url, &block)
12
+
13
+ if block_given? || block
14
+ @output_block = block
15
+ end
16
+
17
+ feed = FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
18
+ return nil unless feed.is_a?(FeedNormalizer::Feed)
19
+
20
+ puts "Looking for #{feed_url} in the database"
21
+ if found_feed=Feed.find_by_feed_url(feed_url)
22
+
23
+ puts "Feed already exists"
24
+
25
+ # Update it
26
+ puts found_feed.import_entries(feed)
27
+
28
+ return found_feed
29
+
30
+ end
31
+ puts "Not found. Subscribing."
32
+
33
+ new_feed = Feed.create(:feed_id => feed.id,
34
+ :title => feed.title.strip,
35
+ # It's very importannt that this is feed_url and not feed.url:
36
+ :feed_url => feed_url.strip,
37
+ :urls => feed.urls.map {|x| x.strip},
38
+ :parser => feed.parser,
39
+ :last_updated => feed.last_updated || Time.now,
40
+ :authors => feed.authors,
41
+ :copyright => feed.copyright,
42
+ :image => feed.image,
43
+ :generator => feed.generator,
44
+ :ttl => feed.ttl,
45
+ :skip_hours => feed.skip_hours,
46
+ :skip_days => feed.skip_days)
47
+ # create entries
48
+ new_feed.import_entries(feed)
49
+ new_feed
50
+ end
51
+
52
+ def self.feeds_list
53
+ feeds = []
54
+ feeds = feeds + Feed.find(:all, :order => "title asc")
55
+
56
+ flagged_entries = VirtualFeed.new
57
+ flagged_entries.title = "Flagged Entries"
58
+ flagged_entries.finder_params = {:conditions => "flagged is not null", :order => "flagged desc"}
59
+
60
+ feeds << flagged_entries
61
+
62
+ all_entries = VirtualFeed.new
63
+ all_entries.title = "All Entries"
64
+ all_entries.finder_params = {:order => "id desc"}
65
+
66
+ feeds << all_entries
67
+
68
+ feeds
69
+ end
70
+
71
+ def puts(string)
72
+ if @output_block
73
+ @output_block.call(string)
74
+ else
75
+ STDOUT.puts( string )
76
+ end
77
+ end
78
+
79
+ # Takes a FeedNormalizer::Feed object
80
+ def import_entries(feed)
81
+
82
+ num_new_items = 0
83
+
84
+ # Reverse the entries because they are most recent first.
85
+ feed.entries.reverse.each do |entry|
86
+ # Check if the entry already exists
87
+ # puts "Looking for existing entry with id #{entry.id}"
88
+ if (existing_entry = self.entries.find(:first,
89
+ :conditions => ["entry_id = ?", entry.id ? entry.id : entry.url ]))
90
+
91
+ # Do nothing if the entry has not been updated
92
+ if existing_entry.last_updated == entry.last_updated
93
+ #puts "Skipping #{entry.title}. Already exists."
94
+ next
95
+
96
+ # The entry has been updated, so update it.
97
+ else
98
+ puts "Updating #{entry.title}"
99
+ update_entry(existing_entry, entry)
100
+ next
101
+ end
102
+
103
+ else
104
+ puts "Importing #{entry.title}"
105
+ num_new_items += 1
106
+ import_entry(entry)
107
+ end
108
+ end
109
+
110
+ num_new_items
111
+ end
112
+
113
+ # Takes a FeedNormalizer::Entry object
114
+ def import_entry(entry)
115
+ unless entry.id || entry.url
116
+ puts "Skipping #{entry.title}. Bad item. No entry id or url detected."
117
+ return
118
+ end
119
+
120
+ self.entries.create(:title => entry.title,
121
+ :description => entry.description,
122
+ :content => entry.content,
123
+ :categories => entry.categories,
124
+ :date_published => entry.date_published || entry.last_updated,
125
+ :url => entry.url,
126
+ :urls => entry.urls,
127
+ # If the entry.id is nil, use the entry.url (this
128
+ # happens for some reason on Slashdot and maybe other
129
+ # websites.
130
+ :entry_id => entry.id ? entry.id.strip : entry.url.strip,
131
+ :authors => entry.authors,
132
+ :copyright => entry.copyright,
133
+ # Apparently entry.last_updated is a Time object
134
+ :last_updated => entry.last_updated ? entry.last_updated.to_datetime : nil)
135
+ end
136
+
137
+ # The old entry is ActiveRecord. The new one is a FeedNormalizer::Entry
138
+ def update_entry(old, new)
139
+ old.update_attributes(:title => new.title,
140
+ :description => new.description,
141
+ :content => new.content,
142
+ :categories => new.categories,
143
+ :date_published => new.date_published,
144
+ :url => new.url,
145
+ :urls => new.urls,
146
+ :authors => new.authors,
147
+ :copyright => new.copyright,
148
+ # Apparently new.last_updated is a Time object
149
+ :last_updated => new.last_updated ? new.last_updated.to_datetime : nil)
150
+
151
+ end
152
+
153
+
154
+ # This field is used to determine whether an entry in the feed is new, in
155
+ # which case it is colored in a special way
156
+ def previously_updated_at
157
+ unless self['previously_updated_at']
158
+ return self['created_at']
159
+ end
160
+ self['previously_updated_at']
161
+ end
162
+
163
+ # Takes a new version of the feed xml
164
+ # Can't call this "update" because that's an important ActiveRecord method
165
+ # The block is the output method. If no block is given it a standard block is
166
+ # created that just outputs to stdout.
167
+ # +puts+ calls the output lambda when it's available; otherwise it prints to
168
+ # STDOUT.
169
+
170
+ def too_soon_to_update?
171
+ self.updated_at.to_time > (Time.now - 3600)
172
+ end
173
+
174
+ def update_self(xml, force=false, &block)
175
+ num_new_items = 0
176
+ if block_given?
177
+ @output_block = block
178
+ end
179
+
180
+ unless force
181
+ # To be courteous, don't update feeds that have been downloaded in the last
182
+ # hour.
183
+ if too_soon_to_update?
184
+ puts "-> skipping. last update was with the last hour."
185
+ return
186
+ end
187
+ end
188
+
189
+ # :updated_at is used for this program's internal bookkeeping, and tracks when the feed was last
190
+ # accessed. :last_updated is the property of the feed.
191
+
192
+ begin
193
+
194
+ # We're forcing the SimpleRssParser because the other one led to errors with DaringFireball's Atom feed.
195
+ new_feed_content = FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
196
+ # Trye another parser
197
+ unless new_feed_content.is_a?(FeedNormalizer::Feed)
198
+ puts "Failed to update #{self.title}. Try again later."
199
+ LOGGER.debug("FAILED TO UPDATE #{self.title}")
200
+ LOGGER.debug(xml)
201
+ return
202
+ end
203
+
204
+ # At this point we're definitely updating the feed.
205
+
206
+ # create entries
207
+ # The import_entries method should silently skip entries that already exist
208
+ num_new_items += import_entries(new_feed_content)
209
+
210
+ # This updates the last_updated timestamp
211
+ self.last_updated = Time.now
212
+
213
+ self.save
214
+ puts "-> %s new items found." % num_new_items
215
+ rescue
216
+ puts "-> There was an error updating the feed #{self.feed_url}."
217
+ raise
218
+ end
219
+ return num_new_items
220
+ end
221
+ end
222
+