imap-feeder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.project +23 -0
  3. data/History.txt +4 -0
  4. data/License.txt +341 -0
  5. data/Manifest.txt +48 -0
  6. data/README.txt +77 -0
  7. data/Rakefile +4 -0
  8. data/TODO.txt +11 -0
  9. data/bin/imap-feeder +68 -0
  10. data/config/hoe.rb +77 -0
  11. data/config/requirements.rb +17 -0
  12. data/lib/imap-feeder.rb +87 -0
  13. data/lib/imap-feeder/createconfigparser.rb +40 -0
  14. data/lib/imap-feeder/fakeserver.rb +23 -0
  15. data/lib/imap-feeder/feedfolder.rb +23 -0
  16. data/lib/imap-feeder/feedreader.rb +81 -0
  17. data/lib/imap-feeder/imapfeederconfig.rb +74 -0
  18. data/lib/imap-feeder/message.rb +152 -0
  19. data/lib/imap-feeder/messagestore.rb +35 -0
  20. data/lib/imap-feeder/opmlreader.rb +50 -0
  21. data/lib/imap-feeder/server.rb +77 -0
  22. data/lib/imap-feeder/version.rb +9 -0
  23. data/script/destroy +14 -0
  24. data/script/generate +14 -0
  25. data/script/txt2html +77 -0
  26. data/settings.rb.example +58 -0
  27. data/setup.rb +1585 -0
  28. data/tasks/deployment.rake +34 -0
  29. data/tasks/environment.rake +7 -0
  30. data/tasks/website.rake +15 -0
  31. data/test/data/encoded.rss +22 -0
  32. data/test/data/erroneous.yml +7 -0
  33. data/test/data/last_messages.yaml +0 -0
  34. data/test/data/rss20_no_body.xml +10 -0
  35. data/test/data/rss20_one_entry.xml +11 -0
  36. data/test/data/rss20_two_entries.xml +17 -0
  37. data/test/data/rss20_with_authors.xml +21 -0
  38. data/test/data/simple.opml +9 -0
  39. data/test/functional_test_server.rb +95 -0
  40. data/test/test_feedreader.rb +92 -0
  41. data/test/test_imap-feeder.rb +66 -0
  42. data/test/test_imapfeederconfig.rb +56 -0
  43. data/test/test_message.rb +312 -0
  44. data/test/test_messagestore.rb +54 -0
  45. data/test/test_opmlreader.rb +73 -0
  46. data/test/testlogger.rb +22 -0
  47. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  48. data/website/stylesheets/screen.css +137 -0
  49. data/website/template.rhtml +49 -0
  50. metadata +175 -0
  51. metadata.gz.sig +0 -0
@@ -0,0 +1,4 @@
1
+ require 'config/requirements'
2
+ require 'config/hoe' # setup Hoe + all gem configuration
3
+
4
+ Dir['tasks/**/*.rake'].each { |rake| load rake }
@@ -0,0 +1,11 @@
1
+ Ideas for further development:
2
+ * Adding new feeds through rssimap, without manually editing the file.
3
+ * HTML-Mails (really?)
4
+ * make the position of the link to the original configurable
5
+ * embed images (e.g. for dilbert)
6
+
7
+ * user-hooks that can interact with the feed processing:
8
+ * to ignore items
9
+ * custom body, e.g. fetch content with hpricot
10
+ * code directly in the feedy.yml
11
+ * or an external file, with an ID to identify the feed
@@ -0,0 +1,68 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'imap-feeder'
4
+ require 'imap-feeder/createconfigparser'
5
+ require 'imap-feeder/imapfeederconfig'
6
+ require 'imap-feeder/fakeserver.rb'
7
+
8
+ opts = CreateConfigParser.parse(ARGV)
9
+
10
+ if opts.create
11
+ $stdout.puts "Creating new configuration."
12
+ ImapFeederConfig.create(opts.create_file, opts.folder)
13
+ $stdout.puts "Please edit your settings.rb file now."
14
+
15
+ elsif opts.check
16
+ $stdout.puts "Checking configuration #{opts.check}"
17
+ $log = Logger.new($stdout)
18
+ ImapFeederConfig.check(File.open(opts.check))
19
+
20
+ elsif ARGV.first and load(ARGV.first)
21
+ configuration = [$host, $user, $pass, $temp, $config]
22
+ if not configuration.all?
23
+ $stderr.puts "Your settings are incomplete, please review settings.rb:"
24
+ $stderr.puts configuration.inspect
25
+ exit 1
26
+ end
27
+
28
+ if File.exist? $running_instance
29
+ $stderr.puts "Already running (#{$running_instance})! Aborting..."
30
+ exit 1
31
+ else
32
+ File.new($running_instance, "w")
33
+ end
34
+
35
+ store = MessageStore.new($temp)
36
+
37
+ server_options = {
38
+ :host => $host,
39
+ :user => $user,
40
+ :pass => $pass,
41
+ :port => $port || "143",
42
+ :use_ssl => $use_ssl || false
43
+ }
44
+
45
+ if opts.pretend
46
+ @server = FakeServer.new(server_options)
47
+ def store.save
48
+ end
49
+ $log = Logger.new(STDOUT)
50
+ else
51
+ begin
52
+ @server = Server.new(server_options)
53
+ rescue Exception, Errno::ECONNRESET => e
54
+ $log.error "Could not open initial connection to server: #{e}"
55
+ end
56
+ end
57
+
58
+ config = File.open($config)
59
+ imapfeeder = ImapFeeder.new(@server, store, config)
60
+ imapfeeder.run
61
+ @server.disconnect
62
+
63
+ File.delete $running_instance
64
+
65
+ else
66
+ $stdout.puts "No options given, maybe you need --help?"
67
+ end
68
+
@@ -0,0 +1,77 @@
1
+ require 'imap-feeder/version'
2
+
3
+ AUTHOR = 'Mirko Stocker' # can also be an array of Authors
4
+ EMAIL = "me@misto.ch"
5
+ DESCRIPTION = "Imap-feeder pushes your RSS and Atom feeds to an IMAP server."
6
+ GEM_NAME = 'imap-feeder' # what ppl will type to install your gem
7
+ RUBYFORGE_PROJECT = 'imap-feeder' # The unix name for your project
8
+ HOMEPATH = "http://#{RUBYFORGE_PROJECT}.rubyforge.org"
9
+ DOWNLOAD_PATH = "http://rubyforge.org/projects/#{RUBYFORGE_PROJECT}"
10
+
11
+ @config_file = "~/.rubyforge/user-config.yml"
12
+ @config = nil
13
+ RUBYFORGE_USERNAME = "unknown"
14
+ def rubyforge_username
15
+ unless @config
16
+ begin
17
+ @config = YAML.load(File.read(File.expand_path(@config_file)))
18
+ rescue
19
+ puts <<-EOS
20
+ ERROR: No rubyforge config file found: #{@config_file}
21
+ Run 'rubyforge setup' to prepare your env for access to Rubyforge
22
+ - See http://newgem.rubyforge.org/rubyforge.html for more details
23
+ EOS
24
+ exit
25
+ end
26
+ end
27
+ RUBYFORGE_USERNAME.replace @config["username"]
28
+ end
29
+
30
+
31
+ REV = nil
32
+ # UNCOMMENT IF REQUIRED:
33
+ # REV = `svn info`.each {|line| if line =~ /^Revision:/ then k,v = line.split(': '); break v.chomp; else next; end} rescue nil
34
+ VERS = ImapFeeder::VERSION::STRING + (REV ? ".#{REV}" : "")
35
+ RDOC_OPTS = ['--quiet', '--title', 'imap-feeder documentation',
36
+ "--opname", "index.html",
37
+ "--line-numbers",
38
+ "--main", "README",
39
+ "--inline-source"]
40
+
41
+ class Hoe
42
+ def extra_deps
43
+ @extra_deps.reject! { |x| Array(x).first == 'hoe' }
44
+ @extra_deps
45
+ end
46
+ end
47
+
48
+ # Generate all the Rake tasks
49
+ # Run 'rake -T' to see list of generated tasks (from gem root directory)
50
+ hoe = Hoe.new(GEM_NAME, VERS) do |p|
51
+ p.developer(AUTHOR, EMAIL)
52
+ p.description = DESCRIPTION
53
+ p.summary = DESCRIPTION
54
+ p.url = HOMEPATH
55
+ p.rubyforge_name = RUBYFORGE_PROJECT if RUBYFORGE_PROJECT
56
+ p.test_globs = ["test/**/test_*.rb"]
57
+ p.clean_globs |= ['**/.*.sw?', '*.gem', '.config', '**/.DS_Store'] #An array of file patterns to delete on clean.
58
+
59
+ # == Optional
60
+ p.changes = p.paragraphs_of("History.txt", 0..1).join("\n\n")
61
+
62
+ p.extra_deps = [
63
+ ['actionmailer', '>= 1.3.2'],
64
+ ['hpricot', '>= 0.5'],
65
+ ['htmlentities', '>= 3.0.1'],
66
+ ['tidy', '>= 1.1.2'],
67
+ ['simple-rss', '>= 1.1']
68
+ ]
69
+
70
+ #p.spec_extras = {} # A hash of extra values to set in the gemspec.
71
+
72
+ end
73
+
74
+ CHANGES = hoe.paragraphs_of('History.txt', 0..1).join("\\n\\n")
75
+ PATH = (RUBYFORGE_PROJECT == GEM_NAME) ? RUBYFORGE_PROJECT : "#{RUBYFORGE_PROJECT}/#{GEM_NAME}"
76
+ hoe.remote_rdoc_dir = File.join(PATH.gsub(/^#{RUBYFORGE_PROJECT}\/?/,''), 'rdoc')
77
+ hoe.rsync_args = '-av --delete --ignore-errors'
@@ -0,0 +1,17 @@
1
+ require 'fileutils'
2
+ include FileUtils
3
+
4
+ require 'rubygems'
5
+ %w[rake hoe newgem rubigen].each do |req_gem|
6
+ begin
7
+ require req_gem
8
+ rescue LoadError
9
+ puts "This Rakefile requires the '#{req_gem}' RubyGem."
10
+ puts "Installation: gem install #{req_gem} -y"
11
+ exit
12
+ end
13
+ end
14
+
15
+ $:.unshift(File.join(File.dirname(__FILE__), %w[.. lib]))
16
+
17
+ require 'imap-feeder'
@@ -0,0 +1,87 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'imap-feeder/server'
4
+ require 'imap-feeder/message'
5
+ require 'imap-feeder/feedreader'
6
+ require 'imap-feeder/messagestore'
7
+
8
+ $KCODE="U"
9
+
10
+ class ImapFeeder
11
+
12
+ def initialize(server, store, config)
13
+ @server = server
14
+ @store = store
15
+ @config = config
16
+ end
17
+
18
+ def run
19
+ $log.info "Started"
20
+ feeds = YAML.load(@config)
21
+
22
+ feeds.each do |feed|
23
+ path = feed['feed']['path']
24
+ $log.info "Processing #{path}"
25
+
26
+ create_folder(path) unless check_folder_exists(path)
27
+
28
+ begin
29
+ url = feed['feed']['url']
30
+ reader = FeedReader.new(url)
31
+ rescue OpenURI::HTTPError => e
32
+ $log.warn "Error retrieving #{url}: #{e.message}"
33
+ next
34
+ rescue Exception => e
35
+ $log.error "Unexpected error while retrieving #{path}: #{e.message}"
36
+ next
37
+ end
38
+
39
+ archive = get_archived(path)
40
+ messages = reader.get_new(archive)
41
+
42
+ unless messages.empty?
43
+ $log.debug "already processed messages: '#{archive.join("', '")}'"
44
+ send_messages(messages, path, reader.number_of_entries)
45
+ end
46
+ end
47
+
48
+ $log.info "Finished"
49
+ end
50
+
51
+ private
52
+
53
+ def send_messages messages, path, number_of_entries
54
+ $log.info "#{messages.size} new message(s)"
55
+ messages.each do |msg|
56
+ send_message(msg, path)
57
+ end
58
+ message_sent(messages, path, number_of_entries)
59
+ end
60
+
61
+ def message_sent(messages, path, number_of_entries)
62
+ identifiers = messages.collect do |msg|
63
+ msg.generate_identifier
64
+ end
65
+
66
+ @store.add_new(path, identifiers, number_of_entries)
67
+ @store.save
68
+ end
69
+
70
+ def send_message(msg, complete_path)
71
+ @server.send(msg, complete_path)
72
+ $log.info "Found in #{complete_path.split(".").last}: #{msg.generate_identifier}"
73
+ end
74
+
75
+ def get_archived(path)
76
+ @store.get_archived path
77
+ end
78
+
79
+ def create_folder(path)
80
+ $log.info "Creating #{path}"
81
+ @server.create_folder path
82
+ end
83
+
84
+ def check_folder_exists(path)
85
+ @server.has_folder? path
86
+ end
87
+ end
@@ -0,0 +1,40 @@
1
+ require 'optparse'
2
+ require 'ostruct'
3
+
4
+ class CreateConfigParser
5
+ def self.parse(args)
6
+ options = OpenStruct.new
7
+
8
+ opts = OptionParser.new do |opts|
9
+ opts.banner = <<EOF
10
+ Usage: #{$0} [options] to create or check your configuration
11
+ or #{$0} SETTINGS_FILE to run the script
12
+ EOF
13
+
14
+ opts.on("-c", "--check-settings SETTINGS_FILE",
15
+ "Check an existing configuration") do |file|
16
+ options.check = file
17
+ end
18
+
19
+ opts.on("-r", "--imap-root [FOLDER]",
20
+ "Use this as the root folder for all feeds") do |folder|
21
+ options.folder = folder
22
+ end
23
+
24
+ opts.on("-n", "--new-config [OPML_FILE]",
25
+ "Create a new configuration") do |file|
26
+ options.create = true
27
+ options.create_file = file
28
+ end
29
+
30
+ opts.on("-p", "--pretend",
31
+ "Don't do anything, just pretend. " +
32
+ "Prints new items to the console.") do |pretend|
33
+ options.pretend = pretend
34
+ end
35
+ end
36
+ opts.parse!(args)
37
+
38
+ options
39
+ end
40
+ end
@@ -0,0 +1,23 @@
1
+ class FakeServer
2
+
3
+ attr_reader :connected
4
+
5
+ def initialize(params)
6
+ puts "Would connect to #{params[:host]}:#{params[:port]}, with ssl? #{params[:use_ssl]}"
7
+ puts "Would login with #{params[:user]}/#{params[:pass]}"
8
+ end
9
+
10
+ def disconnect
11
+ end
12
+
13
+ def send(message, folder="INBOX")
14
+ puts "Send message to folder #{folder}: #{message.title}"
15
+ end
16
+
17
+ def create_folder(folder)
18
+ end
19
+
20
+ def has_folder?(folder)
21
+ true
22
+ end
23
+ end
@@ -0,0 +1,23 @@
1
+ FeedUrl = Struct.new(:name, :url)
2
+
3
+ class FeedFolder
4
+ attr_accessor :name, :children, :urls
5
+
6
+ def initialize(name)
7
+ @name = name
8
+ @children = []
9
+ @urls = []
10
+ end
11
+
12
+ def add_sub(child)
13
+ @children << child
14
+ end
15
+
16
+ def add_url(url)
17
+ @urls << url
18
+ end
19
+
20
+ def [](index)
21
+ @children[index]
22
+ end
23
+ end
@@ -0,0 +1,81 @@
1
+ require 'tempfile'
2
+ require 'open-uri'
3
+ require 'simple-rss'
4
+ require 'htmlentities'
5
+ require 'iconv'
6
+
7
+ $KCODE="U"
8
+
9
+ # Overwrite SimpleRSS::unescape because of an open bug(#10852)
10
+ class SimpleRSS
11
+ def unescape(content)
12
+ content.gsub(/(<!\[CDATA\[|\]\]>)/,'').strip
13
+ end
14
+ end
15
+
16
+ class FeedReader
17
+ attr_reader :messages
18
+
19
+ def initialize(feed_url)
20
+ @feed_url = feed_url
21
+ @feed = SimpleRSS.parse(open(feed_url))
22
+
23
+ @encoding = @feed.source[/encoding=["'](.*?)["']/, 1]
24
+ if not @encoding
25
+ $log.warn "No encoding found for #{feed_url}, defaulting to UTF-8."
26
+ @encoding = "UTF-8"
27
+ end
28
+ end
29
+
30
+ def conv(str)
31
+ Iconv.iconv("UTF-8", @encoding, str).first
32
+ rescue Iconv::IllegalSequence => e
33
+ $log.error "IConv reports an IllegalSequence: #{e.message} from #{str}"
34
+ return str
35
+ end
36
+
37
+ def number_of_entries
38
+ @feed.entries.size
39
+ end
40
+
41
+ def get_new(archive)
42
+ return [] if not @feed
43
+
44
+ archive ||= []
45
+ if archive.include?("")
46
+ $log.warn "Title is empty, that should never happen! Aborting #{@feed_url}."
47
+ return []
48
+ end
49
+
50
+ messages = []
51
+ @feed.entries.each do |item|
52
+
53
+ body = conv(item.content_encoded || item.content ||
54
+ item.summary || item.description)
55
+ message = Message.new(
56
+ :title => conv(item.title),
57
+ :time => time_from(item),
58
+ :body => body,
59
+ :from => conv(item.author),
60
+ :url => conv(item.link)
61
+ )
62
+
63
+ item_identifier = message.generate_identifier
64
+
65
+ if archive.include? item_identifier
66
+ short_name = message.title[0..30]
67
+ short_name << "…" if message.title.length > 30
68
+ $log.debug "Already have '#{short_name}'."
69
+ else
70
+ messages << message
71
+ end
72
+ end
73
+
74
+ messages
75
+ end
76
+
77
+ def time_from item
78
+ return nil if not item
79
+ item.published || item.pubDate || item.date_published || item.updated || nil
80
+ end
81
+ end
@@ -0,0 +1,74 @@
1
+ require 'yaml'
2
+ require 'uri'
3
+ require 'net/http'
4
+ require 'ftools'
5
+
6
+ require 'imap-feeder/opmlreader'
7
+
8
+ class ImapFeederConfig
9
+ def self.create(opml_file, root_folder)
10
+
11
+ root_folder = root_folder ? "INBOX.#{root_folder}" : "INBOX"
12
+
13
+ if opml_file
14
+ items = process(OpmlReader.get(File.open(opml_file)), "#{root_folder}").flatten
15
+ else
16
+ items = [
17
+ {"feed" => {"url" => "http://rubyforge.org/export/rss_sfnews.php", "path" => "#{root_folder}.rubyforge"}},
18
+ {"feed" => {"url" => "http://feeds.feedburner.com/DilbertDailyStrip", "path" => "#{root_folder}.dilbert"}}
19
+ ]
20
+ end
21
+
22
+ File.open("#{Dir.pwd}/feeds.yml", "w+") do |file|
23
+ YAML.dump(items, file)
24
+ end
25
+
26
+ File.copy "#{File.dirname(__FILE__)}/../../settings.rb.example", "#{Dir.pwd}/settings.rb"
27
+ end
28
+
29
+ def self.check(configuration)
30
+ YAML.load(configuration).each do |conf_item|
31
+ check_url_connection(conf_item['feed']['url'])
32
+ check_path_name(conf_item['feed']['path'])
33
+ end
34
+ end
35
+
36
+ private
37
+ def self.process(folder, parent_path)
38
+ items = []
39
+ path = parent_path + folder.name
40
+
41
+ folder.urls.each do |child|
42
+ feed_path = "#{path}.#{child.name}"
43
+ $log.debug "#{feed_path}: #{child.url}"
44
+ items << {"feed" => {"url" => child.url, "path" => feed_path}}
45
+ end
46
+
47
+ folder.children.each do |child|
48
+ items << process(child, path + '.')
49
+ end
50
+ items
51
+ end
52
+
53
+ def self.check_url_connection(url)
54
+ begin
55
+ uri = URI.parse url
56
+ uri.path = "/" if uri.path.empty?
57
+
58
+ response = Net::HTTP.new(uri.host, uri.port).head(uri.path, nil)
59
+ if response.code =~ /^[^2]\d/
60
+ $log.info "Connecting to #{url}: #{response.message}, code: #{response.code}"
61
+ else
62
+ $log.info "Connecting to #{url}: OK"
63
+ end
64
+ rescue Exception => e
65
+ $log.warn "Exception while connecting to #{url}: #{e}."
66
+ end
67
+ end
68
+
69
+ def self.check_path_name(path)
70
+ path.scan(/[^#{IMAP_CHARS}\.]+/) do |char|
71
+ $log.error "Invalid character found in \'#{path}\': #{char}"
72
+ end
73
+ end
74
+ end