imap-feeder 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.project +23 -0
  3. data/History.txt +4 -0
  4. data/License.txt +341 -0
  5. data/Manifest.txt +48 -0
  6. data/README.txt +77 -0
  7. data/Rakefile +4 -0
  8. data/TODO.txt +11 -0
  9. data/bin/imap-feeder +68 -0
  10. data/config/hoe.rb +77 -0
  11. data/config/requirements.rb +17 -0
  12. data/lib/imap-feeder.rb +87 -0
  13. data/lib/imap-feeder/createconfigparser.rb +40 -0
  14. data/lib/imap-feeder/fakeserver.rb +23 -0
  15. data/lib/imap-feeder/feedfolder.rb +23 -0
  16. data/lib/imap-feeder/feedreader.rb +81 -0
  17. data/lib/imap-feeder/imapfeederconfig.rb +74 -0
  18. data/lib/imap-feeder/message.rb +152 -0
  19. data/lib/imap-feeder/messagestore.rb +35 -0
  20. data/lib/imap-feeder/opmlreader.rb +50 -0
  21. data/lib/imap-feeder/server.rb +77 -0
  22. data/lib/imap-feeder/version.rb +9 -0
  23. data/script/destroy +14 -0
  24. data/script/generate +14 -0
  25. data/script/txt2html +77 -0
  26. data/settings.rb.example +58 -0
  27. data/setup.rb +1585 -0
  28. data/tasks/deployment.rake +34 -0
  29. data/tasks/environment.rake +7 -0
  30. data/tasks/website.rake +15 -0
  31. data/test/data/encoded.rss +22 -0
  32. data/test/data/erroneous.yml +7 -0
  33. data/test/data/last_messages.yaml +0 -0
  34. data/test/data/rss20_no_body.xml +10 -0
  35. data/test/data/rss20_one_entry.xml +11 -0
  36. data/test/data/rss20_two_entries.xml +17 -0
  37. data/test/data/rss20_with_authors.xml +21 -0
  38. data/test/data/simple.opml +9 -0
  39. data/test/functional_test_server.rb +95 -0
  40. data/test/test_feedreader.rb +92 -0
  41. data/test/test_imap-feeder.rb +66 -0
  42. data/test/test_imapfeederconfig.rb +56 -0
  43. data/test/test_message.rb +312 -0
  44. data/test/test_messagestore.rb +54 -0
  45. data/test/test_opmlreader.rb +73 -0
  46. data/test/testlogger.rb +22 -0
  47. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  48. data/website/stylesheets/screen.css +137 -0
  49. data/website/template.rhtml +49 -0
  50. metadata +175 -0
  51. metadata.gz.sig +0 -0
@@ -0,0 +1,152 @@
1
+ require 'base64'
2
+ require 'time'
3
+ require 'action_mailer'
4
+ require 'hpricot'
5
+ require 'tidy'
6
+ require 'htmlentities'
7
+ require 'digest/md5'
8
+
9
+ $KCODE="U"
10
+
11
+ class Message
12
+
13
+ include ActionMailer::Quoting
14
+
15
+ def initialize(params)
16
+ @params = params
17
+ end
18
+
19
+ def title
20
+ @title ||= (dec( (@params[:title] || "").gsub(/(\r\n)|\r|\n/, " ")) || "")
21
+ end
22
+
23
+ def from
24
+ @from ||= (dec(@params[:from]) || "Unknown <spam@example.org>")
25
+ end
26
+
27
+ def body
28
+ @body ||= strip_html(@params[:body] || @params[:url] || "")
29
+ end
30
+
31
+ def time
32
+ @time ||= (@params[:time] || Time.now.localtime).rfc2822
33
+ end
34
+
35
+ def quote(str)
36
+ return "" if not str
37
+ str.gsub(/[^a-zA-Z0-9 -_:,\.]+/) {|to_quote| quote_if_necessary(to_quote, "UTF-8")}
38
+ end
39
+
40
+ def generate_identifier
41
+ @cached_identifier ||= "#{title}##{Digest::MD5.hexdigest(body())}"
42
+ end
43
+
44
+ def format
45
+ url = @params[:url]
46
+ return <<-EOF
47
+ Date: #{time}
48
+ Subject: #{quote(title)}
49
+ From: #{quote(from)}
50
+ Content-Type: text/plain;
51
+ charset="utf-8"
52
+ Content-Transfer-Encoding: 8bit
53
+
54
+ #{body}#{"\n\n" + url if url}
55
+ EOF
56
+ end
57
+
58
+ private
59
+
60
+ def replace(doc, element)
61
+ doc.search(element) do |found|
62
+ replacement = block_given? ? yield(found) : found.innerHTML
63
+ found.swap(replacement)
64
+ end
65
+ end
66
+
67
+ def tidy(body)
68
+
69
+ begin
70
+ Tidy.path = $tidy_path unless Tidy.path
71
+ rescue LoadError => e
72
+ $log.warning "Tidy not available: #{e.message}"
73
+ return body
74
+ end
75
+
76
+ tidy_html = Tidy.open(:show_warnings => true) do |tidy|
77
+ tidy.options.markup = true
78
+ tidy.options.wrap = 0
79
+ tidy.options.logical_emphasis = true
80
+ tidy.options.drop_font_tags = true
81
+ tidy.options.output_encoding = "utf8"
82
+ tidy.options.input_encoding = "utf8"
83
+ tidy.options.doctype = "omit"
84
+ tidy.clean(body)
85
+ end
86
+ tidy_html.strip!
87
+ tidy_html.gsub!(/^<html>(.|\n)*<body>/, "")
88
+ tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
89
+ tidy_html.gsub!("\t", " ")
90
+ tidy_html
91
+ end
92
+
93
+ def dec(html)
94
+ HTMLEntities.decode_entities(html).strip if html
95
+ end
96
+
97
+ def strip_html(body)
98
+
99
+ doc = Hpricot(tidy(dec(body)))
100
+
101
+ replace(doc, 'p') {|paragraph| "\n#{paragraph.innerHTML}\n"}
102
+ replace(doc, 'strong') {|strong| "*#{strong.innerHTML}*"}
103
+ replace(doc, 'b') {|bold| "*#{bold.innerHTML}*"}
104
+ replace(doc, 'em') {|em| "*#{em.innerHTML}*"}
105
+ replace(doc, 'li') {|li| "- #{li.innerHTML}"}
106
+ replace(doc, 'i')
107
+ replace(doc, 'ol')
108
+ replace(doc, 'ul')
109
+ replace(doc, 'abr')
110
+ replace(doc, 'font')
111
+ replace(doc, 'span')
112
+ replace(doc, 'div')
113
+ replace(doc, 'br') {|br| "\n"}
114
+ replace(doc, 'img') {|img| img.attributes['alt'] || ""}
115
+ replace(doc, 'abbr') {|abbr| abbr.innerHTML + (" (#{abbr.attributes['title']})" || "")}
116
+
117
+ urls = gather_urls(doc)
118
+
119
+ body = doc.to_html
120
+
121
+ unless urls.empty?
122
+ body << "\n"
123
+ max_length = "[0]".length + Math.log10(urls.size).floor + 1
124
+ urls.each_with_index do |url, i|
125
+ index = i + 1
126
+ str = "\n[#{index}]".ljust max_length
127
+ body << "#{str} #{url}"
128
+ end
129
+ end
130
+
131
+ doc = Hpricot(body)
132
+ replace(doc, 'a')
133
+ body = doc.to_html
134
+
135
+ #sanitize newlines
136
+ body.gsub!(/(\n\s*){3,}/, "\n\n")
137
+
138
+ dec(body)
139
+ end
140
+
141
+ def gather_urls(doc)
142
+ urls = []
143
+ doc.search('a') do |link|
144
+ href = URI(link.attributes['href']) rescue nil
145
+ next if not href && href.host
146
+ next if link.innerHTML.strip == href.to_s.strip
147
+ urls << href
148
+ link.swap(link.innerHTML.strip + "[#{urls.length}]")
149
+ end
150
+ urls
151
+ end
152
+ end
@@ -0,0 +1,35 @@
1
+ require 'yaml'
2
+
3
+ class MessageStore
4
+
5
+ MESSAGES_TO_STORE = 100
6
+
7
+ def initialize(file)
8
+ @file = file
9
+ @root = {}
10
+ if File.exist? @file
11
+ File.open(@file) do |f|
12
+ @root = YAML.load(f) || {}
13
+ end
14
+ else
15
+ $log.warn "#{file} does not exist, creating a new one."
16
+ end
17
+ end
18
+
19
+ def add_new(folder, titles, number_of_entries = MESSAGES_TO_STORE)
20
+ @root[folder] ||= []
21
+ @root[folder].unshift(*titles)
22
+ @root[folder].slice!((number_of_entries * 2)..-1)
23
+ @root[folder].compact!
24
+ end
25
+
26
+ def get_archived(folder)
27
+ @root[folder] || []
28
+ end
29
+
30
+ def save
31
+ File.open(@file, "w") do |f|
32
+ YAML.dump(@root, f)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,50 @@
1
+ require 'rexml/document'
2
+ require 'rexml/parsers/sax2parser'
3
+ require 'rexml/sax2listener'
4
+
5
+ require 'imap-feeder/feedfolder'
6
+
7
+ #
8
+ # Defines the characters that can be used as part of an IMAP Folder
9
+ #
10
+ IMAP_CHARS = "\\w:,\\-= "
11
+
12
+ #
13
+ # Parses an OPML File and extracts the folders and urls of the feeds. A
14
+ # tree like structure is built with FoodFolders and FeedUrls as leafes.
15
+ #
16
+ class OpmlReader
17
+
18
+ #
19
+ # Entry point for the parsing process. Takes the filecontent as a string and returns the root element.
20
+ #
21
+ def self.get(file)
22
+ opml = REXML::Document.new(file)
23
+ parse_opml(opml.elements['opml/body'])
24
+ end
25
+
26
+ private
27
+
28
+ #
29
+ # Replaces the disallowed characters from the folder name
30
+ #
31
+ def self.replace_bad_chars(name)
32
+ name.gsub(/[^#{IMAP_CHARS}]+/, "_")
33
+ end
34
+
35
+ #
36
+ # Parses the given node and recursively traverses through the children
37
+ #
38
+ def self.parse_opml(opml_node, folder = FeedFolder.new(""))
39
+ opml_node.elements.each('outline') do |element|
40
+ if element.attributes['isOpen'] != nil || element.attributes['type'] == "folder"
41
+ child_folder = FeedFolder.new(replace_bad_chars(element.attributes['text']))
42
+ folder.add_sub(child_folder)
43
+ parse_opml(element, child_folder)
44
+ else
45
+ folder.add_url(FeedUrl.new(replace_bad_chars(element.attributes['title']), element.attributes['xmlUrl']))
46
+ end
47
+ end
48
+ folder
49
+ end
50
+ end
@@ -0,0 +1,77 @@
1
+ require 'base64'
2
+ require 'net/imap'
3
+ require 'imap-feeder/message'
4
+
5
+ $KCODE="U"
6
+
7
+ class Server
8
+
9
+ def initialize(params)
10
+ @connection = Net::IMAP.new(params[:host], params[:port], params[:use_ssl])
11
+ result = @connection.login(params[:user], params[:pass])
12
+ end
13
+
14
+ def disconnect
15
+ @connection.disconnect if @connection
16
+ end
17
+
18
+ def send(message, folder="INBOX")
19
+ @connection.select(folder)
20
+ message = message.format.gsub(/\n/, "\r\n")
21
+ @connection.append(folder, message, nil, Time.now)
22
+ end
23
+
24
+ def retrieve(title, folder="INBOX")
25
+ @connection.examine(folder)
26
+ found = @connection.search(["SUBJECT", title]).first
27
+ return if not found
28
+
29
+ imap_header = @connection.fetch([found], "BODY[HEADER.FIELDS (SUBJECT)]")
30
+ retr_title = imap_header.first.attr["BODY[HEADER.FIELDS (SUBJECT)]"]
31
+ retr_title.gsub!(/(^Subject: )|[\n\r]/, "")
32
+
33
+ Message.new(:title => base64decode(retr_title), :id => found)
34
+ end
35
+
36
+ def base64decode(subject)
37
+ encoded = subject[/^=\?utf-8\?b\?(.*?)$/, 1]
38
+ if encoded
39
+ Base64.decode64(encoded)
40
+ else
41
+ subject
42
+ end
43
+ end
44
+ private :base64decode
45
+
46
+ def has?(title, folder)
47
+ retrieve(title, folder) != nil
48
+ end
49
+
50
+ def has_folder?(folder)
51
+ @connection.examine(folder)
52
+ true
53
+ rescue Net::IMAP::NoResponseError
54
+ false
55
+ end
56
+
57
+ def create_folder(folder)
58
+ path = ''
59
+ folder.split('.').each do |part|
60
+ path << part
61
+ @connection.create(path) unless has_folder?(path)
62
+ path << '.'
63
+ end
64
+ end
65
+
66
+ def delete_folder(folder)
67
+ #Switch to root so we can delete the folder
68
+ @connection.examine("INBOX")
69
+ @connection.delete(folder)
70
+ end
71
+
72
+ def delete(message, folder="INBOX")
73
+ @connection.select(folder)
74
+ @connection.store(message.id, "+FLAGS", [:Deleted])
75
+ @connection.expunge
76
+ end
77
+ end
@@ -0,0 +1,9 @@
1
+ class ImapFeeder #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ begin
5
+ require 'newgem'
6
+ rescue LoadError
7
+ puts "\n\nGenerating the website requires the newgem RubyGem"
8
+ puts "Install: gem install newgem\n\n"
9
+ exit(1)
10
+ end
11
+ require 'redcloth'
12
+ require 'syntax/convertors/html'
13
+ require 'erb'
14
+ require File.dirname(__FILE__) + '/../lib/imap-feeder/version.rb'
15
+
16
+ version = ImapFeeder::VERSION::STRING
17
+ download = 'http://rubyforge.org/projects/imap-feeder'
18
+
19
+ class Fixnum
20
+ def ordinal
21
+ # teens
22
+ return 'th' if (10..19).include?(self % 100)
23
+ # others
24
+ case self % 10
25
+ when 1: return 'st'
26
+ when 2: return 'nd'
27
+ when 3: return 'rd'
28
+ else return 'th'
29
+ end
30
+ end
31
+ end
32
+
33
+ class Time
34
+ def pretty
35
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
36
+ end
37
+ end
38
+
39
+ def convert_syntax(syntax, source)
40
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
41
+ end
42
+
43
+ if ARGV.length >= 1
44
+ src, template = ARGV
45
+ template ||= File.join(File.dirname(__FILE__), '/../website/template.rhtml')
46
+
47
+ else
48
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
49
+ exit!
50
+ end
51
+
52
+ template = ERB.new(File.open(template).read)
53
+
54
+ title = nil
55
+ body = nil
56
+ File.open(src) do |fsrc|
57
+ title_text = fsrc.readline.gsub(/^=/, "")
58
+ body_text = fsrc.read
59
+ syntax_items = []
60
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</\1>!m){
61
+ ident = syntax_items.length
62
+ element, syntax, source = $1, $2, $3
63
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
64
+ "syntax-temp-#{ident}"
65
+ }
66
+ body_text.gsub!(/^===/, "h3.")
67
+ body_text.gsub!(/^==/, "h2.")
68
+ body_text.gsub!(/^- /, "* ")
69
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
70
+ body = RedCloth.new(body_text).to_html
71
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(\d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
72
+ end
73
+ stat = File.stat(src)
74
+ created = stat.ctime
75
+ modified = stat.mtime
76
+
77
+ $stdout << template.result(binding)
@@ -0,0 +1,58 @@
1
+ # Configuration for imap-feeder
2
+
3
+ # IMAP connection settings.
4
+ $host = ""
5
+ $user = ""
6
+ $pass = ""
7
+
8
+ # If you want to use a secure connection using ssl/tls, then uncomment the
9
+ # following two declarations.
10
+ #$port = "993"
11
+ #$use_ssl = true
12
+
13
+ # Base directory for imap-feeder
14
+ BASEDIR="#{ENV['HOME']}/.imap-feeder"
15
+
16
+ # We use html-tidy to improve the structure of the feed
17
+ $tidy_path = "/usr/lib/libtidy.so"
18
+
19
+
20
+
21
+ # imap-feeder needs a file to store the last message for each feed
22
+ # so it doesn't fetch old entries. Where should that file be?
23
+ $temp = "#{BASEDIR}/processed_feeds.yml"
24
+
25
+ # The configuration file that was generated
26
+ $config = "#{BASEDIR}/feeds.yml"
27
+
28
+ # The file that contains information about the currently running instance
29
+ $running_instance = "#{BASEDIR}/running_instance"
30
+
31
+ # Configuration for the logger (from `ri Logger.new`):
32
+ #
33
+ #------------------------------------------------------------ Logger::new
34
+ # Logger::new(logdev, shift_age = 0, shift_size = 1048576)
35
+ #------------------------------------------------------------------------
36
+ # Synopsis
37
+ # Logger.new(name, shift_age = 7, shift_size = 1048576)
38
+ # Logger.new(name, shift_age = 'weekly')
39
+ #
40
+ # Args
41
+ # +logdev+: The log device. This is a filename (String) or IO
42
+ # object (typically +STDOUT+, +STDERR+, or an open
43
+ # file).
44
+ #
45
+ # +shift_age+: Number of old log files to keep, *or* frequency of
46
+ # rotation (+daily+, +weekly+ or +monthly+).
47
+ #
48
+ # +shift_size+: Maximum logfile size (only applies when +shift_age+
49
+ # is a number).
50
+ #
51
+ $log = Logger.new "#{BASEDIR}/log.txt", 10
52
+
53
+ # The level can be set to: DEBUG < INFO < WARN < ERROR < FATAL
54
+ $log.level = Logger::WARN
55
+
56
+ $log.formatter = proc { |severity, datetime, progname, msg|
57
+ "#{severity} (#{datetime}): #{msg}\n"
58
+ }