imap-feeder 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. data.tar.gz.sig +0 -0
  2. data/.project +23 -0
  3. data/History.txt +4 -0
  4. data/License.txt +341 -0
  5. data/Manifest.txt +48 -0
  6. data/README.txt +77 -0
  7. data/Rakefile +4 -0
  8. data/TODO.txt +11 -0
  9. data/bin/imap-feeder +68 -0
  10. data/config/hoe.rb +77 -0
  11. data/config/requirements.rb +17 -0
  12. data/lib/imap-feeder.rb +87 -0
  13. data/lib/imap-feeder/createconfigparser.rb +40 -0
  14. data/lib/imap-feeder/fakeserver.rb +23 -0
  15. data/lib/imap-feeder/feedfolder.rb +23 -0
  16. data/lib/imap-feeder/feedreader.rb +81 -0
  17. data/lib/imap-feeder/imapfeederconfig.rb +74 -0
  18. data/lib/imap-feeder/message.rb +152 -0
  19. data/lib/imap-feeder/messagestore.rb +35 -0
  20. data/lib/imap-feeder/opmlreader.rb +50 -0
  21. data/lib/imap-feeder/server.rb +77 -0
  22. data/lib/imap-feeder/version.rb +9 -0
  23. data/script/destroy +14 -0
  24. data/script/generate +14 -0
  25. data/script/txt2html +77 -0
  26. data/settings.rb.example +58 -0
  27. data/setup.rb +1585 -0
  28. data/tasks/deployment.rake +34 -0
  29. data/tasks/environment.rake +7 -0
  30. data/tasks/website.rake +15 -0
  31. data/test/data/encoded.rss +22 -0
  32. data/test/data/erroneous.yml +7 -0
  33. data/test/data/last_messages.yaml +0 -0
  34. data/test/data/rss20_no_body.xml +10 -0
  35. data/test/data/rss20_one_entry.xml +11 -0
  36. data/test/data/rss20_two_entries.xml +17 -0
  37. data/test/data/rss20_with_authors.xml +21 -0
  38. data/test/data/simple.opml +9 -0
  39. data/test/functional_test_server.rb +95 -0
  40. data/test/test_feedreader.rb +92 -0
  41. data/test/test_imap-feeder.rb +66 -0
  42. data/test/test_imapfeederconfig.rb +56 -0
  43. data/test/test_message.rb +312 -0
  44. data/test/test_messagestore.rb +54 -0
  45. data/test/test_opmlreader.rb +73 -0
  46. data/test/testlogger.rb +22 -0
  47. data/website/javascripts/rounded_corners_lite.inc.js +285 -0
  48. data/website/stylesheets/screen.css +137 -0
  49. data/website/template.rhtml +49 -0
  50. metadata +175 -0
  51. metadata.gz.sig +0 -0
@@ -0,0 +1,152 @@
1
+ require 'base64'
2
+ require 'time'
3
+ require 'action_mailer'
4
+ require 'hpricot'
5
+ require 'tidy'
6
+ require 'htmlentities'
7
+ require 'digest/md5'
8
+
9
+ $KCODE="U"
10
+
11
+ class Message
12
+
13
+ include ActionMailer::Quoting
14
+
15
+ def initialize(params)
16
+ @params = params
17
+ end
18
+
19
+ def title
20
+ @title ||= (dec( (@params[:title] || "").gsub(/(\r\n)|\r|\n/, " ")) || "")
21
+ end
22
+
23
+ def from
24
+ @from ||= (dec(@params[:from]) || "Unknown <spam@example.org>")
25
+ end
26
+
27
+ def body
28
+ @body ||= strip_html(@params[:body] || @params[:url] || "")
29
+ end
30
+
31
+ def time
32
+ @time ||= (@params[:time] || Time.now.localtime).rfc2822
33
+ end
34
+
35
+ def quote(str)
36
+ return "" if not str
37
+ str.gsub(/[^a-zA-Z0-9 -_:,\.]+/) {|to_quote| quote_if_necessary(to_quote, "UTF-8")}
38
+ end
39
+
40
+ def generate_identifier
41
+ @cached_identifier ||= "#{title}##{Digest::MD5.hexdigest(body())}"
42
+ end
43
+
44
+ def format
45
+ url = @params[:url]
46
+ return <<-EOF
47
+ Date: #{time}
48
+ Subject: #{quote(title)}
49
+ From: #{quote(from)}
50
+ Content-Type: text/plain;
51
+ charset="utf-8"
52
+ Content-Transfer-Encoding: 8bit
53
+
54
+ #{body}#{"\n\n" + url if url}
55
+ EOF
56
+ end
57
+
58
+ private
59
+
60
+ def replace(doc, element)
61
+ doc.search(element) do |found|
62
+ replacement = block_given? ? yield(found) : found.innerHTML
63
+ found.swap(replacement)
64
+ end
65
+ end
66
+
67
+ def tidy(body)
68
+
69
+ begin
70
+ Tidy.path = $tidy_path unless Tidy.path
71
+ rescue LoadError => e
72
+ $log.warning "Tidy not available: #{e.message}"
73
+ return body
74
+ end
75
+
76
+ tidy_html = Tidy.open(:show_warnings => true) do |tidy|
77
+ tidy.options.markup = true
78
+ tidy.options.wrap = 0
79
+ tidy.options.logical_emphasis = true
80
+ tidy.options.drop_font_tags = true
81
+ tidy.options.output_encoding = "utf8"
82
+ tidy.options.input_encoding = "utf8"
83
+ tidy.options.doctype = "omit"
84
+ tidy.clean(body)
85
+ end
86
+ tidy_html.strip!
87
+ tidy_html.gsub!(/^<html>(.|\n)*<body>/, "")
88
+ tidy_html.gsub!(/<\/body>(.|\n)*<\/html>$/, "")
89
+ tidy_html.gsub!("\t", " ")
90
+ tidy_html
91
+ end
92
+
93
+ def dec(html)
94
+ HTMLEntities.decode_entities(html).strip if html
95
+ end
96
+
97
+ def strip_html(body)
98
+
99
+ doc = Hpricot(tidy(dec(body)))
100
+
101
+ replace(doc, 'p') {|paragraph| "\n#{paragraph.innerHTML}\n"}
102
+ replace(doc, 'strong') {|strong| "*#{strong.innerHTML}*"}
103
+ replace(doc, 'b') {|bold| "*#{bold.innerHTML}*"}
104
+ replace(doc, 'em') {|em| "*#{em.innerHTML}*"}
105
+ replace(doc, 'li') {|li| "- #{li.innerHTML}"}
106
+ replace(doc, 'i')
107
+ replace(doc, 'ol')
108
+ replace(doc, 'ul')
109
+ replace(doc, 'abr')
110
+ replace(doc, 'font')
111
+ replace(doc, 'span')
112
+ replace(doc, 'div')
113
+ replace(doc, 'br') {|br| "\n"}
114
+ replace(doc, 'img') {|img| img.attributes['alt'] || ""}
115
+ replace(doc, 'abbr') {|abbr| abbr.innerHTML + (" (#{abbr.attributes['title']})" || "")}
116
+
117
+ urls = gather_urls(doc)
118
+
119
+ body = doc.to_html
120
+
121
+ unless urls.empty?
122
+ body << "\n"
123
+ max_length = "[0]".length + Math.log10(urls.size).floor + 1
124
+ urls.each_with_index do |url, i|
125
+ index = i + 1
126
+ str = "\n[#{index}]".ljust max_length
127
+ body << "#{str} #{url}"
128
+ end
129
+ end
130
+
131
+ doc = Hpricot(body)
132
+ replace(doc, 'a')
133
+ body = doc.to_html
134
+
135
+ #sanitize newlines
136
+ body.gsub!(/(\n\s*){3,}/, "\n\n")
137
+
138
+ dec(body)
139
+ end
140
+
141
+ def gather_urls(doc)
142
+ urls = []
143
+ doc.search('a') do |link|
144
+ href = URI(link.attributes['href']) rescue nil
145
+ next if not href && href.host
146
+ next if link.innerHTML.strip == href.to_s.strip
147
+ urls << href
148
+ link.swap(link.innerHTML.strip + "[#{urls.length}]")
149
+ end
150
+ urls
151
+ end
152
+ end
@@ -0,0 +1,35 @@
1
+ require 'yaml'
2
+
3
+ class MessageStore
4
+
5
+ MESSAGES_TO_STORE = 100
6
+
7
+ def initialize(file)
8
+ @file = file
9
+ @root = {}
10
+ if File.exist? @file
11
+ File.open(@file) do |f|
12
+ @root = YAML.load(f) || {}
13
+ end
14
+ else
15
+ $log.warn "#{file} does not exist, creating a new one."
16
+ end
17
+ end
18
+
19
+ def add_new(folder, titles, number_of_entries = MESSAGES_TO_STORE)
20
+ @root[folder] ||= []
21
+ @root[folder].unshift(*titles)
22
+ @root[folder].slice!((number_of_entries * 2)..-1)
23
+ @root[folder].compact!
24
+ end
25
+
26
+ def get_archived(folder)
27
+ @root[folder] || []
28
+ end
29
+
30
+ def save
31
+ File.open(@file, "w") do |f|
32
+ YAML.dump(@root, f)
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,50 @@
1
+ require 'rexml/document'
2
+ require 'rexml/parsers/sax2parser'
3
+ require 'rexml/sax2listener'
4
+
5
+ require 'imap-feeder/feedfolder'
6
+
7
+ #
8
+ # Defines the characters that can be used as part of an IMAP Folder
9
+ #
10
+ IMAP_CHARS = "\\w:,\\-= "
11
+
12
+ #
13
+ # Parses an OPML File and extracts the folders and urls of the feeds. A
14
+ # tree like structure is built with FoodFolders and FeedUrls as leafes.
15
+ #
16
+ class OpmlReader
17
+
18
+ #
19
+ # Entry point for the parsing process. Takes the filecontent as a string and returns the root element.
20
+ #
21
+ def self.get(file)
22
+ opml = REXML::Document.new(file)
23
+ parse_opml(opml.elements['opml/body'])
24
+ end
25
+
26
+ private
27
+
28
+ #
29
+ # Replaces the disallowed characters from the folder name
30
+ #
31
+ def self.replace_bad_chars(name)
32
+ name.gsub(/[^#{IMAP_CHARS}]+/, "_")
33
+ end
34
+
35
+ #
36
+ # Parses the given node and recursively traverses through the children
37
+ #
38
+ def self.parse_opml(opml_node, folder = FeedFolder.new(""))
39
+ opml_node.elements.each('outline') do |element|
40
+ if element.attributes['isOpen'] != nil || element.attributes['type'] == "folder"
41
+ child_folder = FeedFolder.new(replace_bad_chars(element.attributes['text']))
42
+ folder.add_sub(child_folder)
43
+ parse_opml(element, child_folder)
44
+ else
45
+ folder.add_url(FeedUrl.new(replace_bad_chars(element.attributes['title']), element.attributes['xmlUrl']))
46
+ end
47
+ end
48
+ folder
49
+ end
50
+ end
@@ -0,0 +1,77 @@
1
+ require 'base64'
2
+ require 'net/imap'
3
+ require 'imap-feeder/message'
4
+
5
+ $KCODE="U"
6
+
7
+ class Server
8
+
9
+ def initialize(params)
10
+ @connection = Net::IMAP.new(params[:host], params[:port], params[:use_ssl])
11
+ result = @connection.login(params[:user], params[:pass])
12
+ end
13
+
14
+ def disconnect
15
+ @connection.disconnect if @connection
16
+ end
17
+
18
+ def send(message, folder="INBOX")
19
+ @connection.select(folder)
20
+ message = message.format.gsub(/\n/, "\r\n")
21
+ @connection.append(folder, message, nil, Time.now)
22
+ end
23
+
24
+ def retrieve(title, folder="INBOX")
25
+ @connection.examine(folder)
26
+ found = @connection.search(["SUBJECT", title]).first
27
+ return if not found
28
+
29
+ imap_header = @connection.fetch([found], "BODY[HEADER.FIELDS (SUBJECT)]")
30
+ retr_title = imap_header.first.attr["BODY[HEADER.FIELDS (SUBJECT)]"]
31
+ retr_title.gsub!(/(^Subject: )|[\n\r]/, "")
32
+
33
+ Message.new(:title => base64decode(retr_title), :id => found)
34
+ end
35
+
36
+ def base64decode(subject)
37
+ encoded = subject[/^=\?utf-8\?b\?(.*?)$/, 1]
38
+ if encoded
39
+ Base64.decode64(encoded)
40
+ else
41
+ subject
42
+ end
43
+ end
44
+ private :base64decode
45
+
46
+ def has?(title, folder)
47
+ retrieve(title, folder) != nil
48
+ end
49
+
50
+ def has_folder?(folder)
51
+ @connection.examine(folder)
52
+ true
53
+ rescue Net::IMAP::NoResponseError
54
+ false
55
+ end
56
+
57
+ def create_folder(folder)
58
+ path = ''
59
+ folder.split('.').each do |part|
60
+ path << part
61
+ @connection.create(path) unless has_folder?(path)
62
+ path << '.'
63
+ end
64
+ end
65
+
66
+ def delete_folder(folder)
67
+ #Switch to root so we can delete the folder
68
+ @connection.examine("INBOX")
69
+ @connection.delete(folder)
70
+ end
71
+
72
+ def delete(message, folder="INBOX")
73
+ @connection.select(folder)
74
+ @connection.store(message.id, "+FLAGS", [:Deleted])
75
+ @connection.expunge
76
+ end
77
+ end
@@ -0,0 +1,9 @@
1
+ class ImapFeeder #:nodoc:
2
+ module VERSION #:nodoc:
3
+ MAJOR = 0
4
+ MINOR = 1
5
+ TINY = 0
6
+
7
+ STRING = [MAJOR, MINOR, TINY].join('.')
8
+ end
9
+ end
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/destroy'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Destroy.new.run(ARGV)
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+ APP_ROOT = File.expand_path(File.join(File.dirname(__FILE__), '..'))
3
+
4
+ begin
5
+ require 'rubigen'
6
+ rescue LoadError
7
+ require 'rubygems'
8
+ require 'rubigen'
9
+ end
10
+ require 'rubigen/scripts/generate'
11
+
12
+ ARGV.shift if ['--help', '-h'].include?(ARGV[0])
13
+ RubiGen::Base.use_component_sources! [:rubygems, :newgem, :newgem_theme, :test_unit]
14
+ RubiGen::Scripts::Generate.new.run(ARGV)
@@ -0,0 +1,77 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rubygems'
4
+ begin
5
+ require 'newgem'
6
+ rescue LoadError
7
+ puts "\n\nGenerating the website requires the newgem RubyGem"
8
+ puts "Install: gem install newgem\n\n"
9
+ exit(1)
10
+ end
11
+ require 'redcloth'
12
+ require 'syntax/convertors/html'
13
+ require 'erb'
14
+ require File.dirname(__FILE__) + '/../lib/imap-feeder/version.rb'
15
+
16
+ version = ImapFeeder::VERSION::STRING
17
+ download = 'http://rubyforge.org/projects/imap-feeder'
18
+
19
+ class Fixnum
20
+ def ordinal
21
+ # teens
22
+ return 'th' if (10..19).include?(self % 100)
23
+ # others
24
+ case self % 10
25
+ when 1: return 'st'
26
+ when 2: return 'nd'
27
+ when 3: return 'rd'
28
+ else return 'th'
29
+ end
30
+ end
31
+ end
32
+
33
+ class Time
34
+ def pretty
35
+ return "#{mday}#{mday.ordinal} #{strftime('%B')} #{year}"
36
+ end
37
+ end
38
+
39
+ def convert_syntax(syntax, source)
40
+ return Syntax::Convertors::HTML.for_syntax(syntax).convert(source).gsub(%r!^<pre>|</pre>$!,'')
41
+ end
42
+
43
+ if ARGV.length >= 1
44
+ src, template = ARGV
45
+ template ||= File.join(File.dirname(__FILE__), '/../website/template.rhtml')
46
+
47
+ else
48
+ puts("Usage: #{File.split($0).last} source.txt [template.rhtml] > output.html")
49
+ exit!
50
+ end
51
+
52
+ template = ERB.new(File.open(template).read)
53
+
54
+ title = nil
55
+ body = nil
56
+ File.open(src) do |fsrc|
57
+ title_text = fsrc.readline.gsub(/^=/, "")
58
+ body_text = fsrc.read
59
+ syntax_items = []
60
+ body_text.gsub!(%r!<(pre|code)[^>]*?syntax=['"]([^'"]+)[^>]*>(.*?)</\1>!m){
61
+ ident = syntax_items.length
62
+ element, syntax, source = $1, $2, $3
63
+ syntax_items << "<#{element} class='syntax'>#{convert_syntax(syntax, source)}</#{element}>"
64
+ "syntax-temp-#{ident}"
65
+ }
66
+ body_text.gsub!(/^===/, "h3.")
67
+ body_text.gsub!(/^==/, "h2.")
68
+ body_text.gsub!(/^- /, "* ")
69
+ title = RedCloth.new(title_text).to_html.gsub(%r!<.*?>!,'').strip
70
+ body = RedCloth.new(body_text).to_html
71
+ body.gsub!(%r!(?:<pre><code>)?syntax-temp-(\d+)(?:</code></pre>)?!){ syntax_items[$1.to_i] }
72
+ end
73
+ stat = File.stat(src)
74
+ created = stat.ctime
75
+ modified = stat.mtime
76
+
77
+ $stdout << template.result(binding)
@@ -0,0 +1,58 @@
1
+ # Configuration for imap-feeder
2
+
3
+ # IMAP connection settings.
4
+ $host = ""
5
+ $user = ""
6
+ $pass = ""
7
+
8
+ # If you want to use a secure connection using ssl/tls, then uncomment the
9
+ # following two declarations.
10
+ #$port = "993"
11
+ #$use_ssl = true
12
+
13
+ # Base directory for imap-feeder
14
+ BASEDIR="#{ENV['HOME']}/.imap-feeder"
15
+
16
+ # We use html-tidy to improve the structure of the feed
17
+ $tidy_path = "/usr/lib/libtidy.so"
18
+
19
+
20
+
21
+ # imap-feeder needs a file to store the last message for each feed
22
+ # so it doesn't fetch old entries. Where should that file be?
23
+ $temp = "#{BASEDIR}/processed_feeds.yml"
24
+
25
+ # The configuration file that was generated
26
+ $config = "#{BASEDIR}/feeds.yml"
27
+
28
+ # The file that contains information about the currently running instance
29
+ $running_instance = "#{BASEDIR}/running_instance"
30
+
31
+ # Configuration for the logger (from `ri Logger.new`):
32
+ #
33
+ #------------------------------------------------------------ Logger::new
34
+ # Logger::new(logdev, shift_age = 0, shift_size = 1048576)
35
+ #------------------------------------------------------------------------
36
+ # Synopsis
37
+ # Logger.new(name, shift_age = 7, shift_size = 1048576)
38
+ # Logger.new(name, shift_age = 'weekly')
39
+ #
40
+ # Args
41
+ # +logdev+: The log device. This is a filename (String) or IO
42
+ # object (typically +STDOUT+, +STDERR+, or an open
43
+ # file).
44
+ #
45
+ # +shift_age+: Number of old log files to keep, *or* frequency of
46
+ # rotation (+daily+, +weekly+ or +monthly+).
47
+ #
48
+ # +shift_size+: Maximum logfile size (only applies when +shift_age+
49
+ # is a number).
50
+ #
51
+ $log = Logger.new "#{BASEDIR}/log.txt", 10
52
+
53
+ # The level can be set to: DEBUG < INFO < WARN < ERROR < FATAL
54
+ $log.level = Logger::WARN
55
+
56
+ $log.formatter = proc { |severity, datetime, progname, msg|
57
+ "#{severity} (#{datetime}): #{msg}\n"
58
+ }