RubyGems - feed2imap - Versions diffs - 1.2.6 - Mend

feed2imap 1.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

checksums.yaml +7 -0
data/COPYING +340 -0
data/ChangeLog +1 -0
data/README +23 -0
data/Rakefile +75 -0
data/bin/feed2imap +49 -0
data/bin/feed2imap-cleaner +32 -0
data/bin/feed2imap-dumpconfig +42 -0
data/bin/feed2imap-opmlimport +48 -0
data/data/doc/feed2imap/examples/feed2imaprc +72 -0
data/data/man/man1/feed2imap-cleaner.1 +43 -0
data/data/man/man1/feed2imap-dumpconfig.1 +28 -0
data/data/man/man1/feed2imap-opmlimport.1 +27 -0
data/data/man/man1/feed2imap.1 +42 -0
data/data/man/man5/feed2imaprc.5 +29 -0
data/lib/feed2imap.rb +1 -0
data/lib/feed2imap/cache.rb +302 -0
data/lib/feed2imap/config.rb +167 -0
data/lib/feed2imap/feed2imap.rb +297 -0
data/lib/feed2imap/html2text-parser.rb +99 -0
data/lib/feed2imap/httpfetcher.rb +122 -0
data/lib/feed2imap/imap.rb +166 -0
data/lib/feed2imap/itemtomail.rb +129 -0
data/lib/feed2imap/maildir.rb +188 -0
data/lib/feed2imap/rexml_patch.rb +47 -0
data/lib/feed2imap/sgml-parser.rb +333 -0
data/lib/feed2imap/version.rb +3 -0
data/setup.rb +1586 -0
data/test/maildir/cur/1376317520.15784_1.debian:2,S +11 -0
data/test/maildir/cur/1376317520.15789_1.debian:2,S +11 -0
data/test/maildir/cur/1376319137.17850_1.debian:2, +11 -0
data/test/maildir/cur/1376320022.18396_5.debian:2,FS +11 -0
data/test/maildir/new/1376320099.18396_7.debian +11 -0
data/test/tc_cache.rb +82 -0
data/test/tc_config.rb +113 -0
data/test/tc_httpfetcher.rb +72 -0
data/test/tc_maildir.rb +97 -0
metadata +95 -0

data/lib/feed2imap/maildir.rb ADDED

@@ -0,0 +1,188 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server, or local Maildir
+Copyright (c) 2009 Andreas Rottmann
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+=end
+require 'uri'
+require 'fileutils'
+require 'fcntl'
+require 'rmail'
+require 'socket'
+class MaildirAccount
+  MYHOSTNAME = Socket.gethostname
+  @@seq_num = 0
+  attr_reader :uri
+  def putmail(folder, mail, date = Time::now)
+    store_message(folder_dir(folder), date, nil) do |f|
+      f.puts(mail)
+    end
+  end
+  def updatemail(folder, mail, idx, date = Time::now, reupload_if_updated = true)
+    dir = folder_dir(folder)
+    guarantee_maildir(dir)
+    mail_files = find_mails(dir, idx)
+    if mail_files.length > 0
+      # get the info from the first result and delete everything
+      info = maildir_file_info(mail_files[0])
+      mail_files.each { |f| File.delete(File.join(dir, f)) }
+    elsif not reupload_if_updated
+      # mail not present, and we don't want to re-upload it
+      return
+    end
+    store_message(dir, date, info) { |f| f.puts(mail) }
+  end
+  def to_s
+    uri.to_s
+  end
+  def cleanup(folder, dryrun = false)
+    dir = folder_dir(folder)
+    puts "-- Considering #{dir}:"
+    guarantee_maildir(dir)
+    del_count = 0
+    recent_time = Time.now() - (3 * 24 * 60 * 60) # 3 days
+    Dir[File.join(dir, 'cur', '*')].each do |fn|
+      flags = maildir_file_info_flags(fn)
+      # don't consider not-seen, flagged, or recent messages
+      mtime = File.mtime(fn)
+      next if (not flags.index('S') or
+               flags.index('F') or
+               mtime > recent_time)
+      mail = File.open(fn) do |f|
+        RMail::Parser.read(f)
+      end
+      subject = mail.header['Subject']
+      if dryrun
+        puts "To remove: #{subject} #{mtime}"
+      else
+        puts "Removing: #{subject} #{mtime}"
+        File.delete(fn)
+      end
+      del_count += 1
+    end
+    puts "-- Deleted #{del_count} messages"
+    return del_count
+  end
+  private
+  def folder_dir(folder)
+    return File.join('/', folder)
+  end
+  def store_message(dir, date, info, &block)
+    guarantee_maildir(dir)
+    stored = false
+    Dir.chdir(dir) do |d|
+      timer = 30
+      fd = nil
+      while timer >= 0
+        new_fn = new_maildir_basefn(date)
+        tmp_path = File.join(dir, 'tmp', new_fn)
+        new_path = File.join(dir, 'new', new_fn)
+        begin
+          fd = IO::sysopen(tmp_path,
+                           Fcntl::O_WRONLY | Fcntl::O_EXCL | Fcntl::O_CREAT)
+          break
+        rescue Errno::EEXIST
+          sleep 2
+          timer -= 2
+          next
+        end
+      end
+      if fd
+        begin
+          f = IO.open(fd)
+          # provide a writable interface for the caller
+          yield f
+          f.fsync
+          File.link tmp_path, new_path
+          stored = true
+        ensure
+          File.unlink tmp_path if File.exist? tmp_path
+        end
+      end
+      if stored and info
+        cur_path = File.join(dir, 'cur', new_fn + ':' + info)
+        File.rename(new_path, cur_path)
+      end
+    end # Dir.chdir
+    return stored
+  end
+  def find_mails(dir, idx)
+    dir_paths = []
+    ['cur', 'new'].each do |d|
+      subdir = File.join(dir, d)
+      raise "#{subdir} not a directory" unless File.directory? subdir
+      Dir[File.join(subdir, '*')].each do |fn|
+        File.open(fn) do |f|
+          mail = RMail::Parser.read(f)
+          cache_index = mail.header['Message-ID']
+          if cache_index && (cache_index == idx || cache_index == "<#{idx}>")
+            dir_paths.push(File.join(d, File.basename(fn)))
+          end
+        end
+      end
+    end
+    return dir_paths
+  end
+  def guarantee_maildir(dir)
+    # Ensure maildir-folderness
+    ['new', 'cur', 'tmp'].each do |d|
+      FileUtils.mkdir_p(File.join(dir, d))
+    end
+  end
+  def maildir_file_info(file)
+    basename = File.basename(file)
+    colon = basename.rindex(':')
+    return (colon and basename[colon + 1 .. -1])
+  end
+  # Re-written and no longer shamelessly taken from
+  # http://gitorious.org/sup/mainline/blobs/master/lib/sup/maildir.rb
+  def new_maildir_basefn(date)
+    fn = "#{date.to_i.to_s}.#{@@seq_num.to_s}.#{MYHOSTNAME}"
+    @@seq_num += 1
+    fn
+  end
+  def maildir_file_info_flags(fn)
+    parts = fn.split(',')
+    if parts.size == 1
+      ''
+    else
+      parts.last
+    end
+  end
+end

data/lib/feed2imap/rexml_patch.rb ADDED

@@ -0,0 +1,47 @@
+=begin
+Feed2Imap - RSS/Atom Aggregator uploading to an IMAP Server
+Copyright (c) 2005 Lucas Nussbaum <lucas@lucas-nussbaum.net>
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2 of the License, or
+(at your option) any later version.
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+=end
+require 'feedparser'
+# Patch for REXML
+# Very ugly patch to make REXML error-proof.
+# The problem is REXML uses IConv, which isn't error-proof at all.
+# With those changes, it uses unpack/pack with some error handling
+module REXML
+  module Encoding
+    def decode(str)
+      return str.encode(@encoding)
+    end
+    def encode(str)
+      return str
+    end
+    def encoding=(enc)
+      return if defined? @encoding and enc == @encoding
+      @encoding = enc || 'utf-8'
+    end
+  end
+  class Element
+    def children
+      @children
+    end
+  end
+end

data/lib/feed2imap/sgml-parser.rb ADDED

@@ -0,0 +1,333 @@
+# A parser for SGML, using the derived class as static DTD.
+# from http://raa.ruby-lang.org/project/html-parser
+class SGMLParser
+  # Regular expressions used for parsing:
+  Interesting = /[&<]/
+  Incomplete = Regexp.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|' +
+                              '<([a-zA-Z][^<>]*|/([a-zA-Z][^<>]*)?|' +
+                              '![^<>]*)?')
+  Entityref = /&([a-zA-Z][-.a-zA-Z0-9]*)[^-.a-zA-Z0-9]/
+  Charref = /&#([0-9]+)[^0-9]/
+  Starttagopen = /<[>a-zA-Z]/
+  Endtagopen = /<\/[<>a-zA-Z]/
+  Endbracket = /[<>]/
+  Special = /<![^<>]*>/
+  Commentopen = /<!--/
+  Commentclose = /--[ \t\n]*>/
+  Tagfind = /[a-zA-Z][a-zA-Z0-9.-]*/
+  Attrfind = Regexp.compile('[\s,]*([a-zA-Z_][a-zA-Z_0-9.-]*)' +
+                            '(\s*=\s*' +
+                            "('[^']*'" +
+                            '|"[^"]*"' +
+                            '|[-~a-zA-Z0-9,./:+*%?!()_#=]*))?')
+  Entitydefs =
+    {'lt'=>'<', 'gt'=>'>', 'amp'=>'&', 'quot'=>'"', 'apos'=>'\''}
+  def initialize(verbose=false)
+    @verbose = verbose
+    reset
+  end
+  def reset
+    @rawdata = ''
+    @stack = []
+    @lasttag = '???'
+    @nomoretags = false
+    @literal = false
+  end
+  def has_context(gi)
+    @stack.include? gi
+  end
+  def setnomoretags
+    @nomoretags = true
+    @literal = true
+  end
+  def setliteral(*args)
+    @literal = true
+  end
+  def feed(data)
+    @rawdata << data
+    goahead(false)
+  end
+  def close
+    goahead(true)
+  end
+  def goahead(_end)
+    rawdata = @rawdata
+    i = 0
+    n = rawdata.length
+    while i < n
+      if @nomoretags
+        handle_data(rawdata[i..(n-1)])
+        i = n
+        break
+      end
+      j = rawdata.index(Interesting, i)
+      j = n unless j
+      if i < j
+        handle_data(rawdata[i..(j-1)])
+      end
+      i = j
+      break if (i == n)
+      if rawdata[i] == ?< #
+        if rawdata.index(Starttagopen, i) == i
+          if @literal
+            handle_data(rawdata[i, 1])
+            i += 1
+            next
+          end
+          k = parse_starttag(i)
+          break unless k
+          i = k
+          next
+        end
+        if rawdata.index(Endtagopen, i) == i
+          k = parse_endtag(i)
+          break unless k
+          i = k
+          @literal = false
+          next
+        end
+        if rawdata.index(Commentopen, i) == i
+          if @literal
+            handle_data(rawdata[i,1])
+            i += 1
+            next
+          end
+          k = parse_comment(i)
+          break unless k
+          i += k
+          next
+        end
+        if rawdata.index(Special, i) == i
+          if @literal
+            handle_data(rawdata[i, 1])
+            i += 1
+            next
+          end
+          k = parse_special(i)
+          break unless k
+          i += k
+          next
+        end
+      elsif rawdata[i] == ?& #
+        if rawdata.index(Charref, i) == i
+          i += $&.length
+          handle_charref($1)
+          i -= 1 unless rawdata[i-1] == ?;
+          next
+        end
+        if rawdata.index(Entityref, i) == i
+          i += $&.length
+          handle_entityref($1)
+          i -= 1 unless rawdata[i-1] == ?;
+          next
+        end
+      else
+        raise RuntimeError, 'neither < nor & ??'
+      end
+      # We get here only if incomplete matches but
+      # nothing else
+      match = rawdata.index(Incomplete, i)
+      unless match == i
+        handle_data(rawdata[i, 1])
+        i += 1
+        next
+      end
+      j = match + $&.length
+      break if j == n # Really incomplete
+      handle_data(rawdata[i..(j-1)])
+      i = j
+    end
+    # end while
+    if _end and i < n
+      handle_data(@rawdata[i..(n-1)])
+      i = n
+    end
+    @rawdata = rawdata[i..-1]
+  end
+  def parse_comment(i)
+    rawdata = @rawdata
+    if rawdata[i, 4] != '<!--'
+      raise RuntimeError, 'unexpected call to handle_comment'
+    end
+    match = rawdata.index(Commentclose, i)
+    return nil unless match
+    matched_length = $&.length
+    j = match
+    handle_comment(rawdata[i+4..(j-1)])
+    j = match + matched_length
+    return j-i
+  end
+  def parse_starttag(i)
+    rawdata = @rawdata
+    j = rawdata.index(Endbracket, i + 1)
+    return nil unless j
+    attrs = []
+    if rawdata[i+1] == ?> #
+      # SGML shorthand: <> == <last open tag seen>
+      k = j
+      tag = @lasttag
+    else
+      match = rawdata.index(Tagfind, i + 1)
+      unless match
+        raise RuntimeError, 'unexpected call to parse_starttag'
+      end
+      k = i + 1 + ($&.length)
+      tag = $&.downcase
+      @lasttag = tag
+    end
+    while k < j
+      break unless rawdata.index(Attrfind, k)
+      matched_length = $&.length
+      attrname, rest, attrvalue = $1, $2, $3
+      if not rest
+        attrvalue = '' # was: = attrname
+      elsif (attrvalue[0] == ?' && attrvalue[-1] == ?') or
+          (attrvalue[0] == ?" && attrvalue[-1,1] == ?")
+        attrvalue = attrvalue[1..-2]
+      end
+      attrs << [attrname.downcase, attrvalue]
+      k += matched_length
+    end
+    if rawdata[j] == ?> #
+      j += 1
+    end
+    finish_starttag(tag, attrs)
+    return j
+  end
+  def parse_endtag(i)
+    rawdata = @rawdata
+    j = rawdata.index(Endbracket, i + 1)
+    return nil unless j
+    tag = (rawdata[i+2..j-1].strip).downcase
+    if rawdata[j] == ?> #
+      j += 1
+    end
+    finish_endtag(tag)
+    return j
+  end
+  def finish_starttag(tag, attrs)
+    method = 'start_' + tag
+    if self.respond_to?(method)
+      @stack << tag
+      handle_starttag(tag, method, attrs)
+      return 1
+    else
+      method = 'do_' + tag
+      if self.respond_to?(method)
+        handle_starttag(tag, method, attrs)
+        return 0
+      else
+        unknown_starttag(tag, attrs)
+        return -1
+      end
+    end
+  end
+  def finish_endtag(tag)
+    if tag == ''
+      found = @stack.length - 1
+      if found < 0
+        unknown_endtag(tag)
+        return
+      end
+    else
+      unless @stack.include? tag
+        method = 'end_' + tag
+        unless self.respond_to?(method)
+          unknown_endtag(tag)
+        end
+        return
+      end
+      found = @stack.index(tag) #or @stack.length
+    end
+    while @stack.length > found
+      tag = @stack[-1]
+      method = 'end_' + tag
+      if respond_to?(method)
+        handle_endtag(tag, method)
+      else
+        unknown_endtag(tag)
+      end
+      @stack.pop
+    end
+  end
+  def parse_special(i)
+    rawdata = @rawdata
+    match = rawdata.index(Endbracket, i+1)
+    return nil unless match
+    matched_length = $&.length
+    handle_special(rawdata[i+1..(match-1)])
+    return match - i + matched_length
+  end
+  def handle_starttag(tag, method, attrs)
+    self.send(method, attrs)
+  end
+  def handle_endtag(tag, method)
+    self.send(method)
+  end
+  def report_unbalanced(tag)
+    if @verbose
+      print '*** Unbalanced </' + tag + '>', "\n"
+      print '*** Stack:', self.stack, "\n"
+    end
+  end
+  def handle_charref(name)
+    n = Integer(name)
+    if !(0 <= n && n <= 255)
+      unknown_charref(name)
+      return
+    end
+    handle_data(n.chr)
+  end
+  def handle_entityref(name)
+    table = Entitydefs
+    if table.include?(name)
+      handle_data(table[name])
+    else
+      unknown_entityref(name)
+      return
+    end
+  end
+  def handle_data(data)
+  end
+  def handle_comment(data)
+  end
+  def handle_special(data)
+  end
+  def unknown_starttag(tag, attrs)
+  end
+  def unknown_endtag(tag)
+  end
+  def unknown_charref(ref)
+  end
+  def unknown_entityref(ref)
+  end
+end