RubyGems - ruby-ole - Versions diffs - 1.2.1 - Mend

ruby-ole 1.2.1

Files changed (13) hide show

data/Rakefile ADDED

@@ -0,0 +1,60 @@
+require 'rake/rdoctask'
+require 'rake/testtask'
+require 'rake/packagetask'
+require 'rake/gempackagetask'
+require 'rbconfig'
+require 'fileutils'
+$:.unshift 'lib'
+require 'ole/storage'
+PKG_NAME = 'ruby-ole'
+PKG_VERSION = Ole::Storage::VERSION
+task :default => [:test]
+Rake::TestTask.new(:test) do |t|
+	t.test_files = FileList["test/test_*.rb"]
+	t.warning = true
+	t.verbose = true
+end
+# RDocTask wasn't working for me
+desc 'Build the rdoc HTML Files'
+task :rdoc do
+	system "rdoc -S -N --main 'Ole::Storage' --tab-width 2 --title '#{PKG_NAME} documentation' lib"
+end
+spec = Gem::Specification.new do |s|
+	s.name = PKG_NAME
+	s.version = PKG_VERSION
+	s.summary = %q{Ruby OLE library.}
+	s.description = %q{A library for easy read/write access to OLE compound documents for Ruby.}
+	s.authors = ["Charles Lowe"]
+	s.email = %q{aquasync@gmail.com}
+	s.homepage = %q{http://code.google.com/p/ruby-ole}
+	#s.rubyforge_project = %q{ruby-ole}
+	s.executables = ['oletool']
+	s.files  = ['Rakefile']
+	s.files += Dir.glob("lib/**/*.rb")
+	s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
+	s.files += Dir.glob("bin/*")
+	s.has_rdoc = true
+	s.rdoc_options += ['--main', 'Ole::Storage',
+					   '--title', "#{PKG_NAME} documentation",
+					   '--tab-width', '2']
+	s.autorequire = 'ole/storage'
+end
+Rake::GemPackageTask.new(spec) do |p|
+	p.gem_spec = spec
+	p.need_tar = true
+	p.need_zip = false
+	p.package_dir = 'build'
+end

data/bin/oletool ADDED

@@ -0,0 +1,35 @@
+#! /usr/bin/ruby
+require 'optparse'
+require 'rubygems'
+require 'ole/storage'
+def oletool
+	opts = {:verbose => false, :action => :tree}
+	op = OptionParser.new do |op|
+		op.banner = "Usage: oletool [options] [files]"
+		op.separator ''
+		op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
+		op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
+		op.separator ''
+		op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
+		op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
+	end
+	files = op.parse ARGV
+	if files.empty?
+		puts 'Must specify 1 or more msg files.'
+		puts op
+		exit 1
+	end
+	Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
+	files.each do |file|
+		case opts[:action]
+		when :tree
+			Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
+		when :repack
+			Ole::Storage.open file, 'r+', &:repack
+		end
+	end
+end
+oletool

data/lib/ole/base.rb ADDED

@@ -0,0 +1,7 @@
+require 'ole/support'
+module Ole # :nodoc:
+	Log = Logger.new_with_callstack
+end

data/lib/ole/file_system.rb ADDED

@@ -0,0 +1,181 @@
+#
+# = Introduction
+#
+# This file intends to provide file system-like api support, a la <tt>zip/zipfilesystem</tt>.
+#
+# Ideally, this will be the recommended interface, allowing Ole::Storage, Dir, and
+# Zip::ZipFile to be used exchangablyk. It should be possible to write recursive copy using
+# the plain api, such that you can copy dirs/files agnostically between any of ole docs, dirs,
+# and zip files.
+#
+# = Usage
+#
+# Currently you can do something like the following:
+#
+#   Ole::Storage.open 'test.doc' do |ole|
+#     ole.dir.entries '/'         # => [".", "..", "\001Ole", "1Table", "\001CompObj", ...]
+#     ole.file.read "\001CompObj" # => "\001\000\376\377\003\n\000\000\377\377..."
+#   end
+#
+# = Notes
+#
+# *** This file is very incomplete
+#
+# i think its okay to have an api like this on top, but there are certain things that ole
+# does that aren't captured.
+# <tt>Ole::Storage</tt> can have multiple files with the same name, for example, or with
+# / in the name, and other things that are probably invalid anyway.
+# i think this should remain an addon, built on top of my core api.
+# but still the ideas can be reflected in the core, ie, changing the read/write semantics.
+#
+# once the core changes are complete, this will be a pretty straight forward file to complete.
+#
+require 'ole/base'
+module Ole # :nodoc:
+	class Storage
+		def file
+			@file ||= FileParent.new self
+		end
+		def dir
+			@dir ||= DirParent.new self
+		end
+		def dirent_from_path path_str
+			path = path_str.sub(/^\/*/, '').sub(/\/*$/, '')
+			dirent = @root
+			return dirent if path.empty?
+			path = path.split /\/+/
+			until path.empty?
+				raise "invalid path #{path_str.inspect}" if dirent.file?
+				if tmp = dirent[path.shift]
+					dirent = tmp
+				else
+					# allow write etc later.
+					raise "invalid path #{path_str.inspect}"
+				end
+			end
+			dirent
+		end
+		class FileParent
+			def initialize ole
+				@ole = ole
+			end
+			def open path_str, mode='r', &block
+				dirent = @ole.dirent_from_path path_str
+				# like Errno::EISDIR
+				raise "#{path_str.inspect} is a directory" unless dirent.file?
+				dirent.open(&block)
+			end
+			alias new :open
+			def read path
+				open(path) { |f| f.read }
+			end
+			# crappy copy from Dir.
+			def unlink path
+				dirent = @ole.dirent_from_path path
+				# EPERM
+				raise "operation not permitted #{path.inspect}" unless dirent.file?
+				# i think we should free all of our blocks. i think the best way to do that would be
+				# like:
+				# open(path) { |f| f.truncate 0 }. which should free all our blocks from the
+				# allocation table. then if we remove ourself from our parent, we won't be part of
+				# the bat at save time.
+				# i think if you run repack, all free blocks should get zeroed.
+				open(path) { |f| f.truncate 0 }
+				parent = @ole.dirent_from_path(('/' + path).sub(/\/[^\/]+$/, ''))
+				parent.children.delete dirent
+				1 # hmmm. as per ::File ?
+			end
+		end
+		class DirParent
+			def initialize ole
+				@ole = ole
+			end
+			def open path_str
+				dirent = @ole.dirent_from_path path_str
+				# like Errno::ENOTDIR
+				raise "#{path_str.inspect} is not a directory" unless dirent.dir?
+				dir = Dir.new dirent, path_str
+				if block_given?
+					yield dir
+				else
+					dir
+				end
+			end
+			# certain Dir class methods proxy in this fashion:
+			def entries path
+				open(path) { |dir| dir.entries }
+			end
+			# there are some other important ones, like:
+			# chroot (!), mkdir, chdir, rmdir, glob etc etc. for now, i think
+			# mkdir, and rmdir are the main ones we'd need to support
+			def rmdir path
+				dirent = @ole.dirent_from_path path
+				# repeating myself
+				raise "#{path.inspect} is not a directory" unless dirent.dir?
+				# ENOTEMPTY:
+				raise "directory not empty #{path.inspect}" unless dirent.children.empty?
+				# now delete it, how to do that? the canonical representation that is
+				# maintained is the root tree, and the children array. we must remove it
+				# from the children array.
+				# we need the parent then. this sucks but anyway:
+				parent = @ole.dirent_from_path path.sub(/\/[^\/]+$/, '') || '/'
+				# note that the way this currently works, on save and repack time this will get
+				# reflected. to work properly, ie to make a difference now it would have to re-write
+				# the dirent. i think that Ole::Storage#close will handle that. and maybe include a
+				# #repack.
+				parent.children.delete dirent
+				0 # hmmm. as per ::Dir ?
+			end
+			class Dir
+				include Enumerable
+				attr_reader :dirent, :path, :entries, :pos
+				def initialize dirent, path
+					@dirent, @path = dirent, path
+					@pos = 0
+					# FIXME: hack, and probably not really desired
+					@entries = %w[. ..] + @dirent.children.map(&:name)
+				end
+				def each(&block)
+					@entries.each(&block)
+				end
+				def close
+				end
+				def read
+					@entries[@pos]
+				ensure
+					@pos += 1 if @pos < @entries.length
+				end
+				def pos= pos
+					@pos = [[0, pos].max, @entries.length].min
+				end
+				def rewind
+					@pos = 0
+				end
+				alias tell :pos
+				alias seek :pos=
+			end
+		end
+	end
+end

data/lib/ole/io_helpers.rb ADDED

@@ -0,0 +1,184 @@
+# move to support?
+class IO # :nodoc:
+	def self.copy src, dst
+		until src.eof?
+			buf = src.read(4096)
+			dst.write buf
+		end
+	end
+end
+#
+# = Introduction
+#
+# +RangesIO+ is a basic class for wrapping another IO object allowing you to arbitrarily reorder
+# slices of the input file by providing a list of ranges. Intended as an initial measure to curb
+# inefficiencies in the Dirent#data method just reading all of a file's data in one hit, with
+# no method to stream it.
+#
+# This class will encapuslate the ranges (corresponding to big or small blocks) of any ole file
+# and thus allow reading/writing directly to the source bytes, in a streamed fashion (so just
+# getting 16 bytes doesn't read the whole thing).
+#
+# In the simplest case it can be used with a single range to provide a limited io to a section of
+# a file.
+#
+# = Limitations
+#
+# * No buffering. by design at the moment. Intended for large reads
+#
+# = TODO
+#
+# On further reflection, this class is something of a joining/optimization of
+# two separate IO classes. a SubfileIO, for providing access to a range within
+# a File as a separate IO object, and a ConcatIO, allowing the presentation of
+# a bunch of io objects as a single unified whole.
+#
+# I will need such a ConcatIO if I'm to provide Mime#to_io, a method that will
+# convert a whole mime message into an IO stream, that can be read from.
+# It will just be the concatenation of a series of IO objects, corresponding to
+# headers and boundaries, as StringIO's, and SubfileIO objects, coming from the
+# original message proper, or RangesIO as provided by the Attachment#data, that
+# will then get wrapped by Mime in a Base64IO or similar, to get encoded on-the-
+# fly. Thus the attachment, in its plain or encoded form, and the message as a
+# whole never exists as a single string in memory, as it does now. This is a
+# fair bit of work to achieve, but generally useful I believe.
+#
+# This class isn't ole specific, maybe move it to my general ruby stream project.
+#
+class RangesIO
+	attr_reader :io, :ranges, :size, :pos
+	# +io+ is the parent io object that we are wrapping.
+	#
+	# +ranges+ are byte offsets, either
+	# 1. an array of ranges [1..2, 4..5, 6..8] or
+	# 2. an array of arrays, where the second is length [[1, 1], [4, 1], [6, 2]] for the above
+	#    (think the way String indexing works)
+	# The +ranges+ provide sequential slices of the file that will be read. they can overlap.
+	def initialize io, ranges, opts={}
+		@opts = {:close_parent => false}.merge opts
+		@io = io
+		# convert ranges to arrays. check for negative ranges?
+		@ranges = ranges.map { |r| Range === r ? [r.begin, r.end - r.begin] : r }
+		# calculate size
+		@size = @ranges.inject(0) { |total, (pos, len)| total + len }
+		# initial position in the file
+		@pos = 0
+	end
+	def pos= pos, whence=IO::SEEK_SET
+		# FIXME support other whence values
+		raise NotImplementedError, "#{whence.inspect} not supported" unless whence == IO::SEEK_SET
+		# just a simple pos calculation. invalidate buffers if we had them
+		@pos = pos
+	end
+	alias seek :pos=
+	alias tell :pos
+	def close
+		@io.close if @opts[:close_parent]
+	end
+	def range_and_offset pos
+		off = nil
+		r = ranges.inject(0) do |total, r|
+			to = total + r[1]
+			if pos <= to
+				off = pos - total
+				break r
+			end
+			to
+		end
+		# should be impossible for any valid pos, (0...size) === pos
+		raise "unable to find range for pos #{pos.inspect}" unless off
+		[r, off]
+	end
+	def eof?
+		@pos == @size
+	end
+	# read bytes from file, to a maximum of +limit+, or all available if unspecified.
+	def read limit=nil
+		data = ''
+		limit ||= size
+		# special case eof
+		return data if eof?
+		r, off = range_and_offset @pos
+		i = ranges.index r
+		# this may be conceptually nice (create sub-range starting where we are), but
+		# for a large range array its pretty wasteful. even the previous way was. but
+		# i'm not trying to optimize this atm. it may even go to c later if necessary.
+		([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
+			@io.seek pos
+			if limit < len
+				# FIXME this += isn't correct if there is a read error
+				# or something.
+				@pos += limit
+				break data << @io.read(limit)
+			end
+			# this can also stuff up. if the ranges are beyond the size of the file, we can get
+			# nil here.
+			data << @io.read(len)
+			@pos += len
+			limit -= len
+		end
+		data
+	end
+	# you may override this call to update @ranges and @size, if applicable. then write
+	# support can grow below
+	def truncate size
+		raise NotImplementedError, 'truncate not supported'
+	end
+	# why not? :)
+	alias size= :truncate
+	def write data
+		# short cut. needed because truncate 0 may return no ranges, instead of empty range,
+		# thus range_and_offset fails.
+		return 0 if data.empty?
+		data_pos = 0
+		# if we don't have room, we can use the truncate hook to make more space.
+		if data.length > @size - @pos
+			begin
+				truncate @pos + data.length
+			rescue NotImplementedError
+				# FIXME maybe warn instead, then just truncate the data?
+				raise "unable to satisfy write of #{data.length} bytes"
+			end
+		end
+		r, off = range_and_offset @pos
+		i = ranges.index r
+		([[r[0] + off, r[1] - off]] + ranges[i+1..-1]).each do |pos, len|
+			@io.seek pos
+			if data_pos + len > data.length
+				chunk = data[data_pos..-1]
+				@io.write chunk
+				@pos += chunk.length
+				data_pos = data.length
+				break
+			end
+			@io.write data[data_pos, len]
+			@pos += len
+			data_pos += len
+		end
+		data_pos
+	end
+	# this will be generalised to a module later
+	def each_read blocksize=4096
+		yield read(blocksize) until eof?
+	end
+	def inspect
+		# the rescue is for empty files
+		pos, len = *(range_and_offset(@pos)[0] rescue [nil, nil])
+		range_str = pos ? "#{pos}..#{pos+len}" : 'nil'
+		"#<#{self.class} io=#{io.inspect} size=#@size pos=#@pos "\
+			"current_range=#{range_str}>"
+	end
+end