ruby-msg 1.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/FIXES +34 -0
- data/README +121 -0
- data/Rakefile +66 -0
- data/bin/msgtool +63 -0
- data/bin/oletool +35 -0
- data/data/mapitags.yaml +4168 -0
- data/data/named_map.yaml +114 -0
- data/data/types.yaml +15 -0
- data/lib/blah.rb +106 -0
- data/lib/mime-new.rb +210 -0
- data/lib/mime.rb +165 -0
- data/lib/msg/properties.rb +515 -0
- data/lib/msg/rtf.rb +236 -0
- data/lib/msg.rb +505 -0
- data/lib/ole/base.rb +5 -0
- data/lib/ole/file_system.rb +181 -0
- data/lib/ole/io_helpers.rb +184 -0
- data/lib/ole/storage.rb +927 -0
- data/lib/ole/types.rb +36 -0
- data/lib/orderedhash.rb +218 -0
- data/lib/rtf.rb +118 -0
- data/lib/support.rb +51 -0
- data/test/test_mime.rb +22 -0
- data/test/test_storage.rb +139 -0
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
- metadata +73 -0
data/FIXES
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
FIXES
|
2
|
+
|
3
|
+
recent fixes based on importing results into evolution
|
4
|
+
|
5
|
+
1. was running into some issue with base64 encoded message/rfc822 attachments displaying
|
6
|
+
as empty. encoding them as plain solved the issue (odd).
|
7
|
+
|
8
|
+
2. problem with a large percentage of emails, not displaying as mime. turned out to be
|
9
|
+
all received from blackberry. further, turned out there was 2 content-type headers,
|
10
|
+
"Content-Type", which I add, and "Content-type". normally my override works, but I
|
11
|
+
need to handle it case insensitvely it would appear. more tricky, whats the story
|
12
|
+
with these. fixing that will probably fix that whole class of issues there.
|
13
|
+
evolution was renaming my second content type as X-Invalid-Content-Type or something.
|
14
|
+
|
15
|
+
3. another interesting one. had content-transfer-encoding set in the transport message
|
16
|
+
headers. it was set to base64. i didn't override that, so evolution "decoded" my
|
17
|
+
plaintext message into complete garbage.
|
18
|
+
fix - delete content-transfer-encoding.
|
19
|
+
|
20
|
+
4. added content-location and content-id output in the mime handling of attachments
|
21
|
+
to get some inline html/image mails to work properly.
|
22
|
+
further, the containing mime content-type must be multipart/related, not multipart/mixed,
|
23
|
+
at least for evolution, in order for the images to appear inline.
|
24
|
+
could still improve in this area. if someone drags and drops in an image, it may
|
25
|
+
be inline in the rtf version, but exchanges generates crappy html such that the image
|
26
|
+
doesn't display inline. maybe i should correct the html output in these cases as i'm
|
27
|
+
throwing away the rtf version.
|
28
|
+
|
29
|
+
5. note you may need wingdings installed. i had a lot of L and J appear in messages from
|
30
|
+
outlook users. turns out its smilies in wingdings. i think its only if word is used
|
31
|
+
as email editor and has autotext messing things up.
|
32
|
+
|
33
|
+
6. still unsure about how to do my "\r" handling.
|
34
|
+
|
data/README
ADDED
@@ -0,0 +1,121 @@
|
|
1
|
+
#summary ruby-msg - A library for reading Outlook msg files, and for converting them to RFC2822 emails.
|
2
|
+
|
3
|
+
= Introduction =
|
4
|
+
|
5
|
+
Generally, the goal of the project is the conversion of .msg files into proper rfc2822
|
6
|
+
emails, independent of outlook, or any platform dependencies etc.
|
7
|
+
In fact its currently pure ruby, so it should be easy to get started with.
|
8
|
+
|
9
|
+
It draws on `msgconvert.pl`, but tries to take a cleaner and more complete approach.
|
10
|
+
Neither are complete yet, however, but I think that this project provides a clean foundation upon which to work on a good converter for msg files for use in outlook migrations etc.
|
11
|
+
|
12
|
+
I am happy to accept patches, give commit bits etc.
|
13
|
+
|
14
|
+
Please let me know how it works for you, any feedback would be welcomed.
|
15
|
+
|
16
|
+
= Usage =
|
17
|
+
|
18
|
+
Higher level access to the msg, can be had through the top level data accessors.
|
19
|
+
|
20
|
+
{{{
|
21
|
+
require 'msg'
|
22
|
+
|
23
|
+
msg = Msg.load open(filename)
|
24
|
+
|
25
|
+
# access to the 3 main data stores, if you want to poke with the msg
|
26
|
+
# internals
|
27
|
+
msg.recipients
|
28
|
+
# => [#<Recipient:'\'Marley, Bob\' <bob.marley@gmail.com>'>]
|
29
|
+
msg.attachments
|
30
|
+
# => [#<Attachment filename='blah1.tif'>, #<Attachment filename='blah2.tif'>]
|
31
|
+
msg.properties
|
32
|
+
# => #<Properties ... normalized_subject='Testing' ...
|
33
|
+
# creation_time=#<DateTime: 2454042.45074714,0,2299161> ...>
|
34
|
+
}}}
|
35
|
+
|
36
|
+
To completely abstract away all msg peculiarities, convert the msg to a mime object.
|
37
|
+
The message as a whole, and some of its main parts support conversion to mime objects.
|
38
|
+
|
39
|
+
{{{
|
40
|
+
msg.attachments.first.to_mime
|
41
|
+
# => #<Mime content_type='application/octet-stream'>
|
42
|
+
mime = msg.to_mime
|
43
|
+
puts mime.to_tree
|
44
|
+
# =>
|
45
|
+
- #<Mime content_type='multipart/mixed'>
|
46
|
+
|- #<Mime content_type='multipart/alternative'>
|
47
|
+
| |- #<Mime content_type='text/plain'>
|
48
|
+
| \- #<Mime content_type='text/html'>
|
49
|
+
|- #<Mime content_type='application/octet-stream'>
|
50
|
+
\- #<Mime content_type='application/octet-stream'>
|
51
|
+
|
52
|
+
# convert mime object to serialised form,
|
53
|
+
# inclusive of attachments etc. (not ideal in memory, but its wip).
|
54
|
+
puts mime.to_s
|
55
|
+
}}}
|
56
|
+
|
57
|
+
You can also access the underlying ole object, and see all the gory details of how msgs are serialised:
|
58
|
+
|
59
|
+
{{{
|
60
|
+
puts msg.ole.root.to_tree
|
61
|
+
# =>
|
62
|
+
- #<OleDir:"Root Entry" size=3840 time="2006-11-03T00:52:53Z">
|
63
|
+
|- #<OleDir:"__nameid_version1.0" size=0 time="2006-11-03T00:52:53Z">
|
64
|
+
| |- #<OleDir:"__substg1.0_00020102" size=16 data="CCAGAAAAAADAAA...">
|
65
|
+
| |- #<OleDir:"__substg1.0_00030102" size=64 data="DoUAAAYAAABShQ...">
|
66
|
+
| |- #<OleDir:"__substg1.0_00040102" size=0 data="">
|
67
|
+
| |- #<OleDir:"__substg1.0_10010102" size=16 data="UoUAAAYAAQAQhQ...">
|
68
|
+
| |- #<OleDir:"__substg1.0_10090102" size=8 data="GIUAAAYABgA=">
|
69
|
+
| |- #<OleDir:"__substg1.0_100A0102" size=8 data="BoUAAAYABwA=">
|
70
|
+
| |- #<OleDir:"__substg1.0_100F0102" size=8 data="A4UAAAYABAA=">
|
71
|
+
| |- #<OleDir:"__substg1.0_10110102" size=8 data="AYUAAAYAAwA=">
|
72
|
+
| |- #<OleDir:"__substg1.0_10120102" size=8 data="DoUAAAYAAAA=">
|
73
|
+
| \- #<OleDir:"__substg1.0_101E0102" size=8 data="VIUAAAYAAgA=">
|
74
|
+
|- #<OleDir:"__substg1.0_001A001E" size=8 data="SVBNLk5vdGU=">
|
75
|
+
...
|
76
|
+
|- #<OleDir:"__substg1.0_8002001E" size=4 data="MTEuMA==">
|
77
|
+
|- #<OleDir:"__properties_version1.0" size=800 data="AAAAAAAAAAABAA...">
|
78
|
+
\- #<OleDir:"__recip_version1.0_#00000000" size=0 time="2006-11-03T00:52:53Z">
|
79
|
+
|
80
|
+
|- #<OleDir:"__substg1.0_0FF60102" size=4 data="AAAAAA==">
|
81
|
+
|- #<OleDir:"__substg1.0_3001001E" size=4 data="YXNkZg==">
|
82
|
+
|- #<OleDir:"__substg1.0_5FF6001E" size=4 data="YXNkZg==">
|
83
|
+
\- #<OleDir:"__properties_version1.0" size=152 data="AAAAAAAAAAAeAA...">
|
84
|
+
}}}
|
85
|
+
|
86
|
+
= Further Details =
|
87
|
+
|
88
|
+
Named properties have recently been implemented, and Msg::Properties now allows associated guids. Keys are represented by Msg::Properties::Key, which contains the relevant code.
|
89
|
+
|
90
|
+
You can now write code like:
|
91
|
+
{{{
|
92
|
+
props = msg.properties
|
93
|
+
|
94
|
+
props[0x0037] # access subject by mapi code
|
95
|
+
props[0x0037, Msg::Properties::PS_MAPI] # equivalent, with explicit GUID.
|
96
|
+
key = Msg::Properties::Key.new 0x0037 # => 0x0037
|
97
|
+
props[key] # same again
|
98
|
+
|
99
|
+
# keys support being converted to symbols, and then use a symbolic lookup
|
100
|
+
key.to_sym # => :subject
|
101
|
+
props[:subject] # as above
|
102
|
+
props.subject # still good
|
103
|
+
}}}
|
104
|
+
|
105
|
+
Under the hood, there is complete support for named properties:
|
106
|
+
{{{
|
107
|
+
# to get the categories as set by outlook
|
108
|
+
props['Keywords', Msg::Properties::PS_PUBLIC_STRINGS]
|
109
|
+
# => ["Business", "Competition", "Favorites"]
|
110
|
+
|
111
|
+
# and as a fallback, the symbolic lookup will automatically use named properties,
|
112
|
+
# which can be seen:
|
113
|
+
props.resolve :keywords
|
114
|
+
# => #<Key {00020329-0000-0000-c000-000000000046}/"Keywords">
|
115
|
+
|
116
|
+
# which allows this to work:
|
117
|
+
props.keywords # as above
|
118
|
+
}}}
|
119
|
+
|
120
|
+
With some more work, the property storage model should be able to reach feature
|
121
|
+
completion.
|
data/Rakefile
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
require 'rake/rdoctask'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/packagetask'
|
4
|
+
require 'rake/gempackagetask'
|
5
|
+
|
6
|
+
require 'rbconfig'
|
7
|
+
require 'fileutils'
|
8
|
+
|
9
|
+
$: << './lib'
|
10
|
+
require 'msg.rb'
|
11
|
+
|
12
|
+
PKG_NAME = 'ruby-msg'
|
13
|
+
PKG_VERSION = Msg::VERSION
|
14
|
+
|
15
|
+
task :default => [:test]
|
16
|
+
|
17
|
+
Rake::TestTask.new(:test) do |t|
|
18
|
+
t.test_files = FileList["test/test_*.rb"]
|
19
|
+
t.warning = true
|
20
|
+
t.verbose = true
|
21
|
+
end
|
22
|
+
|
23
|
+
# RDocTask wasn't working for me
|
24
|
+
desc 'Build the rdoc HTML Files'
|
25
|
+
task :rdoc do
|
26
|
+
system "rdoc -S -N -m Msg -w 2 -t '#{PKG_NAME} documentation' lib"
|
27
|
+
end
|
28
|
+
|
29
|
+
=begin
|
30
|
+
Rake::PackageTask.new(PKG_NAME, PKG_VERSION) do |p|
|
31
|
+
p.need_tar_gz = true
|
32
|
+
p.package_dir = 'build'
|
33
|
+
p.package_files.include("Rakefile", "README")
|
34
|
+
p.package_files.include("contrib/*.c")
|
35
|
+
p.package_files.include("test/test_*.rb", "test/*.doc", "lib/*.rb", "lib/ole/storage.rb")
|
36
|
+
end
|
37
|
+
=end
|
38
|
+
|
39
|
+
spec = Gem::Specification.new do |s|
|
40
|
+
s.name = PKG_NAME
|
41
|
+
s.version = PKG_VERSION
|
42
|
+
s.summary = %q{Ruby Msg library.}
|
43
|
+
s.description = %q{A library for reading Outlook msg files, and for converting them to RFC2822 emails.}
|
44
|
+
s.authors = ["Charles Lowe"]
|
45
|
+
s.email = %q{aquasync@gmail.com}
|
46
|
+
s.homepage = %q{http://code.google.com/p/ruby-msg}
|
47
|
+
#s.rubyforge_project = %q{ruby-msg}
|
48
|
+
|
49
|
+
s.executables = ['msgtool', 'oletool']
|
50
|
+
s.files = Dir.glob('data/*.yaml') + ['Rakefile', 'README', 'FIXES']
|
51
|
+
s.files += Dir.glob("lib/**/*.rb")
|
52
|
+
s.files += Dir.glob("test/test_*.rb") + Dir.glob("test/*.doc")
|
53
|
+
s.files += Dir.glob("bin/*")
|
54
|
+
|
55
|
+
s.has_rdoc = true
|
56
|
+
|
57
|
+
s.autorequire = 'msg'
|
58
|
+
end
|
59
|
+
|
60
|
+
Rake::GemPackageTask.new(spec) do |p|
|
61
|
+
p.gem_spec = spec
|
62
|
+
p.need_tar = true
|
63
|
+
p.need_zip = false
|
64
|
+
p.package_dir = 'build'
|
65
|
+
end
|
66
|
+
|
data/bin/msgtool
ADDED
@@ -0,0 +1,63 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'msg'
|
6
|
+
require 'time'
|
7
|
+
|
8
|
+
def munge_headers mime, opts
|
9
|
+
opts[:header_defaults].each do |s|
|
10
|
+
key, val = s.match(/(.*?):\s+(.*)/)[1..-1]
|
11
|
+
mime.headers[key] = [val] if mime.headers[key].empty?
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def msgtool
|
16
|
+
opts = {:verbose => false, :action => :convert, :header_defaults => []}
|
17
|
+
op = OptionParser.new do |op|
|
18
|
+
op.banner = "Usage: msgtool [options] [files]"
|
19
|
+
op.separator ''
|
20
|
+
op.on('-c', '--convert', 'Convert msg files (default)') { opts[:action] = :convert }
|
21
|
+
op.on('-m', '--convert-mbox', 'Convert msg files for mbox usage') { opts[:action] = :convert_mbox }
|
22
|
+
op.on('-d', '--header-default STR', 'Provide a default value for top level mail header') { |hd| opts[:header_defaults] << hd }
|
23
|
+
op.separator ''
|
24
|
+
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
|
25
|
+
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
|
26
|
+
end
|
27
|
+
msgs = op.parse ARGV
|
28
|
+
if msgs.empty?
|
29
|
+
puts 'Must specify 1 or more msg files.'
|
30
|
+
puts op
|
31
|
+
exit 1
|
32
|
+
end
|
33
|
+
# just shut up and convert a message to eml
|
34
|
+
Msg::Log.level = Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
|
35
|
+
case opts[:action]
|
36
|
+
when :convert
|
37
|
+
msgs.each do |filename|
|
38
|
+
msg = Msg.open filename
|
39
|
+
mime = msg.to_mime
|
40
|
+
munge_headers mime, opts
|
41
|
+
puts mime.to_s
|
42
|
+
end
|
43
|
+
when :convert_mbox
|
44
|
+
msgs.each do |filename|
|
45
|
+
msg = Msg.open filename
|
46
|
+
# could use something from the msg in our from line if we wanted
|
47
|
+
puts "From msgtool@ruby-msg #{Time.now.rfc2822}"
|
48
|
+
mime = msg.to_mime
|
49
|
+
munge_headers mime, opts
|
50
|
+
mime.to_s.each do |line|
|
51
|
+
# we do the append > style mbox quoting (mboxrd i think its called), as it
|
52
|
+
# is the only one that can be robuslty un-quoted. evolution doesn't use this!
|
53
|
+
if line =~ /^>*From /o
|
54
|
+
print '>' + line
|
55
|
+
else
|
56
|
+
print line
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
msgtool
|
data/bin/oletool
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
require 'optparse'
|
4
|
+
require 'rubygems'
|
5
|
+
require 'ole/storage'
|
6
|
+
|
7
|
+
def oletool
|
8
|
+
opts = {:verbose => false, :action => :tree}
|
9
|
+
op = OptionParser.new do |op|
|
10
|
+
op.banner = "Usage: oletool [options] [files]"
|
11
|
+
op.separator ''
|
12
|
+
op.on('-t', '--tree', 'Dump ole trees for files (default)') { opts[:action] = :tree }
|
13
|
+
op.on('-r', '--repack', 'Repack the ole files in canonical form') { opts[:action] = :repack }
|
14
|
+
op.separator ''
|
15
|
+
op.on('-v', '--[no-]verbose', 'Run verbosely') { |v| opts[:verbose] = v }
|
16
|
+
op.on_tail('-h', '--help', 'Show this message') { puts op; exit }
|
17
|
+
end
|
18
|
+
files = op.parse ARGV
|
19
|
+
if files.empty?
|
20
|
+
puts 'Must specify 1 or more msg files.'
|
21
|
+
puts op
|
22
|
+
exit 1
|
23
|
+
end
|
24
|
+
Ole::Log.level = opts[:verbose] ? Logger::WARN : Logger::FATAL
|
25
|
+
files.each do |file|
|
26
|
+
case opts[:action]
|
27
|
+
when :tree
|
28
|
+
Ole::Storage.open(file) { |ole| puts ole.root.to_tree }
|
29
|
+
when :repack
|
30
|
+
Ole::Storage.open(file, &:repack)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
oletool
|