msg_extractor 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +9 -0
- data/LICENSE.txt +21 -0
- data/README.md +88 -0
- data/exe/msg_extractor +5 -0
- data/lib/msg_extractor/appointment.rb +20 -0
- data/lib/msg_extractor/attachment.rb +57 -0
- data/lib/msg_extractor/cfbf/directory.rb +84 -0
- data/lib/msg_extractor/cfbf/fat.rb +75 -0
- data/lib/msg_extractor/cfbf/file.rb +114 -0
- data/lib/msg_extractor/cfbf/header.rb +40 -0
- data/lib/msg_extractor/cli.rb +77 -0
- data/lib/msg_extractor/contact.rb +23 -0
- data/lib/msg_extractor/errors.rb +12 -0
- data/lib/msg_extractor/headers.rb +39 -0
- data/lib/msg_extractor/mapi/decoders.rb +59 -0
- data/lib/msg_extractor/mapi/named_property_map.rb +74 -0
- data/lib/msg_extractor/mapi/property_store.rb +106 -0
- data/lib/msg_extractor/mapi/ptag.rb +55 -0
- data/lib/msg_extractor/message.rb +6 -0
- data/lib/msg_extractor/message_object.rb +100 -0
- data/lib/msg_extractor/recipient.rb +34 -0
- data/lib/msg_extractor/rtf/compressed_rtf.rb +88 -0
- data/lib/msg_extractor/rtf/decapsulator.rb +206 -0
- data/lib/msg_extractor/task.rb +25 -0
- data/lib/msg_extractor/util.rb +72 -0
- data/lib/msg_extractor/version.rb +3 -0
- data/lib/msg_extractor.rb +63 -0
- metadata +74 -0
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
module MsgExtractor
|
|
2
|
+
module Mapi
|
|
3
|
+
# Converts raw MAPI property bytes into Ruby values. All text becomes
|
|
4
|
+
# UTF-8 with invalid sequences replaced; binary stays ASCII-8BIT.
|
|
5
|
+
module Decoders
|
|
6
|
+
CODE_PAGES = {
|
|
7
|
+
437 => "IBM437", 850 => "IBM850", 932 => "Windows-31J", 936 => "GBK",
|
|
8
|
+
949 => "EUC-KR", 950 => "Big5",
|
|
9
|
+
1250 => "Windows-1250", 1251 => "Windows-1251", 1252 => "Windows-1252",
|
|
10
|
+
1253 => "Windows-1253", 1254 => "Windows-1254", 1255 => "Windows-1255",
|
|
11
|
+
1256 => "Windows-1256", 1257 => "Windows-1257", 1258 => "Windows-1258",
|
|
12
|
+
20127 => "US-ASCII", 28591 => "ISO-8859-1", 28592 => "ISO-8859-2",
|
|
13
|
+
28605 => "ISO8859-15", 65001 => "UTF-8"
|
|
14
|
+
}.freeze
|
|
15
|
+
|
|
16
|
+
# Seconds between 1601-01-01 (FILETIME epoch) and 1970-01-01 (Unix).
|
|
17
|
+
EPOCH_DELTA = 11_644_473_600
|
|
18
|
+
|
|
19
|
+
module_function
|
|
20
|
+
|
|
21
|
+
# For fixed-width types, +bytes+ may be the full 8-byte record value
|
|
22
|
+
# field; unpack reads only the leading bytes it needs.
|
|
23
|
+
def decode(type, bytes, codepage: 1252)
|
|
24
|
+
case type
|
|
25
|
+
when PT_UNICODE then utf16(bytes)
|
|
26
|
+
when PT_STRING8 then string8(bytes, codepage)
|
|
27
|
+
when PT_SYSTIME then filetime(bytes.unpack1("Q<"))
|
|
28
|
+
when PT_LONG then bytes.unpack1("l<")
|
|
29
|
+
when PT_SHORT then bytes.unpack1("s<")
|
|
30
|
+
when PT_I8 then bytes.unpack1("q<")
|
|
31
|
+
when PT_DOUBLE then bytes.unpack1("E")
|
|
32
|
+
when PT_BOOLEAN then (bytes.unpack1("v") || 0) != 0
|
|
33
|
+
else bytes # PT_BINARY, PT_OBJECT, PT_CLSID and anything unknown: raw
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def utf16(bytes)
|
|
38
|
+
bytes.dup.force_encoding(Encoding::UTF_16LE)
|
|
39
|
+
.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
|
40
|
+
.sub(/\0+\z/, "")
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def string8(bytes, codepage)
|
|
44
|
+
encoding = CODE_PAGES.fetch(codepage, "Windows-1252")
|
|
45
|
+
bytes.dup.force_encoding(encoding)
|
|
46
|
+
.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
|
47
|
+
.sub(/\0+\z/, "")
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# PR_HTML bytes -> UTF-8 string using PR_INTERNET_CPID.
|
|
51
|
+
def bytes_to_utf8(bytes, codepage) = string8(bytes, codepage)
|
|
52
|
+
|
|
53
|
+
def filetime(ticks)
|
|
54
|
+
return nil if ticks.nil? || ticks.zero?
|
|
55
|
+
Time.at(Rational(ticks, 10_000_000) - EPOCH_DELTA).utc
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
end
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
module MsgExtractor
|
|
2
|
+
module Mapi
|
|
3
|
+
# Parses the __nameid_version1.0 storage: maps (property-set GUID, numeric
|
|
4
|
+
# LID or string name) pairs to the 0x8000+ property ids used in this file.
|
|
5
|
+
class NamedPropertyMap
|
|
6
|
+
PS_MAPI = "00020328-0000-0000-c000-000000000046"
|
|
7
|
+
PS_PUBLIC_STRINGS = "00020329-0000-0000-c000-000000000046"
|
|
8
|
+
|
|
9
|
+
def self.read(cfbf, storage)
|
|
10
|
+
nameid = storage.children["__NAMEID_VERSION1.0"]
|
|
11
|
+
return new({}) unless nameid&.storage?
|
|
12
|
+
|
|
13
|
+
guid_stream = read_child(cfbf, nameid, "__SUBSTG1.0_00020102")
|
|
14
|
+
entry_stream = read_child(cfbf, nameid, "__SUBSTG1.0_00030102")
|
|
15
|
+
string_stream = read_child(cfbf, nameid, "__SUBSTG1.0_00040102")
|
|
16
|
+
parse(guid_stream, entry_stream, string_stream)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def self.read_child(cfbf, storage, name)
|
|
20
|
+
entry = storage.children[name]
|
|
21
|
+
entry&.stream? ? cfbf.read_stream(entry) : "".b
|
|
22
|
+
end
|
|
23
|
+
private_class_method :read_child
|
|
24
|
+
|
|
25
|
+
def self.parse(guid_stream, entry_stream, string_stream)
|
|
26
|
+
map = {}
|
|
27
|
+
(entry_stream.bytesize / 8).times do |i|
|
|
28
|
+
name_id, info, prop_index = entry_stream.byteslice(i * 8, 8).unpack("Vvv")
|
|
29
|
+
guid_index = info >> 1
|
|
30
|
+
guid =
|
|
31
|
+
case guid_index
|
|
32
|
+
when 1 then PS_MAPI
|
|
33
|
+
when 2 then PS_PUBLIC_STRINGS
|
|
34
|
+
else
|
|
35
|
+
if guid_index >= 3
|
|
36
|
+
format_guid(guid_stream.byteslice((guid_index - 3) * 16, 16))
|
|
37
|
+
else
|
|
38
|
+
"00000000-0000-0000-0000-000000000000"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
key =
|
|
42
|
+
if (info & 1) == 1
|
|
43
|
+
length = string_stream.byteslice(name_id, 4)&.unpack1("V")
|
|
44
|
+
next if length.nil?
|
|
45
|
+
raw = string_stream.byteslice(name_id + 4, length) || "".b
|
|
46
|
+
raw.force_encoding(Encoding::UTF_16LE)
|
|
47
|
+
.encode(Encoding::UTF_8, invalid: :replace, undef: :replace)
|
|
48
|
+
.downcase
|
|
49
|
+
else
|
|
50
|
+
name_id
|
|
51
|
+
end
|
|
52
|
+
map[[guid, key]] = 0x8000 + prop_index
|
|
53
|
+
end
|
|
54
|
+
new(map)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def self.format_guid(bytes)
|
|
58
|
+
return "00000000-0000-0000-0000-000000000000" if bytes.nil? || bytes.bytesize < 16
|
|
59
|
+
d1, d2, d3 = bytes.unpack("Vvv")
|
|
60
|
+
format("%08x-%04x-%04x-%s-%s", d1, d2, d3,
|
|
61
|
+
bytes.byteslice(8, 2).unpack1("H4"), bytes.byteslice(10, 6).unpack1("H12"))
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def initialize(map)
|
|
65
|
+
@map = map
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def resolve(guid, name_or_lid)
|
|
69
|
+
key = name_or_lid.is_a?(::String) ? name_or_lid.downcase : name_or_lid
|
|
70
|
+
@map[[guid.downcase, key]]
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
module MsgExtractor
|
|
2
|
+
module Mapi
|
|
3
|
+
# Reads the MAPI properties of one storage: fixed-width values from the
|
|
4
|
+
# __properties_version1.0 stream, variable-width values from companion
|
|
5
|
+
# __substg1.0_XXXXYYYY streams. The properties stream header length
|
|
6
|
+
# depends on what kind of storage this is.
|
|
7
|
+
class PropertyStore
|
|
8
|
+
HEADER_SIZES = { root: 32, embedded: 24, attachment: 8, recipient: 8 }.freeze
|
|
9
|
+
|
|
10
|
+
# These counts come directly from the file and are untrusted; the model
|
|
11
|
+
# layer iterates actual storages rather than relying on them.
|
|
12
|
+
attr_reader :recipient_count, :attachment_count
|
|
13
|
+
|
|
14
|
+
SUBSTG_RE = /\A__SUBSTG1\.0_([0-9A-F]{4})([0-9A-F]{4})\z/
|
|
15
|
+
|
|
16
|
+
def initialize(cfbf, storage, kind)
|
|
17
|
+
@cfbf = cfbf
|
|
18
|
+
@storage = storage
|
|
19
|
+
@kind = kind
|
|
20
|
+
@records = {} # id => [type, 8-byte value field]
|
|
21
|
+
@streams = {} # id => [type, Cfbf::Entry]
|
|
22
|
+
parse
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def key?(id) = @streams.key?(id) || @records.key?(id)
|
|
26
|
+
|
|
27
|
+
# Variable-width types (PT_UNICODE, PT_STRING8, PT_BINARY, PT_OBJECT,
|
|
28
|
+
# PT_CLSID) must be sourced from a substg stream. If one appears only in
|
|
29
|
+
# @records the 8-byte value field is not the actual payload, so return nil
|
|
30
|
+
# rather than decoding garbage.
|
|
31
|
+
VARIABLE_WIDTH_TYPES = [
|
|
32
|
+
MsgExtractor::Mapi::PT_UNICODE,
|
|
33
|
+
MsgExtractor::Mapi::PT_STRING8,
|
|
34
|
+
MsgExtractor::Mapi::PT_BINARY,
|
|
35
|
+
MsgExtractor::Mapi::PT_OBJECT,
|
|
36
|
+
MsgExtractor::Mapi::PT_CLSID
|
|
37
|
+
].freeze
|
|
38
|
+
|
|
39
|
+
def [](id)
|
|
40
|
+
if (type_entry = @streams[id])
|
|
41
|
+
type, entry = type_entry
|
|
42
|
+
Decoders.decode(type, @cfbf.read_stream(entry), codepage: codepage)
|
|
43
|
+
elsif (type_value = @records[id])
|
|
44
|
+
type, value = type_value
|
|
45
|
+
return nil if VARIABLE_WIDTH_TYPES.include?(type)
|
|
46
|
+
Decoders.decode(type, value, codepage: codepage)
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Raw bytes without decoding (binary props, or the 8-byte record field).
|
|
51
|
+
def raw(id)
|
|
52
|
+
if (type_entry = @streams[id])
|
|
53
|
+
@cfbf.read_stream(type_entry[1])
|
|
54
|
+
elsif (type_value = @records[id])
|
|
55
|
+
type_value[1]
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def type_of(id) = (@streams[id] || @records[id])&.first
|
|
60
|
+
|
|
61
|
+
def codepage
|
|
62
|
+
@codepage ||=
|
|
63
|
+
if (record = @records[PR_MESSAGE_CODEPAGE])
|
|
64
|
+
record[1].unpack1("l<")
|
|
65
|
+
else
|
|
66
|
+
1252
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def internet_codepage
|
|
71
|
+
if (record = @records[PR_INTERNET_CPID])
|
|
72
|
+
record[1].unpack1("l<")
|
|
73
|
+
else
|
|
74
|
+
codepage
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
private
|
|
79
|
+
|
|
80
|
+
def parse
|
|
81
|
+
@storage.children.each_value do |entry|
|
|
82
|
+
next unless entry.stream?
|
|
83
|
+
match = SUBSTG_RE.match(entry.name.upcase) or next
|
|
84
|
+
type = match[2].to_i(16)
|
|
85
|
+
next unless (type & MV_FLAG).zero? # multi-valued props unsupported in v1
|
|
86
|
+
@streams[match[1].to_i(16)] = [type, entry]
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
properties_entry = @storage.children["__PROPERTIES_VERSION1.0"] or return
|
|
90
|
+
data = @cfbf.read_stream(properties_entry)
|
|
91
|
+
if %i[root embedded].include?(@kind) && data.bytesize >= 24
|
|
92
|
+
@recipient_count, @attachment_count = data.byteslice(16, 8).unpack("V2")
|
|
93
|
+
end
|
|
94
|
+
position = HEADER_SIZES.fetch(@kind)
|
|
95
|
+
while position + 16 <= data.bytesize
|
|
96
|
+
tag = data.byteslice(position, 4).unpack1("V")
|
|
97
|
+
value = data.byteslice(position + 8, 8)
|
|
98
|
+
id = tag >> 16
|
|
99
|
+
type = tag & 0xFFFF
|
|
100
|
+
@records[id] = [type, value] unless @streams.key?(id)
|
|
101
|
+
position += 16
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
module MsgExtractor
|
|
2
|
+
module Mapi
|
|
3
|
+
# Property types ([MS-OXCDATA] 2.11.1)
|
|
4
|
+
PT_SHORT = 0x0002
|
|
5
|
+
PT_LONG = 0x0003
|
|
6
|
+
PT_DOUBLE = 0x0005
|
|
7
|
+
PT_BOOLEAN = 0x000B
|
|
8
|
+
PT_OBJECT = 0x000D
|
|
9
|
+
PT_I8 = 0x0014
|
|
10
|
+
PT_STRING8 = 0x001E
|
|
11
|
+
PT_UNICODE = 0x001F
|
|
12
|
+
PT_SYSTIME = 0x0040
|
|
13
|
+
PT_CLSID = 0x0048
|
|
14
|
+
PT_BINARY = 0x0102
|
|
15
|
+
MV_FLAG = 0x1000
|
|
16
|
+
|
|
17
|
+
# Property IDs (MAPI PR_* convention, 16-bit id without the type word)
|
|
18
|
+
PR_MESSAGE_CLASS = 0x001A
|
|
19
|
+
PR_SUBJECT = 0x0037
|
|
20
|
+
PR_CLIENT_SUBMIT_TIME = 0x0039
|
|
21
|
+
PR_TRANSPORT_HEADERS = 0x007D
|
|
22
|
+
PR_RECIPIENT_TYPE = 0x0C15
|
|
23
|
+
PR_SENDER_NAME = 0x0C1A
|
|
24
|
+
PR_SENDER_EMAIL = 0x0C1F
|
|
25
|
+
PR_DISPLAY_BCC = 0x0E02
|
|
26
|
+
PR_DISPLAY_CC = 0x0E03
|
|
27
|
+
PR_DISPLAY_TO = 0x0E04
|
|
28
|
+
PR_MESSAGE_DELIVERY_TIME = 0x0E06
|
|
29
|
+
PR_BODY = 0x1000
|
|
30
|
+
PR_RTF_COMPRESSED = 0x1009
|
|
31
|
+
PR_HTML = 0x1013
|
|
32
|
+
PR_DISPLAY_NAME = 0x3001
|
|
33
|
+
PR_ADDRTYPE = 0x3002
|
|
34
|
+
PR_EMAIL_ADDRESS = 0x3003
|
|
35
|
+
PR_ATTACH_DATA = 0x3701
|
|
36
|
+
PR_ATTACH_FILENAME = 0x3704
|
|
37
|
+
PR_ATTACH_METHOD = 0x3705
|
|
38
|
+
PR_ATTACH_LONG_FILENAME = 0x3707
|
|
39
|
+
PR_ATTACH_MIME_TAG = 0x370E
|
|
40
|
+
PR_ATTACH_CONTENT_ID = 0x3712
|
|
41
|
+
PR_SMTP_ADDRESS = 0x39FE
|
|
42
|
+
PR_POSTAL_ADDRESS = 0x3A15
|
|
43
|
+
PR_GIVEN_NAME = 0x3A06
|
|
44
|
+
PR_BUSINESS_PHONE = 0x3A08
|
|
45
|
+
PR_HOME_PHONE = 0x3A09
|
|
46
|
+
PR_SURNAME = 0x3A11
|
|
47
|
+
PR_COMPANY_NAME = 0x3A16
|
|
48
|
+
PR_JOB_TITLE = 0x3A17
|
|
49
|
+
PR_MOBILE_PHONE = 0x3A1C
|
|
50
|
+
PR_INTERNET_CPID = 0x3FDE
|
|
51
|
+
PR_MESSAGE_CODEPAGE = 0x3FFD
|
|
52
|
+
PR_SENDER_SMTP = 0x5D01
|
|
53
|
+
PR_ATTACHMENT_HIDDEN = 0x7FFE
|
|
54
|
+
end
|
|
55
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
require "fileutils"
|
|
2
|
+
|
|
3
|
+
module MsgExtractor
|
|
4
|
+
# Shared base for every MSG item type: property access, bodies, recipients,
|
|
5
|
+
# attachments. Works for the root message and for embedded messages.
|
|
6
|
+
class MessageObject
|
|
7
|
+
attr_reader :properties
|
|
8
|
+
|
|
9
|
+
attr_reader :named # :nodoc: internal reuse by MsgExtractor.from_storage
|
|
10
|
+
|
|
11
|
+
def initialize(cfbf, storage: nil, named: nil, kind: :root, properties: nil)
|
|
12
|
+
@cfbf = cfbf
|
|
13
|
+
@storage = storage || cfbf.root
|
|
14
|
+
@kind = kind
|
|
15
|
+
@properties = properties || Mapi::PropertyStore.new(cfbf, @storage, kind)
|
|
16
|
+
@named = named || Mapi::NamedPropertyMap.read(cfbf, @storage)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def message_class = properties[Mapi::PR_MESSAGE_CLASS]
|
|
20
|
+
def subject = properties[Mapi::PR_SUBJECT]
|
|
21
|
+
|
|
22
|
+
def date
|
|
23
|
+
properties[Mapi::PR_CLIENT_SUBMIT_TIME] || properties[Mapi::PR_MESSAGE_DELIVERY_TIME]
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def body
|
|
27
|
+
return @body if defined?(@body)
|
|
28
|
+
@body = properties[Mapi::PR_BODY] || (html_body && Util.html_to_text(html_body))
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def rtf_body
|
|
32
|
+
return @rtf_body if defined?(@rtf_body)
|
|
33
|
+
raw = properties.raw(Mapi::PR_RTF_COMPRESSED)
|
|
34
|
+
@rtf_body = raw && Rtf::CompressedRtf.decompress(raw)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def html_body
|
|
38
|
+
return @html_body if defined?(@html_body)
|
|
39
|
+
@html_body =
|
|
40
|
+
if properties.type_of(Mapi::PR_HTML) == Mapi::PT_UNICODE
|
|
41
|
+
properties[Mapi::PR_HTML]
|
|
42
|
+
elsif (raw = properties.raw(Mapi::PR_HTML))
|
|
43
|
+
Mapi::Decoders.bytes_to_utf8(raw, properties.internet_codepage)
|
|
44
|
+
elsif rtf_body
|
|
45
|
+
Rtf::Decapsulator.html_from(rtf_body)
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def headers
|
|
50
|
+
@headers ||= Headers.parse(properties[Mapi::PR_TRANSPORT_HEADERS])
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def sender
|
|
54
|
+
return @sender if defined?(@sender)
|
|
55
|
+
name = properties[Mapi::PR_SENDER_NAME]
|
|
56
|
+
email = properties[Mapi::PR_SENDER_SMTP] || properties[Mapi::PR_SENDER_EMAIL]
|
|
57
|
+
email = nil unless email&.include?("@")
|
|
58
|
+
@sender = (name || email) && Recipient.new(name: name, email: email, type: nil)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def recipients
|
|
62
|
+
@recipients ||= child_storages("__RECIP_VERSION1.0_#")
|
|
63
|
+
.map { |e| Recipient.from_storage(@cfbf, e) }
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def to = recipients.select { |r| r.type == Recipient::TO }
|
|
67
|
+
def cc = recipients.select { |r| r.type == Recipient::CC }
|
|
68
|
+
def bcc = recipients.select { |r| r.type == Recipient::BCC }
|
|
69
|
+
|
|
70
|
+
def attachments
|
|
71
|
+
@attachments ||= child_storages("__ATTACH_VERSION1.0_#")
|
|
72
|
+
.map { |e| Attachment.new(@cfbf, e, named: @named) }
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def named_value(guid, lid_or_name)
|
|
76
|
+
id = @named.resolve(guid, lid_or_name)
|
|
77
|
+
id && properties[id]
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def save(dir: ".")
|
|
81
|
+
name = Util.sanitize_filename(subject || "message")
|
|
82
|
+
base = Util.dedupe_path(::File.join(dir, name))
|
|
83
|
+
FileUtils.mkdir_p(base)
|
|
84
|
+
::File.write(::File.join(base, "message.txt"), body, encoding: Encoding::UTF_8) if body
|
|
85
|
+
::File.binwrite(::File.join(base, "message.html"), html_body.b) if html_body
|
|
86
|
+
attachments.each do |attachment|
|
|
87
|
+
attachment.save(dir: base) unless attachment.embedded_message?
|
|
88
|
+
end
|
|
89
|
+
base
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
def child_storages(prefix)
|
|
95
|
+
@storage.children.values
|
|
96
|
+
.select { |e| e.storage? && e.name.upcase.start_with?(prefix) }
|
|
97
|
+
.sort_by(&:name)
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
module MsgExtractor
|
|
2
|
+
class Recipient
|
|
3
|
+
TO = 1
|
|
4
|
+
CC = 2
|
|
5
|
+
BCC = 3
|
|
6
|
+
|
|
7
|
+
attr_reader :name, :email, :type
|
|
8
|
+
|
|
9
|
+
def initialize(name:, email:, type:)
|
|
10
|
+
@name = name
|
|
11
|
+
@email = email
|
|
12
|
+
@type = type
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def self.from_storage(cfbf, storage)
|
|
16
|
+
props = Mapi::PropertyStore.new(cfbf, storage, :recipient)
|
|
17
|
+
email = props[Mapi::PR_SMTP_ADDRESS]
|
|
18
|
+
if email.nil?
|
|
19
|
+
address = props[Mapi::PR_EMAIL_ADDRESS]
|
|
20
|
+
email = address if address&.include?("@")
|
|
21
|
+
end
|
|
22
|
+
new(name: props[Mapi::PR_DISPLAY_NAME], email: email,
|
|
23
|
+
type: props[Mapi::PR_RECIPIENT_TYPE] || TO)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def to_s
|
|
27
|
+
if name && email && name != email
|
|
28
|
+
"#{name} <#{email}>"
|
|
29
|
+
else
|
|
30
|
+
(email || name).to_s
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
module MsgExtractor
|
|
2
|
+
module Rtf
|
|
3
|
+
# LZFu decompression for PR_RTF_COMPRESSED per [MS-OXRTFCP].
|
|
4
|
+
module CompressedRtf
|
|
5
|
+
MAGIC_COMPRESSED = 0x75465A4C # "LZFu"
|
|
6
|
+
MAGIC_UNCOMPRESSED = 0x414C454D # "MELA"
|
|
7
|
+
|
|
8
|
+
# The fixed 207-byte initial dictionary defined by the spec.
|
|
9
|
+
INITIAL_DICTIONARY =
|
|
10
|
+
("{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
|
|
11
|
+
"{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
|
|
12
|
+
"\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
|
|
13
|
+
"{\\colortbl\\red0\\green0\\blue0\r\n\\par " \
|
|
14
|
+
"\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx").b.freeze
|
|
15
|
+
|
|
16
|
+
# CRC32 table (polynomial 0xEDB88320), init 0, no final XOR — per spec.
|
|
17
|
+
CRC_TABLE = (0...256).map { |i|
|
|
18
|
+
crc = i
|
|
19
|
+
8.times { crc = crc.odd? ? (0xEDB88320 ^ (crc >> 1)) : (crc >> 1) }
|
|
20
|
+
crc
|
|
21
|
+
}.freeze
|
|
22
|
+
|
|
23
|
+
module_function
|
|
24
|
+
|
|
25
|
+
def crc32(bytes)
|
|
26
|
+
bytes.each_byte.reduce(0) { |crc, b| CRC_TABLE[(crc ^ b) & 0xFF] ^ (crc >> 8) }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def decompress(data)
|
|
30
|
+
raise CorruptFileError, "compressed RTF too short" if data.nil? || data.bytesize < 16
|
|
31
|
+
comp_size, raw_size, magic, crc = data.unpack("V4")
|
|
32
|
+
case magic
|
|
33
|
+
when MAGIC_UNCOMPRESSED
|
|
34
|
+
raise CorruptFileError, "MELA RTF truncated" if data.bytesize - 16 < raw_size
|
|
35
|
+
data.byteslice(16, raw_size)
|
|
36
|
+
when MAGIC_COMPRESSED
|
|
37
|
+
payload = data.byteslice(16, comp_size - 12) if comp_size >= 12
|
|
38
|
+
raise CorruptFileError, "compressed RTF truncated header" if payload.nil?
|
|
39
|
+
unless crc32(payload) == crc
|
|
40
|
+
raise CorruptFileError, "compressed RTF CRC mismatch"
|
|
41
|
+
end
|
|
42
|
+
lzfu(payload)
|
|
43
|
+
else
|
|
44
|
+
raise CorruptFileError, format("bad compressed RTF magic 0x%08x", magic)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def lzfu(payload)
|
|
49
|
+
dictionary = INITIAL_DICTIONARY.dup
|
|
50
|
+
dictionary << ("\0".b * (4096 - dictionary.bytesize))
|
|
51
|
+
write_pos = INITIAL_DICTIONARY.bytesize # 207
|
|
52
|
+
out = +"".b
|
|
53
|
+
pos = 0
|
|
54
|
+
while pos < payload.bytesize
|
|
55
|
+
control = payload.getbyte(pos)
|
|
56
|
+
pos += 1
|
|
57
|
+
8.times do |bit|
|
|
58
|
+
if ((control >> bit) & 1) == 1
|
|
59
|
+
high = payload.getbyte(pos)
|
|
60
|
+
low = payload.getbyte(pos + 1)
|
|
61
|
+
return out if high.nil? || low.nil?
|
|
62
|
+
pos += 2
|
|
63
|
+
reference = (high << 8) | low
|
|
64
|
+
offset = reference >> 4
|
|
65
|
+
length = (reference & 0x0F) + 2
|
|
66
|
+
return out if offset == write_pos # end-of-stream marker
|
|
67
|
+
length.times do
|
|
68
|
+
byte = dictionary.getbyte(offset)
|
|
69
|
+
offset = (offset + 1) % 4096
|
|
70
|
+
out << byte
|
|
71
|
+
dictionary.setbyte(write_pos, byte)
|
|
72
|
+
write_pos = (write_pos + 1) % 4096
|
|
73
|
+
end
|
|
74
|
+
else
|
|
75
|
+
byte = payload.getbyte(pos)
|
|
76
|
+
return out if byte.nil?
|
|
77
|
+
pos += 1
|
|
78
|
+
out << byte
|
|
79
|
+
dictionary.setbyte(write_pos, byte)
|
|
80
|
+
write_pos = (write_pos + 1) % 4096
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
out
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|