ruby-msg 1.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/FIXES +34 -0
- data/README +121 -0
- data/Rakefile +66 -0
- data/bin/msgtool +63 -0
- data/bin/oletool +35 -0
- data/data/mapitags.yaml +4168 -0
- data/data/named_map.yaml +114 -0
- data/data/types.yaml +15 -0
- data/lib/blah.rb +106 -0
- data/lib/mime-new.rb +210 -0
- data/lib/mime.rb +165 -0
- data/lib/msg/properties.rb +515 -0
- data/lib/msg/rtf.rb +236 -0
- data/lib/msg.rb +505 -0
- data/lib/ole/base.rb +5 -0
- data/lib/ole/file_system.rb +181 -0
- data/lib/ole/io_helpers.rb +184 -0
- data/lib/ole/storage.rb +927 -0
- data/lib/ole/types.rb +36 -0
- data/lib/orderedhash.rb +218 -0
- data/lib/rtf.rb +118 -0
- data/lib/support.rb +51 -0
- data/test/test_mime.rb +22 -0
- data/test/test_storage.rb +139 -0
- data/test/test_word_6.doc +0 -0
- data/test/test_word_95.doc +0 -0
- data/test/test_word_97.doc +0 -0
- metadata +73 -0
data/data/named_map.yaml
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
# this file provides for the mapping of the keys of named properties
|
2
|
+
# to symbolic names (as opposed to mapitags.yaml, which is currently
|
3
|
+
# in a different format, has a different source, and is only fixed
|
4
|
+
# code properties)
|
5
|
+
#
|
6
|
+
# essentially the symbols are slightly munged versions of the names
|
7
|
+
# given to these properties by CDO, or Outlook's object model.
|
8
|
+
# it was parsed out of cdo10.htm, and neatened up a bit.
|
9
|
+
#
|
10
|
+
# interestingly, despite having separate guids, the codes are picked not to
|
11
|
+
# clash. further the names themselves have only 3 clashes in all the below.
|
12
|
+
{
|
13
|
+
[0x8005, PSETID_Address]: file_under,
|
14
|
+
[0x8017, PSETID_Address]: last_name_and_first_name,
|
15
|
+
[0x8018, PSETID_Address]: company_and_full_name,
|
16
|
+
[0x8019, PSETID_Address]: full_name_and_company,
|
17
|
+
[0x801a, PSETID_Address]: home_address,
|
18
|
+
[0x801b, PSETID_Address]: business_address,
|
19
|
+
[0x801c, PSETID_Address]: other_address,
|
20
|
+
[0x8022, PSETID_Address]: selected_address,
|
21
|
+
[0x802b, PSETID_Address]: web_page,
|
22
|
+
[0x802c, PSETID_Address]: yomi_first_name,
|
23
|
+
[0x802d, PSETID_Address]: yomi_last_name,
|
24
|
+
[0x802e, PSETID_Address]: yomi_company_name,
|
25
|
+
[0x8030, PSETID_Address]: last_first_no_space,
|
26
|
+
[0x8031, PSETID_Address]: last_first_space_only,
|
27
|
+
[0x8032, PSETID_Address]: company_last_first_no_space,
|
28
|
+
[0x8033, PSETID_Address]: company_last_first_space_only,
|
29
|
+
[0x8034, PSETID_Address]: last_first_no_space_company,
|
30
|
+
[0x8035, PSETID_Address]: last_first_space_only_company,
|
31
|
+
[0x8036, PSETID_Address]: last_first_and_suffix,
|
32
|
+
[0x8045, PSETID_Address]: business_address_street,
|
33
|
+
[0x8046, PSETID_Address]: business_address_city,
|
34
|
+
[0x8047, PSETID_Address]: business_address_state,
|
35
|
+
[0x8048, PSETID_Address]: business_address_postal_code,
|
36
|
+
[0x8049, PSETID_Address]: business_address_country,
|
37
|
+
[0x804a, PSETID_Address]: business_address_post_office_box,
|
38
|
+
[0x804f, PSETID_Address]: user_field1,
|
39
|
+
[0x8050, PSETID_Address]: user_field2,
|
40
|
+
[0x8051, PSETID_Address]: user_field3,
|
41
|
+
[0x8052, PSETID_Address]: user_field4,
|
42
|
+
[0x8062, PSETID_Address]: imaddress,
|
43
|
+
[0x8082, PSETID_Address]: email_addr_type,
|
44
|
+
[0x8083, PSETID_Address]: email_email_address,
|
45
|
+
[0x8084, PSETID_Address]: email_original_display_name,
|
46
|
+
[0x8085, PSETID_Address]: email_original_entry_id,
|
47
|
+
[0x8092, PSETID_Address]: email2_addr_type,
|
48
|
+
[0x8093, PSETID_Address]: email2_email_address,
|
49
|
+
[0x8094, PSETID_Address]: email2_original_display_name,
|
50
|
+
[0x8095, PSETID_Address]: email2_original_entry_id,
|
51
|
+
[0x80a2, PSETID_Address]: email3_addr_type,
|
52
|
+
[0x80a3, PSETID_Address]: email3_email_address,
|
53
|
+
[0x80a4, PSETID_Address]: email3_original_display_name,
|
54
|
+
[0x80a5, PSETID_Address]: email3_original_entry_id,
|
55
|
+
[0x80d8, PSETID_Address]: internet_free_busy_address,
|
56
|
+
[0x8101, PSETID_Task]: status,
|
57
|
+
[0x8102, PSETID_Task]: percent_complete,
|
58
|
+
[0x8103, PSETID_Task]: team_task,
|
59
|
+
[0x8104, PSETID_Task]: start_date,
|
60
|
+
[0x8105, PSETID_Task]: due_date,
|
61
|
+
[0x8106, PSETID_Task]: duration,
|
62
|
+
[0x810f, PSETID_Task]: date_completed,
|
63
|
+
[0x8110, PSETID_Task]: actual_work,
|
64
|
+
[0x8111, PSETID_Task]: total_work,
|
65
|
+
[0x811c, PSETID_Task]: complete,
|
66
|
+
[0x811f, PSETID_Task]: owner,
|
67
|
+
[0x8126, PSETID_Task]: is_recurring,
|
68
|
+
[0x8205, PSETID_Appointment]: busy_status,
|
69
|
+
[0x8208, PSETID_Appointment]: location,
|
70
|
+
[0x820d, PSETID_Appointment]: start_date,
|
71
|
+
[0x820e, PSETID_Appointment]: end_date,
|
72
|
+
[0x8213, PSETID_Appointment]: duration,
|
73
|
+
[0x8214, PSETID_Appointment]: colors,
|
74
|
+
[0x8216, PSETID_Appointment]: recurrence_state,
|
75
|
+
[0x8218, PSETID_Appointment]: response_status,
|
76
|
+
[0x8222, PSETID_Appointment]: reply_time,
|
77
|
+
[0x8223, PSETID_Appointment]: is_recurring,
|
78
|
+
[0x822e, PSETID_Appointment]: organizer,
|
79
|
+
[0x8231, PSETID_Appointment]: recurrence_type,
|
80
|
+
[0x8232, PSETID_Appointment]: recurrence_pattern,
|
81
|
+
# also had CdoPR_FLAG_DUE_BY, when applied to messages. i don't currently
|
82
|
+
# use message class specific names
|
83
|
+
[0x8502, PSETID_Common]: reminder_time,
|
84
|
+
[0x8503, PSETID_Common]: reminder_set,
|
85
|
+
[0x8516, PSETID_Common]: common_start,
|
86
|
+
[0x8517, PSETID_Common]: common_end,
|
87
|
+
[0x851c, PSETID_Common]: reminder_override,
|
88
|
+
[0x851e, PSETID_Common]: reminder_sound,
|
89
|
+
[0x851f, PSETID_Common]: reminder_file,
|
90
|
+
# this one only listed as CdoPR_FLAG_TEXT. maybe should be
|
91
|
+
# reminder_text
|
92
|
+
[0x8530, PSETID_Common]: flag_text,
|
93
|
+
[0x8534, PSETID_Common]: mileage,
|
94
|
+
[0x8535, PSETID_Common]: billing_information,
|
95
|
+
[0x8539, PSETID_Common]: companies,
|
96
|
+
[0x853a, PSETID_Common]: contact_names,
|
97
|
+
# had CdoPR_FLAG_DUE_BY_NEXT for this one also
|
98
|
+
[0x8560, PSETID_Common]: reminder_next_time,
|
99
|
+
[0x8700, PSETID_Log]: entry,
|
100
|
+
[0x8704, PSETID_Log]: start_date,
|
101
|
+
[0x8705, PSETID_Log]: start_time,
|
102
|
+
[0x8706, PSETID_Log]: start,
|
103
|
+
[0x8707, PSETID_Log]: duration,
|
104
|
+
[0x8708, PSETID_Log]: end,
|
105
|
+
[0x870e, PSETID_Log]: doc_printed,
|
106
|
+
[0x870f, PSETID_Log]: doc_saved,
|
107
|
+
[0x8710, PSETID_Log]: doc_routed,
|
108
|
+
[0x8711, PSETID_Log]: doc_posted,
|
109
|
+
[0x8712, PSETID_Log]: entry_type,
|
110
|
+
[0x8b00, PSETID_Note]: color,
|
111
|
+
[0x8b02, PSETID_Note]: width,
|
112
|
+
[0x8b03, PSETID_Note]: height,
|
113
|
+
["Keywords", PS_PUBLIC_STRINGS]: categories
|
114
|
+
}
|
data/data/types.yaml
ADDED
data/lib/blah.rb
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
#! /usr/bin/ruby
|
2
|
+
|
3
|
+
class SimpleGrammar
|
4
|
+
def initialize(grammar)
|
5
|
+
@grammar = grammar
|
6
|
+
end
|
7
|
+
def scan(cursor,buffer)
|
8
|
+
@grammar.scan(cursor,buffer)
|
9
|
+
end
|
10
|
+
def |(other); Code.new { |cursor,buffer|
|
11
|
+
scan(cursor,buffer) || other.scan(cursor,buffer)
|
12
|
+
} end
|
13
|
+
def +(other); Code.new { |cursor,buffer|
|
14
|
+
scan(cursor,buffer) && other.scan(cursor,buffer)
|
15
|
+
} end
|
16
|
+
def filter(buf0,&block); Code.new { |cursor,buffer|
|
17
|
+
buf = buf0.clone
|
18
|
+
scan(cursor,buf) && buffer.concat(block[buf])
|
19
|
+
} end
|
20
|
+
def discard; Code.new { |cursor,buffer|
|
21
|
+
scan(cursor,[])
|
22
|
+
} end
|
23
|
+
class Code < SimpleGrammar
|
24
|
+
def initialize(&block)
|
25
|
+
@block = block
|
26
|
+
end
|
27
|
+
def scan(cursor,buffer)
|
28
|
+
@block[cursor,buffer]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
class Recurse < SimpleGrammar
|
32
|
+
def initialize(&block)
|
33
|
+
@grammar = block[self]
|
34
|
+
end
|
35
|
+
def scan(cursor,buffer)
|
36
|
+
@grammar.scan(cursor,buffer)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
class Element < SimpleGrammar
|
40
|
+
def initialize(pattern)
|
41
|
+
@pattern = pattern
|
42
|
+
end
|
43
|
+
def scan(cursor,buffer)
|
44
|
+
c = cursor.read1after
|
45
|
+
if @pattern===c
|
46
|
+
buffer << c
|
47
|
+
cursor.skip1next
|
48
|
+
true
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
# Make methods for our classes that call new for us
|
53
|
+
constants.each { |klass|
|
54
|
+
eval("
|
55
|
+
def #{klass}(*args,&block)
|
56
|
+
#{klass}.new(*args,&block)
|
57
|
+
end
|
58
|
+
def self.#{klass}(*args,&block)
|
59
|
+
#{klass}.new(*args,&block)
|
60
|
+
end
|
61
|
+
")
|
62
|
+
}
|
63
|
+
NULL = Code.new { true }
|
64
|
+
end
|
65
|
+
|
66
|
+
class IO
|
67
|
+
# implement just the methods we need to look like a cursor
|
68
|
+
def read1after;c=getc;ungetc(c);c;end
|
69
|
+
def skip1next;getc&&true;end
|
70
|
+
end
|
71
|
+
|
72
|
+
class Expression < SimpleGrammar::Recurse
|
73
|
+
def initialize; super() { |expr|
|
74
|
+
digit = Element(?0..?9)
|
75
|
+
int = Recurse { |int| digit+(int|NULL) }
|
76
|
+
number =
|
77
|
+
(int + (
|
78
|
+
Element(?.)+int |
|
79
|
+
NULL
|
80
|
+
)).filter("") { |n| [n.to_f] }
|
81
|
+
primary = Recurse { |primary|
|
82
|
+
number |
|
83
|
+
Element(?-).discard + primary + Code { |_,b| b[-1]=-b[-1] } |
|
84
|
+
Element(?().discard + expr + Element(?)).discard
|
85
|
+
}
|
86
|
+
product = Recurse { |product|
|
87
|
+
primary + (
|
88
|
+
Element(?*).discard + product + Code { |_,b|
|
89
|
+
b[-2]*=b[-1];b.pop } |
|
90
|
+
Element(?/).discard + product + Code { |_,b|
|
91
|
+
b[-2]/=b[-1];b.pop } |
|
92
|
+
NULL
|
93
|
+
)
|
94
|
+
}
|
95
|
+
sum = Recurse { |sum|
|
96
|
+
product + (
|
97
|
+
Element(?+).discard + sum + Code { |_,b| b[-2]+=b[-1];b.pop } |
|
98
|
+
Element(?-).discard + sum + Code { |_,b| b[-2]-=b[-1];b.pop } |
|
99
|
+
NULL
|
100
|
+
)
|
101
|
+
}
|
102
|
+
} end
|
103
|
+
end
|
104
|
+
|
105
|
+
Expression.new.scan(STDIN,buf=[]) && p(buf[0])
|
106
|
+
|
data/lib/mime-new.rb
ADDED
@@ -0,0 +1,210 @@
|
|
1
|
+
require 'stringio'
|
2
|
+
require 'ole/storage'
|
3
|
+
|
4
|
+
# the IO equivalent to StringScanner.
|
5
|
+
# in fact its mostly scan and scan_until that are useful to me. maybe this can be
|
6
|
+
# mixed in to the underlying io class, as providing a lot of stuff, like simple reads
|
7
|
+
# and peeks seems redundant extra layer cruft.
|
8
|
+
class IOScanner
|
9
|
+
# needs seekable +io+, as we may read more than we should, and don't
|
10
|
+
# have a way (that i know of?) to push back those bytes onto temporary
|
11
|
+
# buffer. (ungetc?)
|
12
|
+
attr_reader :io
|
13
|
+
def initialize io
|
14
|
+
@io = io
|
15
|
+
end
|
16
|
+
|
17
|
+
def pos
|
18
|
+
@io.pos
|
19
|
+
end
|
20
|
+
|
21
|
+
def pos= pos
|
22
|
+
@io.pos = pos
|
23
|
+
end
|
24
|
+
|
25
|
+
def scan rx
|
26
|
+
rx = Regexp.new rx
|
27
|
+
# now we have to see whether we match
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
#
|
32
|
+
# = Introduction
|
33
|
+
#
|
34
|
+
# A *basic* mime class for _really_ _basic_ and probably non-standard parsing
|
35
|
+
# and construction of MIME messages.
|
36
|
+
#
|
37
|
+
# Intended for two main purposes in this project:
|
38
|
+
# 1. As the container that is used to build up the message for eventual
|
39
|
+
# serialization as an eml.
|
40
|
+
# 2. For assistance in parsing the +transport_message_headers+ provided in .msg files,
|
41
|
+
# which are then kept through to the final eml.
|
42
|
+
#
|
43
|
+
# = TODO
|
44
|
+
#
|
45
|
+
# * Better streaming support, rather than an all-in-string approach.
|
46
|
+
# * Add +OrderedHash+ optionally, to not lose ordering in headers.
|
47
|
+
# * A fair bit remains to be done for this class, its fairly immature. But generally I'd like
|
48
|
+
# to see it be more generally useful.
|
49
|
+
# * All sorts of correctness issues, encoding particular.
|
50
|
+
# * Duplication of work in net/http.rb's +HTTPHeader+? Don't know if the overlap is sufficient.
|
51
|
+
# I don't want to lower case things, just for starters.
|
52
|
+
# * Mime was the original place I wrote #to_tree, intended as a quick debug hack.
|
53
|
+
#
|
54
|
+
class Mime
|
55
|
+
Hash = begin
|
56
|
+
require 'orderedhash'
|
57
|
+
OrderedHash
|
58
|
+
rescue LoadError
|
59
|
+
Hash
|
60
|
+
end
|
61
|
+
|
62
|
+
attr_reader :headers, :body, :parts, :content_type, :preamble, :epilogue
|
63
|
+
|
64
|
+
# Create a Mime object from +io+ as an initial serialization, which must contain headers
|
65
|
+
# and a body (even if empty). Needs work.
|
66
|
+
attr_reader :io
|
67
|
+
def initialize io=nil
|
68
|
+
# use a string backed io if not specified
|
69
|
+
@io = io || StringIO.new('')
|
70
|
+
# read headers. flexible about new line endings here
|
71
|
+
headers = ''
|
72
|
+
while s = @io.gets
|
73
|
+
s.chomp!
|
74
|
+
break if s.empty?
|
75
|
+
headers << s + "\r\n"
|
76
|
+
end
|
77
|
+
# what remains is the body. maybe rangesio should support infinite ranges
|
78
|
+
@body = RangesIO.new @io, [@io.pos...(@io.stat.size rescue @io.size)]
|
79
|
+
#headers, @body = $~[1..-1] if str[/(.*?\r?\n)(?:\r?\n(.*))?\Z/m]
|
80
|
+
|
81
|
+
# don't like the way i'm creatinh on access.
|
82
|
+
@headers = Hash.new { |hash, key| hash[key] = [] }
|
83
|
+
#@body ||= ''
|
84
|
+
headers.to_s.scan(/^\S+:\s*.*(?:\n\t.*)*/).each do |header|
|
85
|
+
@headers[header[/(\S+):/, 1]] << header[/\S+:\s*(.*)/m, 1].gsub(/\s+/m, ' ').strip # this is kind of wrong
|
86
|
+
end
|
87
|
+
|
88
|
+
# don't have to have content type i suppose
|
89
|
+
@content_type, attrs = nil, {}
|
90
|
+
if @headers.include? 'Content-Type'
|
91
|
+
@content_type, attrs = Mime.split_header @headers['Content-Type'][0]
|
92
|
+
end
|
93
|
+
|
94
|
+
if multipart?
|
95
|
+
# special case
|
96
|
+
if body.size == 0
|
97
|
+
@preamble = ''
|
98
|
+
@epilogue = ''
|
99
|
+
@parts = []
|
100
|
+
else
|
101
|
+
# we need to split the message at the boundary
|
102
|
+
# instead of raising, it should just probably become a single part message.
|
103
|
+
boundary = attrs['boundary'] or raise "no boundary for multipart message"
|
104
|
+
|
105
|
+
# splitting the body:
|
106
|
+
# talk about crap. all that work to use io, and then i do this :)
|
107
|
+
# each multipart should become a sub-io, which gets passed to Mime.new
|
108
|
+
parts = body.read.split(/--#{Regexp.quote boundary}/m)
|
109
|
+
unless parts[-1] =~ /^--/; warn "bad multipart boundary (missing trailing --)"
|
110
|
+
else parts[-1][0..1] = ''
|
111
|
+
end
|
112
|
+
parts.each_with_index do |part, i|
|
113
|
+
part =~ /^(\r?\n)?(.*?)(\r?\n)?\Z/m
|
114
|
+
part.replace $2
|
115
|
+
warn "bad multipart boundary" if (1...parts.length-1) === i and !($1 && $3)
|
116
|
+
end
|
117
|
+
@preamble = parts.shift
|
118
|
+
@epilogue = parts.pop
|
119
|
+
@parts = parts.map { |part| Mime.new part }
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
def multipart?
|
125
|
+
@content_type && @content_type =~ /^multipart/ ? true : false
|
126
|
+
end
|
127
|
+
|
128
|
+
def inspect
|
129
|
+
# add some extra here.
|
130
|
+
"#<Mime content_type=#{@content_type.inspect}>"
|
131
|
+
end
|
132
|
+
|
133
|
+
def to_tree
|
134
|
+
if multipart?
|
135
|
+
str = "- #{inspect}\n"
|
136
|
+
parts.each_with_index do |part, i|
|
137
|
+
last = i == parts.length - 1
|
138
|
+
part.to_tree.split(/\n/).each_with_index do |line, j|
|
139
|
+
str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}" + line + "\n"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
str
|
143
|
+
else
|
144
|
+
"- #{inspect}\n"
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
def to_s opts={}
|
149
|
+
opts = {:boundary_counter => 0}.merge opts
|
150
|
+
if multipart?
|
151
|
+
boundary = Mime.make_boundary opts[:boundary_counter] += 1, self
|
152
|
+
@body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue].
|
153
|
+
flatten.join("\r\n--" + boundary)
|
154
|
+
content_type, attrs = Mime.split_header @headers['Content-Type'][0]
|
155
|
+
attrs['boundary'] = boundary
|
156
|
+
@headers['Content-Type'] = [([content_type] + attrs.map { |key, val| %{#{key}="#{val}"} }).join('; ')]
|
157
|
+
end
|
158
|
+
|
159
|
+
str = ''
|
160
|
+
@headers.each do |key, vals|
|
161
|
+
vals.each { |val| str << "#{key}: #{val}\r\n" }
|
162
|
+
end
|
163
|
+
str << "\r\n" + @body
|
164
|
+
end
|
165
|
+
|
166
|
+
def self.split_header header
|
167
|
+
# FIXME: haven't read standard. not sure what its supposed to do with " in the name, or if other
|
168
|
+
# escapes are allowed. can't test on windows as " isn't allowed anyway. can be fixed with more
|
169
|
+
# accurate parser later.
|
170
|
+
# maybe move to some sort of Header class. but not all headers should be of it i suppose.
|
171
|
+
# at least add a join_header then, taking name and {}. for use in Mime#to_s (for boundary
|
172
|
+
# rewrite), and Attachment#to_mime, among others...
|
173
|
+
attrs = {}
|
174
|
+
header.scan(/;\s*([^\s=]+)\s*=\s*("[^"]*"|[^\s;]*)\s*/m).each do |key, value|
|
175
|
+
if attrs[key]; warn "ignoring duplicate header attribute #{key.inspect}"
|
176
|
+
else attrs[key] = value[/^"/] ? value[1..-2] : value
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
[header[/^[^;]+/].strip, attrs]
|
181
|
+
end
|
182
|
+
|
183
|
+
# +i+ is some value that should be unique for all multipart boundaries for a given message
|
184
|
+
def self.make_boundary i, extra_obj = Mime
|
185
|
+
"----_=_NextPart_#{'%03d' % i}_#{'%08x' % extra_obj.object_id}.#{'%08x' % Time.now}"
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
=begin
|
190
|
+
things to consider for header work.
|
191
|
+
encoded words:
|
192
|
+
Subject: =?iso-8859-1?q?p=F6stal?=
|
193
|
+
|
194
|
+
and other mime funkyness:
|
195
|
+
Content-Disposition: attachment;
|
196
|
+
filename*0*=UTF-8''09%20%D7%90%D7%A5;
|
197
|
+
filename*1*=%20%D7%A1%D7%91-;
|
198
|
+
filename*2*=%D7%A7%95%A5.wma
|
199
|
+
Content-Transfer-Encoding: base64
|
200
|
+
|
201
|
+
and another, doing a test with an embedded newline in an attachment name, I
|
202
|
+
get this output from evolution. I get the feeling that this is probably a bug
|
203
|
+
with their implementation though, they weren't expecting new lines in filenames.
|
204
|
+
Content-Disposition: attachment; filename="asdf'b\"c
|
205
|
+
d efgh=i: ;\\j"
|
206
|
+
d efgh=i: ;\\j"; charset=us-ascii
|
207
|
+
Content-Type: text/plain; name="asdf'b\"c"; charset=us-ascii
|
208
|
+
|
209
|
+
=end
|
210
|
+
|
data/lib/mime.rb
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
#
|
2
|
+
# = Introduction
|
3
|
+
#
|
4
|
+
# A *basic* mime class for _really_ _basic_ and probably non-standard parsing
|
5
|
+
# and construction of MIME messages.
|
6
|
+
#
|
7
|
+
# Intended for two main purposes in this project:
|
8
|
+
# 1. As the container that is used to build up the message for eventual
|
9
|
+
# serialization as an eml.
|
10
|
+
# 2. For assistance in parsing the +transport_message_headers+ provided in .msg files,
|
11
|
+
# which are then kept through to the final eml.
|
12
|
+
#
|
13
|
+
# = TODO
|
14
|
+
#
|
15
|
+
# * Better streaming support, rather than an all-in-string approach.
|
16
|
+
# * Add +OrderedHash+ optionally, to not lose ordering in headers.
|
17
|
+
# * A fair bit remains to be done for this class, its fairly immature. But generally I'd like
|
18
|
+
# to see it be more generally useful.
|
19
|
+
# * All sorts of correctness issues, encoding particular.
|
20
|
+
# * Duplication of work in net/http.rb's +HTTPHeader+? Don't know if the overlap is sufficient.
|
21
|
+
# I don't want to lower case things, just for starters.
|
22
|
+
# * Mime was the original place I wrote #to_tree, intended as a quick debug hack.
|
23
|
+
#
|
24
|
+
class Mime
|
25
|
+
Hash = begin
|
26
|
+
require 'orderedhash'
|
27
|
+
OrderedHash
|
28
|
+
rescue LoadError
|
29
|
+
Hash
|
30
|
+
end
|
31
|
+
|
32
|
+
attr_reader :headers, :body, :parts, :content_type, :preamble, :epilogue
|
33
|
+
|
34
|
+
# Create a Mime object using +str+ as an initial serialization, which must contain headers
|
35
|
+
# and a body (even if empty). Needs work.
|
36
|
+
def initialize str, ignore_body=false
|
37
|
+
headers, @body = $~[1..-1] if str[/(.*?\r?\n)(?:\r?\n(.*))?\Z/m]
|
38
|
+
|
39
|
+
@headers = Hash.new { |hash, key| hash[key] = [] }
|
40
|
+
@body ||= ''
|
41
|
+
headers.to_s.scan(/^\S+:\s*.*(?:\n\t.*)*/).each do |header|
|
42
|
+
@headers[header[/(\S+):/, 1]] << header[/\S+:\s*(.*)/m, 1].gsub(/\s+/m, ' ').strip # this is kind of wrong
|
43
|
+
end
|
44
|
+
|
45
|
+
# don't have to have content type i suppose
|
46
|
+
@content_type, attrs = nil, {}
|
47
|
+
if content_type = @headers['Content-Type'][0]
|
48
|
+
@content_type, attrs = Mime.split_header content_type
|
49
|
+
end
|
50
|
+
|
51
|
+
return if ignore_body
|
52
|
+
|
53
|
+
if multipart?
|
54
|
+
if body.empty?
|
55
|
+
@preamble = ''
|
56
|
+
@epilogue = ''
|
57
|
+
@parts = []
|
58
|
+
else
|
59
|
+
# we need to split the message at the boundary
|
60
|
+
boundary = attrs['boundary'] or raise "no boundary for multipart message"
|
61
|
+
|
62
|
+
# splitting the body:
|
63
|
+
parts = body.split(/--#{Regexp.quote boundary}/m)
|
64
|
+
unless parts[-1] =~ /^--/; warn "bad multipart boundary (missing trailing --)"
|
65
|
+
else parts[-1][0..1] = ''
|
66
|
+
end
|
67
|
+
parts.each_with_index do |part, i|
|
68
|
+
part =~ /^(\r?\n)?(.*?)(\r?\n)?\Z/m
|
69
|
+
part.replace $2
|
70
|
+
warn "bad multipart boundary" if (1...parts.length-1) === i and !($1 && $3)
|
71
|
+
end
|
72
|
+
@preamble = parts.shift
|
73
|
+
@epilogue = parts.pop
|
74
|
+
@parts = parts.map { |part| Mime.new part }
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def multipart?
|
80
|
+
@content_type && @content_type =~ /^multipart/ ? true : false
|
81
|
+
end
|
82
|
+
|
83
|
+
def inspect
|
84
|
+
# add some extra here.
|
85
|
+
"#<Mime content_type=#{@content_type.inspect}>"
|
86
|
+
end
|
87
|
+
|
88
|
+
def to_tree
|
89
|
+
if multipart?
|
90
|
+
str = "- #{inspect}\n"
|
91
|
+
parts.each_with_index do |part, i|
|
92
|
+
last = i == parts.length - 1
|
93
|
+
part.to_tree.split(/\n/).each_with_index do |line, j|
|
94
|
+
str << " #{last ? (j == 0 ? "\\" : ' ') : '|'}" + line + "\n"
|
95
|
+
end
|
96
|
+
end
|
97
|
+
str
|
98
|
+
else
|
99
|
+
"- #{inspect}\n"
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def to_s opts={}
|
104
|
+
opts = {:boundary_counter => 0}.merge opts
|
105
|
+
if multipart?
|
106
|
+
boundary = Mime.make_boundary opts[:boundary_counter] += 1, self
|
107
|
+
@body = [preamble, parts.map { |part| "\r\n" + part.to_s(opts) + "\r\n" }, "--\r\n" + epilogue].
|
108
|
+
flatten.join("\r\n--" + boundary)
|
109
|
+
content_type, attrs = Mime.split_header @headers['Content-Type'][0]
|
110
|
+
attrs['boundary'] = boundary
|
111
|
+
@headers['Content-Type'] = [([content_type] + attrs.map { |key, val| %{#{key}="#{val}"} }).join('; ')]
|
112
|
+
end
|
113
|
+
|
114
|
+
str = ''
|
115
|
+
@headers.each do |key, vals|
|
116
|
+
vals.each { |val| str << "#{key}: #{val}\r\n" }
|
117
|
+
end
|
118
|
+
str << "\r\n" + @body
|
119
|
+
end
|
120
|
+
|
121
|
+
def self.split_header header
|
122
|
+
# FIXME: haven't read standard. not sure what its supposed to do with " in the name, or if other
|
123
|
+
# escapes are allowed. can't test on windows as " isn't allowed anyway. can be fixed with more
|
124
|
+
# accurate parser later.
|
125
|
+
# maybe move to some sort of Header class. but not all headers should be of it i suppose.
|
126
|
+
# at least add a join_header then, taking name and {}. for use in Mime#to_s (for boundary
|
127
|
+
# rewrite), and Attachment#to_mime, among others...
|
128
|
+
attrs = {}
|
129
|
+
header.scan(/;\s*([^\s=]+)\s*=\s*("[^"]*"|[^\s;]*)\s*/m).each do |key, value|
|
130
|
+
if attrs[key]; warn "ignoring duplicate header attribute #{key.inspect}"
|
131
|
+
else attrs[key] = value[/^"/] ? value[1..-2] : value
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
[header[/^[^;]+/].strip, attrs]
|
136
|
+
end
|
137
|
+
|
138
|
+
# +i+ is some value that should be unique for all multipart boundaries for a given message
|
139
|
+
def self.make_boundary i, extra_obj = Mime
|
140
|
+
"----_=_NextPart_#{'%03d' % i}_#{'%08x' % extra_obj.object_id}.#{'%08x' % Time.now}"
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
=begin
|
145
|
+
things to consider for header work.
|
146
|
+
encoded words:
|
147
|
+
Subject: =?iso-8859-1?q?p=F6stal?=
|
148
|
+
|
149
|
+
and other mime funkyness:
|
150
|
+
Content-Disposition: attachment;
|
151
|
+
filename*0*=UTF-8''09%20%D7%90%D7%A5;
|
152
|
+
filename*1*=%20%D7%A1%D7%91-;
|
153
|
+
filename*2*=%D7%A7%95%A5.wma
|
154
|
+
Content-Transfer-Encoding: base64
|
155
|
+
|
156
|
+
and another, doing a test with an embedded newline in an attachment name, I
|
157
|
+
get this output from evolution. I get the feeling that this is probably a bug
|
158
|
+
with their implementation though, they weren't expecting new lines in filenames.
|
159
|
+
Content-Disposition: attachment; filename="asdf'b\"c
|
160
|
+
d efgh=i: ;\\j"
|
161
|
+
d efgh=i: ;\\j"; charset=us-ascii
|
162
|
+
Content-Type: text/plain; name="asdf'b\"c"; charset=us-ascii
|
163
|
+
|
164
|
+
=end
|
165
|
+
|