libis-mapi 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/mapi/rtf.rb ADDED
@@ -0,0 +1,297 @@
1
+ require 'stringio'
2
+ require 'strscan'
3
+
4
+ class StringIO # :nodoc:
5
+ begin
6
+ instance_method :getbyte
7
+ rescue NameError
8
+ alias getbyte getc
9
+ end
10
+ end
11
+
12
+ module Mapi
13
+ #
14
+ # = Introduction
15
+ #
16
+ # The +RTF+ module contains a few helper functions for dealing with rtf
17
+ # in mapi messages: +rtfdecompr+, and <tt>rtf2html</tt>.
18
+ #
19
+ # Both were ported from their original C versions for simplicity's sake.
20
+ #
21
+ module RTF
22
+ class Tokenizer
23
+ def self.process io
24
+ while true do
25
+ case c = io.getc
26
+ when ?{; yield :open_group
27
+ when ?}; yield :close_group
28
+ when ?\\
29
+ case c = io.getc
30
+ when ?{, ?}, ?\\; yield :text, c.chr
31
+ when ?'; yield :text, [io.read(2)].pack('H*')
32
+ when ?a..?z, ?A..?Z
33
+ # read control word
34
+ str = c.chr
35
+ str << c while c = io.read(1) and c =~ /[a-zA-Z]/
36
+ neg = 1
37
+ neg = -1 and c = io.read(1) if c == '-'
38
+ num = if c =~ /[0-9]/
39
+ num = c
40
+ num << c while c = io.read(1) and c =~ /[0-9]/
41
+ num.to_i * neg
42
+ end
43
+ raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
44
+ io.seek(-1, IO::SEEK_CUR) if c != ' '
45
+ yield :control_word, str, num
46
+ when nil
47
+ raise "invalid rtf stream" # \EOF
48
+ else
49
+ # other kind of control symbol
50
+ yield :control_symbol, c.chr
51
+ end
52
+ when nil
53
+ return
54
+ when ?\r, ?\n
55
+ # ignore
56
+ else yield :text, c.chr
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ class Converter
63
+ # this is pretty crap, its just to ensure there is always something readable if
64
+ # there is an rtf only body, with no html encapsulation.
65
+ def self.rtf2text str, format=:text
66
+ group = 0
67
+ text = ''
68
+ text << "<html>\n<body>" if format == :html
69
+ group_type = []
70
+ group_tags = []
71
+ RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
72
+ add_text = ''
73
+ case a
74
+ when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
75
+ when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
76
+ when :control_word; # ignore
77
+ group_type[group] ||= b
78
+ # maybe change this to use utf8 where possible
79
+ add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
80
+ elsif b == 'tab' || b == 'cell'; "\t"
81
+ elsif b == 'endash' || b == 'emdash'; "-"
82
+ elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
83
+ elsif b == 'ldblquote'; '"'
84
+ else ''
85
+ end
86
+ if b == 'b' || b == 'i' and format == :html
87
+ close = c == 0 ? '/' : ''
88
+ text << "<#{close}#{b}>"
89
+ if c == 0
90
+ group_tags[group].delete b
91
+ else
92
+ group_tags[group] << b
93
+ end
94
+ end
95
+ # lot of other ones belong in here.\
96
+ =begin
97
+ \bullet Bullet character.
98
+ \lquote Left single quotation mark.
99
+ \rquote Right single quotation mark.
100
+ \ldblquote Left double quotation mark.
101
+ \rdblquote
102
+ =end
103
+ when :control_symbol; # ignore
104
+ group_type[group] ||= b
105
+ add_text = ' ' if b == '~' # non-breakable space
106
+ add_text = '-' if b == '_' # non-breakable hypen
107
+ when :text
108
+ add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
109
+ end
110
+ if format == :html
111
+ text << add_text.gsub(/([<>&"'])/) do
112
+ ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
113
+ "&#{ent};"
114
+ end
115
+ text << '<br>' if add_text == "\n"
116
+ else
117
+ text << add_text
118
+ end
119
+ end
120
+ text << "</body>\n</html>\n" if format == :html
121
+ text
122
+ end
123
+ end
124
+
125
+ RTF_PREBUF =
126
+ "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
127
+ "{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
128
+ "\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
129
+ "{\\colortbl\\red0\\green0\\blue0\n\r\\par " \
130
+ "\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"
131
+
132
+ # Decompresses compressed rtf +data+, as found in the mapi property
133
+ # +PR_RTF_COMPRESSED+. Code converted from my C version, which in turn
134
+ # I wrote from a Java source, in JTNEF I believe.
135
+ #
136
+ # C version was modified to use circular buffer for back references,
137
+ # instead of the optimization of the Java version to index directly into
138
+ # output buffer. This was in preparation to support streaming in a
139
+ # read/write neutral fashion.
140
+ def rtfdecompr data
141
+ io = StringIO.new data
142
+ buf = RTF_PREBUF + "\x00" * (4096 - RTF_PREBUF.length)
143
+ wp = RTF_PREBUF.length
144
+ rtf = ''
145
+
146
+ # get header fields (as defined in RTFLIB.H)
147
+ compr_size, uncompr_size, magic, crc32 = io.read(16).unpack 'V*'
148
+ #warn "compressed-RTF data size mismatch" unless io.size == data.compr_size + 4
149
+
150
+ # process the data
151
+ case magic
152
+ when 0x414c454d # "MELA" magic number that identifies the stream as a uncompressed stream
153
+ rtf = io.read uncompr_size
154
+ when 0x75465a4c # "LZFu" magic number that identifies the stream as a compressed stream
155
+ flag_count = -1
156
+ flags = nil
157
+ while rtf.length < uncompr_size and !io.eof?
158
+ # each flag byte flags 8 literals/references, 1 per bit
159
+ flags = ((flag_count += 1) % 8 == 0) ? io.getbyte : flags >> 1
160
+ if 1 == (flags & 1) # each flag bit is 1 for reference, 0 for literal
161
+ rp, l = io.getbyte, io.getbyte
162
+ # offset is a 12 byte number. 2^12 is 4096, so thats fine
163
+ rp = (rp << 4) | (l >> 4) # the offset relative to block start
164
+ l = (l & 0xf) + 2 # the number of bytes to copy
165
+ l.times do
166
+ rtf << buf[wp] = buf[rp]
167
+ wp = (wp + 1) % 4096
168
+ rp = (rp + 1) % 4096
169
+ end
170
+ else
171
+ rtf << buf[wp] = io.getbyte.chr
172
+ wp = (wp + 1) % 4096
173
+ end
174
+ end
175
+ else # unknown magic number
176
+ raise "Unknown compression type (magic number 0x%08x)" % magic
177
+ end
178
+
179
+ # not sure if its due to a bug in the above code. doesn't seem to be
180
+ # in my tests, but sometimes there's a trailing null. we chomp it here,
181
+ # which actually makes the resultant rtf smaller than its advertised
182
+ # size (+uncompr_size+).
183
+ rtf.chomp! 0.chr
184
+ rtf
185
+ end
186
+
187
+ # Note, this is a conversion of the original C code. Not great - needs tests and
188
+ # some refactoring, and an attempt to correct some inaccuracies. Hacky but works.
189
+ #
190
+ # Returns +nil+ if it doesn't look like an rtf encapsulated rtf.
191
+ #
192
+ # Some cases that the original didn't deal with have been patched up, eg from
193
+ # this chunk, where there are tags outside of the htmlrtf ignore block.
194
+ #
195
+ # "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..."
196
+ #
197
+ # We take the approach of ignoring all rtf tags not explicitly handled. A proper
198
+ # parse tree would be nicer to work with. will need to look for ruby rtf library
199
+ #
200
+ # Some of the original comment to the c code is excerpted here:
201
+ #
202
+ # Sometimes in MAPI, the PR_BODY_HTML property contains the HTML of a message.
203
+ # But more usually, the HTML is encoded inside the RTF body (which you get in the
204
+ # PR_RTF_COMPRESSED property). These routines concern the decoding of the HTML
205
+ # from this RTF body.
206
+ #
207
+ # An encoded htmlrtf file is a valid RTF document, but which contains additional
208
+ # html markup information in its comments, and sometimes contains the equivalent
209
+ # rtf markup outside the comments. Therefore, when it is displayed by a plain
210
+ # simple RTF reader, the html comments are ignored and only the rtf markup has
211
+ # effect. Typically, this rtf markup is not as rich as the html markup would have been.
212
+ # But for an html-aware reader (such as the code below), we can ignore all the
213
+ # rtf markup, and extract the html markup out of the comments, and get a valid
214
+ # html document.
215
+ #
216
+ # There are actually two kinds of html markup in comments. Most of them are
217
+ # prefixed by "\*\htmltagNNN", for some number NNN. But sometimes there's one
218
+ # prefixed by "\*\mhtmltagNNN" followed by "\*\htmltagNNN". In this case,
219
+ # the two are equivalent, but the m-tag is for a MIME Multipart/Mixed Message
220
+ # and contains tags that refer to content-ids (e.g. img src="cid:072344a7")
221
+ # while the normal tag just refers to a name (e.g. img src="fred.jpg")
222
+ # The code below keeps the m-tag and discards the normal tag.
223
+ # If there are any m-tags like this, then the message also contains an
224
+ # attachment with a PR_CONTENT_ID property e.g. "072344a7". Actually,
225
+ # sometimes the m-tag is e.g. img src="http://outlook/welcome.html" and the
226
+ # attachment has a PR_CONTENT_LOCATION "http://outlook/welcome.html" instead
227
+ # of a PR_CONTENT_ID.
228
+ #
229
+ def rtf2html rtf
230
+ scan = StringScanner.new rtf
231
+ # require \fromhtml. is this worth keeping? apparently you see \\fromtext if it
232
+ # was converted from plain text.
233
+ return nil unless rtf["\\fromhtml"]
234
+ # https://github.com/Scompler/ruby-msg/commit/c9046a2448bb6f4e08a8024d2b6637e2c12c23e5
235
+ # if scan.scan_until(/\\ansicpg/)
236
+ # code_page = "cp" + scan.scan(/\d+/)
237
+ # scan.pos = 0
238
+ # else
239
+ # code_page = 'ascii'
240
+ # end
241
+
242
+ last_pos = scan.pos
243
+ if scan.scan_until(/\\ansicpg/)
244
+ code_page = "cp" + scan.scan(/\d+/)
245
+ else
246
+ code_page = 'ascii'
247
+ end
248
+ scan.pos = last_pos
249
+
250
+ html = ''
251
+ ignore_tag = nil
252
+ # skip up to the first htmltag. return nil if we don't ever find one
253
+ return nil unless scan.scan_until /(?=\{\\\*\\htmltag)/
254
+ until scan.empty?
255
+ if scan.scan /\{/
256
+ elsif scan.scan /\}/
257
+ elsif scan.scan /\\\*\\htmltag(\d+) ?/
258
+ #p scan[1]
259
+ if ignore_tag == scan[1]
260
+ scan.scan_until /\}/
261
+ ignore_tag = nil
262
+ end
263
+ elsif scan.scan /\\\*\\mhtmltag(\d+) ?/
264
+ ignore_tag = scan[1]
265
+ elsif scan.scan /\\par ?/
266
+ html << "\r\n"
267
+ elsif scan.scan /\\tab ?/
268
+ html << "\t"
269
+ elsif scan.scan /\\'([0-9A-Za-z]{2})/
270
+ html << scan[1].hex.chr
271
+ elsif scan.scan /\\pntext/
272
+ scan.scan_until /\}/
273
+ elsif scan.scan /\\htmlrtf/
274
+ scan.scan_until /\\htmlrtf0 ?/
275
+ # a generic throw away unknown tags thing.
276
+ # the above 2 however, are handled specially
277
+ elsif scan.scan /\\[a-z-]+(\d+)? ?/
278
+ #elsif scan.scan /\\li(\d+) ?/
279
+ #elsif scan.scan /\\fi-(\d+) ?/
280
+ elsif scan.scan /[\r\n]/
281
+ elsif scan.scan /\\([{}\\])/
282
+ html << scan[1]
283
+ elsif scan.scan /(.)/
284
+ html << scan[1]
285
+ else
286
+ p :wtf
287
+ end
288
+ end
289
+ html.strip.empty? ? nil : html.force_encoding(code_page)
290
+ # https://github.com/Scompler/ruby-msg/commit/c9046a2448bb6f4e08a8024d2b6637e2c12c23e5
291
+ # html.strip.empty? ? nil : html.enode('utf-8', code_page)
292
+ end
293
+
294
+ module_function :rtf2html, :rtfdecompr
295
+ end
296
+ end
297
+
data/lib/mapi/types.rb ADDED
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'ole/types'
3
+
4
+ module Mapi
5
+ Log = Logger.new_with_callstack
6
+
7
+ module Types
8
+ #
9
+ # Mapi property types, taken from http://msdn2.microsoft.com/en-us/library/bb147591.aspx.
10
+ #
11
+ # The fields are [mapi name, variant name, description]. Maybe I should just make it a
12
+ # struct.
13
+ #
14
+ # seen some synonyms here, like PT_I8 vs PT_LONG. seen stuff like PT_SRESTRICTION, not
15
+ # sure what that is. look at `grep ' PT_' data/mapitags.yaml | sort -u`
16
+ # also, it has stuff like PT_MV_BINARY, where _MV_ probably means multi value, and is
17
+ # likely just defined to | in 0x1000.
18
+ #
19
+ # Note that the last 2 are the only ones where the Mapi value differs from the Variant value
20
+ # for the corresponding variant type. Odd. Also, the last 2 are currently commented out here
21
+ # because of the clash.
22
+ #
23
+ # Note 2 - the strings here say VT_BSTR, but I don't have that defined in Ole::Types. Should
24
+ # maybe change them to match. I've also seen reference to PT_TSTRING, which is defined as some
25
+ # sort of get unicode first, and fallback to ansii or something.
26
+ #
27
+ DATA = {
28
+ 0x0001 => ['PT_NULL', 'VT_NULL', 'Null (no valid data)'],
29
+ 0x0002 => ['PT_SHORT', 'VT_I2', '2-byte integer (signed)'],
30
+ 0x0003 => ['PT_LONG', 'VT_I4', '4-byte integer (signed)'],
31
+ 0x0004 => ['PT_FLOAT', 'VT_R4', '4-byte real (floating point)'],
32
+ 0x0005 => ['PT_DOUBLE', 'VT_R8', '8-byte real (floating point)'],
33
+ 0x0006 => ['PT_CURRENCY', 'VT_CY', '8-byte integer (scaled by 10,000)'],
34
+ 0x000a => ['PT_ERROR', 'VT_ERROR', 'SCODE value; 32-bit unsigned integer'],
35
+ 0x000b => ['PT_BOOLEAN', 'VT_BOOL', 'Boolean'],
36
+ 0x000d => ['PT_OBJECT', 'VT_UNKNOWN', 'Data object'],
37
+ 0x001e => ['PT_STRING8', 'VT_BSTR', 'String'],
38
+ 0x001f => ['PT_UNICODE', 'VT_BSTR', 'String'],
39
+ 0x0040 => ['PT_SYSTIME', 'VT_DATE', '8-byte real (date in integer, time in fraction)'],
40
+ #0x0102 => ['PT_BINARY', 'VT_BLOB', 'Binary (unknown format)'],
41
+ #0x0102 => ['PT_CLSID', 'VT_CLSID', 'OLE GUID']
42
+ }
43
+
44
+ module Constants
45
+ DATA.each { |num, (mapi_name, variant_name, desc)| const_set mapi_name, num }
46
+ end
47
+
48
+ include Constants
49
+ end
50
+ end
51
+
@@ -0,0 +1,3 @@
1
+ module Mapi
2
+ VERSION = '0.3.1'
3
+ end
data/lib/mapi.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'mapi/version'
2
+ require 'mapi/base'
3
+ require 'mapi/types'
4
+ require 'mapi/property_set'
5
+ require 'mapi/convert'
data/ruby-msg.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ $:.unshift File.dirname(__FILE__) + '/lib'
2
+ require 'mapi/version'
3
+
4
+ PKG_NAME = 'libis-mapi'
5
+ PKG_VERSION = Mapi::VERSION
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = PKG_NAME
9
+ s.version = PKG_VERSION
10
+ s.licenses = ['MIT']
11
+ s.summary = %q{Ruby Msg library.}
12
+ s.description = %q{A library for reading and converting Outlook msg and pst files (mapi message stores).}
13
+ s.authors = ['Charles Lowe', 'kenjiuno']
14
+ s.email = %q{ku@digitaldolphins.jp}
15
+ s.homepage = %q{https://github.com/HiraokaHyperTools/ruby-msg}
16
+
17
+ s.executables = ['mapitool']
18
+ s.files = ['Home.md', 'COPYING', 'Rakefile', 'ChangeLog', 'ruby-msg.gemspec']
19
+ s.files += Dir.glob('data/*.yaml')
20
+ s.files += Dir.glob('lib/**/*.rb')
21
+ s.files += Dir.glob('bin/*')
22
+
23
+ s.add_runtime_dependency 'ruby-ole', '~> 1.2', '>=1.2.8'
24
+ s.add_runtime_dependency 'vpim', '~> 13.0'
25
+ end
26
+
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: libis-mapi
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.1
5
+ platform: ruby
6
+ authors:
7
+ - Charles Lowe
8
+ - kenjiuno
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2023-08-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ruby-ole
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.2'
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.8
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '1.2'
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.8
34
+ - !ruby/object:Gem::Dependency
35
+ name: vpim
36
+ requirement: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '13.0'
41
+ type: :runtime
42
+ prerelease: false
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '13.0'
48
+ description: A library for reading and converting Outlook msg and pst files (mapi
49
+ message stores).
50
+ email: ku@digitaldolphins.jp
51
+ executables:
52
+ - mapitool
53
+ extensions: []
54
+ extra_rdoc_files: []
55
+ files:
56
+ - COPYING
57
+ - ChangeLog
58
+ - Home.md
59
+ - Rakefile
60
+ - bin/mapitool
61
+ - data/mapitags.yaml
62
+ - data/named_map.yaml
63
+ - data/types.yaml
64
+ - lib/mapi.rb
65
+ - lib/mapi/base.rb
66
+ - lib/mapi/convert.rb
67
+ - lib/mapi/convert/contact.rb
68
+ - lib/mapi/convert/note-mime.rb
69
+ - lib/mapi/convert/note-tmail.rb
70
+ - lib/mapi/helper.rb
71
+ - lib/mapi/mime.rb
72
+ - lib/mapi/msg.rb
73
+ - lib/mapi/property_set.rb
74
+ - lib/mapi/pst.rb
75
+ - lib/mapi/rtf.rb
76
+ - lib/mapi/types.rb
77
+ - lib/mapi/version.rb
78
+ - ruby-msg.gemspec
79
+ homepage: https://github.com/HiraokaHyperTools/ruby-msg
80
+ licenses:
81
+ - MIT
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubygems_version: 3.3.7
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Ruby Msg library.
102
+ test_files: []