ruby-msg-nx 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/mapi/rtf.rb ADDED
@@ -0,0 +1,288 @@
1
+ require 'stringio'
2
+ require 'strscan'
3
+
4
+ class StringIO # :nodoc:
5
+ begin
6
+ instance_method :getbyte
7
+ rescue NameError
8
+ alias getbyte getc
9
+ end
10
+ end
11
+
12
+ module Mapi
13
+ #
14
+ # = Introduction
15
+ #
16
+ # The +RTF+ module contains a few helper functions for dealing with rtf
17
+ # in mapi messages: +rtfdecompr+, and <tt>rtf2html</tt>.
18
+ #
19
+ # Both were ported from their original C versions for simplicity's sake.
20
+ #
21
+ module RTF
22
+ class Tokenizer
23
+ def self.process io
24
+ while true do
25
+ case c = io.getc
26
+ when ?{; yield :open_group
27
+ when ?}; yield :close_group
28
+ when ?\\
29
+ case c = io.getc
30
+ when ?{, ?}, ?\\; yield :text, c.chr
31
+ when ?'; yield :text, [io.read(2)].pack('H*')
32
+ when ?a..?z, ?A..?Z
33
+ # read control word
34
+ str = c.chr
35
+ str << c while c = io.read(1) and c =~ /[a-zA-Z]/
36
+ neg = 1
37
+ neg = -1 and c = io.read(1) if c == '-'
38
+ num = if c =~ /[0-9]/
39
+ num = c
40
+ num << c while c = io.read(1) and c =~ /[0-9]/
41
+ num.to_i * neg
42
+ end
43
+ raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
44
+ io.seek(-1, IO::SEEK_CUR) if c != ' '
45
+ yield :control_word, str, num
46
+ when nil
47
+ raise "invalid rtf stream" # \EOF
48
+ else
49
+ # other kind of control symbol
50
+ yield :control_symbol, c.chr
51
+ end
52
+ when nil
53
+ return
54
+ when ?\r, ?\n
55
+ # ignore
56
+ else yield :text, c.chr
57
+ end
58
+ end
59
+ end
60
+ end
61
+
62
+ class Converter
63
+ # this is pretty crap, its just to ensure there is always something readable if
64
+ # there is an rtf only body, with no html encapsulation.
65
+ def self.rtf2text str, format=:text
66
+ group = 0
67
+ text = ''
68
+ text << "<html>\n<body>" if format == :html
69
+ group_type = []
70
+ group_tags = []
71
+ RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
72
+ add_text = ''
73
+ case a
74
+ when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
75
+ when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
76
+ when :control_word; # ignore
77
+ group_type[group] ||= b
78
+ # maybe change this to use utf8 where possible
79
+ add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
80
+ elsif b == 'tab' || b == 'cell'; "\t"
81
+ elsif b == 'endash' || b == 'emdash'; "-"
82
+ elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
83
+ elsif b == 'ldblquote'; '"'
84
+ else ''
85
+ end
86
+ if b == 'b' || b == 'i' and format == :html
87
+ close = c == 0 ? '/' : ''
88
+ text << "<#{close}#{b}>"
89
+ if c == 0
90
+ group_tags[group].delete b
91
+ else
92
+ group_tags[group] << b
93
+ end
94
+ end
95
+ # lot of other ones belong in here.\
96
+ =begin
97
+ \bullet Bullet character.
98
+ \lquote Left single quotation mark.
99
+ \rquote Right single quotation mark.
100
+ \ldblquote Left double quotation mark.
101
+ \rdblquote
102
+ =end
103
+ when :control_symbol; # ignore
104
+ group_type[group] ||= b
105
+ add_text = ' ' if b == '~' # non-breakable space
106
+ add_text = '-' if b == '_' # non-breakable hypen
107
+ when :text
108
+ add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
109
+ end
110
+ if format == :html
111
+ text << add_text.gsub(/([<>&"'])/) do
112
+ ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
113
+ "&#{ent};"
114
+ end
115
+ text << '<br>' if add_text == "\n"
116
+ else
117
+ text << add_text
118
+ end
119
+ end
120
+ text << "</body>\n</html>\n" if format == :html
121
+ text
122
+ end
123
+ end
124
+
125
+ RTF_PREBUF =
126
+ "{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
127
+ "{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
128
+ "\\fdecor MS Sans SerifSymbolArialTimes New RomanCourier" \
129
+ "{\\colortbl\\red0\\green0\\blue0\n\r\\par " \
130
+ "\\pard\\plain\\f0\\fs20\\b\\i\\u\\tab\\tx"
131
+
132
+ # Decompresses compressed rtf +data+, as found in the mapi property
133
+ # +PR_RTF_COMPRESSED+. Code converted from my C version, which in turn
134
+ # I wrote from a Java source, in JTNEF I believe.
135
+ #
136
+ # C version was modified to use circular buffer for back references,
137
+ # instead of the optimization of the Java version to index directly into
138
+ # output buffer. This was in preparation to support streaming in a
139
+ # read/write neutral fashion.
140
+ def rtfdecompr data
141
+ io = StringIO.new data
142
+ buf = RTF_PREBUF + "\x00" * (4096 - RTF_PREBUF.length)
143
+ wp = RTF_PREBUF.length
144
+ rtf = ''
145
+
146
+ # get header fields (as defined in RTFLIB.H)
147
+ compr_size, uncompr_size, magic, crc32 = io.read(16).unpack 'V*'
148
+ #warn "compressed-RTF data size mismatch" unless io.size == data.compr_size + 4
149
+
150
+ # process the data
151
+ case magic
152
+ when 0x414c454d # "MELA" magic number that identifies the stream as a uncompressed stream
153
+ rtf = io.read uncompr_size
154
+ when 0x75465a4c # "LZFu" magic number that identifies the stream as a compressed stream
155
+ flag_count = -1
156
+ flags = nil
157
+ while rtf.length < uncompr_size and !io.eof?
158
+ # each flag byte flags 8 literals/references, 1 per bit
159
+ flags = ((flag_count += 1) % 8 == 0) ? io.getbyte : flags >> 1
160
+ if 1 == (flags & 1) # each flag bit is 1 for reference, 0 for literal
161
+ rp, l = io.getbyte, io.getbyte
162
+ # offset is a 12 byte number. 2^12 is 4096, so thats fine
163
+ rp = (rp << 4) | (l >> 4) # the offset relative to block start
164
+ l = (l & 0xf) + 2 # the number of bytes to copy
165
+ l.times do
166
+ rtf << buf[wp] = buf[rp]
167
+ wp = (wp + 1) % 4096
168
+ rp = (rp + 1) % 4096
169
+ end
170
+ else
171
+ rtf << buf[wp] = io.getbyte.chr
172
+ wp = (wp + 1) % 4096
173
+ end
174
+ end
175
+ else # unknown magic number
176
+ raise "Unknown compression type (magic number 0x%08x)" % magic
177
+ end
178
+
179
+ # not sure if its due to a bug in the above code. doesn't seem to be
180
+ # in my tests, but sometimes there's a trailing null. we chomp it here,
181
+ # which actually makes the resultant rtf smaller than its advertised
182
+ # size (+uncompr_size+).
183
+ rtf.chomp! 0.chr
184
+ rtf
185
+ end
186
+
187
+ # Note, this is a conversion of the original C code. Not great - needs tests and
188
+ # some refactoring, and an attempt to correct some inaccuracies. Hacky but works.
189
+ #
190
+ # Returns +nil+ if it doesn't look like an rtf encapsulated rtf.
191
+ #
192
+ # Some cases that the original didn't deal with have been patched up, eg from
193
+ # this chunk, where there are tags outside of the htmlrtf ignore block.
194
+ #
195
+ # "{\\*\\htmltag116 <br />}\\htmlrtf \\line \\htmlrtf0 \\line {\\*\\htmltag84 <a href..."
196
+ #
197
+ # We take the approach of ignoring all rtf tags not explicitly handled. A proper
198
+ # parse tree would be nicer to work with. will need to look for ruby rtf library
199
+ #
200
+ # Some of the original comment to the c code is excerpted here:
201
+ #
202
+ # Sometimes in MAPI, the PR_BODY_HTML property contains the HTML of a message.
203
+ # But more usually, the HTML is encoded inside the RTF body (which you get in the
204
+ # PR_RTF_COMPRESSED property). These routines concern the decoding of the HTML
205
+ # from this RTF body.
206
+ #
207
+ # An encoded htmlrtf file is a valid RTF document, but which contains additional
208
+ # html markup information in its comments, and sometimes contains the equivalent
209
+ # rtf markup outside the comments. Therefore, when it is displayed by a plain
210
+ # simple RTF reader, the html comments are ignored and only the rtf markup has
211
+ # effect. Typically, this rtf markup is not as rich as the html markup would have been.
212
+ # But for an html-aware reader (such as the code below), we can ignore all the
213
+ # rtf markup, and extract the html markup out of the comments, and get a valid
214
+ # html document.
215
+ #
216
+ # There are actually two kinds of html markup in comments. Most of them are
217
+ # prefixed by "\*\htmltagNNN", for some number NNN. But sometimes there's one
218
+ # prefixed by "\*\mhtmltagNNN" followed by "\*\htmltagNNN". In this case,
219
+ # the two are equivalent, but the m-tag is for a MIME Multipart/Mixed Message
220
+ # and contains tags that refer to content-ids (e.g. img src="cid:072344a7")
221
+ # while the normal tag just refers to a name (e.g. img src="fred.jpg")
222
+ # The code below keeps the m-tag and discards the normal tag.
223
+ # If there are any m-tags like this, then the message also contains an
224
+ # attachment with a PR_CONTENT_ID property e.g. "072344a7". Actually,
225
+ # sometimes the m-tag is e.g. img src="http://outlook/welcome.html" and the
226
+ # attachment has a PR_CONTENT_LOCATION "http://outlook/welcome.html" instead
227
+ # of a PR_CONTENT_ID.
228
+ #
229
+ def rtf2html rtf
230
+ scan = StringScanner.new rtf
231
+ # require \fromhtml. is this worth keeping? apparently you see \\fromtext if it
232
+ # was converted from plain text.
233
+ return nil unless rtf["\\fromhtml"]
234
+
235
+ last_pos = scan.pos
236
+ if scan.scan_until(/\\ansicpg/)
237
+ code_page = "cp" + scan.scan(/\d+/)
238
+ else
239
+ code_page = 'ascii'
240
+ end
241
+ scan.pos = last_pos
242
+
243
+ html = ''
244
+ ignore_tag = nil
245
+ # skip up to the first htmltag. return nil if we don't ever find one
246
+ return nil unless scan.scan_until /(?=\{\\\*\\htmltag)/
247
+ until scan.empty?
248
+ if scan.scan /\{/
249
+ elsif scan.scan /\}/
250
+ elsif scan.scan /\\\*\\htmltag(\d+) ?/
251
+ #p scan[1]
252
+ if ignore_tag == scan[1]
253
+ scan.scan_until /\}/
254
+ ignore_tag = nil
255
+ end
256
+ elsif scan.scan /\\\*\\mhtmltag(\d+) ?/
257
+ ignore_tag = scan[1]
258
+ elsif scan.scan /\\par ?/
259
+ html << "\r\n"
260
+ elsif scan.scan /\\tab ?/
261
+ html << "\t"
262
+ elsif scan.scan /\\'([0-9A-Za-z]{2})/
263
+ html << scan[1].hex.chr
264
+ elsif scan.scan /\\pntext/
265
+ scan.scan_until /\}/
266
+ elsif scan.scan /\\htmlrtf/
267
+ scan.scan_until /\\htmlrtf0 ?/
268
+ # a generic throw away unknown tags thing.
269
+ # the above 2 however, are handled specially
270
+ elsif scan.scan /\\[a-z-]+(\d+)? ?/
271
+ #elsif scan.scan /\\li(\d+) ?/
272
+ #elsif scan.scan /\\fi-(\d+) ?/
273
+ elsif scan.scan /[\r\n]/
274
+ elsif scan.scan /\\([{}\\])/
275
+ html << scan[1]
276
+ elsif scan.scan /(.)/
277
+ html << scan[1]
278
+ else
279
+ p :wtf
280
+ end
281
+ end
282
+ html.strip.empty? ? nil : html.force_encoding(code_page)
283
+ end
284
+
285
+ module_function :rtf2html, :rtfdecompr
286
+ end
287
+ end
288
+
data/lib/mapi/types.rb ADDED
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'ole/types'
3
+
4
+ module Mapi
5
+ Log = Logger.new_with_callstack
6
+
7
+ module Types
8
+ #
9
+ # Mapi property types, taken from http://msdn2.microsoft.com/en-us/library/bb147591.aspx.
10
+ #
11
+ # The fields are [mapi name, variant name, description]. Maybe I should just make it a
12
+ # struct.
13
+ #
14
+ # seen some synonyms here, like PT_I8 vs PT_LONG. seen stuff like PT_SRESTRICTION, not
15
+ # sure what that is. look at `grep ' PT_' data/mapitags.yaml | sort -u`
16
+ # also, it has stuff like PT_MV_BINARY, where _MV_ probably means multi value, and is
17
+ # likely just defined to | in 0x1000.
18
+ #
19
+ # Note that the last 2 are the only ones where the Mapi value differs from the Variant value
20
+ # for the corresponding variant type. Odd. Also, the last 2 are currently commented out here
21
+ # because of the clash.
22
+ #
23
+ # Note 2 - the strings here say VT_BSTR, but I don't have that defined in Ole::Types. Should
24
+ # maybe change them to match. I've also seen reference to PT_TSTRING, which is defined as some
25
+ # sort of get unicode first, and fallback to ansii or something.
26
+ #
27
+ DATA = {
28
+ 0x0001 => ['PT_NULL', 'VT_NULL', 'Null (no valid data)'],
29
+ 0x0002 => ['PT_SHORT', 'VT_I2', '2-byte integer (signed)'],
30
+ 0x0003 => ['PT_LONG', 'VT_I4', '4-byte integer (signed)'],
31
+ 0x0004 => ['PT_FLOAT', 'VT_R4', '4-byte real (floating point)'],
32
+ 0x0005 => ['PT_DOUBLE', 'VT_R8', '8-byte real (floating point)'],
33
+ 0x0006 => ['PT_CURRENCY', 'VT_CY', '8-byte integer (scaled by 10,000)'],
34
+ 0x000a => ['PT_ERROR', 'VT_ERROR', 'SCODE value; 32-bit unsigned integer'],
35
+ 0x000b => ['PT_BOOLEAN', 'VT_BOOL', 'Boolean'],
36
+ 0x000d => ['PT_OBJECT', 'VT_UNKNOWN', 'Data object'],
37
+ 0x001e => ['PT_STRING8', 'VT_BSTR', 'String'],
38
+ 0x001f => ['PT_UNICODE', 'VT_BSTR', 'String'],
39
+ 0x0040 => ['PT_SYSTIME', 'VT_DATE', '8-byte real (date in integer, time in fraction)'],
40
+ #0x0102 => ['PT_BINARY', 'VT_BLOB', 'Binary (unknown format)'],
41
+ #0x0102 => ['PT_CLSID', 'VT_CLSID', 'OLE GUID']
42
+ }
43
+
44
+ module Constants
45
+ DATA.each { |num, (mapi_name, variant_name, desc)| const_set mapi_name, num }
46
+ end
47
+
48
+ include Constants
49
+ end
50
+ end
51
+
@@ -0,0 +1,3 @@
1
+ module Mapi
2
+ VERSION = '0.3.0'
3
+ end
data/lib/mapi.rb ADDED
@@ -0,0 +1,5 @@
1
+ require 'mapi/version'
2
+ require 'mapi/base'
3
+ require 'mapi/types'
4
+ require 'mapi/property_set'
5
+ require 'mapi/convert'
data/ruby-msg.gemspec ADDED
@@ -0,0 +1,26 @@
1
+ $:.unshift File.dirname(__FILE__) + '/lib'
2
+ require 'mapi/version'
3
+
4
+ PKG_NAME = 'ruby-msg-nx'
5
+ PKG_VERSION = Mapi::VERSION
6
+
7
+ Gem::Specification.new do |s|
8
+ s.name = PKG_NAME
9
+ s.version = PKG_VERSION
10
+ s.licenses = ['MIT']
11
+ s.summary = %q{Ruby Msg library.}
12
+ s.description = %q{A library for reading and converting Outlook msg and pst files (mapi message stores).}
13
+ s.authors = ['Charles Lowe', 'kenjiuno']
14
+ s.email = %q{ku@digitaldolphins.jp}
15
+ s.homepage = %q{https://github.com/HiraokaHyperTools/ruby-msg}
16
+
17
+ s.executables = ['mapitool']
18
+ s.files = ['Home.md', 'COPYING', 'Rakefile', 'ChangeLog', 'ruby-msg.gemspec']
19
+ s.files += Dir.glob('data/*.yaml')
20
+ s.files += Dir.glob('lib/**/*.rb')
21
+ s.files += Dir.glob('bin/*')
22
+
23
+ s.add_runtime_dependency 'ruby-ole', '~> 1.2', '>=1.2.8'
24
+ s.add_runtime_dependency 'vpim', '~> 0.360'
25
+ end
26
+
metadata ADDED
@@ -0,0 +1,102 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: ruby-msg-nx
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.3.0
5
+ platform: ruby
6
+ authors:
7
+ - Charles Lowe
8
+ - kenjiuno
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2022-01-13 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: ruby-ole
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - "~>"
19
+ - !ruby/object:Gem::Version
20
+ version: '1.2'
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.8
24
+ type: :runtime
25
+ prerelease: false
26
+ version_requirements: !ruby/object:Gem::Requirement
27
+ requirements:
28
+ - - "~>"
29
+ - !ruby/object:Gem::Version
30
+ version: '1.2'
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 1.2.8
34
+ - !ruby/object:Gem::Dependency
35
+ name: vpim
36
+ requirement: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.360'
41
+ type: :runtime
42
+ prerelease: false
43
+ version_requirements: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '0.360'
48
+ description: A library for reading and converting Outlook msg and pst files (mapi
49
+ message stores).
50
+ email: ku@digitaldolphins.jp
51
+ executables:
52
+ - mapitool
53
+ extensions: []
54
+ extra_rdoc_files: []
55
+ files:
56
+ - COPYING
57
+ - ChangeLog
58
+ - Home.md
59
+ - Rakefile
60
+ - bin/mapitool
61
+ - data/mapitags.yaml
62
+ - data/named_map.yaml
63
+ - data/types.yaml
64
+ - lib/mapi.rb
65
+ - lib/mapi/base.rb
66
+ - lib/mapi/convert.rb
67
+ - lib/mapi/convert/contact.rb
68
+ - lib/mapi/convert/note-mime.rb
69
+ - lib/mapi/convert/note-tmail.rb
70
+ - lib/mapi/helper.rb
71
+ - lib/mapi/mime.rb
72
+ - lib/mapi/msg.rb
73
+ - lib/mapi/property_set.rb
74
+ - lib/mapi/pst.rb
75
+ - lib/mapi/rtf.rb
76
+ - lib/mapi/types.rb
77
+ - lib/mapi/version.rb
78
+ - ruby-msg.gemspec
79
+ homepage: https://github.com/HiraokaHyperTools/ruby-msg
80
+ licenses:
81
+ - MIT
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubygems_version: 3.2.33
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Ruby Msg library.
102
+ test_files: []