ruby-msg 1.5.0 → 1.5.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/mapi.rb +1 -1
- data/lib/mapi/msg.rb +0 -1
- data/lib/mapi/property_set.rb +29 -10
- data/lib/mapi/rtf.rb +103 -1
- data/test/test_msg.rb +4 -6
- metadata +87 -79
- data/lib/rtf.rb +0 -109
data/lib/mapi.rb
CHANGED
data/lib/mapi/msg.rb
CHANGED
data/lib/mapi/property_set.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'mapi/types'
|
3
3
|
require 'mapi/rtf'
|
4
|
-
require 'rtf'
|
5
4
|
|
6
5
|
module Mapi
|
7
6
|
#
|
@@ -241,7 +240,7 @@ module Mapi
|
|
241
240
|
# last resort
|
242
241
|
if !@body or @body.strip.empty?
|
243
242
|
Log.warn 'creating text body from rtf'
|
244
|
-
@body = (
|
243
|
+
@body = (RTF::Converter.rtf2text body_rtf rescue nil)
|
245
244
|
end
|
246
245
|
@body
|
247
246
|
end
|
@@ -249,18 +248,38 @@ module Mapi
|
|
249
248
|
# for providing rtf decompression
|
250
249
|
def body_rtf
|
251
250
|
return @body_rtf if defined?(@body_rtf)
|
252
|
-
@body_rtf =
|
251
|
+
@body_rtf = nil
|
252
|
+
if self[:rtf_compressed]
|
253
|
+
begin
|
254
|
+
@body_rtf = RTF.rtfdecompr self[:rtf_compressed].read
|
255
|
+
rescue
|
256
|
+
Log.warn 'unable to decompress rtf'
|
257
|
+
end
|
258
|
+
end
|
259
|
+
@body_rtf
|
253
260
|
end
|
254
261
|
|
255
|
-
# for providing rtf to html conversion
|
262
|
+
# for providing rtf to html extraction or conversion
|
256
263
|
def body_html
|
257
264
|
return @body_html if defined?(@body_html)
|
258
|
-
@body_html =
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
265
|
+
@body_html = self[:body_html]
|
266
|
+
# sometimes body_html is a stream, and sometimes a string
|
267
|
+
@body_html = @body_html.read if @body_html.respond_to?(:read)
|
268
|
+
@body_html = nil if @body_html.to_s.strip.empty?
|
269
|
+
if body_rtf and !@body_html
|
270
|
+
begin
|
271
|
+
@body_html = RTF.rtf2html body_rtf
|
272
|
+
rescue
|
273
|
+
Log.warn 'unable to extract html from rtf'
|
274
|
+
end
|
275
|
+
if !@body_html
|
276
|
+
Log.warn 'creating html body from rtf'
|
277
|
+
begin
|
278
|
+
@body_html = RTF::Converter.rtf2text body_rtf, :html
|
279
|
+
rescue
|
280
|
+
Log.warn 'unable to convert rtf to html'
|
281
|
+
end
|
282
|
+
end
|
264
283
|
end
|
265
284
|
@body_html
|
266
285
|
end
|
data/lib/mapi/rtf.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'stringio'
|
2
2
|
require 'strscan'
|
3
|
-
require 'rtf'
|
4
3
|
|
5
4
|
class StringIO # :nodoc:
|
6
5
|
begin
|
@@ -20,6 +19,109 @@ module Mapi
|
|
20
19
|
# Both were ported from their original C versions for simplicity's sake.
|
21
20
|
#
|
22
21
|
module RTF
|
22
|
+
class Tokenizer
|
23
|
+
def self.process io
|
24
|
+
while true do
|
25
|
+
case c = io.getc
|
26
|
+
when ?{; yield :open_group
|
27
|
+
when ?}; yield :close_group
|
28
|
+
when ?\\
|
29
|
+
case c = io.getc
|
30
|
+
when ?{, ?}, ?\\; yield :text, c.chr
|
31
|
+
when ?'; yield :text, [io.read(2)].pack('H*')
|
32
|
+
when ?a..?z, ?A..?Z
|
33
|
+
# read control word
|
34
|
+
str = c.chr
|
35
|
+
str << c while c = io.read(1) and c =~ /[a-zA-Z]/
|
36
|
+
neg = 1
|
37
|
+
neg = -1 and c = io.read(1) if c == '-'
|
38
|
+
num = if c =~ /[0-9]/
|
39
|
+
num = c
|
40
|
+
num << c while c = io.read(1) and c =~ /[0-9]/
|
41
|
+
num.to_i * neg
|
42
|
+
end
|
43
|
+
raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
|
44
|
+
io.seek(-1, IO::SEEK_CUR) if c != ' '
|
45
|
+
yield :control_word, str, num
|
46
|
+
when nil
|
47
|
+
raise "invalid rtf stream" # \EOF
|
48
|
+
else
|
49
|
+
# other kind of control symbol
|
50
|
+
yield :control_symbol, c.chr
|
51
|
+
end
|
52
|
+
when nil
|
53
|
+
return
|
54
|
+
when ?\r, ?\n
|
55
|
+
# ignore
|
56
|
+
else yield :text, c.chr
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class Converter
|
63
|
+
# this is pretty crap, its just to ensure there is always something readable if
|
64
|
+
# there is an rtf only body, with no html encapsulation.
|
65
|
+
def self.rtf2text str, format=:text
|
66
|
+
group = 0
|
67
|
+
text = ''
|
68
|
+
text << "<html>\n<body>" if format == :html
|
69
|
+
group_type = []
|
70
|
+
group_tags = []
|
71
|
+
RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
|
72
|
+
add_text = ''
|
73
|
+
case a
|
74
|
+
when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
|
75
|
+
when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
|
76
|
+
when :control_word; # ignore
|
77
|
+
group_type[group] ||= b
|
78
|
+
# maybe change this to use utf8 where possible
|
79
|
+
add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
|
80
|
+
elsif b == 'tab' || b == 'cell'; "\t"
|
81
|
+
elsif b == 'endash' || b == 'emdash'; "-"
|
82
|
+
elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
|
83
|
+
elsif b == 'ldblquote'; '"'
|
84
|
+
else ''
|
85
|
+
end
|
86
|
+
if b == 'b' || b == 'i' and format == :html
|
87
|
+
close = c == 0 ? '/' : ''
|
88
|
+
text << "<#{close}#{b}>"
|
89
|
+
if c == 0
|
90
|
+
group_tags[group].delete b
|
91
|
+
else
|
92
|
+
group_tags[group] << b
|
93
|
+
end
|
94
|
+
end
|
95
|
+
# lot of other ones belong in here.\
|
96
|
+
=begin
|
97
|
+
\bullet Bullet character.
|
98
|
+
\lquote Left single quotation mark.
|
99
|
+
\rquote Right single quotation mark.
|
100
|
+
\ldblquote Left double quotation mark.
|
101
|
+
\rdblquote
|
102
|
+
=end
|
103
|
+
when :control_symbol; # ignore
|
104
|
+
group_type[group] ||= b
|
105
|
+
add_text = ' ' if b == '~' # non-breakable space
|
106
|
+
add_text = '-' if b == '_' # non-breakable hypen
|
107
|
+
when :text
|
108
|
+
add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
|
109
|
+
end
|
110
|
+
if format == :html
|
111
|
+
text << add_text.gsub(/([<>&"'])/) do
|
112
|
+
ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
|
113
|
+
"&#{ent};"
|
114
|
+
end
|
115
|
+
text << '<br>' if add_text == "\n"
|
116
|
+
else
|
117
|
+
text << add_text
|
118
|
+
end
|
119
|
+
end
|
120
|
+
text << "</body>\n</html>\n" if format == :html
|
121
|
+
text
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
23
125
|
RTF_PREBUF =
|
24
126
|
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
|
25
127
|
"{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
|
data/test/test_msg.rb
CHANGED
@@ -5,6 +5,7 @@ $: << "#{TEST_DIR}/../lib"
|
|
5
5
|
|
6
6
|
require 'test/unit'
|
7
7
|
require 'mapi/msg'
|
8
|
+
require 'mapi/convert'
|
8
9
|
|
9
10
|
class TestMsg < Test::Unit::TestCase
|
10
11
|
def test_blammo
|
@@ -17,12 +18,9 @@ class TestMsg < Test::Unit::TestCase
|
|
17
18
|
assert_equal 66, msg.properties.raw.length
|
18
19
|
# this is unique named properties
|
19
20
|
assert_equal 48, msg.properties.to_h.length
|
20
|
-
#
|
21
|
-
|
22
|
-
assert_equal '
|
23
|
-
assert_equal 'Yippee555', msg.properties[keys[0]]
|
24
|
-
assert_equal '66666666-6666-6666-c000-000000000046', keys[1].guid.format
|
25
|
-
assert_equal 'Yippee666', msg.properties[keys[1]]
|
21
|
+
# test accessing the named property keys - same name but different namespace
|
22
|
+
assert_equal 'Yippee555', msg.props['Name4', Ole::Types::Clsid.parse('55555555-5555-5555-c000-000000000046')]
|
23
|
+
assert_equal 'Yippee666', msg.props['Name4', Ole::Types::Clsid.parse('66666666-6666-6666-c000-000000000046')]
|
26
24
|
end
|
27
25
|
end
|
28
26
|
end
|
metadata
CHANGED
@@ -1,116 +1,124 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-msg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 1
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
version: 1.5.
|
7
|
+
- 1
|
8
|
+
- 5
|
9
|
+
- 1
|
10
|
+
version: 1.5.1
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
|
-
|
13
|
+
- Charles Lowe
|
13
14
|
autorequire:
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date:
|
18
|
+
date: 2012-07-03 00:00:00 +10:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: ruby-ole
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 15
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 2
|
33
|
+
- 8
|
34
|
+
version: 1.2.8
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: vpim
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 731
|
46
|
+
segments:
|
47
|
+
- 0
|
48
|
+
- 360
|
49
|
+
version: "0.360"
|
50
|
+
type: :runtime
|
51
|
+
version_requirements: *id002
|
47
52
|
description: A library for reading and converting Outlook msg and pst files (mapi message stores).
|
48
53
|
email: aquasync@gmail.com
|
49
54
|
executables:
|
50
|
-
|
55
|
+
- mapitool
|
51
56
|
extensions: []
|
52
57
|
|
53
58
|
extra_rdoc_files:
|
54
|
-
|
59
|
+
- README
|
55
60
|
files:
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
- test/test_msg.rb
|
61
|
+
- data/mapitags.yaml
|
62
|
+
- data/types.yaml
|
63
|
+
- data/named_map.yaml
|
64
|
+
- Rakefile
|
65
|
+
- README
|
66
|
+
- FIXES
|
67
|
+
- bin/mapitool
|
68
|
+
- lib/mapi.rb
|
69
|
+
- lib/mime.rb
|
70
|
+
- lib/orderedhash.rb
|
71
|
+
- lib/mapi/rtf.rb
|
72
|
+
- lib/mapi/property_set.rb
|
73
|
+
- lib/mapi/convert/contact.rb
|
74
|
+
- lib/mapi/convert/note-mime.rb
|
75
|
+
- lib/mapi/convert/note-tmail.rb
|
76
|
+
- lib/mapi/pst.rb
|
77
|
+
- lib/mapi/convert.rb
|
78
|
+
- lib/mapi/types.rb
|
79
|
+
- lib/mapi/msg.rb
|
80
|
+
- test/test_property_set.rb
|
81
|
+
- test/test_convert_note.rb
|
82
|
+
- test/test_mime.rb
|
83
|
+
- test/test_convert_contact.rb
|
84
|
+
- test/test_types.rb
|
85
|
+
- test/test_msg.rb
|
82
86
|
has_rdoc: true
|
83
87
|
homepage: http://code.google.com/p/ruby-msg
|
84
88
|
licenses: []
|
85
89
|
|
86
90
|
post_install_message:
|
87
91
|
rdoc_options:
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
92
|
+
- --main
|
93
|
+
- README
|
94
|
+
- --title
|
95
|
+
- ruby-msg documentation
|
96
|
+
- --tab-width
|
97
|
+
- "2"
|
94
98
|
require_paths:
|
95
|
-
|
99
|
+
- lib
|
96
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
97
102
|
requirements:
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
hash: 3
|
106
|
+
segments:
|
107
|
+
- 0
|
108
|
+
version: "0"
|
103
109
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
none: false
|
104
111
|
requirements:
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
hash: 3
|
115
|
+
segments:
|
116
|
+
- 0
|
117
|
+
version: "0"
|
110
118
|
requirements: []
|
111
119
|
|
112
120
|
rubyforge_project: ruby-msg
|
113
|
-
rubygems_version: 1.3.
|
121
|
+
rubygems_version: 1.3.7
|
114
122
|
signing_key:
|
115
123
|
specification_version: 3
|
116
124
|
summary: Ruby Msg library.
|
data/lib/rtf.rb
DELETED
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'stringio'
|
2
|
-
|
3
|
-
# this file is pretty crap, its just to ensure there is always something readable if
|
4
|
-
# there is an rtf only body, with no html encapsulation.
|
5
|
-
|
6
|
-
module RTF
|
7
|
-
class Tokenizer
|
8
|
-
def self.process io
|
9
|
-
while true do
|
10
|
-
case c = io.getc
|
11
|
-
when ?{; yield :open_group
|
12
|
-
when ?}; yield :close_group
|
13
|
-
when ?\\
|
14
|
-
case c = io.getc
|
15
|
-
when ?{, ?}, ?\\; yield :text, c.chr
|
16
|
-
when ?'; yield :text, [io.read(2)].pack('H*')
|
17
|
-
when ?a..?z, ?A..?Z
|
18
|
-
# read control word
|
19
|
-
str = c.chr
|
20
|
-
str << c while c = io.read(1) and c =~ /[a-zA-Z]/
|
21
|
-
neg = 1
|
22
|
-
neg = -1 and c = io.read(1) if c == '-'
|
23
|
-
num = if c =~ /[0-9]/
|
24
|
-
num = c
|
25
|
-
num << c while c = io.read(1) and c =~ /[0-9]/
|
26
|
-
num.to_i * neg
|
27
|
-
end
|
28
|
-
raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
|
29
|
-
io.seek(-1, IO::SEEK_CUR) if c != ' '
|
30
|
-
yield :control_word, str, num
|
31
|
-
when nil
|
32
|
-
raise "invalid rtf stream" # \EOF
|
33
|
-
else
|
34
|
-
# other kind of control symbol
|
35
|
-
yield :control_symbol, c.chr
|
36
|
-
end
|
37
|
-
when nil
|
38
|
-
return
|
39
|
-
when ?\r, ?\n
|
40
|
-
# ignore
|
41
|
-
else yield :text, c.chr
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
class Converter
|
48
|
-
# crappy
|
49
|
-
def self.rtf2text str, format=:text
|
50
|
-
group = 0
|
51
|
-
text = ''
|
52
|
-
text << "<html>\n<body>" if format == :html
|
53
|
-
group_type = []
|
54
|
-
group_tags = []
|
55
|
-
RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
|
56
|
-
add_text = ''
|
57
|
-
case a
|
58
|
-
when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
|
59
|
-
when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
|
60
|
-
when :control_word; # ignore
|
61
|
-
group_type[group] ||= b
|
62
|
-
# maybe change this to use utf8 where possible
|
63
|
-
add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
|
64
|
-
elsif b == 'tab' || b == 'cell'; "\t"
|
65
|
-
elsif b == 'endash' || b == 'emdash'; "-"
|
66
|
-
elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
|
67
|
-
elsif b == 'ldblquote'; '"'
|
68
|
-
else ''
|
69
|
-
end
|
70
|
-
if b == 'b' || b == 'i' and format == :html
|
71
|
-
close = c == 0 ? '/' : ''
|
72
|
-
text << "<#{close}#{b}>"
|
73
|
-
if c == 0
|
74
|
-
group_tags[group].delete b
|
75
|
-
else
|
76
|
-
group_tags[group] << b
|
77
|
-
end
|
78
|
-
end
|
79
|
-
# lot of other ones belong in here.\
|
80
|
-
=begin
|
81
|
-
\bullet Bullet character.
|
82
|
-
\lquote Left single quotation mark.
|
83
|
-
\rquote Right single quotation mark.
|
84
|
-
\ldblquote Left double quotation mark.
|
85
|
-
\rdblquote
|
86
|
-
=end
|
87
|
-
when :control_symbol; # ignore
|
88
|
-
group_type[group] ||= b
|
89
|
-
add_text = ' ' if b == '~' # non-breakable space
|
90
|
-
add_text = '-' if b == '_' # non-breakable hypen
|
91
|
-
when :text
|
92
|
-
add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
|
93
|
-
end
|
94
|
-
if format == :html
|
95
|
-
text << add_text.gsub(/([<>&"'])/) do
|
96
|
-
ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
|
97
|
-
"&#{ent};"
|
98
|
-
end
|
99
|
-
text << '<br>' if add_text == "\n"
|
100
|
-
else
|
101
|
-
text << add_text
|
102
|
-
end
|
103
|
-
end
|
104
|
-
text << "</body>\n</html>\n" if format == :html
|
105
|
-
text
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|