ruby-msg 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/mapi.rb +1 -1
- data/lib/mapi/msg.rb +0 -1
- data/lib/mapi/property_set.rb +29 -10
- data/lib/mapi/rtf.rb +103 -1
- data/test/test_msg.rb +4 -6
- metadata +87 -79
- data/lib/rtf.rb +0 -109
data/lib/mapi.rb
CHANGED
data/lib/mapi/msg.rb
CHANGED
data/lib/mapi/property_set.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'mapi/types'
|
3
3
|
require 'mapi/rtf'
|
4
|
-
require 'rtf'
|
5
4
|
|
6
5
|
module Mapi
|
7
6
|
#
|
@@ -241,7 +240,7 @@ module Mapi
|
|
241
240
|
# last resort
|
242
241
|
if !@body or @body.strip.empty?
|
243
242
|
Log.warn 'creating text body from rtf'
|
244
|
-
@body = (
|
243
|
+
@body = (RTF::Converter.rtf2text body_rtf rescue nil)
|
245
244
|
end
|
246
245
|
@body
|
247
246
|
end
|
@@ -249,18 +248,38 @@ module Mapi
|
|
249
248
|
# for providing rtf decompression
|
250
249
|
def body_rtf
|
251
250
|
return @body_rtf if defined?(@body_rtf)
|
252
|
-
@body_rtf =
|
251
|
+
@body_rtf = nil
|
252
|
+
if self[:rtf_compressed]
|
253
|
+
begin
|
254
|
+
@body_rtf = RTF.rtfdecompr self[:rtf_compressed].read
|
255
|
+
rescue
|
256
|
+
Log.warn 'unable to decompress rtf'
|
257
|
+
end
|
258
|
+
end
|
259
|
+
@body_rtf
|
253
260
|
end
|
254
261
|
|
255
|
-
# for providing rtf to html conversion
|
262
|
+
# for providing rtf to html extraction or conversion
|
256
263
|
def body_html
|
257
264
|
return @body_html if defined?(@body_html)
|
258
|
-
@body_html =
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
265
|
+
@body_html = self[:body_html]
|
266
|
+
# sometimes body_html is a stream, and sometimes a string
|
267
|
+
@body_html = @body_html.read if @body_html.respond_to?(:read)
|
268
|
+
@body_html = nil if @body_html.to_s.strip.empty?
|
269
|
+
if body_rtf and !@body_html
|
270
|
+
begin
|
271
|
+
@body_html = RTF.rtf2html body_rtf
|
272
|
+
rescue
|
273
|
+
Log.warn 'unable to extract html from rtf'
|
274
|
+
end
|
275
|
+
if !@body_html
|
276
|
+
Log.warn 'creating html body from rtf'
|
277
|
+
begin
|
278
|
+
@body_html = RTF::Converter.rtf2text body_rtf, :html
|
279
|
+
rescue
|
280
|
+
Log.warn 'unable to convert rtf to html'
|
281
|
+
end
|
282
|
+
end
|
264
283
|
end
|
265
284
|
@body_html
|
266
285
|
end
|
data/lib/mapi/rtf.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'stringio'
|
2
2
|
require 'strscan'
|
3
|
-
require 'rtf'
|
4
3
|
|
5
4
|
class StringIO # :nodoc:
|
6
5
|
begin
|
@@ -20,6 +19,109 @@ module Mapi
|
|
20
19
|
# Both were ported from their original C versions for simplicity's sake.
|
21
20
|
#
|
22
21
|
module RTF
|
22
|
+
class Tokenizer
|
23
|
+
def self.process io
|
24
|
+
while true do
|
25
|
+
case c = io.getc
|
26
|
+
when ?{; yield :open_group
|
27
|
+
when ?}; yield :close_group
|
28
|
+
when ?\\
|
29
|
+
case c = io.getc
|
30
|
+
when ?{, ?}, ?\\; yield :text, c.chr
|
31
|
+
when ?'; yield :text, [io.read(2)].pack('H*')
|
32
|
+
when ?a..?z, ?A..?Z
|
33
|
+
# read control word
|
34
|
+
str = c.chr
|
35
|
+
str << c while c = io.read(1) and c =~ /[a-zA-Z]/
|
36
|
+
neg = 1
|
37
|
+
neg = -1 and c = io.read(1) if c == '-'
|
38
|
+
num = if c =~ /[0-9]/
|
39
|
+
num = c
|
40
|
+
num << c while c = io.read(1) and c =~ /[0-9]/
|
41
|
+
num.to_i * neg
|
42
|
+
end
|
43
|
+
raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
|
44
|
+
io.seek(-1, IO::SEEK_CUR) if c != ' '
|
45
|
+
yield :control_word, str, num
|
46
|
+
when nil
|
47
|
+
raise "invalid rtf stream" # \EOF
|
48
|
+
else
|
49
|
+
# other kind of control symbol
|
50
|
+
yield :control_symbol, c.chr
|
51
|
+
end
|
52
|
+
when nil
|
53
|
+
return
|
54
|
+
when ?\r, ?\n
|
55
|
+
# ignore
|
56
|
+
else yield :text, c.chr
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
class Converter
|
63
|
+
# this is pretty crap, its just to ensure there is always something readable if
|
64
|
+
# there is an rtf only body, with no html encapsulation.
|
65
|
+
def self.rtf2text str, format=:text
|
66
|
+
group = 0
|
67
|
+
text = ''
|
68
|
+
text << "<html>\n<body>" if format == :html
|
69
|
+
group_type = []
|
70
|
+
group_tags = []
|
71
|
+
RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
|
72
|
+
add_text = ''
|
73
|
+
case a
|
74
|
+
when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
|
75
|
+
when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
|
76
|
+
when :control_word; # ignore
|
77
|
+
group_type[group] ||= b
|
78
|
+
# maybe change this to use utf8 where possible
|
79
|
+
add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
|
80
|
+
elsif b == 'tab' || b == 'cell'; "\t"
|
81
|
+
elsif b == 'endash' || b == 'emdash'; "-"
|
82
|
+
elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
|
83
|
+
elsif b == 'ldblquote'; '"'
|
84
|
+
else ''
|
85
|
+
end
|
86
|
+
if b == 'b' || b == 'i' and format == :html
|
87
|
+
close = c == 0 ? '/' : ''
|
88
|
+
text << "<#{close}#{b}>"
|
89
|
+
if c == 0
|
90
|
+
group_tags[group].delete b
|
91
|
+
else
|
92
|
+
group_tags[group] << b
|
93
|
+
end
|
94
|
+
end
|
95
|
+
# lot of other ones belong in here.\
|
96
|
+
=begin
|
97
|
+
\bullet Bullet character.
|
98
|
+
\lquote Left single quotation mark.
|
99
|
+
\rquote Right single quotation mark.
|
100
|
+
\ldblquote Left double quotation mark.
|
101
|
+
\rdblquote
|
102
|
+
=end
|
103
|
+
when :control_symbol; # ignore
|
104
|
+
group_type[group] ||= b
|
105
|
+
add_text = ' ' if b == '~' # non-breakable space
|
106
|
+
add_text = '-' if b == '_' # non-breakable hypen
|
107
|
+
when :text
|
108
|
+
add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
|
109
|
+
end
|
110
|
+
if format == :html
|
111
|
+
text << add_text.gsub(/([<>&"'])/) do
|
112
|
+
ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
|
113
|
+
"&#{ent};"
|
114
|
+
end
|
115
|
+
text << '<br>' if add_text == "\n"
|
116
|
+
else
|
117
|
+
text << add_text
|
118
|
+
end
|
119
|
+
end
|
120
|
+
text << "</body>\n</html>\n" if format == :html
|
121
|
+
text
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
23
125
|
RTF_PREBUF =
|
24
126
|
"{\\rtf1\\ansi\\mac\\deff0\\deftab720{\\fonttbl;}" \
|
25
127
|
"{\\f0\\fnil \\froman \\fswiss \\fmodern \\fscript " \
|
data/test/test_msg.rb
CHANGED
@@ -5,6 +5,7 @@ $: << "#{TEST_DIR}/../lib"
|
|
5
5
|
|
6
6
|
require 'test/unit'
|
7
7
|
require 'mapi/msg'
|
8
|
+
require 'mapi/convert'
|
8
9
|
|
9
10
|
class TestMsg < Test::Unit::TestCase
|
10
11
|
def test_blammo
|
@@ -17,12 +18,9 @@ class TestMsg < Test::Unit::TestCase
|
|
17
18
|
assert_equal 66, msg.properties.raw.length
|
18
19
|
# this is unique named properties
|
19
20
|
assert_equal 48, msg.properties.to_h.length
|
20
|
-
#
|
21
|
-
|
22
|
-
assert_equal '
|
23
|
-
assert_equal 'Yippee555', msg.properties[keys[0]]
|
24
|
-
assert_equal '66666666-6666-6666-c000-000000000046', keys[1].guid.format
|
25
|
-
assert_equal 'Yippee666', msg.properties[keys[1]]
|
21
|
+
# test accessing the named property keys - same name but different namespace
|
22
|
+
assert_equal 'Yippee555', msg.props['Name4', Ole::Types::Clsid.parse('55555555-5555-5555-c000-000000000046')]
|
23
|
+
assert_equal 'Yippee666', msg.props['Name4', Ole::Types::Clsid.parse('66666666-6666-6666-c000-000000000046')]
|
26
24
|
end
|
27
25
|
end
|
28
26
|
end
|
metadata
CHANGED
@@ -1,116 +1,124 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-msg
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 1
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
version: 1.5.
|
7
|
+
- 1
|
8
|
+
- 5
|
9
|
+
- 1
|
10
|
+
version: 1.5.1
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
|
-
|
13
|
+
- Charles Lowe
|
13
14
|
autorequire:
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date:
|
18
|
+
date: 2012-07-03 00:00:00 +10:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: ruby-ole
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 15
|
30
|
+
segments:
|
31
|
+
- 1
|
32
|
+
- 2
|
33
|
+
- 8
|
34
|
+
version: 1.2.8
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id001
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: vpim
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 731
|
46
|
+
segments:
|
47
|
+
- 0
|
48
|
+
- 360
|
49
|
+
version: "0.360"
|
50
|
+
type: :runtime
|
51
|
+
version_requirements: *id002
|
47
52
|
description: A library for reading and converting Outlook msg and pst files (mapi message stores).
|
48
53
|
email: aquasync@gmail.com
|
49
54
|
executables:
|
50
|
-
|
55
|
+
- mapitool
|
51
56
|
extensions: []
|
52
57
|
|
53
58
|
extra_rdoc_files:
|
54
|
-
|
59
|
+
- README
|
55
60
|
files:
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
- test/test_msg.rb
|
61
|
+
- data/mapitags.yaml
|
62
|
+
- data/types.yaml
|
63
|
+
- data/named_map.yaml
|
64
|
+
- Rakefile
|
65
|
+
- README
|
66
|
+
- FIXES
|
67
|
+
- bin/mapitool
|
68
|
+
- lib/mapi.rb
|
69
|
+
- lib/mime.rb
|
70
|
+
- lib/orderedhash.rb
|
71
|
+
- lib/mapi/rtf.rb
|
72
|
+
- lib/mapi/property_set.rb
|
73
|
+
- lib/mapi/convert/contact.rb
|
74
|
+
- lib/mapi/convert/note-mime.rb
|
75
|
+
- lib/mapi/convert/note-tmail.rb
|
76
|
+
- lib/mapi/pst.rb
|
77
|
+
- lib/mapi/convert.rb
|
78
|
+
- lib/mapi/types.rb
|
79
|
+
- lib/mapi/msg.rb
|
80
|
+
- test/test_property_set.rb
|
81
|
+
- test/test_convert_note.rb
|
82
|
+
- test/test_mime.rb
|
83
|
+
- test/test_convert_contact.rb
|
84
|
+
- test/test_types.rb
|
85
|
+
- test/test_msg.rb
|
82
86
|
has_rdoc: true
|
83
87
|
homepage: http://code.google.com/p/ruby-msg
|
84
88
|
licenses: []
|
85
89
|
|
86
90
|
post_install_message:
|
87
91
|
rdoc_options:
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
92
|
+
- --main
|
93
|
+
- README
|
94
|
+
- --title
|
95
|
+
- ruby-msg documentation
|
96
|
+
- --tab-width
|
97
|
+
- "2"
|
94
98
|
require_paths:
|
95
|
-
|
99
|
+
- lib
|
96
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
97
102
|
requirements:
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
hash: 3
|
106
|
+
segments:
|
107
|
+
- 0
|
108
|
+
version: "0"
|
103
109
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
110
|
+
none: false
|
104
111
|
requirements:
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
hash: 3
|
115
|
+
segments:
|
116
|
+
- 0
|
117
|
+
version: "0"
|
110
118
|
requirements: []
|
111
119
|
|
112
120
|
rubyforge_project: ruby-msg
|
113
|
-
rubygems_version: 1.3.
|
121
|
+
rubygems_version: 1.3.7
|
114
122
|
signing_key:
|
115
123
|
specification_version: 3
|
116
124
|
summary: Ruby Msg library.
|
data/lib/rtf.rb
DELETED
@@ -1,109 +0,0 @@
|
|
1
|
-
require 'stringio'
|
2
|
-
|
3
|
-
# this file is pretty crap, its just to ensure there is always something readable if
|
4
|
-
# there is an rtf only body, with no html encapsulation.
|
5
|
-
|
6
|
-
module RTF
|
7
|
-
class Tokenizer
|
8
|
-
def self.process io
|
9
|
-
while true do
|
10
|
-
case c = io.getc
|
11
|
-
when ?{; yield :open_group
|
12
|
-
when ?}; yield :close_group
|
13
|
-
when ?\\
|
14
|
-
case c = io.getc
|
15
|
-
when ?{, ?}, ?\\; yield :text, c.chr
|
16
|
-
when ?'; yield :text, [io.read(2)].pack('H*')
|
17
|
-
when ?a..?z, ?A..?Z
|
18
|
-
# read control word
|
19
|
-
str = c.chr
|
20
|
-
str << c while c = io.read(1) and c =~ /[a-zA-Z]/
|
21
|
-
neg = 1
|
22
|
-
neg = -1 and c = io.read(1) if c == '-'
|
23
|
-
num = if c =~ /[0-9]/
|
24
|
-
num = c
|
25
|
-
num << c while c = io.read(1) and c =~ /[0-9]/
|
26
|
-
num.to_i * neg
|
27
|
-
end
|
28
|
-
raise "invalid rtf stream" if neg == -1 and !num # ???? \blahblah- some text
|
29
|
-
io.seek(-1, IO::SEEK_CUR) if c != ' '
|
30
|
-
yield :control_word, str, num
|
31
|
-
when nil
|
32
|
-
raise "invalid rtf stream" # \EOF
|
33
|
-
else
|
34
|
-
# other kind of control symbol
|
35
|
-
yield :control_symbol, c.chr
|
36
|
-
end
|
37
|
-
when nil
|
38
|
-
return
|
39
|
-
when ?\r, ?\n
|
40
|
-
# ignore
|
41
|
-
else yield :text, c.chr
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
class Converter
|
48
|
-
# crappy
|
49
|
-
def self.rtf2text str, format=:text
|
50
|
-
group = 0
|
51
|
-
text = ''
|
52
|
-
text << "<html>\n<body>" if format == :html
|
53
|
-
group_type = []
|
54
|
-
group_tags = []
|
55
|
-
RTF::Tokenizer.process(StringIO.new(str)) do |a, b, c|
|
56
|
-
add_text = ''
|
57
|
-
case a
|
58
|
-
when :open_group; group += 1; group_type[group] = nil; group_tags[group] = []
|
59
|
-
when :close_group; group_tags[group].reverse.each { |t| text << "</#{t}>" }; group -= 1;
|
60
|
-
when :control_word; # ignore
|
61
|
-
group_type[group] ||= b
|
62
|
-
# maybe change this to use utf8 where possible
|
63
|
-
add_text = if b == 'par' || b == 'line' || b == 'page'; "\n"
|
64
|
-
elsif b == 'tab' || b == 'cell'; "\t"
|
65
|
-
elsif b == 'endash' || b == 'emdash'; "-"
|
66
|
-
elsif b == 'emspace' || b == 'enspace' || b == 'qmspace'; " "
|
67
|
-
elsif b == 'ldblquote'; '"'
|
68
|
-
else ''
|
69
|
-
end
|
70
|
-
if b == 'b' || b == 'i' and format == :html
|
71
|
-
close = c == 0 ? '/' : ''
|
72
|
-
text << "<#{close}#{b}>"
|
73
|
-
if c == 0
|
74
|
-
group_tags[group].delete b
|
75
|
-
else
|
76
|
-
group_tags[group] << b
|
77
|
-
end
|
78
|
-
end
|
79
|
-
# lot of other ones belong in here.\
|
80
|
-
=begin
|
81
|
-
\bullet Bullet character.
|
82
|
-
\lquote Left single quotation mark.
|
83
|
-
\rquote Right single quotation mark.
|
84
|
-
\ldblquote Left double quotation mark.
|
85
|
-
\rdblquote
|
86
|
-
=end
|
87
|
-
when :control_symbol; # ignore
|
88
|
-
group_type[group] ||= b
|
89
|
-
add_text = ' ' if b == '~' # non-breakable space
|
90
|
-
add_text = '-' if b == '_' # non-breakable hypen
|
91
|
-
when :text
|
92
|
-
add_text = b if group <= 1 or group_type[group] == 'rtlch' && !group_type[0...group].include?('*')
|
93
|
-
end
|
94
|
-
if format == :html
|
95
|
-
text << add_text.gsub(/([<>&"'])/) do
|
96
|
-
ent = { '<' => 'lt', '>' => 'gt', '&' => 'amp', '"' => 'quot', "'" => 'apos' }[$1]
|
97
|
-
"&#{ent};"
|
98
|
-
end
|
99
|
-
text << '<br>' if add_text == "\n"
|
100
|
-
else
|
101
|
-
text << add_text
|
102
|
-
end
|
103
|
-
end
|
104
|
-
text << "</body>\n</html>\n" if format == :html
|
105
|
-
text
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|