mailcvt 0.2.27 → 0.2.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/encoded_word.rb +48 -8
- data/lib/mailcvt/version.rb +1 -1
- metadata +1 -1
data/lib/encoded_word.rb
CHANGED
@@ -12,13 +12,42 @@ class EncodedWord
|
|
12
12
|
end
|
13
13
|
|
14
14
|
def decode
|
15
|
+
decode_all_mlog
|
16
|
+
combine_all_mlog_plain
|
17
|
+
end
|
18
|
+
|
19
|
+
def combine_all_mlog_plain
|
20
|
+
File.open(File.join(@inputdir, 'all_mlog.csv'), 'w:utf-8') do |out|
|
21
|
+
out.puts '"key","","","date","from","to","cc","bcc","subject","attach"'
|
22
|
+
Dir.glob(File.join(@inputdir, File.join('**', '*.plain'))).select do |f|
|
23
|
+
puts f
|
24
|
+
File.open(f, 'r:utf-8').each_line do |line|
|
25
|
+
parts = mysplit(line)
|
26
|
+
key = parts[3]
|
27
|
+
date = parts[0]
|
28
|
+
subject = parts[2]
|
29
|
+
from = parts[3]
|
30
|
+
to = parts[4].gsub("\a", ';')
|
31
|
+
cc = parts[5].gsub("\a", ';')
|
32
|
+
attach = parts[6].gsub("\a", ';')
|
33
|
+
out.puts %Q("#{key}","","","#{date}","#{from}","#{to}","#{cc}","","#{subject}","#{attach}")
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def decode_all_mlog
|
15
40
|
Dir.glob(File.join(@inputdir, File.join('**', '*.mlog'))).select do |f|
|
16
41
|
puts f
|
17
|
-
|
42
|
+
mids = {}
|
43
|
+
File.open(f + '.plain', 'w:utf-8') do |out|
|
18
44
|
File.open(f) do |input|
|
19
45
|
@input_enc = input.external_encoding
|
20
46
|
input.each_line do |line|
|
21
47
|
parts = mysplit(line)
|
48
|
+
next if mids.has_key?(parts[1])
|
49
|
+
mids[parts[1]] = 0
|
50
|
+
|
22
51
|
newparts = []
|
23
52
|
newparts << format_date(parts[0]) #date
|
24
53
|
newparts << parts[1] #message-id
|
@@ -26,7 +55,7 @@ class EncodedWord
|
|
26
55
|
newparts << trim_emails(parts[3]) #from
|
27
56
|
newparts << trim_emails(parts[4]) #to
|
28
57
|
newparts << trim_emails(parts[5]) #cc
|
29
|
-
newparts <<
|
58
|
+
newparts << decode_attaches(parts)
|
30
59
|
out.puts newparts.join("\t")
|
31
60
|
end
|
32
61
|
end
|
@@ -39,6 +68,7 @@ class EncodedWord
|
|
39
68
|
return [] unless line.length > 0
|
40
69
|
|
41
70
|
parts = []
|
71
|
+
last = -1
|
42
72
|
pos1 = -1
|
43
73
|
while true do
|
44
74
|
pos1 += 1
|
@@ -47,7 +77,8 @@ class EncodedWord
|
|
47
77
|
parts << line[pos1...pos2]
|
48
78
|
pos1 = pos2
|
49
79
|
else
|
50
|
-
|
80
|
+
last -= 1 if line[last] == "\n"
|
81
|
+
parts << line[pos1..last]
|
51
82
|
break
|
52
83
|
end
|
53
84
|
end
|
@@ -99,15 +130,24 @@ class EncodedWord
|
|
99
130
|
line[pos1..pos2]
|
100
131
|
end
|
101
132
|
|
102
|
-
def
|
103
|
-
|
104
|
-
|
105
|
-
|
133
|
+
def decode_attaches(parts)
|
134
|
+
attaches = []
|
135
|
+
6.upto(parts.length-1) do |i|
|
136
|
+
attaches << decode_attach(parts[i])
|
137
|
+
end
|
138
|
+
return '' unless attaches.length > 0
|
139
|
+
attaches.join("\a")
|
140
|
+
end
|
141
|
+
|
142
|
+
def decode_attach(attach)
|
143
|
+
return '' unless attach and attach.length > 0
|
144
|
+
attach = attach.encode('utf-8', @input_enc, :undef=>:replace, :invalid=>:replace)
|
145
|
+
parts = mysplit(attach, "\a")
|
106
146
|
newparts = []
|
107
147
|
parts.each do |p|
|
108
148
|
newparts << mime_decode(p)
|
109
149
|
end
|
110
|
-
newparts.join(
|
150
|
+
newparts.join('')
|
111
151
|
end
|
112
152
|
|
113
153
|
def mime_decode(input, out_charset = 'utf-8')
|
data/lib/mailcvt/version.rb
CHANGED