mbox2csv 0.1.1 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/mbox2csv.rb +51 -14
- metadata +21 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fff3b927d3dc15547ce87943b6bfe1b747423222391f82cb336dbb03b710a056
|
4
|
+
data.tar.gz: 143acbaaf7f95ae5f2b61df3e242bf976d328dfa780d4e3439d764a294ddffd7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f68e2ca1627bcb9cf8d1325000b6b00d96b7f856227b1ea2a768962312def6bbd29dd4cce478c286ad1a610915fd5b0bbe90310fd0c370a28ca916681f2e4b35
|
7
|
+
data.tar.gz: 0cf68cb50e9886e174d55844f1b68533795c23cb3a38824bf5c9c3d8c90e2b5454317db10b681ca1328eb483a0a7fca1222d18d1b20106e1a81ecfab336452fc
|
data/lib/mbox2csv.rb
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
require 'base64'
|
2
2
|
require 'csv'
|
3
3
|
require 'mail'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'ruby-progressbar'
|
4
6
|
|
5
7
|
module Mbox2CSV
|
6
|
-
# Main class
|
8
|
+
# Main class for parsing MBOX files and saving email data and statistics to CSV files.
|
7
9
|
class MboxParser
|
8
10
|
# Initializes the MboxParser with file paths for the MBOX file, output CSV file,
|
9
|
-
# and statistics CSV files for sender statistics.
|
11
|
+
# and statistics CSV files for sender and recipient statistics.
|
10
12
|
#
|
11
13
|
# @param [String] mbox_file Path to the MBOX file to be parsed.
|
12
14
|
# @param [String] csv_file Path to the output CSV file where parsed email data will be saved.
|
@@ -18,30 +20,35 @@ module Mbox2CSV
|
|
18
20
|
@statistics = EmailStatistics.new
|
19
21
|
@stats_csv_file = stats_csv_file
|
20
22
|
@recipient_stats_csv_file = recipient_stats_csv_file
|
23
|
+
@senders_folder = 'senders/'
|
24
|
+
FileUtils.mkdir_p(@senders_folder) # Create the senders folder if it doesn't exist
|
21
25
|
end
|
22
26
|
|
23
27
|
# Parses the MBOX file and writes the email data to the specified CSV file.
|
24
28
|
# It also saves sender and recipient statistics to separate CSV files.
|
29
|
+
# A progress bar is displayed during the parsing process.
|
25
30
|
def parse
|
31
|
+
total_lines = File.foreach(@mbox_file).inject(0) { |c, _line| c + 1 }
|
32
|
+
progressbar = ProgressBar.create(title: "Parsing Emails", total: total_lines, format: "%t: |%B| %p%%")
|
33
|
+
|
26
34
|
CSV.open(@csv_file, 'w') do |csv|
|
27
|
-
# Write CSV header
|
28
35
|
csv << ['From', 'To', 'Subject', 'Date', 'Body']
|
29
36
|
|
30
37
|
File.open(@mbox_file, 'r') do |mbox|
|
31
38
|
buffer = ""
|
32
39
|
mbox.each_line do |line|
|
40
|
+
progressbar.increment
|
33
41
|
if line.start_with?("From ")
|
34
42
|
process_email_block(buffer, csv) unless buffer.empty?
|
35
|
-
buffer = ""
|
43
|
+
buffer = ""
|
36
44
|
end
|
37
|
-
buffer << line
|
45
|
+
buffer << line
|
38
46
|
end
|
39
|
-
process_email_block(buffer, csv) unless buffer.empty?
|
47
|
+
process_email_block(buffer, csv) unless buffer.empty?
|
40
48
|
end
|
41
49
|
end
|
42
50
|
puts "Parsing completed. Data saved to #{@csv_file}"
|
43
51
|
|
44
|
-
# Save and print statistics after parsing
|
45
52
|
@statistics.save_sender_statistics(@stats_csv_file)
|
46
53
|
@statistics.save_recipient_statistics(@recipient_stats_csv_file)
|
47
54
|
rescue => e
|
@@ -51,7 +58,8 @@ module Mbox2CSV
|
|
51
58
|
private
|
52
59
|
|
53
60
|
# Processes an individual email block from the MBOX file, extracts the email fields,
|
54
|
-
# and writes them to the CSV. Also records email statistics for analysis
|
61
|
+
# and writes them to the CSV. Also records email statistics for analysis and creates
|
62
|
+
# sender-specific CSV files.
|
55
63
|
#
|
56
64
|
# @param [String] buffer The email block from the MBOX file.
|
57
65
|
# @param [CSV] csv The CSV object where email data is written.
|
@@ -62,14 +70,13 @@ module Mbox2CSV
|
|
62
70
|
to = ensure_utf8(mail.to ? mail.to.join(", ") : '', 'UTF-8')
|
63
71
|
subject = ensure_utf8(mail.subject ? mail.subject : '', 'UTF-8')
|
64
72
|
date = ensure_utf8(mail.date ? mail.date.to_s : '', 'UTF-8')
|
65
|
-
|
66
73
|
body = decode_body(mail)
|
67
74
|
|
68
|
-
# Write to CSV
|
69
75
|
csv << [from, to, subject, date, body]
|
70
76
|
|
71
|
-
# Record email for statistics
|
72
77
|
@statistics.record_email(from, to, body.length)
|
78
|
+
|
79
|
+
save_email_to_sender_csv(from, to, subject, date, body)
|
73
80
|
rescue => e
|
74
81
|
puts "Error processing email block: #{e.message}"
|
75
82
|
end
|
@@ -108,15 +115,45 @@ module Mbox2CSV
|
|
108
115
|
text = text.force_encoding(charset) if charset
|
109
116
|
text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
110
117
|
end
|
118
|
+
|
119
|
+
# Saves an email to a sender-specific CSV file.
|
120
|
+
#
|
121
|
+
# @param [String] from The sender of the email.
|
122
|
+
# @param [String] to The recipient(s) of the email.
|
123
|
+
# @param [String] subject The subject of the email.
|
124
|
+
# @param [String] date The date of the email.
|
125
|
+
# @param [String] body The body of the email.
|
126
|
+
def save_email_to_sender_csv(from, to, subject, date, body)
|
127
|
+
return if from.empty?
|
128
|
+
|
129
|
+
sender_file = File.join(@senders_folder, "#{sanitize_filename(from)}.csv")
|
130
|
+
|
131
|
+
CSV.open(sender_file, 'a') do |csv|
|
132
|
+
if File.size(sender_file).zero?
|
133
|
+
csv << ['From', 'To', 'Subject', 'Date', 'Body'] # Add header if file is new
|
134
|
+
end
|
135
|
+
csv << [from, to, subject, date, body]
|
136
|
+
end
|
137
|
+
rescue => e
|
138
|
+
puts "Error writing to sender CSV file for #{from}: #{e.message}"
|
139
|
+
end
|
140
|
+
|
141
|
+
# Sanitizes filenames by replacing invalid characters with underscores.
|
142
|
+
#
|
143
|
+
# @param [String] filename The input filename.
|
144
|
+
# @return [String] A sanitized version of the filename.
|
145
|
+
def sanitize_filename(filename)
|
146
|
+
filename.gsub(/[^0-9A-Za-z\-]/, '_')
|
147
|
+
end
|
111
148
|
end
|
112
149
|
|
113
150
|
# The EmailStatistics class is responsible for gathering and writing statistics related to emails.
|
114
151
|
# It tracks sender frequency, recipient frequency, and calculates the average email body length per sender.
|
115
152
|
class EmailStatistics
|
116
153
|
def initialize
|
117
|
-
@sender_counts = Hash.new(0)
|
118
|
-
@recipient_counts = Hash.new(0)
|
119
|
-
@body_lengths = Hash.new { |hash, key| hash[key] = [] }
|
154
|
+
@sender_counts = Hash.new(0)
|
155
|
+
@recipient_counts = Hash.new(0)
|
156
|
+
@body_lengths = Hash.new { |hash, key| hash[key] = [] }
|
120
157
|
end
|
121
158
|
|
122
159
|
# Records an email's sender, recipients, and body length for statistical purposes.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mbox2csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- firefly-cpp
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 2.8.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: ruby-progressbar
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.11'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.11'
|
55
69
|
description:
|
56
70
|
email:
|
57
71
|
- iztok@iztok-jr-fister.eu
|
@@ -62,13 +76,13 @@ files:
|
|
62
76
|
- LICENSE
|
63
77
|
- README.md
|
64
78
|
- lib/mbox2csv.rb
|
65
|
-
homepage: https://
|
79
|
+
homepage: https://github.com/firefly-cpp/mbox2csv
|
66
80
|
licenses:
|
67
81
|
- MIT
|
68
82
|
metadata:
|
69
|
-
homepage_uri: https://
|
70
|
-
source_code_uri: https://
|
71
|
-
changelog_uri: https://
|
83
|
+
homepage_uri: https://github.com/firefly-cpp/mbox2csv
|
84
|
+
source_code_uri: https://github.com/firefly-cpp/mbox2csv
|
85
|
+
changelog_uri: https://github.com/firefly-cpp/mbox2csv
|
72
86
|
post_install_message:
|
73
87
|
rdoc_options: []
|
74
88
|
require_paths:
|
@@ -84,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
98
|
- !ruby/object:Gem::Version
|
85
99
|
version: '0'
|
86
100
|
requirements: []
|
87
|
-
rubygems_version: 3.5.
|
101
|
+
rubygems_version: 3.5.22
|
88
102
|
signing_key:
|
89
103
|
specification_version: 4
|
90
104
|
summary: Parse MBOX files and export email data into CSV format
|