mbox2csv 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/mbox2csv.rb +51 -14
- metadata +21 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fff3b927d3dc15547ce87943b6bfe1b747423222391f82cb336dbb03b710a056
|
4
|
+
data.tar.gz: 143acbaaf7f95ae5f2b61df3e242bf976d328dfa780d4e3439d764a294ddffd7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f68e2ca1627bcb9cf8d1325000b6b00d96b7f856227b1ea2a768962312def6bbd29dd4cce478c286ad1a610915fd5b0bbe90310fd0c370a28ca916681f2e4b35
|
7
|
+
data.tar.gz: 0cf68cb50e9886e174d55844f1b68533795c23cb3a38824bf5c9c3d8c90e2b5454317db10b681ca1328eb483a0a7fca1222d18d1b20106e1a81ecfab336452fc
|
data/lib/mbox2csv.rb
CHANGED
@@ -1,12 +1,14 @@
|
|
1
1
|
require 'base64'
|
2
2
|
require 'csv'
|
3
3
|
require 'mail'
|
4
|
+
require 'fileutils'
|
5
|
+
require 'ruby-progressbar'
|
4
6
|
|
5
7
|
module Mbox2CSV
|
6
|
-
# Main class
|
8
|
+
# Main class for parsing MBOX files and saving email data and statistics to CSV files.
|
7
9
|
class MboxParser
|
8
10
|
# Initializes the MboxParser with file paths for the MBOX file, output CSV file,
|
9
|
-
# and statistics CSV files for sender statistics.
|
11
|
+
# and statistics CSV files for sender and recipient statistics.
|
10
12
|
#
|
11
13
|
# @param [String] mbox_file Path to the MBOX file to be parsed.
|
12
14
|
# @param [String] csv_file Path to the output CSV file where parsed email data will be saved.
|
@@ -18,30 +20,35 @@ module Mbox2CSV
|
|
18
20
|
@statistics = EmailStatistics.new
|
19
21
|
@stats_csv_file = stats_csv_file
|
20
22
|
@recipient_stats_csv_file = recipient_stats_csv_file
|
23
|
+
@senders_folder = 'senders/'
|
24
|
+
FileUtils.mkdir_p(@senders_folder) # Create the senders folder if it doesn't exist
|
21
25
|
end
|
22
26
|
|
23
27
|
# Parses the MBOX file and writes the email data to the specified CSV file.
|
24
28
|
# It also saves sender and recipient statistics to separate CSV files.
|
29
|
+
# A progress bar is displayed during the parsing process.
|
25
30
|
def parse
|
31
|
+
total_lines = File.foreach(@mbox_file).inject(0) { |c, _line| c + 1 }
|
32
|
+
progressbar = ProgressBar.create(title: "Parsing Emails", total: total_lines, format: "%t: |%B| %p%%")
|
33
|
+
|
26
34
|
CSV.open(@csv_file, 'w') do |csv|
|
27
|
-
# Write CSV header
|
28
35
|
csv << ['From', 'To', 'Subject', 'Date', 'Body']
|
29
36
|
|
30
37
|
File.open(@mbox_file, 'r') do |mbox|
|
31
38
|
buffer = ""
|
32
39
|
mbox.each_line do |line|
|
40
|
+
progressbar.increment
|
33
41
|
if line.start_with?("From ")
|
34
42
|
process_email_block(buffer, csv) unless buffer.empty?
|
35
|
-
buffer = ""
|
43
|
+
buffer = ""
|
36
44
|
end
|
37
|
-
buffer << line
|
45
|
+
buffer << line
|
38
46
|
end
|
39
|
-
process_email_block(buffer, csv) unless buffer.empty?
|
47
|
+
process_email_block(buffer, csv) unless buffer.empty?
|
40
48
|
end
|
41
49
|
end
|
42
50
|
puts "Parsing completed. Data saved to #{@csv_file}"
|
43
51
|
|
44
|
-
# Save and print statistics after parsing
|
45
52
|
@statistics.save_sender_statistics(@stats_csv_file)
|
46
53
|
@statistics.save_recipient_statistics(@recipient_stats_csv_file)
|
47
54
|
rescue => e
|
@@ -51,7 +58,8 @@ module Mbox2CSV
|
|
51
58
|
private
|
52
59
|
|
53
60
|
# Processes an individual email block from the MBOX file, extracts the email fields,
|
54
|
-
# and writes them to the CSV. Also records email statistics for analysis
|
61
|
+
# and writes them to the CSV. Also records email statistics for analysis and creates
|
62
|
+
# sender-specific CSV files.
|
55
63
|
#
|
56
64
|
# @param [String] buffer The email block from the MBOX file.
|
57
65
|
# @param [CSV] csv The CSV object where email data is written.
|
@@ -62,14 +70,13 @@ module Mbox2CSV
|
|
62
70
|
to = ensure_utf8(mail.to ? mail.to.join(", ") : '', 'UTF-8')
|
63
71
|
subject = ensure_utf8(mail.subject ? mail.subject : '', 'UTF-8')
|
64
72
|
date = ensure_utf8(mail.date ? mail.date.to_s : '', 'UTF-8')
|
65
|
-
|
66
73
|
body = decode_body(mail)
|
67
74
|
|
68
|
-
# Write to CSV
|
69
75
|
csv << [from, to, subject, date, body]
|
70
76
|
|
71
|
-
# Record email for statistics
|
72
77
|
@statistics.record_email(from, to, body.length)
|
78
|
+
|
79
|
+
save_email_to_sender_csv(from, to, subject, date, body)
|
73
80
|
rescue => e
|
74
81
|
puts "Error processing email block: #{e.message}"
|
75
82
|
end
|
@@ -108,15 +115,45 @@ module Mbox2CSV
|
|
108
115
|
text = text.force_encoding(charset) if charset
|
109
116
|
text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
|
110
117
|
end
|
118
|
+
|
119
|
+
# Saves an email to a sender-specific CSV file.
|
120
|
+
#
|
121
|
+
# @param [String] from The sender of the email.
|
122
|
+
# @param [String] to The recipient(s) of the email.
|
123
|
+
# @param [String] subject The subject of the email.
|
124
|
+
# @param [String] date The date of the email.
|
125
|
+
# @param [String] body The body of the email.
|
126
|
+
def save_email_to_sender_csv(from, to, subject, date, body)
|
127
|
+
return if from.empty?
|
128
|
+
|
129
|
+
sender_file = File.join(@senders_folder, "#{sanitize_filename(from)}.csv")
|
130
|
+
|
131
|
+
CSV.open(sender_file, 'a') do |csv|
|
132
|
+
if File.size(sender_file).zero?
|
133
|
+
csv << ['From', 'To', 'Subject', 'Date', 'Body'] # Add header if file is new
|
134
|
+
end
|
135
|
+
csv << [from, to, subject, date, body]
|
136
|
+
end
|
137
|
+
rescue => e
|
138
|
+
puts "Error writing to sender CSV file for #{from}: #{e.message}"
|
139
|
+
end
|
140
|
+
|
141
|
+
# Sanitizes filenames by replacing invalid characters with underscores.
|
142
|
+
#
|
143
|
+
# @param [String] filename The input filename.
|
144
|
+
# @return [String] A sanitized version of the filename.
|
145
|
+
def sanitize_filename(filename)
|
146
|
+
filename.gsub(/[^0-9A-Za-z\-]/, '_')
|
147
|
+
end
|
111
148
|
end
|
112
149
|
|
113
150
|
# The EmailStatistics class is responsible for gathering and writing statistics related to emails.
|
114
151
|
# It tracks sender frequency, recipient frequency, and calculates the average email body length per sender.
|
115
152
|
class EmailStatistics
|
116
153
|
def initialize
|
117
|
-
@sender_counts = Hash.new(0)
|
118
|
-
@recipient_counts = Hash.new(0)
|
119
|
-
@body_lengths = Hash.new { |hash, key| hash[key] = [] }
|
154
|
+
@sender_counts = Hash.new(0)
|
155
|
+
@recipient_counts = Hash.new(0)
|
156
|
+
@body_lengths = Hash.new { |hash, key| hash[key] = [] }
|
120
157
|
end
|
121
158
|
|
122
159
|
# Records an email's sender, recipients, and body length for statistical purposes.
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mbox2csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- firefly-cpp
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 2.8.1
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: ruby-progressbar
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '1.11'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '1.11'
|
55
69
|
description:
|
56
70
|
email:
|
57
71
|
- iztok@iztok-jr-fister.eu
|
@@ -62,13 +76,13 @@ files:
|
|
62
76
|
- LICENSE
|
63
77
|
- README.md
|
64
78
|
- lib/mbox2csv.rb
|
65
|
-
homepage: https://
|
79
|
+
homepage: https://github.com/firefly-cpp/mbox2csv
|
66
80
|
licenses:
|
67
81
|
- MIT
|
68
82
|
metadata:
|
69
|
-
homepage_uri: https://
|
70
|
-
source_code_uri: https://
|
71
|
-
changelog_uri: https://
|
83
|
+
homepage_uri: https://github.com/firefly-cpp/mbox2csv
|
84
|
+
source_code_uri: https://github.com/firefly-cpp/mbox2csv
|
85
|
+
changelog_uri: https://github.com/firefly-cpp/mbox2csv
|
72
86
|
post_install_message:
|
73
87
|
rdoc_options: []
|
74
88
|
require_paths:
|
@@ -84,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
84
98
|
- !ruby/object:Gem::Version
|
85
99
|
version: '0'
|
86
100
|
requirements: []
|
87
|
-
rubygems_version: 3.5.
|
101
|
+
rubygems_version: 3.5.22
|
88
102
|
signing_key:
|
89
103
|
specification_version: 4
|
90
104
|
summary: Parse MBOX files and export email data into CSV format
|