mbox2csv 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/mbox2csv.rb +51 -14
  3. metadata +21 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f4a5c7ffcfb3c8deffd7c05068268b129e5a37ffc2ba01f3024e3e9790c3d2d
4
- data.tar.gz: c20e300f2ddd8b2a002bb782109af205532e7be93b539f197937480c4ec0400d
3
+ metadata.gz: fff3b927d3dc15547ce87943b6bfe1b747423222391f82cb336dbb03b710a056
4
+ data.tar.gz: 143acbaaf7f95ae5f2b61df3e242bf976d328dfa780d4e3439d764a294ddffd7
5
5
  SHA512:
6
- metadata.gz: b81692ac0aa4648ee89b5f2d3cd3e704bd9a889b140a82f078009c4075067a1a6d09b58398898a4b215352bc4431d83ec3fbbe8e70213388eb4680442468b4ba
7
- data.tar.gz: 15442b56f31f99c8cf17db145057b11d6345f912146e4569de628098b6b0574e5bcc3aed9c1791d58cbc5cfc83a29ba5c4685798dbbf579b5225ad933bc900cc
6
+ metadata.gz: f68e2ca1627bcb9cf8d1325000b6b00d96b7f856227b1ea2a768962312def6bbd29dd4cce478c286ad1a610915fd5b0bbe90310fd0c370a28ca916681f2e4b35
7
+ data.tar.gz: 0cf68cb50e9886e174d55844f1b68533795c23cb3a38824bf5c9c3d8c90e2b5454317db10b681ca1328eb483a0a7fca1222d18d1b20106e1a81ecfab336452fc
data/lib/mbox2csv.rb CHANGED
@@ -1,12 +1,14 @@
1
1
  require 'base64'
2
2
  require 'csv'
3
3
  require 'mail'
4
+ require 'fileutils'
5
+ require 'ruby-progressbar'
4
6
 
5
7
  module Mbox2CSV
6
- # Main class
8
+ # Main class for parsing MBOX files and saving email data and statistics to CSV files.
7
9
  class MboxParser
8
10
  # Initializes the MboxParser with file paths for the MBOX file, output CSV file,
9
- # and statistics CSV files for sender statistics.
11
+ # and statistics CSV files for sender and recipient statistics.
10
12
  #
11
13
  # @param [String] mbox_file Path to the MBOX file to be parsed.
12
14
  # @param [String] csv_file Path to the output CSV file where parsed email data will be saved.
@@ -18,30 +20,35 @@ module Mbox2CSV
18
20
  @statistics = EmailStatistics.new
19
21
  @stats_csv_file = stats_csv_file
20
22
  @recipient_stats_csv_file = recipient_stats_csv_file
23
+ @senders_folder = 'senders/'
24
+ FileUtils.mkdir_p(@senders_folder) # Create the senders folder if it doesn't exist
21
25
  end
22
26
 
23
27
  # Parses the MBOX file and writes the email data to the specified CSV file.
24
28
  # It also saves sender and recipient statistics to separate CSV files.
29
+ # A progress bar is displayed during the parsing process.
25
30
  def parse
31
+ total_lines = File.foreach(@mbox_file).inject(0) { |c, _line| c + 1 }
32
+ progressbar = ProgressBar.create(title: "Parsing Emails", total: total_lines, format: "%t: |%B| %p%%")
33
+
26
34
  CSV.open(@csv_file, 'w') do |csv|
27
- # Write CSV header
28
35
  csv << ['From', 'To', 'Subject', 'Date', 'Body']
29
36
 
30
37
  File.open(@mbox_file, 'r') do |mbox|
31
38
  buffer = ""
32
39
  mbox.each_line do |line|
40
+ progressbar.increment
33
41
  if line.start_with?("From ")
34
42
  process_email_block(buffer, csv) unless buffer.empty?
35
- buffer = "" # Reset buffer
43
+ buffer = ""
36
44
  end
37
- buffer << line # Append line to buffer
45
+ buffer << line
38
46
  end
39
- process_email_block(buffer, csv) unless buffer.empty? # Process last email block
47
+ process_email_block(buffer, csv) unless buffer.empty?
40
48
  end
41
49
  end
42
50
  puts "Parsing completed. Data saved to #{@csv_file}"
43
51
 
44
- # Save and print statistics after parsing
45
52
  @statistics.save_sender_statistics(@stats_csv_file)
46
53
  @statistics.save_recipient_statistics(@recipient_stats_csv_file)
47
54
  rescue => e
@@ -51,7 +58,8 @@ module Mbox2CSV
51
58
  private
52
59
 
53
60
  # Processes an individual email block from the MBOX file, extracts the email fields,
54
- # and writes them to the CSV. Also records email statistics for analysis.
61
+ # and writes them to the CSV. Also records email statistics for analysis and creates
62
+ # sender-specific CSV files.
55
63
  #
56
64
  # @param [String] buffer The email block from the MBOX file.
57
65
  # @param [CSV] csv The CSV object where email data is written.
@@ -62,14 +70,13 @@ module Mbox2CSV
62
70
  to = ensure_utf8(mail.to ? mail.to.join(", ") : '', 'UTF-8')
63
71
  subject = ensure_utf8(mail.subject ? mail.subject : '', 'UTF-8')
64
72
  date = ensure_utf8(mail.date ? mail.date.to_s : '', 'UTF-8')
65
-
66
73
  body = decode_body(mail)
67
74
 
68
- # Write to CSV
69
75
  csv << [from, to, subject, date, body]
70
76
 
71
- # Record email for statistics
72
77
  @statistics.record_email(from, to, body.length)
78
+
79
+ save_email_to_sender_csv(from, to, subject, date, body)
73
80
  rescue => e
74
81
  puts "Error processing email block: #{e.message}"
75
82
  end
@@ -108,15 +115,45 @@ module Mbox2CSV
108
115
  text = text.force_encoding(charset) if charset
109
116
  text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
110
117
  end
118
+
119
+ # Saves an email to a sender-specific CSV file.
120
+ #
121
+ # @param [String] from The sender of the email.
122
+ # @param [String] to The recipient(s) of the email.
123
+ # @param [String] subject The subject of the email.
124
+ # @param [String] date The date of the email.
125
+ # @param [String] body The body of the email.
126
+ def save_email_to_sender_csv(from, to, subject, date, body)
127
+ return if from.empty?
128
+
129
+ sender_file = File.join(@senders_folder, "#{sanitize_filename(from)}.csv")
130
+
131
+ CSV.open(sender_file, 'a') do |csv|
132
+ if File.size(sender_file).zero?
133
+ csv << ['From', 'To', 'Subject', 'Date', 'Body'] # Add header if file is new
134
+ end
135
+ csv << [from, to, subject, date, body]
136
+ end
137
+ rescue => e
138
+ puts "Error writing to sender CSV file for #{from}: #{e.message}"
139
+ end
140
+
141
+ # Sanitizes filenames by replacing invalid characters with underscores.
142
+ #
143
+ # @param [String] filename The input filename.
144
+ # @return [String] A sanitized version of the filename.
145
+ def sanitize_filename(filename)
146
+ filename.gsub(/[^0-9A-Za-z\-]/, '_')
147
+ end
111
148
  end
112
149
 
113
150
  # The EmailStatistics class is responsible for gathering and writing statistics related to emails.
114
151
  # It tracks sender frequency, recipient frequency, and calculates the average email body length per sender.
115
152
  class EmailStatistics
116
153
  def initialize
117
- @sender_counts = Hash.new(0) # Keeps count of emails per sender
118
- @recipient_counts = Hash.new(0) # Keeps count of emails per recipient
119
- @body_lengths = Hash.new { |hash, key| hash[key] = [] } # Stores body lengths per sender
154
+ @sender_counts = Hash.new(0)
155
+ @recipient_counts = Hash.new(0)
156
+ @body_lengths = Hash.new { |hash, key| hash[key] = [] }
120
157
  end
121
158
 
122
159
  # Records an email's sender, recipients, and body length for statistical purposes.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mbox2csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - firefly-cpp
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-21 00:00:00.000000000 Z
11
+ date: 2025-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 2.8.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: ruby-progressbar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.11'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.11'
55
69
  description:
56
70
  email:
57
71
  - iztok@iztok-jr-fister.eu
@@ -62,13 +76,13 @@ files:
62
76
  - LICENSE
63
77
  - README.md
64
78
  - lib/mbox2csv.rb
65
- homepage: https://codeberg.org/firefly-cpp/mbox2csv
79
+ homepage: https://github.com/firefly-cpp/mbox2csv
66
80
  licenses:
67
81
  - MIT
68
82
  metadata:
69
- homepage_uri: https://codeberg.org/firefly-cpp/mbox2csv
70
- source_code_uri: https://codeberg.org/firefly-cpp/mbox2csv
71
- changelog_uri: https://codeberg.org/firefly-cpp/mbox2csv
83
+ homepage_uri: https://github.com/firefly-cpp/mbox2csv
84
+ source_code_uri: https://github.com/firefly-cpp/mbox2csv
85
+ changelog_uri: https://github.com/firefly-cpp/mbox2csv
72
86
  post_install_message:
73
87
  rdoc_options: []
74
88
  require_paths:
@@ -84,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
98
  - !ruby/object:Gem::Version
85
99
  version: '0'
86
100
  requirements: []
87
- rubygems_version: 3.5.16
101
+ rubygems_version: 3.5.22
88
102
  signing_key:
89
103
  specification_version: 4
90
104
  summary: Parse MBOX files and export email data into CSV format