mbox2csv 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/mbox2csv.rb +51 -14
  3. metadata +21 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 1f4a5c7ffcfb3c8deffd7c05068268b129e5a37ffc2ba01f3024e3e9790c3d2d
4
- data.tar.gz: c20e300f2ddd8b2a002bb782109af205532e7be93b539f197937480c4ec0400d
3
+ metadata.gz: fff3b927d3dc15547ce87943b6bfe1b747423222391f82cb336dbb03b710a056
4
+ data.tar.gz: 143acbaaf7f95ae5f2b61df3e242bf976d328dfa780d4e3439d764a294ddffd7
5
5
  SHA512:
6
- metadata.gz: b81692ac0aa4648ee89b5f2d3cd3e704bd9a889b140a82f078009c4075067a1a6d09b58398898a4b215352bc4431d83ec3fbbe8e70213388eb4680442468b4ba
7
- data.tar.gz: 15442b56f31f99c8cf17db145057b11d6345f912146e4569de628098b6b0574e5bcc3aed9c1791d58cbc5cfc83a29ba5c4685798dbbf579b5225ad933bc900cc
6
+ metadata.gz: f68e2ca1627bcb9cf8d1325000b6b00d96b7f856227b1ea2a768962312def6bbd29dd4cce478c286ad1a610915fd5b0bbe90310fd0c370a28ca916681f2e4b35
7
+ data.tar.gz: 0cf68cb50e9886e174d55844f1b68533795c23cb3a38824bf5c9c3d8c90e2b5454317db10b681ca1328eb483a0a7fca1222d18d1b20106e1a81ecfab336452fc
data/lib/mbox2csv.rb CHANGED
@@ -1,12 +1,14 @@
1
1
  require 'base64'
2
2
  require 'csv'
3
3
  require 'mail'
4
+ require 'fileutils'
5
+ require 'ruby-progressbar'
4
6
 
5
7
  module Mbox2CSV
6
- # Main class
8
+ # Main class for parsing MBOX files and saving email data and statistics to CSV files.
7
9
  class MboxParser
8
10
  # Initializes the MboxParser with file paths for the MBOX file, output CSV file,
9
- # and statistics CSV files for sender statistics.
11
+ # and statistics CSV files for sender and recipient statistics.
10
12
  #
11
13
  # @param [String] mbox_file Path to the MBOX file to be parsed.
12
14
  # @param [String] csv_file Path to the output CSV file where parsed email data will be saved.
@@ -18,30 +20,35 @@ module Mbox2CSV
18
20
  @statistics = EmailStatistics.new
19
21
  @stats_csv_file = stats_csv_file
20
22
  @recipient_stats_csv_file = recipient_stats_csv_file
23
+ @senders_folder = 'senders/'
24
+ FileUtils.mkdir_p(@senders_folder) # Create the senders folder if it doesn't exist
21
25
  end
22
26
 
23
27
  # Parses the MBOX file and writes the email data to the specified CSV file.
24
28
  # It also saves sender and recipient statistics to separate CSV files.
29
+ # A progress bar is displayed during the parsing process.
25
30
  def parse
31
+ total_lines = File.foreach(@mbox_file).inject(0) { |c, _line| c + 1 }
32
+ progressbar = ProgressBar.create(title: "Parsing Emails", total: total_lines, format: "%t: |%B| %p%%")
33
+
26
34
  CSV.open(@csv_file, 'w') do |csv|
27
- # Write CSV header
28
35
  csv << ['From', 'To', 'Subject', 'Date', 'Body']
29
36
 
30
37
  File.open(@mbox_file, 'r') do |mbox|
31
38
  buffer = ""
32
39
  mbox.each_line do |line|
40
+ progressbar.increment
33
41
  if line.start_with?("From ")
34
42
  process_email_block(buffer, csv) unless buffer.empty?
35
- buffer = "" # Reset buffer
43
+ buffer = ""
36
44
  end
37
- buffer << line # Append line to buffer
45
+ buffer << line
38
46
  end
39
- process_email_block(buffer, csv) unless buffer.empty? # Process last email block
47
+ process_email_block(buffer, csv) unless buffer.empty?
40
48
  end
41
49
  end
42
50
  puts "Parsing completed. Data saved to #{@csv_file}"
43
51
 
44
- # Save and print statistics after parsing
45
52
  @statistics.save_sender_statistics(@stats_csv_file)
46
53
  @statistics.save_recipient_statistics(@recipient_stats_csv_file)
47
54
  rescue => e
@@ -51,7 +58,8 @@ module Mbox2CSV
51
58
  private
52
59
 
53
60
  # Processes an individual email block from the MBOX file, extracts the email fields,
54
- # and writes them to the CSV. Also records email statistics for analysis.
61
+ # and writes them to the CSV. Also records email statistics for analysis and creates
62
+ # sender-specific CSV files.
55
63
  #
56
64
  # @param [String] buffer The email block from the MBOX file.
57
65
  # @param [CSV] csv The CSV object where email data is written.
@@ -62,14 +70,13 @@ module Mbox2CSV
62
70
  to = ensure_utf8(mail.to ? mail.to.join(", ") : '', 'UTF-8')
63
71
  subject = ensure_utf8(mail.subject ? mail.subject : '', 'UTF-8')
64
72
  date = ensure_utf8(mail.date ? mail.date.to_s : '', 'UTF-8')
65
-
66
73
  body = decode_body(mail)
67
74
 
68
- # Write to CSV
69
75
  csv << [from, to, subject, date, body]
70
76
 
71
- # Record email for statistics
72
77
  @statistics.record_email(from, to, body.length)
78
+
79
+ save_email_to_sender_csv(from, to, subject, date, body)
73
80
  rescue => e
74
81
  puts "Error processing email block: #{e.message}"
75
82
  end
@@ -108,15 +115,45 @@ module Mbox2CSV
108
115
  text = text.force_encoding(charset) if charset
109
116
  text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '?')
110
117
  end
118
+
119
+ # Saves an email to a sender-specific CSV file.
120
+ #
121
+ # @param [String] from The sender of the email.
122
+ # @param [String] to The recipient(s) of the email.
123
+ # @param [String] subject The subject of the email.
124
+ # @param [String] date The date of the email.
125
+ # @param [String] body The body of the email.
126
+ def save_email_to_sender_csv(from, to, subject, date, body)
127
+ return if from.empty?
128
+
129
+ sender_file = File.join(@senders_folder, "#{sanitize_filename(from)}.csv")
130
+
131
+ CSV.open(sender_file, 'a') do |csv|
132
+ if File.size(sender_file).zero?
133
+ csv << ['From', 'To', 'Subject', 'Date', 'Body'] # Add header if file is new
134
+ end
135
+ csv << [from, to, subject, date, body]
136
+ end
137
+ rescue => e
138
+ puts "Error writing to sender CSV file for #{from}: #{e.message}"
139
+ end
140
+
141
+ # Sanitizes filenames by replacing invalid characters with underscores.
142
+ #
143
+ # @param [String] filename The input filename.
144
+ # @return [String] A sanitized version of the filename.
145
+ def sanitize_filename(filename)
146
+ filename.gsub(/[^0-9A-Za-z\-]/, '_')
147
+ end
111
148
  end
112
149
 
113
150
  # The EmailStatistics class is responsible for gathering and writing statistics related to emails.
114
151
  # It tracks sender frequency, recipient frequency, and calculates the average email body length per sender.
115
152
  class EmailStatistics
116
153
  def initialize
117
- @sender_counts = Hash.new(0) # Keeps count of emails per sender
118
- @recipient_counts = Hash.new(0) # Keeps count of emails per recipient
119
- @body_lengths = Hash.new { |hash, key| hash[key] = [] } # Stores body lengths per sender
154
+ @sender_counts = Hash.new(0)
155
+ @recipient_counts = Hash.new(0)
156
+ @body_lengths = Hash.new { |hash, key| hash[key] = [] }
120
157
  end
121
158
 
122
159
  # Records an email's sender, recipients, and body length for statistical purposes.
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mbox2csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - firefly-cpp
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-09-21 00:00:00.000000000 Z
11
+ date: 2025-01-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 2.8.1
55
+ - !ruby/object:Gem::Dependency
56
+ name: ruby-progressbar
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.11'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.11'
55
69
  description:
56
70
  email:
57
71
  - iztok@iztok-jr-fister.eu
@@ -62,13 +76,13 @@ files:
62
76
  - LICENSE
63
77
  - README.md
64
78
  - lib/mbox2csv.rb
65
- homepage: https://codeberg.org/firefly-cpp/mbox2csv
79
+ homepage: https://github.com/firefly-cpp/mbox2csv
66
80
  licenses:
67
81
  - MIT
68
82
  metadata:
69
- homepage_uri: https://codeberg.org/firefly-cpp/mbox2csv
70
- source_code_uri: https://codeberg.org/firefly-cpp/mbox2csv
71
- changelog_uri: https://codeberg.org/firefly-cpp/mbox2csv
83
+ homepage_uri: https://github.com/firefly-cpp/mbox2csv
84
+ source_code_uri: https://github.com/firefly-cpp/mbox2csv
85
+ changelog_uri: https://github.com/firefly-cpp/mbox2csv
72
86
  post_install_message:
73
87
  rdoc_options: []
74
88
  require_paths:
@@ -84,7 +98,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
84
98
  - !ruby/object:Gem::Version
85
99
  version: '0'
86
100
  requirements: []
87
- rubygems_version: 3.5.16
101
+ rubygems_version: 3.5.22
88
102
  signing_key:
89
103
  specification_version: 4
90
104
  summary: Parse MBOX files and export email data into CSV format