whatsapp-chat-parser 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c63551e684919c385110d12bcbbe8e15bae0eef6e4ffe31a0617970f45f24bd6
4
- data.tar.gz: a510d3a34c51154faae21a8f69d7629e6f0f0ca103a4cfddfbb7571013cc8d3c
3
+ metadata.gz: 4e92031c7088862cb6488753801997b0cb67453b61275068a4b2e430d85f83a0
4
+ data.tar.gz: 0df5654a6a3590b01596d0fa00e7081699a498db115ab4b8f337b4ababc0c6f2
5
5
  SHA512:
6
- metadata.gz: c35b864edcaef50faed6aff4c32d80c8d12e7a624819165766a3e935052b7148aff429dfb674c714dae32b938b2f0e3a654e0fbf8628f2147ead5c46e46dfaa6
7
- data.tar.gz: 16c197ee4c6ffc1f1e5ecfc94aadabde22a2b5329ab0b7059ffc2897b18afeb3f6b879a799649b98ac162506162d9a965a0ecfcd6e9423c0f11192a2f9880d48
6
+ metadata.gz: 6167122bde0e631130963719035eb70bce2764c59664c03f451f0ea4c7108511f1bff5a26ee1a3b88706763413b2f20d6c1eff96b91bd13f2292a9689810ef49
7
+ data.tar.gz: fed7b89133841f13cbef30a270303caea6895db23e8d4b64df6d4350b2d4dfcce1baf83d796a67519c8dae317273673d4421bfb9a79ceeb5d688a08004bbcdc9
data/CHANGELOG.md CHANGED
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [0.1.2] - 2026-02-26
9
+
10
+ ### Fixed
11
+ - Fixed `Encoding::CompatibilityError` caused by stripping an unscrubbed message line
12
+ - Fixed `FrozenError` caused by mutating a frozen `message` string during message line accumulation
13
+
14
+ ## [0.1.1] - 2026-02-22
15
+
16
+ ### Changed
17
+ - Refactored platform-specific parsers (`Android`, `Ios`) into a shared `Base` module to improve code maintainability (DRY).
18
+ - Optimized timestamp extraction logic in `Base` module to handle optional seconds across all platforms.
19
+
8
20
  ## [0.1.0] - 2026-02-18
9
21
 
10
22
  ### Added
data/README.md CHANGED
@@ -1,5 +1,8 @@
1
1
  # WhatsApp Chat Parser
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/whatsapp-chat-parser.svg?icon=si%3Arubygems)](https://badge.fury.io/rb/whatsapp-chat-parser)
4
+ [![CI](https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/actions/workflows/ci.yml/badge.svg)](https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/actions/workflows/ci.yml)
5
+
3
6
  A Ruby library that parses exported WhatsApp chat `.txt` files and converts them into structured, machine-readable data. Designed for downstream processing such as analytics, ETL pipelines, storage, and transformation - not for rendering UI or interacting with the WhatsApp API.
4
7
 
5
8
  ## Features
@@ -22,7 +22,7 @@ module WhatsappChatParser
22
22
  )
23
23
  end
24
24
 
25
- str
25
+ str.scrub(' ').squeeze(' ')
26
26
  end
27
27
 
28
28
  private
@@ -30,7 +30,7 @@ module WhatsappChatParser
30
30
  end
31
31
 
32
32
  def accumulate_messages(io, &block)
33
- message = ''
33
+ message = +''
34
34
 
35
35
  io.each_line do |line|
36
36
  if message_starts_here?(line)
@@ -4,81 +4,29 @@ module WhatsappChatParser
4
4
  module Platforms
5
5
  # Parser for Android WhatsApp chat exports.
6
6
  module Android
7
+ extend Base
8
+
7
9
  class << self
8
10
  # Parses a line from an Android export.
9
11
  # @param line [String] The exported line.
10
12
  # @return [Models::Message, nil]
11
13
  def parse(line)
12
- match = line.match(Pattern.regex)
13
- return unless match
14
-
15
- timestamp = extract_timestamp(match)
16
- author = extract(match, :author)
17
- body = extract(match, :body)
18
-
19
- Models::Message.new(timestamp: timestamp, author: author, body: body, platform: :android)
14
+ super(line, :android, Pattern)
20
15
  end
21
16
 
22
17
  # Checks if a line matches the Android format.
23
18
  # @param line [String]
24
19
  # @return [Boolean]
25
20
  def matches?(line)
26
- Pattern.regex.match?(line)
21
+ super(line, Pattern)
27
22
  end
28
23
 
29
24
  private
30
25
 
31
- def extract(match, key)
32
- index = Pattern::PATTERNS.keys.index(key)
33
- match[index + 1]
34
- end
35
-
36
- def extract_timestamp(match)
37
- date_components = extract_date_components(match)
38
- time_components = extract_time_components(match)
39
-
40
- format_sql_timestamp(date_components, time_components)
41
- end
42
-
43
- def extract_date_components(match)
44
- month = extract(match, :month)
45
- day = extract(match, :day)
46
- year = extract(match, :year).to_i + 2000
47
-
48
- { month: month, day: day, year: year }
49
- end
50
-
51
- def extract_time_components(match)
52
- hour = extract(match, :hour).to_i
53
- minute = extract(match, :minute).to_i
54
- meridiem = extract(match, :meridiem)
55
- hour = convert_to_24_hour(hour, meridiem)
56
-
57
- { hour: hour, minute: minute }
58
- end
59
-
60
- def convert_to_24_hour(hour, meridiem)
61
- meridiem = meridiem.upcase
62
- if meridiem == 'PM' && hour < 12
63
- hour + 12
64
- elsif meridiem == 'AM' && hour == 12
65
- 0
66
- else
67
- hour
68
- end
69
- end
70
-
71
- def format_sql_timestamp(date, time)
72
- # rubocop:disable Layout/HashAlignment
73
- format(
74
- '%<year>04d-%<month>02d-%<day>02d %<hour>02d:%<minute>02d:00',
75
- year: date[:year],
76
- month: date[:month],
77
- day: date[:day],
78
- hour: time[:hour],
79
- minute: time[:minute]
80
- )
81
- # rubocop:enable Layout/HashAlignment
26
+ def extract_date_components(match, pattern_module)
27
+ components = super
28
+ components[:year] = components[:year].to_i + 2000
29
+ components
82
30
  end
83
31
  end
84
32
  end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ module WhatsappChatParser
4
+ module Platforms
5
+ # Base module for shared platform parser logic
6
+ module Base
7
+ # Parses a line from an export
8
+ # @param line [String] The exported line
9
+ # @param platform [Symbol] The platform identifier
10
+ # @param pattern_module [Module] The pattern module for the platform
11
+ # @return [Models::Message, nil]
12
+ def parse(line, platform, pattern_module)
13
+ match = line.match(pattern_module.regex)
14
+ return unless match
15
+
16
+ timestamp = extract_timestamp(match, pattern_module)
17
+ author = extract(match, pattern_module, :author)
18
+ body = extract(match, pattern_module, :body)
19
+
20
+ Models::Message.new(timestamp: timestamp, author: author, body: body, platform: platform)
21
+ end
22
+
23
+ # Checks if a line matches the platform format
24
+ # @param line [String]
25
+ # @param pattern_module [Module]
26
+ # @return [Boolean]
27
+ def matches?(line, pattern_module)
28
+ pattern_module.regex.match?(line)
29
+ end
30
+
31
+ private
32
+
33
+ # Extracts a value from a match based on a pattern key
34
+ # @param match [MatchData]
35
+ # @param pattern_module [Module]
36
+ # @param key [Symbol]
37
+ # @return [String, nil]
38
+ def extract(match, pattern_module, key)
39
+ index = pattern_module::PATTERNS.keys.index(key)
40
+ return unless index
41
+
42
+ match[index + 1]
43
+ end
44
+
45
+ # Extracts and formats a timestamp from a match
46
+ # @param match [MatchData]
47
+ # @param pattern_module [Module]
48
+ # @return [String]
49
+ def extract_timestamp(match, pattern_module)
50
+ date_components = extract_date_components(match, pattern_module)
51
+ time_components = extract_time_components(match, pattern_module)
52
+
53
+ format_sql_timestamp(date_components, time_components)
54
+ end
55
+
56
+ # Extracts date components from a match
57
+ # @param match [MatchData]
58
+ # @param pattern_module [Module]
59
+ # @return [Hash]
60
+ def extract_date_components(match, pattern_module)
61
+ {
62
+ month: extract(match, pattern_module, :month),
63
+ day: extract(match, pattern_module, :day),
64
+ year: extract(match, pattern_module, :year)
65
+ }
66
+ end
67
+
68
+ # Extracts time components from a match
69
+ # @param match [MatchData]
70
+ # @param pattern_module [Module]
71
+ # @return [Hash]
72
+ def extract_time_components(match, pattern_module)
73
+ hour = extract(match, pattern_module, :hour).to_i
74
+ minute = extract(match, pattern_module, :minute).to_i
75
+ second = extract(match, pattern_module, :second).to_i
76
+ meridiem = extract(match, pattern_module, :meridiem)
77
+ hour = convert_to_24_hour(hour, meridiem)
78
+
79
+ { hour: hour, minute: minute, second: second }
80
+ end
81
+
82
+ # Converts an hour to 24-hour format
83
+ # @param hour [Integer]
84
+ # @param meridiem [String, nil]
85
+ # @return [Integer]
86
+ def convert_to_24_hour(hour, meridiem)
87
+ return hour unless meridiem
88
+
89
+ meridiem = meridiem.upcase
90
+ if meridiem == 'PM' && hour < 12
91
+ hour + 12
92
+ elsif meridiem == 'AM' && hour == 12
93
+ 0
94
+ else
95
+ hour
96
+ end
97
+ end
98
+
99
+ # Formats date and time components into an SQL timestamp
100
+ # @param date [Hash]
101
+ # @param time [Hash]
102
+ # @return [String]
103
+ def format_sql_timestamp(date, time)
104
+ # rubocop:disable Layout/HashAlignment
105
+ format(
106
+ '%<year>04d-%<month>02d-%<day>02d %<hour>02d:%<minute>02d:%<second>02d',
107
+ year: date[:year],
108
+ month: date[:month],
109
+ day: date[:day],
110
+ hour: time[:hour],
111
+ minute: time[:minute],
112
+ second: time[:second] || 0
113
+ )
114
+ # rubocop:enable Layout/HashAlignment
115
+ end
116
+ end
117
+ end
118
+ end
@@ -2,84 +2,23 @@
2
2
 
3
3
  module WhatsappChatParser
4
4
  module Platforms
5
- # Parser for iOS WhatsApp chat exports.
5
+ # Parser for iOS WhatsApp chat exports
6
6
  module Ios
7
+ extend Base
8
+
7
9
  class << self
8
- # Parses a line from an iOS export.
9
- # @param line [String] The exported line.
10
+ # Parses a line from an iOS export
11
+ # @param line [String] The exported line
10
12
  # @return [Models::Message, nil]
11
13
  def parse(line)
12
- match = line.match(Pattern.regex)
13
- return unless match
14
-
15
- timestamp = extract_timestamp(match)
16
- author = extract(match, :author)
17
- body = extract(match, :body)
18
-
19
- Models::Message.new(timestamp: timestamp, author: author, body: body, platform: :ios)
14
+ super(line, :ios, Pattern)
20
15
  end
21
16
 
22
- # Checks if a line matches the iOS format.
17
+ # Checks if a line matches the iOS format
23
18
  # @param line [String]
24
19
  # @return [Boolean]
25
20
  def matches?(line)
26
- Pattern.regex.match?(line)
27
- end
28
-
29
- private
30
-
31
- def extract(match, key)
32
- index = Pattern::PATTERNS.keys.index(key)
33
- match[index + 1]
34
- end
35
-
36
- def extract_timestamp(match)
37
- date_components = extract_date_components(match)
38
- time_components = extract_time_components(match)
39
-
40
- format_sql_timestamp(date_components, time_components)
41
- end
42
-
43
- def extract_date_components(match)
44
- month = extract(match, :month)
45
- day = extract(match, :day)
46
- year = extract(match, :year)
47
-
48
- { month: month, day: day, year: year }
49
- end
50
-
51
- def extract_time_components(match)
52
- hour = extract(match, :hour).to_i
53
- minute = extract(match, :minute).to_i
54
- second = extract(match, :second)
55
- meridiem = extract(match, :meridiem)
56
- hour = convert_to_24_hour(hour, meridiem)
57
-
58
- { hour: hour, minute: minute, second: second }
59
- end
60
-
61
- def convert_to_24_hour(hour, meridiem)
62
- if meridiem == 'PM' && hour < 12
63
- hour + 12
64
- elsif meridiem == 'AM' && hour == 12
65
- 0
66
- else
67
- hour
68
- end
69
- end
70
-
71
- def format_sql_timestamp(date, time)
72
- # rubocop:disable Layout/HashAlignment
73
- format(
74
- '%<year>04d-%<month>02d-%<day>02d %<hour>02d:%<minute>02d:%<second>02d',
75
- year: date[:year],
76
- month: date[:month],
77
- day: date[:day],
78
- hour: time[:hour],
79
- minute: time[:minute],
80
- second: time[:second]
81
- )
82
- # rubocop:enable Layout/HashAlignment
21
+ super(line, Pattern)
83
22
  end
84
23
  end
85
24
  end
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'encoding'
4
+ require_relative 'platforms/base'
4
5
  require_relative 'platforms/android'
5
6
  require_relative 'platforms/ios'
6
7
  require_relative 'platforms/android/pattern'
@@ -31,7 +32,7 @@ module WhatsappChatParser
31
32
  end
32
33
 
33
34
  def sanitize(line)
34
- Encoding.normalize_to_utf8(line).strip.scrub(' ').squeeze(' ')
35
+ Encoding.normalize_to_utf8(line).strip
35
36
  end
36
37
  end
37
38
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module WhatsappChatParser
4
- VERSION = '0.1.0'
4
+ VERSION = '0.1.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whatsapp-chat-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Emmanuel Akachukwu
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-02-18 00:00:00.000000000 Z
11
+ date: 2026-02-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rspec
@@ -100,6 +100,7 @@ files:
100
100
  - lib/whatsapp-chat-parser/platforms.rb
101
101
  - lib/whatsapp-chat-parser/platforms/android.rb
102
102
  - lib/whatsapp-chat-parser/platforms/android/pattern.rb
103
+ - lib/whatsapp-chat-parser/platforms/base.rb
103
104
  - lib/whatsapp-chat-parser/platforms/ios.rb
104
105
  - lib/whatsapp-chat-parser/platforms/ios/pattern.rb
105
106
  - lib/whatsapp-chat-parser/platforms/pattern_helpers.rb
@@ -110,9 +111,9 @@ licenses:
110
111
  metadata:
111
112
  homepage_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb
112
113
  bug_tracker_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/issues
113
- changelog_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/blob/v0.1.0/CHANGELOG.md
114
- documentation_uri: https://www.rubydoc.info/gems/whatsapp-chat-parser/0.1.0
115
- source_code_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/tree/v0.1.0
114
+ changelog_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/blob/v0.1.2/CHANGELOG.md
115
+ documentation_uri: https://www.rubydoc.info/gems/whatsapp-chat-parser/0.1.2
116
+ source_code_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/tree/v0.1.2
116
117
  keywords: whatsapp chat parser whatsapp-chat-parser text export android ios
117
118
  rubygems_mfa_required: 'true'
118
119
  post_install_message: