whatsapp-chat-parser 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -0
- data/README.md +3 -0
- data/lib/whatsapp-chat-parser/encoding.rb +1 -1
- data/lib/whatsapp-chat-parser/file_processor.rb +1 -1
- data/lib/whatsapp-chat-parser/platforms/android.rb +8 -60
- data/lib/whatsapp-chat-parser/platforms/base.rb +118 -0
- data/lib/whatsapp-chat-parser/platforms/ios.rb +8 -69
- data/lib/whatsapp-chat-parser/platforms.rb +2 -1
- data/lib/whatsapp-chat-parser/version.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4e92031c7088862cb6488753801997b0cb67453b61275068a4b2e430d85f83a0
|
|
4
|
+
data.tar.gz: 0df5654a6a3590b01596d0fa00e7081699a498db115ab4b8f337b4ababc0c6f2
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6167122bde0e631130963719035eb70bce2764c59664c03f451f0ea4c7108511f1bff5a26ee1a3b88706763413b2f20d6c1eff96b91bd13f2292a9689810ef49
|
|
7
|
+
data.tar.gz: fed7b89133841f13cbef30a270303caea6895db23e8d4b64df6d4350b2d4dfcce1baf83d796a67519c8dae317273673d4421bfb9a79ceeb5d688a08004bbcdc9
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,18 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [0.1.2] - 2026-02-26
|
|
9
|
+
|
|
10
|
+
### Fixed
|
|
11
|
+
- Fixed `Encoding::CompatibilityError` caused by stripping an unscrubbed message line
|
|
12
|
+
- Fixed `FrozenError` caused by mutating a frozen `message` string during message line accumulation
|
|
13
|
+
|
|
14
|
+
## [0.1.1] - 2026-02-22
|
|
15
|
+
|
|
16
|
+
### Changed
|
|
17
|
+
- Refactored platform-specific parsers (`Android`, `Ios`) into a shared `Base` module to improve code maintainability (DRY).
|
|
18
|
+
- Optimized timestamp extraction logic in `Base` module to handle optional seconds across all platforms.
|
|
19
|
+
|
|
8
20
|
## [0.1.0] - 2026-02-18
|
|
9
21
|
|
|
10
22
|
### Added
|
data/README.md
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# WhatsApp Chat Parser
|
|
2
2
|
|
|
3
|
+
[](https://badge.fury.io/rb/whatsapp-chat-parser)
|
|
4
|
+
[](https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/actions/workflows/ci.yml)
|
|
5
|
+
|
|
3
6
|
A Ruby library that parses exported WhatsApp chat `.txt` files and converts them into structured, machine-readable data. Designed for downstream processing such as analytics, ETL pipelines, storage, and transformation - not for rendering UI or interacting with the WhatsApp API.
|
|
4
7
|
|
|
5
8
|
## Features
|
|
@@ -4,81 +4,29 @@ module WhatsappChatParser
|
|
|
4
4
|
module Platforms
|
|
5
5
|
# Parser for Android WhatsApp chat exports.
|
|
6
6
|
module Android
|
|
7
|
+
extend Base
|
|
8
|
+
|
|
7
9
|
class << self
|
|
8
10
|
# Parses a line from an Android export.
|
|
9
11
|
# @param line [String] The exported line.
|
|
10
12
|
# @return [Models::Message, nil]
|
|
11
13
|
def parse(line)
|
|
12
|
-
|
|
13
|
-
return unless match
|
|
14
|
-
|
|
15
|
-
timestamp = extract_timestamp(match)
|
|
16
|
-
author = extract(match, :author)
|
|
17
|
-
body = extract(match, :body)
|
|
18
|
-
|
|
19
|
-
Models::Message.new(timestamp: timestamp, author: author, body: body, platform: :android)
|
|
14
|
+
super(line, :android, Pattern)
|
|
20
15
|
end
|
|
21
16
|
|
|
22
17
|
# Checks if a line matches the Android format.
|
|
23
18
|
# @param line [String]
|
|
24
19
|
# @return [Boolean]
|
|
25
20
|
def matches?(line)
|
|
26
|
-
|
|
21
|
+
super(line, Pattern)
|
|
27
22
|
end
|
|
28
23
|
|
|
29
24
|
private
|
|
30
25
|
|
|
31
|
-
def
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def extract_timestamp(match)
|
|
37
|
-
date_components = extract_date_components(match)
|
|
38
|
-
time_components = extract_time_components(match)
|
|
39
|
-
|
|
40
|
-
format_sql_timestamp(date_components, time_components)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def extract_date_components(match)
|
|
44
|
-
month = extract(match, :month)
|
|
45
|
-
day = extract(match, :day)
|
|
46
|
-
year = extract(match, :year).to_i + 2000
|
|
47
|
-
|
|
48
|
-
{ month: month, day: day, year: year }
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def extract_time_components(match)
|
|
52
|
-
hour = extract(match, :hour).to_i
|
|
53
|
-
minute = extract(match, :minute).to_i
|
|
54
|
-
meridiem = extract(match, :meridiem)
|
|
55
|
-
hour = convert_to_24_hour(hour, meridiem)
|
|
56
|
-
|
|
57
|
-
{ hour: hour, minute: minute }
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
def convert_to_24_hour(hour, meridiem)
|
|
61
|
-
meridiem = meridiem.upcase
|
|
62
|
-
if meridiem == 'PM' && hour < 12
|
|
63
|
-
hour + 12
|
|
64
|
-
elsif meridiem == 'AM' && hour == 12
|
|
65
|
-
0
|
|
66
|
-
else
|
|
67
|
-
hour
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def format_sql_timestamp(date, time)
|
|
72
|
-
# rubocop:disable Layout/HashAlignment
|
|
73
|
-
format(
|
|
74
|
-
'%<year>04d-%<month>02d-%<day>02d %<hour>02d:%<minute>02d:00',
|
|
75
|
-
year: date[:year],
|
|
76
|
-
month: date[:month],
|
|
77
|
-
day: date[:day],
|
|
78
|
-
hour: time[:hour],
|
|
79
|
-
minute: time[:minute]
|
|
80
|
-
)
|
|
81
|
-
# rubocop:enable Layout/HashAlignment
|
|
26
|
+
def extract_date_components(match, pattern_module)
|
|
27
|
+
components = super
|
|
28
|
+
components[:year] = components[:year].to_i + 2000
|
|
29
|
+
components
|
|
82
30
|
end
|
|
83
31
|
end
|
|
84
32
|
end
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module WhatsappChatParser
|
|
4
|
+
module Platforms
|
|
5
|
+
# Base module for shared platform parser logic
|
|
6
|
+
module Base
|
|
7
|
+
# Parses a line from an export
|
|
8
|
+
# @param line [String] The exported line
|
|
9
|
+
# @param platform [Symbol] The platform identifier
|
|
10
|
+
# @param pattern_module [Module] The pattern module for the platform
|
|
11
|
+
# @return [Models::Message, nil]
|
|
12
|
+
def parse(line, platform, pattern_module)
|
|
13
|
+
match = line.match(pattern_module.regex)
|
|
14
|
+
return unless match
|
|
15
|
+
|
|
16
|
+
timestamp = extract_timestamp(match, pattern_module)
|
|
17
|
+
author = extract(match, pattern_module, :author)
|
|
18
|
+
body = extract(match, pattern_module, :body)
|
|
19
|
+
|
|
20
|
+
Models::Message.new(timestamp: timestamp, author: author, body: body, platform: platform)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Checks if a line matches the platform format
|
|
24
|
+
# @param line [String]
|
|
25
|
+
# @param pattern_module [Module]
|
|
26
|
+
# @return [Boolean]
|
|
27
|
+
def matches?(line, pattern_module)
|
|
28
|
+
pattern_module.regex.match?(line)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
# Extracts a value from a match based on a pattern key
|
|
34
|
+
# @param match [MatchData]
|
|
35
|
+
# @param pattern_module [Module]
|
|
36
|
+
# @param key [Symbol]
|
|
37
|
+
# @return [String, nil]
|
|
38
|
+
def extract(match, pattern_module, key)
|
|
39
|
+
index = pattern_module::PATTERNS.keys.index(key)
|
|
40
|
+
return unless index
|
|
41
|
+
|
|
42
|
+
match[index + 1]
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Extracts and formats a timestamp from a match
|
|
46
|
+
# @param match [MatchData]
|
|
47
|
+
# @param pattern_module [Module]
|
|
48
|
+
# @return [String]
|
|
49
|
+
def extract_timestamp(match, pattern_module)
|
|
50
|
+
date_components = extract_date_components(match, pattern_module)
|
|
51
|
+
time_components = extract_time_components(match, pattern_module)
|
|
52
|
+
|
|
53
|
+
format_sql_timestamp(date_components, time_components)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Extracts date components from a match
|
|
57
|
+
# @param match [MatchData]
|
|
58
|
+
# @param pattern_module [Module]
|
|
59
|
+
# @return [Hash]
|
|
60
|
+
def extract_date_components(match, pattern_module)
|
|
61
|
+
{
|
|
62
|
+
month: extract(match, pattern_module, :month),
|
|
63
|
+
day: extract(match, pattern_module, :day),
|
|
64
|
+
year: extract(match, pattern_module, :year)
|
|
65
|
+
}
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Extracts time components from a match
|
|
69
|
+
# @param match [MatchData]
|
|
70
|
+
# @param pattern_module [Module]
|
|
71
|
+
# @return [Hash]
|
|
72
|
+
def extract_time_components(match, pattern_module)
|
|
73
|
+
hour = extract(match, pattern_module, :hour).to_i
|
|
74
|
+
minute = extract(match, pattern_module, :minute).to_i
|
|
75
|
+
second = extract(match, pattern_module, :second).to_i
|
|
76
|
+
meridiem = extract(match, pattern_module, :meridiem)
|
|
77
|
+
hour = convert_to_24_hour(hour, meridiem)
|
|
78
|
+
|
|
79
|
+
{ hour: hour, minute: minute, second: second }
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Converts an hour to 24-hour format
|
|
83
|
+
# @param hour [Integer]
|
|
84
|
+
# @param meridiem [String, nil]
|
|
85
|
+
# @return [Integer]
|
|
86
|
+
def convert_to_24_hour(hour, meridiem)
|
|
87
|
+
return hour unless meridiem
|
|
88
|
+
|
|
89
|
+
meridiem = meridiem.upcase
|
|
90
|
+
if meridiem == 'PM' && hour < 12
|
|
91
|
+
hour + 12
|
|
92
|
+
elsif meridiem == 'AM' && hour == 12
|
|
93
|
+
0
|
|
94
|
+
else
|
|
95
|
+
hour
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Formats date and time components into an SQL timestamp
|
|
100
|
+
# @param date [Hash]
|
|
101
|
+
# @param time [Hash]
|
|
102
|
+
# @return [String]
|
|
103
|
+
def format_sql_timestamp(date, time)
|
|
104
|
+
# rubocop:disable Layout/HashAlignment
|
|
105
|
+
format(
|
|
106
|
+
'%<year>04d-%<month>02d-%<day>02d %<hour>02d:%<minute>02d:%<second>02d',
|
|
107
|
+
year: date[:year],
|
|
108
|
+
month: date[:month],
|
|
109
|
+
day: date[:day],
|
|
110
|
+
hour: time[:hour],
|
|
111
|
+
minute: time[:minute],
|
|
112
|
+
second: time[:second] || 0
|
|
113
|
+
)
|
|
114
|
+
# rubocop:enable Layout/HashAlignment
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
@@ -2,84 +2,23 @@
|
|
|
2
2
|
|
|
3
3
|
module WhatsappChatParser
|
|
4
4
|
module Platforms
|
|
5
|
-
# Parser for iOS WhatsApp chat exports
|
|
5
|
+
# Parser for iOS WhatsApp chat exports
|
|
6
6
|
module Ios
|
|
7
|
+
extend Base
|
|
8
|
+
|
|
7
9
|
class << self
|
|
8
|
-
# Parses a line from an iOS export
|
|
9
|
-
# @param line [String] The exported line
|
|
10
|
+
# Parses a line from an iOS export
|
|
11
|
+
# @param line [String] The exported line
|
|
10
12
|
# @return [Models::Message, nil]
|
|
11
13
|
def parse(line)
|
|
12
|
-
|
|
13
|
-
return unless match
|
|
14
|
-
|
|
15
|
-
timestamp = extract_timestamp(match)
|
|
16
|
-
author = extract(match, :author)
|
|
17
|
-
body = extract(match, :body)
|
|
18
|
-
|
|
19
|
-
Models::Message.new(timestamp: timestamp, author: author, body: body, platform: :ios)
|
|
14
|
+
super(line, :ios, Pattern)
|
|
20
15
|
end
|
|
21
16
|
|
|
22
|
-
# Checks if a line matches the iOS format
|
|
17
|
+
# Checks if a line matches the iOS format
|
|
23
18
|
# @param line [String]
|
|
24
19
|
# @return [Boolean]
|
|
25
20
|
def matches?(line)
|
|
26
|
-
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
private
|
|
30
|
-
|
|
31
|
-
def extract(match, key)
|
|
32
|
-
index = Pattern::PATTERNS.keys.index(key)
|
|
33
|
-
match[index + 1]
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
def extract_timestamp(match)
|
|
37
|
-
date_components = extract_date_components(match)
|
|
38
|
-
time_components = extract_time_components(match)
|
|
39
|
-
|
|
40
|
-
format_sql_timestamp(date_components, time_components)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
def extract_date_components(match)
|
|
44
|
-
month = extract(match, :month)
|
|
45
|
-
day = extract(match, :day)
|
|
46
|
-
year = extract(match, :year)
|
|
47
|
-
|
|
48
|
-
{ month: month, day: day, year: year }
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
def extract_time_components(match)
|
|
52
|
-
hour = extract(match, :hour).to_i
|
|
53
|
-
minute = extract(match, :minute).to_i
|
|
54
|
-
second = extract(match, :second)
|
|
55
|
-
meridiem = extract(match, :meridiem)
|
|
56
|
-
hour = convert_to_24_hour(hour, meridiem)
|
|
57
|
-
|
|
58
|
-
{ hour: hour, minute: minute, second: second }
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
def convert_to_24_hour(hour, meridiem)
|
|
62
|
-
if meridiem == 'PM' && hour < 12
|
|
63
|
-
hour + 12
|
|
64
|
-
elsif meridiem == 'AM' && hour == 12
|
|
65
|
-
0
|
|
66
|
-
else
|
|
67
|
-
hour
|
|
68
|
-
end
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def format_sql_timestamp(date, time)
|
|
72
|
-
# rubocop:disable Layout/HashAlignment
|
|
73
|
-
format(
|
|
74
|
-
'%<year>04d-%<month>02d-%<day>02d %<hour>02d:%<minute>02d:%<second>02d',
|
|
75
|
-
year: date[:year],
|
|
76
|
-
month: date[:month],
|
|
77
|
-
day: date[:day],
|
|
78
|
-
hour: time[:hour],
|
|
79
|
-
minute: time[:minute],
|
|
80
|
-
second: time[:second]
|
|
81
|
-
)
|
|
82
|
-
# rubocop:enable Layout/HashAlignment
|
|
21
|
+
super(line, Pattern)
|
|
83
22
|
end
|
|
84
23
|
end
|
|
85
24
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'encoding'
|
|
4
|
+
require_relative 'platforms/base'
|
|
4
5
|
require_relative 'platforms/android'
|
|
5
6
|
require_relative 'platforms/ios'
|
|
6
7
|
require_relative 'platforms/android/pattern'
|
|
@@ -31,7 +32,7 @@ module WhatsappChatParser
|
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
def sanitize(line)
|
|
34
|
-
Encoding.normalize_to_utf8(line).strip
|
|
35
|
+
Encoding.normalize_to_utf8(line).strip
|
|
35
36
|
end
|
|
36
37
|
end
|
|
37
38
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: whatsapp-chat-parser
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Emmanuel Akachukwu
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-02-
|
|
11
|
+
date: 2026-02-26 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: rspec
|
|
@@ -100,6 +100,7 @@ files:
|
|
|
100
100
|
- lib/whatsapp-chat-parser/platforms.rb
|
|
101
101
|
- lib/whatsapp-chat-parser/platforms/android.rb
|
|
102
102
|
- lib/whatsapp-chat-parser/platforms/android/pattern.rb
|
|
103
|
+
- lib/whatsapp-chat-parser/platforms/base.rb
|
|
103
104
|
- lib/whatsapp-chat-parser/platforms/ios.rb
|
|
104
105
|
- lib/whatsapp-chat-parser/platforms/ios/pattern.rb
|
|
105
106
|
- lib/whatsapp-chat-parser/platforms/pattern_helpers.rb
|
|
@@ -110,9 +111,9 @@ licenses:
|
|
|
110
111
|
metadata:
|
|
111
112
|
homepage_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb
|
|
112
113
|
bug_tracker_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/issues
|
|
113
|
-
changelog_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/blob/v0.1.
|
|
114
|
-
documentation_uri: https://www.rubydoc.info/gems/whatsapp-chat-parser/0.1.
|
|
115
|
-
source_code_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/tree/v0.1.
|
|
114
|
+
changelog_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/blob/v0.1.2/CHANGELOG.md
|
|
115
|
+
documentation_uri: https://www.rubydoc.info/gems/whatsapp-chat-parser/0.1.2
|
|
116
|
+
source_code_uri: https://github.com/emmaakachukwu/whatsapp-chat-parser-rb/tree/v0.1.2
|
|
116
117
|
keywords: whatsapp chat parser whatsapp-chat-parser text export android ios
|
|
117
118
|
rubygems_mfa_required: 'true'
|
|
118
119
|
post_install_message:
|