pidgin2adium 1.0.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +6 -0
- data/Manifest.txt +10 -0
- data/README.rdoc +106 -0
- data/Rakefile.rb +26 -0
- data/bin/pidgin2adium +72 -0
- data/lib/pidgin2adium.rb +120 -0
- data/lib/pidgin2adium/{balance-tags.rb → balance_tags.rb} +32 -29
- data/lib/pidgin2adium/log_converter.rb +68 -0
- data/lib/pidgin2adium/log_file.rb +101 -0
- data/lib/pidgin2adium/log_parser.rb +590 -0
- metadata +39 -19
- data/bin/pidgin2adium_logs +0 -67
- data/bin/pidgin2adium_status +0 -15
- data/lib/pidgin2adium/ChatFileGenerator.rb +0 -59
- data/lib/pidgin2adium/SrcFileParse.rb +0 -485
- data/lib/pidgin2adium/logs.rb +0 -250
- data/lib/pidgin2adium/status.rb +0 -113
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'pidgin2adium'
|
2
|
+
|
3
|
+
module Pidgin2Adium
|
4
|
+
# An easy way to batch-process a directory. Used by the pidgin2adium
|
5
|
+
# command-line script.
|
6
|
+
class LogConverter
|
7
|
+
include Pidgin2Adium
|
8
|
+
# You can add options using the _opts_ hash, which can have the
|
9
|
+
# following keys, all of which are optional:
|
10
|
+
# * *overwrite*: If true, then overwrite even if log is found.
|
11
|
+
# Defaults to false.
|
12
|
+
# * *output_dir*: The top-level dir to put the logs in.
|
13
|
+
# Logs under output_dir are still each in their own folders, etc.
|
14
|
+
# Defaults to Pidgin2Adium::ADIUM_LOG_DIR
|
15
|
+
def initialize(pidgin_log_dir, aliases, opts = {})
|
16
|
+
# parse_and_generate will process it for us
|
17
|
+
@opts = opts
|
18
|
+
|
19
|
+
@pidgin_log_dir = File.expand_path(pidgin_log_dir)
|
20
|
+
@my_aliases = aliases
|
21
|
+
|
22
|
+
unless File.directory?(@pidgin_log_dir)
|
23
|
+
puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
|
24
|
+
raise Errno::ENOENT
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Runs Pidgin2Adium::parse_and_generate on every log file in directory
|
29
|
+
# provided in new, then deletes Adium's search indexes to force
|
30
|
+
# it to rescan logs on startup.
|
31
|
+
def start
|
32
|
+
log_msg "Begin converting."
|
33
|
+
begin
|
34
|
+
files_path = get_all_chat_files(@pidgin_log_dir)
|
35
|
+
rescue Errno::EACCES => bang
|
36
|
+
error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
|
37
|
+
error("Details: #{bang.message}")
|
38
|
+
raise Errno::EACCES
|
39
|
+
end
|
40
|
+
|
41
|
+
total_files = files_path.size
|
42
|
+
total_successes = 0
|
43
|
+
log_msg("#{total_files} files to convert.")
|
44
|
+
files_path.each_with_index do |fname, i|
|
45
|
+
log_msg(
|
46
|
+
sprintf("[%d/%d] Converting %s...",
|
47
|
+
(i+1), total_files, fname)
|
48
|
+
)
|
49
|
+
result = parse_and_generate(fname, @my_aliases, @opts)
|
50
|
+
total_successes += 1 if result == true
|
51
|
+
end
|
52
|
+
|
53
|
+
delete_search_indexes()
|
54
|
+
|
55
|
+
log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
56
|
+
end
|
57
|
+
|
58
|
+
###########
|
59
|
+
private
|
60
|
+
###########
|
61
|
+
|
62
|
+
def get_all_chat_files(dir)
|
63
|
+
return [] if File.basename(dir) == ".system"
|
64
|
+
# recurse into each subdir
|
65
|
+
return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# ADD DOCUMENTATION
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module Pidgin2Adium
|
6
|
+
# A holding object for the result of LogParser.parse. It makes the
|
7
|
+
# instance variable @chat_lines available, which is an array of objects
|
8
|
+
# which each have at least the instance variables _sender_, _time_, and
|
9
|
+
# _buddy_alias_ available. Some objects in @chat_lines have more variables
|
10
|
+
# available, specifically:
|
11
|
+
# * XMLMessage, AutoReplyMessage, and Event:: _body_
|
12
|
+
# * Event:: _event_type_
|
13
|
+
# * StatusMessage:: _status_
|
14
|
+
class LogFile
|
15
|
+
include Pidgin2Adium
|
16
|
+
def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
|
17
|
+
@chat_lines = chat_lines
|
18
|
+
@user_SN = user_SN
|
19
|
+
@partner_SN = partner_SN
|
20
|
+
@adium_chat_time_start = adium_chat_time_start
|
21
|
+
|
22
|
+
# @chat_str is generated when to_s is called
|
23
|
+
@chat_str = nil
|
24
|
+
|
25
|
+
# key is for Pidgin, value is for Adium
|
26
|
+
# Just used for <service>.<screenname> in directory structure
|
27
|
+
service_name_map = {'aim' => 'AIM',
|
28
|
+
'jabber' =>'jabber',
|
29
|
+
'gtalk'=> 'GTalk',
|
30
|
+
'icq' => 'ICQ',
|
31
|
+
'qq' => 'QQ',
|
32
|
+
'msn' => 'MSN',
|
33
|
+
'yahoo' => 'Yahoo'}
|
34
|
+
|
35
|
+
@service = service_name_map[service.downcase]
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
|
39
|
+
|
40
|
+
# Returns contents of log file
|
41
|
+
def to_s
|
42
|
+
if @chat_str.nil?
|
43
|
+
# Faster than inject() or each()
|
44
|
+
@chat_str = @chat_lines.map{|l| l.to_s }.join
|
45
|
+
end
|
46
|
+
return @chat_str
|
47
|
+
end
|
48
|
+
|
49
|
+
def each(&blk)
|
50
|
+
@chat_lines.each{|l| yield l }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Set overwrite=true to create a logfile even if logfile already exists.
|
54
|
+
# Returns one of:
|
55
|
+
# * false (if an error occurred),
|
56
|
+
# * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
|
57
|
+
# * the path to the new Adium log file.
|
58
|
+
def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
|
59
|
+
# output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
|
60
|
+
output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
|
61
|
+
# output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
|
62
|
+
output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
|
63
|
+
begin
|
64
|
+
FileUtils.mkdir_p(output_dir)
|
65
|
+
rescue => bang
|
66
|
+
error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
|
67
|
+
return false
|
68
|
+
end
|
69
|
+
if overwrite
|
70
|
+
unless File.exist?(output_path)
|
71
|
+
# File doesn't exist, but maybe it does with a different
|
72
|
+
# time zone. Check for a file that differs only in time
|
73
|
+
# zone and, if found, change @output_path to target it.
|
74
|
+
maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
|
75
|
+
unless maybe_matches.empty?
|
76
|
+
output_path = maybe_matches[0]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
else
|
80
|
+
if File.exist?(output_path)
|
81
|
+
return FILE_EXISTS
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
begin
|
86
|
+
outfile = File.new(output_path, 'w')
|
87
|
+
rescue => bang
|
88
|
+
error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
|
92
|
+
# no \n before </chat> because @chat_str (from to_s) has it already
|
93
|
+
outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
|
94
|
+
'<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
|
95
|
+
@user_SN, @service, self.to_s)
|
96
|
+
outfile.close
|
97
|
+
|
98
|
+
return output_path
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,590 @@
|
|
1
|
+
# Contains the class BasicParser and its subclasses, HtmlLogParser and
|
2
|
+
# TextFileParser, which parse the file passed into it and return a LogFile
|
3
|
+
# object.
|
4
|
+
#
|
5
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
6
|
+
# using these classes directly.
|
7
|
+
require 'parsedate'
|
8
|
+
|
9
|
+
require 'pidgin2adium/balance_tags'
|
10
|
+
require 'pidgin2adium/log_file'
|
11
|
+
|
12
|
+
module Pidgin2Adium
|
13
|
+
# Empty class. Raise'd by LogParser if the first line of a log is not
|
14
|
+
# parseable.
|
15
|
+
class InvalidFirstLineError < StandardError; end
|
16
|
+
|
17
|
+
# BasicParser is a base class. Its subclasses are TextLogParser and
|
18
|
+
# HtmlLogParser.
|
19
|
+
#
|
20
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
21
|
+
# using this class directly.
|
22
|
+
class BasicParser
|
23
|
+
include Pidgin2Adium
|
24
|
+
def initialize(src_path, user_aliases)
|
25
|
+
@src_path = src_path
|
26
|
+
# Whitespace is removed for easy matching later on.
|
27
|
+
@user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
|
28
|
+
# @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
|
29
|
+
# alias.
|
30
|
+
# Set an initial value just in case the first message doesn't give
|
31
|
+
# us an alias.
|
32
|
+
@user_alias = user_aliases.split(',')[0]
|
33
|
+
|
34
|
+
@tz_offset = get_time_zone_offset()
|
35
|
+
|
36
|
+
file = File.new(@src_path, 'r')
|
37
|
+
@first_line = file.readline
|
38
|
+
@file_content = file.read
|
39
|
+
file.close
|
40
|
+
|
41
|
+
# Time regexes must be set before pre_parse().
|
42
|
+
# "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
|
43
|
+
# ONLY used (if at all) in first line of chat ("Conversation with...at...")
|
44
|
+
@time_regex_first_line = %r{(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)}
|
45
|
+
# "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
|
46
|
+
@time_regex = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/
|
47
|
+
# sometimes a line in a chat doesn't have a full timestamp
|
48
|
+
# "04:22:05 AM" => %w{04 22 05 AM}
|
49
|
+
@minimal_time_regex = /(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?/
|
50
|
+
|
51
|
+
# Whether or not the first line is parseable.
|
52
|
+
@first_line_is_valid = true
|
53
|
+
begin
|
54
|
+
@service,
|
55
|
+
@user_SN,
|
56
|
+
@partner_SN,
|
57
|
+
# @basic_time_info is for files that only have the full
|
58
|
+
# timestamp at the top; we can use it to fill in the minimal
|
59
|
+
# per-line timestamps. It has only 3 elements (year, month,
|
60
|
+
# dayofmonth) because you should be able to fill everything
|
61
|
+
# else in. If you can't, something's wrong.
|
62
|
+
@basic_time_info,
|
63
|
+
# When the chat started, in Adium's format
|
64
|
+
@adium_chat_time_start = pre_parse()
|
65
|
+
rescue InvalidFirstLineError
|
66
|
+
@first_line_is_valid = false
|
67
|
+
error("Parsing of #{@src_path} failed (could not find valid first line).")
|
68
|
+
return # stop processing
|
69
|
+
end
|
70
|
+
|
71
|
+
# @status_map, @lib_purple_events, and @events are used in
|
72
|
+
# create_status_or_event_msg
|
73
|
+
@status_map = {
|
74
|
+
/(.+) logged in\.$/ => 'online',
|
75
|
+
/(.+) logged out\.$/ => 'offline',
|
76
|
+
/(.+) has signed on\.$/ => 'online',
|
77
|
+
/(.+) has signed off\.$/ => 'offline',
|
78
|
+
/(.+) has gone away\.$/ => 'away',
|
79
|
+
/(.+) is no longer away\.$/ => 'available',
|
80
|
+
/(.+) has become idle\.$/ => 'idle',
|
81
|
+
/(.+) is no longer idle\.$/ => 'available'
|
82
|
+
}
|
83
|
+
|
84
|
+
# lib_purple_events are all of event_type libPurple
|
85
|
+
@lib_purple_events = [
|
86
|
+
# file transfer
|
87
|
+
/Starting transfer of .+ from (.+)/,
|
88
|
+
/^Offering to send .+ to (.+)$/,
|
89
|
+
/(.+) is offering to send file/,
|
90
|
+
/^Transfer of file .+ complete$/,
|
91
|
+
/Error reading|writing|accessing .+: .+/,
|
92
|
+
/You cancelled the transfer of/,
|
93
|
+
/File transfer cancelled/,
|
94
|
+
/(.+) cancelled the transfer of/,
|
95
|
+
/(.+) cancelled the file transfer/,
|
96
|
+
# Direct IM - actual (dis)connect events are their own types
|
97
|
+
/^Attempting to connect to (.+) at .+ for Direct IM\./,
|
98
|
+
/^Asking (.+) to connect to us at .+ for Direct IM\./,
|
99
|
+
/^Attempting to connect via proxy server\.$/,
|
100
|
+
/^Direct IM with (.+) failed/,
|
101
|
+
# encryption
|
102
|
+
/Received message encrypted with wrong key/,
|
103
|
+
/^Requesting key\.\.\.$/,
|
104
|
+
/^Outgoing message lost\.$/,
|
105
|
+
/^Conflicting Key Received!$/,
|
106
|
+
/^Error in decryption- asking for resend\.\.\.$/,
|
107
|
+
/^Making new key pair\.\.\.$/,
|
108
|
+
# file transfer
|
109
|
+
/You canceled the transfer of/,
|
110
|
+
/(.+?) canceled the transfer of/,
|
111
|
+
# sending errors
|
112
|
+
/^Last outgoing message not received properly- resetting$/,
|
113
|
+
/Resending\.\.\./,
|
114
|
+
# connection errors
|
115
|
+
/Lost connection with the remote user:.+/,
|
116
|
+
# chats
|
117
|
+
/^.+ entered the room\.$/,
|
118
|
+
/^.+ left the room\.$/
|
119
|
+
]
|
120
|
+
|
121
|
+
# non-libpurple events
|
122
|
+
# Each key maps to an event_type string. The keys will be matched against a line of chat
|
123
|
+
# and the partner's alias will be in regex group 1, IF the alias is matched.
|
124
|
+
@event_map = {
|
125
|
+
# .+ is not an alias, it's a proxy server so no grouping
|
126
|
+
/^Attempting to connect to .+\.$/ => 'direct-im-connect',
|
127
|
+
# NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
|
128
|
+
/^Direct IM established$/ => 'directIMConnected',
|
129
|
+
/Unable to send message/ => 'chat-error',
|
130
|
+
/You missed .+ messages from (.+) because they were too large/ => 'chat-error',
|
131
|
+
/User information not available/ => 'chat-error'
|
132
|
+
}
|
133
|
+
|
134
|
+
@ignore_events = [
|
135
|
+
# Adium ignores SN/alias changes.
|
136
|
+
/^.+? is now known as .+?\.<br\/?>$/
|
137
|
+
]
|
138
|
+
end
|
139
|
+
|
140
|
+
# This method returns a LogFile instance, or false if an error occurred.
|
141
|
+
def parse
|
142
|
+
return false unless @first_line_is_valid
|
143
|
+
@file_content = cleanup(@file_content).split("\n")
|
144
|
+
|
145
|
+
@file_content.map! do |line|
|
146
|
+
next if line =~ /^\s+$/
|
147
|
+
if line =~ @line_regex
|
148
|
+
create_msg($~.captures)
|
149
|
+
elsif line =~ @line_regex_status
|
150
|
+
create_status_or_event_msg($~.captures)
|
151
|
+
else
|
152
|
+
error "Could not parse line:"
|
153
|
+
p line # returns nil which is then removed by compact
|
154
|
+
exit 1 # if $DEBUG FIXME
|
155
|
+
end
|
156
|
+
end.compact!
|
157
|
+
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
158
|
+
end
|
159
|
+
|
160
|
+
#################
|
161
|
+
private
|
162
|
+
#################
|
163
|
+
|
164
|
+
def get_time_zone_offset()
|
165
|
+
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
166
|
+
tz_offset = tz_match[1] rescue ''
|
167
|
+
return tz_offset
|
168
|
+
end
|
169
|
+
|
170
|
+
#--
|
171
|
+
# Adium time format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like:
|
172
|
+
# 2008-10-05T22.26.20-0800
|
173
|
+
#++
|
174
|
+
# Converts a pidgin datestamp to an Adium one.
|
175
|
+
def create_adium_time(time, is_first_line = false)
|
176
|
+
# parsed_date = [year, month, day, hour, min, sec]
|
177
|
+
if time =~ @time_regex
|
178
|
+
year, month, day, hour, min, sec = $1.to_i,
|
179
|
+
$2.to_i,
|
180
|
+
$3.to_i,
|
181
|
+
$4.to_i,
|
182
|
+
$5.to_i,
|
183
|
+
$6.to_i
|
184
|
+
elsif is_first_line and time =~ @time_regex_first_line
|
185
|
+
hour = $4.to_i
|
186
|
+
if $7 == 'PM' and hour != 12
|
187
|
+
hour += 12
|
188
|
+
end
|
189
|
+
year, month, day, min, sec = $3.to_i, # year
|
190
|
+
$1.to_i, # month
|
191
|
+
$2.to_i, # day
|
192
|
+
# already did hour
|
193
|
+
$5.to_i, # minutes
|
194
|
+
$6.to_i # seconds
|
195
|
+
elsif time =~ @minimal_time_regex
|
196
|
+
# "04:22:05" => %w{04 22 05}
|
197
|
+
hour = $1.to_i
|
198
|
+
if $4 == 'PM' and hour != 12
|
199
|
+
hour += 12
|
200
|
+
end
|
201
|
+
year, month, day = @basic_time_info
|
202
|
+
min = $2.to_i
|
203
|
+
sec = $3.to_i
|
204
|
+
else
|
205
|
+
error("You have found an odd timestamp. Please report it to the developer.")
|
206
|
+
log_msg("The timestamp: #{time}")
|
207
|
+
log_msg("Continuing...")
|
208
|
+
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
209
|
+
end
|
210
|
+
return Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
211
|
+
end
|
212
|
+
|
213
|
+
# Extract required data from the file. Run by parse.
|
214
|
+
def pre_parse
|
215
|
+
# Deal with first line.
|
216
|
+
|
217
|
+
# the first line is special. It tells us (in order of regex groups):
|
218
|
+
# 1) who we're talking to
|
219
|
+
# 2) what time/date
|
220
|
+
# 3) what SN we used
|
221
|
+
# 4) what protocol (AIM, icq, jabber...)
|
222
|
+
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
223
|
+
if first_line_match.nil?
|
224
|
+
raise InvalidFirstLineError
|
225
|
+
else
|
226
|
+
service = first_line_match[4]
|
227
|
+
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
228
|
+
user_SN = first_line_match[3].downcase.tr(' ', '')
|
229
|
+
partner_SN = first_line_match[1]
|
230
|
+
pidgin_chat_time_start = first_line_match[2]
|
231
|
+
basic_time_info = case @first_line
|
232
|
+
when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
|
233
|
+
when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
|
234
|
+
end
|
235
|
+
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
236
|
+
return [service,
|
237
|
+
user_SN,
|
238
|
+
partner_SN,
|
239
|
+
basic_time_info,
|
240
|
+
adium_chat_time_start]
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def get_sender_by_alias(alias_name)
|
245
|
+
no_action = alias_name.sub(/^\*{3}/, '')
|
246
|
+
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
247
|
+
# Set the current alias being used of the ones in @user_aliases
|
248
|
+
@user_alias = no_action
|
249
|
+
return @user_SN
|
250
|
+
else
|
251
|
+
return @partner_SN
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
#--
|
256
|
+
# create_msg takes an array of captures from matching against
|
257
|
+
# @line_regex and returns a Message object or one of its subclasses.
|
258
|
+
# It can be used for TextLogParser and HtmlLogParser because both of
|
259
|
+
# them return data in the same indexes in the matches array.
|
260
|
+
#++
|
261
|
+
def create_msg(matches)
|
262
|
+
msg = nil
|
263
|
+
# Either a regular message line or an auto-reply/away message.
|
264
|
+
time = create_adium_time(matches[0])
|
265
|
+
buddy_alias = matches[1]
|
266
|
+
sender = get_sender_by_alias(buddy_alias)
|
267
|
+
body = matches[3]
|
268
|
+
if matches[2] # auto-reply
|
269
|
+
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
270
|
+
else
|
271
|
+
# normal message
|
272
|
+
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
273
|
+
end
|
274
|
+
return msg
|
275
|
+
end
|
276
|
+
|
277
|
+
#--
|
278
|
+
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
279
|
+
# matching against @line_regex_status and returns an Event or Status.
|
280
|
+
#++
|
281
|
+
def create_status_or_event_msg(matches)
|
282
|
+
# ["22:58:00", "BuddyName logged in."]
|
283
|
+
# 0: time
|
284
|
+
# 1: status message or event
|
285
|
+
msg = nil
|
286
|
+
time = create_adium_time(matches[0])
|
287
|
+
str = matches[1]
|
288
|
+
# Return nil, which will get compact'ed out
|
289
|
+
return nil if @ignore_events.detect{|regex| str =~ regex }
|
290
|
+
|
291
|
+
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
292
|
+
if regex and status
|
293
|
+
# Status message
|
294
|
+
buddy_alias = regex.match(str)[1]
|
295
|
+
sender = get_sender_by_alias(buddy_alias)
|
296
|
+
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
297
|
+
else
|
298
|
+
# Test for event
|
299
|
+
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
300
|
+
event_type = 'libpurpleEvent' if regex
|
301
|
+
unless regex and event_type
|
302
|
+
# not a libpurple event, try others
|
303
|
+
if @event_map.detect{|regex,event_type| str =~ regex}
|
304
|
+
regex, event_type = $1, $2
|
305
|
+
else
|
306
|
+
error("Could not match string to status or event!")
|
307
|
+
error(sprintf("matches: %p", matches))
|
308
|
+
error(sprintf("str: %p", str))
|
309
|
+
exit 1
|
310
|
+
end
|
311
|
+
end
|
312
|
+
if regex and event_type
|
313
|
+
regex_matches = regex.match(str)
|
314
|
+
# Event message
|
315
|
+
if regex_matches.size == 1
|
316
|
+
# No alias - this means it's the user
|
317
|
+
buddy_alias = @user_alias
|
318
|
+
sender = @user_SN
|
319
|
+
else
|
320
|
+
buddy_alias = regex_matches[1]
|
321
|
+
sender = get_sender_by_alias(buddy_alias)
|
322
|
+
end
|
323
|
+
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
324
|
+
end
|
325
|
+
end
|
326
|
+
return msg
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
331
|
+
# using this class directly.
|
332
|
+
class TextLogParser < BasicParser
|
333
|
+
def initialize(src_path, user_aliases)
|
334
|
+
super(src_path, user_aliases)
|
335
|
+
@timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
|
336
|
+
|
337
|
+
# @line_regex matches a line in a TXT log file other than the first
|
338
|
+
# @line_regex matchdata:
|
339
|
+
# 0: timestamp
|
340
|
+
# 1: screen name or alias, if alias set
|
341
|
+
# 2: "<AUTO-REPLY>" or nil
|
342
|
+
# 3: message body
|
343
|
+
@line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
|
344
|
+
# @line_regex_status matches a status line
|
345
|
+
# @line_regex_status matchdata:
|
346
|
+
# 0: timestamp
|
347
|
+
# 1: status message
|
348
|
+
@line_regex_status = /#{@timestamp_rx} ([^:]+)/o
|
349
|
+
end
|
350
|
+
|
351
|
+
#################
|
352
|
+
private
|
353
|
+
#################
|
354
|
+
|
355
|
+
def cleanup(text)
|
356
|
+
text.tr!("\r", '')
|
357
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
358
|
+
text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
|
359
|
+
# Escape entities since this will be in XML
|
360
|
+
text.gsub!('&', '&') # escape '&' first
|
361
|
+
text.gsub!('<', '<')
|
362
|
+
text.gsub!('>', '>')
|
363
|
+
text.gsub!('"', '"')
|
364
|
+
text.gsub!("'", ''')
|
365
|
+
return text
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
|
370
|
+
# of using this class directly.
|
371
|
+
class HtmlLogParser < BasicParser
|
372
|
+
def initialize(src_path, user_aliases)
|
373
|
+
super(src_path, user_aliases)
|
374
|
+
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
375
|
+
|
376
|
+
# @line_regex matches a line in an HTML log file other than the
|
377
|
+
# first time matches on either "2008-11-17 14:12" or "14:12"
|
378
|
+
# @line_regex match obj:
|
379
|
+
# 0: timestamp, extended or not
|
380
|
+
# 1: screen name or alias, if alias set
|
381
|
+
# 2: "<AUTO-REPLY>" or nil
|
382
|
+
# 3: message body
|
383
|
+
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
384
|
+
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
385
|
+
# @line_regex_status matches a status line
|
386
|
+
# @line_regex_status match obj:
|
387
|
+
# 0: timestamp
|
388
|
+
# 1: status message
|
389
|
+
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
390
|
+
end
|
391
|
+
|
392
|
+
#################
|
393
|
+
private
|
394
|
+
#################
|
395
|
+
|
396
|
+
# Returns a cleaned string.
|
397
|
+
# Removes the following tags from _text_:
|
398
|
+
# * html
|
399
|
+
# * body
|
400
|
+
# * font
|
401
|
+
# * a with no innertext, e.g. <a href="blah"></a>
|
402
|
+
# And removes the following style declarations:
|
403
|
+
# * color: #000000 (just turns text black)
|
404
|
+
# * font-family
|
405
|
+
# * font-size
|
406
|
+
# * background
|
407
|
+
# * em (really it's changed to <span style="font-style: italic;">)
|
408
|
+
# Since each <span> has only one style declaration, spans with these
|
409
|
+
# declarations are removed (but the text inside them is preserved).
|
410
|
+
def cleanup(text)
|
411
|
+
# Sometimes this is in there. I don't know why.
|
412
|
+
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
413
|
+
# We can remove <font> safely since Pidgin and Adium both show bold
|
414
|
+
# using <span style="font-weight: bold;"> except Pidgin uses single
|
415
|
+
# quotes while Adium uses double quotes.
|
416
|
+
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
417
|
+
|
418
|
+
text.tr!("\r", '')
|
419
|
+
# Remove empty lines
|
420
|
+
text.gsub!("\n\n", "\n")
|
421
|
+
|
422
|
+
# Remove newlines that end the file, since they screw up the
|
423
|
+
# newline -> <br/> conversion
|
424
|
+
text.gsub!(/\n\Z/, '')
|
425
|
+
|
426
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
427
|
+
# This must go after we remove <font> tags.
|
428
|
+
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
429
|
+
|
430
|
+
# These empty links are sometimes appended to every line in a chat,
|
431
|
+
# for some weird reason. Remove them.
|
432
|
+
text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
|
433
|
+
|
434
|
+
# Replace single quotes inside tags with double quotes so we can
|
435
|
+
# easily change single quotes to entities.
|
436
|
+
# For spans, removes a space after the final declaration if it exists.
|
437
|
+
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
438
|
+
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
439
|
+
=begin
|
440
|
+
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
441
|
+
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
442
|
+
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
443
|
+
=end
|
444
|
+
text.gsub!("'", ''')
|
445
|
+
|
446
|
+
# This actually does match stuff, but doesn't group it correctly. :(
|
447
|
+
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
448
|
+
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
449
|
+
# Remove empty spans.
|
450
|
+
next if $2 == ''
|
451
|
+
|
452
|
+
# style = style declaration
|
453
|
+
# innertext = text inside <span>
|
454
|
+
style, innertext = $1, $2
|
455
|
+
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
456
|
+
# innertext.gsub!("")
|
457
|
+
|
458
|
+
styleparts = style.split(/; ?/)
|
459
|
+
styleparts.map! do |p|
|
460
|
+
if p =~ /^color/
|
461
|
+
# Regarding the bit with the ">", sometimes this happens:
|
462
|
+
# <span style="color: #000000>today;">today was busy</span>
|
463
|
+
# Then p = "color: #000000>today"
|
464
|
+
# Or it can end in ">;", with no text before the semicolon.
|
465
|
+
# So remove the ">" and anything following it.
|
466
|
+
|
467
|
+
# Use regex instead of string, to account for funky ">" stuff
|
468
|
+
if p =~ /color: #000000/
|
469
|
+
next
|
470
|
+
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
471
|
+
# Keep the color but remove the bit after it
|
472
|
+
next($1)
|
473
|
+
end
|
474
|
+
else
|
475
|
+
# don't remove font-weight
|
476
|
+
case p
|
477
|
+
when /^font-family/: next
|
478
|
+
when /^font-size/: next
|
479
|
+
when /^background/: next
|
480
|
+
end
|
481
|
+
end
|
482
|
+
end.compact!
|
483
|
+
unless styleparts.empty?
|
484
|
+
style = styleparts.join('; ')
|
485
|
+
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
486
|
+
end
|
487
|
+
innertext
|
488
|
+
end
|
489
|
+
# Pidgin uses <em>, Adium uses <span>
|
490
|
+
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
491
|
+
text.gsub!('</em>', '</span>')
|
492
|
+
end
|
493
|
+
return text
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
# A holding object for each line of the chat. It is subclassed as
|
498
|
+
# appropriate (eg AutoReplyMessage). Each subclass (but not Message
|
499
|
+
# itself) has its own to_s which prints out its information in a format
|
500
|
+
# appropriate for putting in an Adium log file.
|
501
|
+
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
502
|
+
class Message
|
503
|
+
def initialize(sender, time, buddy_alias)
|
504
|
+
@sender = sender
|
505
|
+
@time = time
|
506
|
+
@buddy_alias = buddy_alias
|
507
|
+
end
|
508
|
+
attr_accessor :sender, :time, :buddy_alias
|
509
|
+
end
|
510
|
+
|
511
|
+
# Basic message with body text (as opposed to pure status messages, which
|
512
|
+
# have no body).
|
513
|
+
class XMLMessage < Message
|
514
|
+
include Pidgin2Adium
|
515
|
+
def initialize(sender, time, buddy_alias, body)
|
516
|
+
super(sender, time, buddy_alias)
|
517
|
+
@body = body
|
518
|
+
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
519
|
+
normalize_body!()
|
520
|
+
end
|
521
|
+
attr_accessor :body
|
522
|
+
|
523
|
+
def to_s
|
524
|
+
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
525
|
+
@sender, @time, @buddy_alias, @styled_body)
|
526
|
+
end
|
527
|
+
|
528
|
+
#################
|
529
|
+
private
|
530
|
+
#################
|
531
|
+
|
532
|
+
# Balances mismatched tags, normalizes body style, and fixes actions
|
533
|
+
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
534
|
+
# "*Buddy waves at you*").
|
535
|
+
def normalize_body!
|
536
|
+
normalize_body_entities!()
|
537
|
+
# Fix mismatched tags. Yes, it's faster to do it per-message
|
538
|
+
# than all at once.
|
539
|
+
@body = balance_tags(@body)
|
540
|
+
if @buddy_alias[0,3] == '***'
|
541
|
+
# "***<alias>" is what pidgin sets as the alias for a /me action
|
542
|
+
@buddy_alias.slice!(0,3)
|
543
|
+
@body = '*' << @body << '*'
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
# Escapes entities.
|
548
|
+
def normalize_body_entities!
|
549
|
+
# Convert '&' to '&' only if it's not followed by an entity.
|
550
|
+
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
# An auto reply message.
|
555
|
+
class AutoReplyMessage < XMLMessage
|
556
|
+
def to_s
|
557
|
+
return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
|
558
|
+
@sender, @time, @buddy_alias, @styled_body)
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
# A message saying e.g. "Blahblah has gone away."
|
563
|
+
class StatusMessage < Message
|
564
|
+
def initialize(sender, time, buddy_alias, status)
|
565
|
+
super(sender, time, buddy_alias)
|
566
|
+
@status = status
|
567
|
+
end
|
568
|
+
attr_accessor :status
|
569
|
+
|
570
|
+
def to_s
|
571
|
+
return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
# Pidgin does not have Events, but Adium does. Pidgin mostly uses system
|
576
|
+
# messages to display what Adium calls events. These include sending a file,
|
577
|
+
# starting a Direct IM connection, or an error in chat.
|
578
|
+
class Event < XMLMessage
|
579
|
+
def initialize(sender, time, buddy_alias, body, event_type)
|
580
|
+
super(sender, time, buddy_alias, body)
|
581
|
+
@event_type = event_type
|
582
|
+
end
|
583
|
+
attr_accessor :event_type
|
584
|
+
|
585
|
+
def to_s
|
586
|
+
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
587
|
+
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
588
|
+
end
|
589
|
+
end
|
590
|
+
end # end module
|