pidgin2adium 1.0.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,68 @@
1
+ require 'pidgin2adium'
2
+
3
+ module Pidgin2Adium
4
+ # An easy way to batch-process a directory. Used by the pidgin2adium
5
+ # command-line script.
6
+ class LogConverter
7
+ include Pidgin2Adium
8
+ # You can add options using the _opts_ hash, which can have the
9
+ # following keys, all of which are optional:
10
+ # * *overwrite*: If true, then overwrite even if log is found.
11
+ # Defaults to false.
12
+ # * *output_dir*: The top-level dir to put the logs in.
13
+ # Logs under output_dir are still each in their own folders, etc.
14
+ # Defaults to Pidgin2Adium::ADIUM_LOG_DIR
15
+ def initialize(pidgin_log_dir, aliases, opts = {})
16
+ # parse_and_generate will process it for us
17
+ @opts = opts
18
+
19
+ @pidgin_log_dir = File.expand_path(pidgin_log_dir)
20
+ @my_aliases = aliases
21
+
22
+ unless File.directory?(@pidgin_log_dir)
23
+ puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
24
+ raise Errno::ENOENT
25
+ end
26
+ end
27
+
28
+ # Runs Pidgin2Adium::parse_and_generate on every log file in directory
29
+ # provided in new, then deletes Adium's search indexes to force
30
+ # it to rescan logs on startup.
31
+ def start
32
+ log_msg "Begin converting."
33
+ begin
34
+ files_path = get_all_chat_files(@pidgin_log_dir)
35
+ rescue Errno::EACCES => bang
36
+ error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
37
+ error("Details: #{bang.message}")
38
+ raise Errno::EACCES
39
+ end
40
+
41
+ total_files = files_path.size
42
+ total_successes = 0
43
+ log_msg("#{total_files} files to convert.")
44
+ files_path.each_with_index do |fname, i|
45
+ log_msg(
46
+ sprintf("[%d/%d] Converting %s...",
47
+ (i+1), total_files, fname)
48
+ )
49
+ result = parse_and_generate(fname, @my_aliases, @opts)
50
+ total_successes += 1 if result == true
51
+ end
52
+
53
+ delete_search_indexes()
54
+
55
+ log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
56
+ end
57
+
58
+ ###########
59
+ private
60
+ ###########
61
+
62
+ def get_all_chat_files(dir)
63
+ return [] if File.basename(dir) == ".system"
64
+ # recurse into each subdir
65
+ return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,101 @@
1
+ # ADD DOCUMENTATION
2
+
3
+ require 'fileutils'
4
+
5
+ module Pidgin2Adium
6
+ # A holding object for the result of LogParser.parse. It makes the
7
+ # instance variable @chat_lines available, which is an array of objects
8
+ # which each have at least the instance variables _sender_, _time_, and
9
+ # _buddy_alias_ available. Some objects in @chat_lines have more variables
10
+ # available, specifically:
11
+ # * XMLMessage, AutoReplyMessage, and Event:: _body_
12
+ # * Event:: _event_type_
13
+ # * StatusMessage:: _status_
14
+ class LogFile
15
+ include Pidgin2Adium
16
+ def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
17
+ @chat_lines = chat_lines
18
+ @user_SN = user_SN
19
+ @partner_SN = partner_SN
20
+ @adium_chat_time_start = adium_chat_time_start
21
+
22
+ # @chat_str is generated when to_s is called
23
+ @chat_str = nil
24
+
25
+ # key is for Pidgin, value is for Adium
26
+ # Just used for <service>.<screenname> in directory structure
27
+ service_name_map = {'aim' => 'AIM',
28
+ 'jabber' =>'jabber',
29
+ 'gtalk'=> 'GTalk',
30
+ 'icq' => 'ICQ',
31
+ 'qq' => 'QQ',
32
+ 'msn' => 'MSN',
33
+ 'yahoo' => 'Yahoo'}
34
+
35
+ @service = service_name_map[service.downcase]
36
+ end
37
+
38
+ attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
39
+
40
+ # Returns contents of log file
41
+ def to_s
42
+ if @chat_str.nil?
43
+ # Faster than inject() or each()
44
+ @chat_str = @chat_lines.map{|l| l.to_s }.join
45
+ end
46
+ return @chat_str
47
+ end
48
+
49
+ def each(&blk)
50
+ @chat_lines.each{|l| yield l }
51
+ end
52
+
53
+ # Set overwrite=true to create a logfile even if logfile already exists.
54
+ # Returns one of:
55
+ # * false (if an error occurred),
56
+ # * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
57
+ # * the path to the new Adium log file.
58
+ def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
59
+ # output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
60
+ output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
61
+ # output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
62
+ output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
63
+ begin
64
+ FileUtils.mkdir_p(output_dir)
65
+ rescue => bang
66
+ error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
67
+ return false
68
+ end
69
+ if overwrite
70
+ unless File.exist?(output_path)
71
+ # File doesn't exist, but maybe it does with a different
72
+ # time zone. Check for a file that differs only in time
73
+ # zone and, if found, change @output_path to target it.
74
+ maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
75
+ unless maybe_matches.empty?
76
+ output_path = maybe_matches[0]
77
+ end
78
+ end
79
+ else
80
+ if File.exist?(output_path)
81
+ return FILE_EXISTS
82
+ end
83
+ end
84
+
85
+ begin
86
+ outfile = File.new(output_path, 'w')
87
+ rescue => bang
88
+ error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
89
+ return false
90
+ end
91
+
92
+ # no \n before </chat> because @chat_str (from to_s) has it already
93
+ outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
94
+ '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
95
+ @user_SN, @service, self.to_s)
96
+ outfile.close
97
+
98
+ return output_path
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,590 @@
1
+ # Contains the class BasicParser and its subclasses, HtmlLogParser and
2
+ # TextFileParser, which parse the file passed into it and return a LogFile
3
+ # object.
4
+ #
5
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
6
+ # using these classes directly.
7
+ require 'parsedate'
8
+
9
+ require 'pidgin2adium/balance_tags'
10
+ require 'pidgin2adium/log_file'
11
+
12
+ module Pidgin2Adium
13
+ # Empty class. Raise'd by LogParser if the first line of a log is not
14
+ # parseable.
15
+ class InvalidFirstLineError < StandardError; end
16
+
17
+ # BasicParser is a base class. Its subclasses are TextLogParser and
18
+ # HtmlLogParser.
19
+ #
20
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
21
+ # using this class directly.
22
+ class BasicParser
23
+ include Pidgin2Adium
24
+ def initialize(src_path, user_aliases)
25
+ @src_path = src_path
26
+ # Whitespace is removed for easy matching later on.
27
+ @user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
28
+ # @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
29
+ # alias.
30
+ # Set an initial value just in case the first message doesn't give
31
+ # us an alias.
32
+ @user_alias = user_aliases.split(',')[0]
33
+
34
+ @tz_offset = get_time_zone_offset()
35
+
36
+ file = File.new(@src_path, 'r')
37
+ @first_line = file.readline
38
+ @file_content = file.read
39
+ file.close
40
+
41
+ # Time regexes must be set before pre_parse().
42
+ # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
43
+ # ONLY used (if at all) in first line of chat ("Conversation with...at...")
44
+ @time_regex_first_line = %r{(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)}
45
+ # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
46
+ @time_regex = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/
47
+ # sometimes a line in a chat doesn't have a full timestamp
48
+ # "04:22:05 AM" => %w{04 22 05 AM}
49
+ @minimal_time_regex = /(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?/
50
+
51
+ # Whether or not the first line is parseable.
52
+ @first_line_is_valid = true
53
+ begin
54
+ @service,
55
+ @user_SN,
56
+ @partner_SN,
57
+ # @basic_time_info is for files that only have the full
58
+ # timestamp at the top; we can use it to fill in the minimal
59
+ # per-line timestamps. It has only 3 elements (year, month,
60
+ # dayofmonth) because you should be able to fill everything
61
+ # else in. If you can't, something's wrong.
62
+ @basic_time_info,
63
+ # When the chat started, in Adium's format
64
+ @adium_chat_time_start = pre_parse()
65
+ rescue InvalidFirstLineError
66
+ @first_line_is_valid = false
67
+ error("Parsing of #{@src_path} failed (could not find valid first line).")
68
+ return # stop processing
69
+ end
70
+
71
+ # @status_map, @lib_purple_events, and @events are used in
72
+ # create_status_or_event_msg
73
+ @status_map = {
74
+ /(.+) logged in\.$/ => 'online',
75
+ /(.+) logged out\.$/ => 'offline',
76
+ /(.+) has signed on\.$/ => 'online',
77
+ /(.+) has signed off\.$/ => 'offline',
78
+ /(.+) has gone away\.$/ => 'away',
79
+ /(.+) is no longer away\.$/ => 'available',
80
+ /(.+) has become idle\.$/ => 'idle',
81
+ /(.+) is no longer idle\.$/ => 'available'
82
+ }
83
+
84
+ # lib_purple_events are all of event_type libPurple
85
+ @lib_purple_events = [
86
+ # file transfer
87
+ /Starting transfer of .+ from (.+)/,
88
+ /^Offering to send .+ to (.+)$/,
89
+ /(.+) is offering to send file/,
90
+ /^Transfer of file .+ complete$/,
91
+ /Error reading|writing|accessing .+: .+/,
92
+ /You cancelled the transfer of/,
93
+ /File transfer cancelled/,
94
+ /(.+) cancelled the transfer of/,
95
+ /(.+) cancelled the file transfer/,
96
+ # Direct IM - actual (dis)connect events are their own types
97
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
98
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
99
+ /^Attempting to connect via proxy server\.$/,
100
+ /^Direct IM with (.+) failed/,
101
+ # encryption
102
+ /Received message encrypted with wrong key/,
103
+ /^Requesting key\.\.\.$/,
104
+ /^Outgoing message lost\.$/,
105
+ /^Conflicting Key Received!$/,
106
+ /^Error in decryption- asking for resend\.\.\.$/,
107
+ /^Making new key pair\.\.\.$/,
108
+ # file transfer
109
+ /You canceled the transfer of/,
110
+ /(.+?) canceled the transfer of/,
111
+ # sending errors
112
+ /^Last outgoing message not received properly- resetting$/,
113
+ /Resending\.\.\./,
114
+ # connection errors
115
+ /Lost connection with the remote user:.+/,
116
+ # chats
117
+ /^.+ entered the room\.$/,
118
+ /^.+ left the room\.$/
119
+ ]
120
+
121
+ # non-libpurple events
122
+ # Each key maps to an event_type string. The keys will be matched against a line of chat
123
+ # and the partner's alias will be in regex group 1, IF the alias is matched.
124
+ @event_map = {
125
+ # .+ is not an alias, it's a proxy server so no grouping
126
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
127
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
128
+ /^Direct IM established$/ => 'directIMConnected',
129
+ /Unable to send message/ => 'chat-error',
130
+ /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
131
+ /User information not available/ => 'chat-error'
132
+ }
133
+
134
+ @ignore_events = [
135
+ # Adium ignores SN/alias changes.
136
+ /^.+? is now known as .+?\.<br\/?>$/
137
+ ]
138
+ end
139
+
140
+ # This method returns a LogFile instance, or false if an error occurred.
141
+ def parse
142
+ return false unless @first_line_is_valid
143
+ @file_content = cleanup(@file_content).split("\n")
144
+
145
+ @file_content.map! do |line|
146
+ next if line =~ /^\s+$/
147
+ if line =~ @line_regex
148
+ create_msg($~.captures)
149
+ elsif line =~ @line_regex_status
150
+ create_status_or_event_msg($~.captures)
151
+ else
152
+ error "Could not parse line:"
153
+ p line # returns nil which is then removed by compact
154
+ exit 1 # if $DEBUG FIXME
155
+ end
156
+ end.compact!
157
+ return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
158
+ end
159
+
160
+ #################
161
+ private
162
+ #################
163
+
164
+ def get_time_zone_offset()
165
+ tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
166
+ tz_offset = tz_match[1] rescue ''
167
+ return tz_offset
168
+ end
169
+
170
+ #--
171
+ # Adium time format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like:
172
+ # 2008-10-05T22.26.20-0800
173
+ #++
174
+ # Converts a pidgin datestamp to an Adium one.
175
+ def create_adium_time(time, is_first_line = false)
176
+ # parsed_date = [year, month, day, hour, min, sec]
177
+ if time =~ @time_regex
178
+ year, month, day, hour, min, sec = $1.to_i,
179
+ $2.to_i,
180
+ $3.to_i,
181
+ $4.to_i,
182
+ $5.to_i,
183
+ $6.to_i
184
+ elsif is_first_line and time =~ @time_regex_first_line
185
+ hour = $4.to_i
186
+ if $7 == 'PM' and hour != 12
187
+ hour += 12
188
+ end
189
+ year, month, day, min, sec = $3.to_i, # year
190
+ $1.to_i, # month
191
+ $2.to_i, # day
192
+ # already did hour
193
+ $5.to_i, # minutes
194
+ $6.to_i # seconds
195
+ elsif time =~ @minimal_time_regex
196
+ # "04:22:05" => %w{04 22 05}
197
+ hour = $1.to_i
198
+ if $4 == 'PM' and hour != 12
199
+ hour += 12
200
+ end
201
+ year, month, day = @basic_time_info
202
+ min = $2.to_i
203
+ sec = $3.to_i
204
+ else
205
+ error("You have found an odd timestamp. Please report it to the developer.")
206
+ log_msg("The timestamp: #{time}")
207
+ log_msg("Continuing...")
208
+ year,month,day,hour,min,sec = ParseDate.parsedate(time)
209
+ end
210
+ return Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
211
+ end
212
+
213
+ # Extract required data from the file. Run by parse.
214
+ def pre_parse
215
+ # Deal with first line.
216
+
217
+ # the first line is special. It tells us (in order of regex groups):
218
+ # 1) who we're talking to
219
+ # 2) what time/date
220
+ # 3) what SN we used
221
+ # 4) what protocol (AIM, icq, jabber...)
222
+ first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
223
+ if first_line_match.nil?
224
+ raise InvalidFirstLineError
225
+ else
226
+ service = first_line_match[4]
227
+ # @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
228
+ user_SN = first_line_match[3].downcase.tr(' ', '')
229
+ partner_SN = first_line_match[1]
230
+ pidgin_chat_time_start = first_line_match[2]
231
+ basic_time_info = case @first_line
232
+ when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
233
+ when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
234
+ end
235
+ adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
236
+ return [service,
237
+ user_SN,
238
+ partner_SN,
239
+ basic_time_info,
240
+ adium_chat_time_start]
241
+ end
242
+ end
243
+
244
+ def get_sender_by_alias(alias_name)
245
+ no_action = alias_name.sub(/^\*{3}/, '')
246
+ if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
247
+ # Set the current alias being used of the ones in @user_aliases
248
+ @user_alias = no_action
249
+ return @user_SN
250
+ else
251
+ return @partner_SN
252
+ end
253
+ end
254
+
255
+ #--
256
+ # create_msg takes an array of captures from matching against
257
+ # @line_regex and returns a Message object or one of its subclasses.
258
+ # It can be used for TextLogParser and HtmlLogParser because both of
259
+ # them return data in the same indexes in the matches array.
260
+ #++
261
+ def create_msg(matches)
262
+ msg = nil
263
+ # Either a regular message line or an auto-reply/away message.
264
+ time = create_adium_time(matches[0])
265
+ buddy_alias = matches[1]
266
+ sender = get_sender_by_alias(buddy_alias)
267
+ body = matches[3]
268
+ if matches[2] # auto-reply
269
+ msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
270
+ else
271
+ # normal message
272
+ msg = XMLMessage.new(sender, time, buddy_alias, body)
273
+ end
274
+ return msg
275
+ end
276
+
277
+ #--
278
+ # create_status_or_event_msg takes an array of +MatchData+ captures from
279
+ # matching against @line_regex_status and returns an Event or Status.
280
+ #++
281
+ def create_status_or_event_msg(matches)
282
+ # ["22:58:00", "BuddyName logged in."]
283
+ # 0: time
284
+ # 1: status message or event
285
+ msg = nil
286
+ time = create_adium_time(matches[0])
287
+ str = matches[1]
288
+ # Return nil, which will get compact'ed out
289
+ return nil if @ignore_events.detect{|regex| str =~ regex }
290
+
291
+ regex, status = @status_map.detect{|regex, status| str =~ regex}
292
+ if regex and status
293
+ # Status message
294
+ buddy_alias = regex.match(str)[1]
295
+ sender = get_sender_by_alias(buddy_alias)
296
+ msg = StatusMessage.new(sender, time, buddy_alias, status)
297
+ else
298
+ # Test for event
299
+ regex = @lib_purple_events.detect{|regex| str =~ regex }
300
+ event_type = 'libpurpleEvent' if regex
301
+ unless regex and event_type
302
+ # not a libpurple event, try others
303
+ if @event_map.detect{|regex,event_type| str =~ regex}
304
+ regex, event_type = $1, $2
305
+ else
306
+ error("Could not match string to status or event!")
307
+ error(sprintf("matches: %p", matches))
308
+ error(sprintf("str: %p", str))
309
+ exit 1
310
+ end
311
+ end
312
+ if regex and event_type
313
+ regex_matches = regex.match(str)
314
+ # Event message
315
+ if regex_matches.size == 1
316
+ # No alias - this means it's the user
317
+ buddy_alias = @user_alias
318
+ sender = @user_SN
319
+ else
320
+ buddy_alias = regex_matches[1]
321
+ sender = get_sender_by_alias(buddy_alias)
322
+ end
323
+ msg = Event.new(sender, time, buddy_alias, str, event_type)
324
+ end
325
+ end
326
+ return msg
327
+ end
328
+ end
329
+
330
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
331
+ # using this class directly.
332
+ class TextLogParser < BasicParser
333
+ def initialize(src_path, user_aliases)
334
+ super(src_path, user_aliases)
335
+ @timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
336
+
337
+ # @line_regex matches a line in a TXT log file other than the first
338
+ # @line_regex matchdata:
339
+ # 0: timestamp
340
+ # 1: screen name or alias, if alias set
341
+ # 2: "<AUTO-REPLY>" or nil
342
+ # 3: message body
343
+ @line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
344
+ # @line_regex_status matches a status line
345
+ # @line_regex_status matchdata:
346
+ # 0: timestamp
347
+ # 1: status message
348
+ @line_regex_status = /#{@timestamp_rx} ([^:]+)/o
349
+ end
350
+
351
+ #################
352
+ private
353
+ #################
354
+
355
+ def cleanup(text)
356
+ text.tr!("\r", '')
357
+ # Replace newlines with "<br/>" unless they end a chat line.
358
+ text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
359
+ # Escape entities since this will be in XML
360
+ text.gsub!('&', '&amp;') # escape '&' first
361
+ text.gsub!('<', '&lt;')
362
+ text.gsub!('>', '&gt;')
363
+ text.gsub!('"', '&quot;')
364
+ text.gsub!("'", '&apos;')
365
+ return text
366
+ end
367
+ end
368
+
369
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
370
+ # of using this class directly.
371
+ class HtmlLogParser < BasicParser
372
+ def initialize(src_path, user_aliases)
373
+ super(src_path, user_aliases)
374
+ @timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
375
+
376
+ # @line_regex matches a line in an HTML log file other than the
377
+ # first time matches on either "2008-11-17 14:12" or "14:12"
378
+ # @line_regex match obj:
379
+ # 0: timestamp, extended or not
380
+ # 1: screen name or alias, if alias set
381
+ # 2: "&lt;AUTO-REPLY&gt;" or nil
382
+ # 3: message body
383
+ # The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
384
+ @line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.+)<br ?\/>/o
385
+ # @line_regex_status matches a status line
386
+ # @line_regex_status match obj:
387
+ # 0: timestamp
388
+ # 1: status message
389
+ @line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
390
+ end
391
+
392
+ #################
393
+ private
394
+ #################
395
+
396
+ # Returns a cleaned string.
397
+ # Removes the following tags from _text_:
398
+ # * html
399
+ # * body
400
+ # * font
401
+ # * a with no innertext, e.g. <a href="blah"></a>
402
+ # And removes the following style declarations:
403
+ # * color: #000000 (just turns text black)
404
+ # * font-family
405
+ # * font-size
406
+ # * background
407
+ # * em (really it's changed to <span style="font-style: italic;">)
408
+ # Since each <span> has only one style declaration, spans with these
409
+ # declarations are removed (but the text inside them is preserved).
410
+ def cleanup(text)
411
+ # Sometimes this is in there. I don't know why.
412
+ text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
413
+ # We can remove <font> safely since Pidgin and Adium both show bold
414
+ # using <span style="font-weight: bold;"> except Pidgin uses single
415
+ # quotes while Adium uses double quotes.
416
+ text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
417
+
418
+ text.tr!("\r", '')
419
+ # Remove empty lines
420
+ text.gsub!("\n\n", "\n")
421
+
422
+ # Remove newlines that end the file, since they screw up the
423
+ # newline -> <br/> conversion
424
+ text.gsub!(/\n\Z/, '')
425
+
426
+ # Replace newlines with "<br/>" unless they end a chat line.
427
+ # This must go after we remove <font> tags.
428
+ text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
429
+
430
+ # These empty links are sometimes appended to every line in a chat,
431
+ # for some weird reason. Remove them.
432
+ text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
433
+
434
+ # Replace single quotes inside tags with double quotes so we can
435
+ # easily change single quotes to entities.
436
+ # For spans, removes a space after the final declaration if it exists.
437
+ text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
438
+ text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
439
+ =begin
440
+ text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
441
+ text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
442
+ text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
443
+ =end
444
+ text.gsub!("'", '&apos;')
445
+
446
+ # This actually does match stuff, but doesn't group it correctly. :(
447
+ # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
448
+ text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
449
+ # Remove empty spans.
450
+ next if $2 == ''
451
+
452
+ # style = style declaration
453
+ # innertext = text inside <span>
454
+ style, innertext = $1, $2
455
+ # TODO: replace double quotes with "&quot;", but only outside tags; may still be tags inside spans
456
+ # innertext.gsub!("")
457
+
458
+ styleparts = style.split(/; ?/)
459
+ styleparts.map! do |p|
460
+ if p =~ /^color/
461
+ # Regarding the bit with the ">", sometimes this happens:
462
+ # <span style="color: #000000>today;">today was busy</span>
463
+ # Then p = "color: #000000>today"
464
+ # Or it can end in ">;", with no text before the semicolon.
465
+ # So remove the ">" and anything following it.
466
+
467
+ # Use regex instead of string, to account for funky ">" stuff
468
+ if p =~ /color: #000000/
469
+ next
470
+ elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
471
+ # Keep the color but remove the bit after it
472
+ next($1)
473
+ end
474
+ else
475
+ # don't remove font-weight
476
+ case p
477
+ when /^font-family/: next
478
+ when /^font-size/: next
479
+ when /^background/: next
480
+ end
481
+ end
482
+ end.compact!
483
+ unless styleparts.empty?
484
+ style = styleparts.join('; ')
485
+ innertext = "<span style=\"#{style};\">#{innertext}</span>"
486
+ end
487
+ innertext
488
+ end
489
+ # Pidgin uses <em>, Adium uses <span>
490
+ if text.gsub!('<em>', '<span style="font-style: italic;">')
491
+ text.gsub!('</em>', '</span>')
492
+ end
493
+ return text
494
+ end
495
+ end
496
+
497
+ # A holding object for each line of the chat. It is subclassed as
498
+ # appropriate (eg AutoReplyMessage). Each subclass (but not Message
499
+ # itself) has its own to_s which prints out its information in a format
500
+ # appropriate for putting in an Adium log file.
501
+ # Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
502
+ class Message
503
+ def initialize(sender, time, buddy_alias)
504
+ @sender = sender
505
+ @time = time
506
+ @buddy_alias = buddy_alias
507
+ end
508
+ attr_accessor :sender, :time, :buddy_alias
509
+ end
510
+
511
+ # Basic message with body text (as opposed to pure status messages, which
512
+ # have no body).
513
+ class XMLMessage < Message
514
+ include Pidgin2Adium
515
+ def initialize(sender, time, buddy_alias, body)
516
+ super(sender, time, buddy_alias)
517
+ @body = body
518
+ @styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
519
+ normalize_body!()
520
+ end
521
+ attr_accessor :body
522
+
523
+ def to_s
524
+ return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
525
+ @sender, @time, @buddy_alias, @styled_body)
526
+ end
527
+
528
+ #################
529
+ private
530
+ #################
531
+
532
+ # Balances mismatched tags, normalizes body style, and fixes actions
533
+ # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
534
+ # "*Buddy waves at you*").
535
+ def normalize_body!
536
+ normalize_body_entities!()
537
+ # Fix mismatched tags. Yes, it's faster to do it per-message
538
+ # than all at once.
539
+ @body = balance_tags(@body)
540
+ if @buddy_alias[0,3] == '***'
541
+ # "***<alias>" is what pidgin sets as the alias for a /me action
542
+ @buddy_alias.slice!(0,3)
543
+ @body = '*' << @body << '*'
544
+ end
545
+ end
546
+
547
+ # Escapes entities.
548
+ def normalize_body_entities!
549
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
550
+ @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
551
+ end
552
+ end
553
+
554
+ # An auto reply message.
555
+ class AutoReplyMessage < XMLMessage
556
+ def to_s
557
+ return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
558
+ @sender, @time, @buddy_alias, @styled_body)
559
+ end
560
+ end
561
+
562
+ # A message saying e.g. "Blahblah has gone away."
563
+ class StatusMessage < Message
564
+ def initialize(sender, time, buddy_alias, status)
565
+ super(sender, time, buddy_alias)
566
+ @status = status
567
+ end
568
+ attr_accessor :status
569
+
570
+ def to_s
571
+ return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
572
+ end
573
+ end
574
+
575
+ # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
576
+ # messages to display what Adium calls events. These include sending a file,
577
+ # starting a Direct IM connection, or an error in chat.
578
+ class Event < XMLMessage
579
+ def initialize(sender, time, buddy_alias, body, event_type)
580
+ super(sender, time, buddy_alias, body)
581
+ @event_type = event_type
582
+ end
583
+ attr_accessor :event_type
584
+
585
+ def to_s
586
+ return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
587
+ @event_type, @sender, @time, @buddy_alias, @styled_body)
588
+ end
589
+ end
590
+ end # end module