pidgin2adium 1.0.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,68 @@
1
+ require 'pidgin2adium'
2
+
3
+ module Pidgin2Adium
4
+ # An easy way to batch-process a directory. Used by the pidgin2adium
5
+ # command-line script.
6
+ class LogConverter
7
+ include Pidgin2Adium
8
+ # You can add options using the _opts_ hash, which can have the
9
+ # following keys, all of which are optional:
10
+ # * *overwrite*: If true, then overwrite even if log is found.
11
+ # Defaults to false.
12
+ # * *output_dir*: The top-level dir to put the logs in.
13
+ # Logs under output_dir are still each in their own folders, etc.
14
+ # Defaults to Pidgin2Adium::ADIUM_LOG_DIR
15
+ def initialize(pidgin_log_dir, aliases, opts = {})
16
+ # parse_and_generate will process it for us
17
+ @opts = opts
18
+
19
+ @pidgin_log_dir = File.expand_path(pidgin_log_dir)
20
+ @my_aliases = aliases
21
+
22
+ unless File.directory?(@pidgin_log_dir)
23
+ puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
24
+ raise Errno::ENOENT
25
+ end
26
+ end
27
+
28
+ # Runs Pidgin2Adium::parse_and_generate on every log file in directory
29
+ # provided in new, then deletes Adium's search indexes to force
30
+ # it to rescan logs on startup.
31
+ def start
32
+ log_msg "Begin converting."
33
+ begin
34
+ files_path = get_all_chat_files(@pidgin_log_dir)
35
+ rescue Errno::EACCES => bang
36
+ error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
37
+ error("Details: #{bang.message}")
38
+ raise Errno::EACCES
39
+ end
40
+
41
+ total_files = files_path.size
42
+ total_successes = 0
43
+ log_msg("#{total_files} files to convert.")
44
+ files_path.each_with_index do |fname, i|
45
+ log_msg(
46
+ sprintf("[%d/%d] Converting %s...",
47
+ (i+1), total_files, fname)
48
+ )
49
+ result = parse_and_generate(fname, @my_aliases, @opts)
50
+ total_successes += 1 if result == true
51
+ end
52
+
53
+ delete_search_indexes()
54
+
55
+ log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
56
+ end
57
+
58
+ ###########
59
+ private
60
+ ###########
61
+
62
+ def get_all_chat_files(dir)
63
+ return [] if File.basename(dir) == ".system"
64
+ # recurse into each subdir
65
+ return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
66
+ end
67
+ end
68
+ end
@@ -0,0 +1,101 @@
1
+ # ADD DOCUMENTATION
2
+
3
+ require 'fileutils'
4
+
5
+ module Pidgin2Adium
6
+ # A holding object for the result of LogParser.parse. It makes the
7
+ # instance variable @chat_lines available, which is an array of objects
8
+ # which each have at least the instance variables _sender_, _time_, and
9
+ # _buddy_alias_ available. Some objects in @chat_lines have more variables
10
+ # available, specifically:
11
+ # * XMLMessage, AutoReplyMessage, and Event:: _body_
12
+ # * Event:: _event_type_
13
+ # * StatusMessage:: _status_
14
+ class LogFile
15
+ include Pidgin2Adium
16
+ def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
17
+ @chat_lines = chat_lines
18
+ @user_SN = user_SN
19
+ @partner_SN = partner_SN
20
+ @adium_chat_time_start = adium_chat_time_start
21
+
22
+ # @chat_str is generated when to_s is called
23
+ @chat_str = nil
24
+
25
+ # key is for Pidgin, value is for Adium
26
+ # Just used for <service>.<screenname> in directory structure
27
+ service_name_map = {'aim' => 'AIM',
28
+ 'jabber' =>'jabber',
29
+ 'gtalk'=> 'GTalk',
30
+ 'icq' => 'ICQ',
31
+ 'qq' => 'QQ',
32
+ 'msn' => 'MSN',
33
+ 'yahoo' => 'Yahoo'}
34
+
35
+ @service = service_name_map[service.downcase]
36
+ end
37
+
38
+ attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
39
+
40
+ # Returns contents of log file
41
+ def to_s
42
+ if @chat_str.nil?
43
+ # Faster than inject() or each()
44
+ @chat_str = @chat_lines.map{|l| l.to_s }.join
45
+ end
46
+ return @chat_str
47
+ end
48
+
49
+ def each(&blk)
50
+ @chat_lines.each{|l| yield l }
51
+ end
52
+
53
+ # Set overwrite=true to create a logfile even if logfile already exists.
54
+ # Returns one of:
55
+ # * false (if an error occurred),
56
+ # * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
57
+ # * the path to the new Adium log file.
58
+ def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
59
+ # output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
60
+ output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
61
+ # output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
62
+ output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
63
+ begin
64
+ FileUtils.mkdir_p(output_dir)
65
+ rescue => bang
66
+ error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
67
+ return false
68
+ end
69
+ if overwrite
70
+ unless File.exist?(output_path)
71
+ # File doesn't exist, but maybe it does with a different
72
+ # time zone. Check for a file that differs only in time
73
+ # zone and, if found, change @output_path to target it.
74
+ maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
75
+ unless maybe_matches.empty?
76
+ output_path = maybe_matches[0]
77
+ end
78
+ end
79
+ else
80
+ if File.exist?(output_path)
81
+ return FILE_EXISTS
82
+ end
83
+ end
84
+
85
+ begin
86
+ outfile = File.new(output_path, 'w')
87
+ rescue => bang
88
+ error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
89
+ return false
90
+ end
91
+
92
+ # no \n before </chat> because @chat_str (from to_s) has it already
93
+ outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
94
+ '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
95
+ @user_SN, @service, self.to_s)
96
+ outfile.close
97
+
98
+ return output_path
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,590 @@
1
+ # Contains the class BasicParser and its subclasses, HtmlLogParser and
2
+ # TextFileParser, which parse the file passed into it and return a LogFile
3
+ # object.
4
+ #
5
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
6
+ # using these classes directly.
7
+ require 'parsedate'
8
+
9
+ require 'pidgin2adium/balance_tags'
10
+ require 'pidgin2adium/log_file'
11
+
12
+ module Pidgin2Adium
13
+ # Empty class. Raise'd by LogParser if the first line of a log is not
14
+ # parseable.
15
+ class InvalidFirstLineError < StandardError; end
16
+
17
+ # BasicParser is a base class. Its subclasses are TextLogParser and
18
+ # HtmlLogParser.
19
+ #
20
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
21
+ # using this class directly.
22
+ class BasicParser
23
+ include Pidgin2Adium
24
+ def initialize(src_path, user_aliases)
25
+ @src_path = src_path
26
+ # Whitespace is removed for easy matching later on.
27
+ @user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
28
+ # @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
29
+ # alias.
30
+ # Set an initial value just in case the first message doesn't give
31
+ # us an alias.
32
+ @user_alias = user_aliases.split(',')[0]
33
+
34
+ @tz_offset = get_time_zone_offset()
35
+
36
+ file = File.new(@src_path, 'r')
37
+ @first_line = file.readline
38
+ @file_content = file.read
39
+ file.close
40
+
41
+ # Time regexes must be set before pre_parse().
42
+ # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
43
+ # ONLY used (if at all) in first line of chat ("Conversation with...at...")
44
+ @time_regex_first_line = %r{(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)}
45
+ # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
46
+ @time_regex = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/
47
+ # sometimes a line in a chat doesn't have a full timestamp
48
+ # "04:22:05 AM" => %w{04 22 05 AM}
49
+ @minimal_time_regex = /(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?/
50
+
51
+ # Whether or not the first line is parseable.
52
+ @first_line_is_valid = true
53
+ begin
54
+ @service,
55
+ @user_SN,
56
+ @partner_SN,
57
+ # @basic_time_info is for files that only have the full
58
+ # timestamp at the top; we can use it to fill in the minimal
59
+ # per-line timestamps. It has only 3 elements (year, month,
60
+ # dayofmonth) because you should be able to fill everything
61
+ # else in. If you can't, something's wrong.
62
+ @basic_time_info,
63
+ # When the chat started, in Adium's format
64
+ @adium_chat_time_start = pre_parse()
65
+ rescue InvalidFirstLineError
66
+ @first_line_is_valid = false
67
+ error("Parsing of #{@src_path} failed (could not find valid first line).")
68
+ return # stop processing
69
+ end
70
+
71
+ # @status_map, @lib_purple_events, and @events are used in
72
+ # create_status_or_event_msg
73
+ @status_map = {
74
+ /(.+) logged in\.$/ => 'online',
75
+ /(.+) logged out\.$/ => 'offline',
76
+ /(.+) has signed on\.$/ => 'online',
77
+ /(.+) has signed off\.$/ => 'offline',
78
+ /(.+) has gone away\.$/ => 'away',
79
+ /(.+) is no longer away\.$/ => 'available',
80
+ /(.+) has become idle\.$/ => 'idle',
81
+ /(.+) is no longer idle\.$/ => 'available'
82
+ }
83
+
84
+ # lib_purple_events are all of event_type libPurple
85
+ @lib_purple_events = [
86
+ # file transfer
87
+ /Starting transfer of .+ from (.+)/,
88
+ /^Offering to send .+ to (.+)$/,
89
+ /(.+) is offering to send file/,
90
+ /^Transfer of file .+ complete$/,
91
+ /Error reading|writing|accessing .+: .+/,
92
+ /You cancelled the transfer of/,
93
+ /File transfer cancelled/,
94
+ /(.+) cancelled the transfer of/,
95
+ /(.+) cancelled the file transfer/,
96
+ # Direct IM - actual (dis)connect events are their own types
97
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
98
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
99
+ /^Attempting to connect via proxy server\.$/,
100
+ /^Direct IM with (.+) failed/,
101
+ # encryption
102
+ /Received message encrypted with wrong key/,
103
+ /^Requesting key\.\.\.$/,
104
+ /^Outgoing message lost\.$/,
105
+ /^Conflicting Key Received!$/,
106
+ /^Error in decryption- asking for resend\.\.\.$/,
107
+ /^Making new key pair\.\.\.$/,
108
+ # file transfer
109
+ /You canceled the transfer of/,
110
+ /(.+?) canceled the transfer of/,
111
+ # sending errors
112
+ /^Last outgoing message not received properly- resetting$/,
113
+ /Resending\.\.\./,
114
+ # connection errors
115
+ /Lost connection with the remote user:.+/,
116
+ # chats
117
+ /^.+ entered the room\.$/,
118
+ /^.+ left the room\.$/
119
+ ]
120
+
121
+ # non-libpurple events
122
+ # Each key maps to an event_type string. The keys will be matched against a line of chat
123
+ # and the partner's alias will be in regex group 1, IF the alias is matched.
124
+ @event_map = {
125
+ # .+ is not an alias, it's a proxy server so no grouping
126
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
127
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
128
+ /^Direct IM established$/ => 'directIMConnected',
129
+ /Unable to send message/ => 'chat-error',
130
+ /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
131
+ /User information not available/ => 'chat-error'
132
+ }
133
+
134
+ @ignore_events = [
135
+ # Adium ignores SN/alias changes.
136
+ /^.+? is now known as .+?\.<br\/?>$/
137
+ ]
138
+ end
139
+
140
+ # This method returns a LogFile instance, or false if an error occurred.
141
+ def parse
142
+ return false unless @first_line_is_valid
143
+ @file_content = cleanup(@file_content).split("\n")
144
+
145
+ @file_content.map! do |line|
146
+ next if line =~ /^\s+$/
147
+ if line =~ @line_regex
148
+ create_msg($~.captures)
149
+ elsif line =~ @line_regex_status
150
+ create_status_or_event_msg($~.captures)
151
+ else
152
+ error "Could not parse line:"
153
+ p line # returns nil which is then removed by compact
154
+ exit 1 # if $DEBUG FIXME
155
+ end
156
+ end.compact!
157
+ return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
158
+ end
159
+
160
+ #################
161
+ private
162
+ #################
163
+
164
+ def get_time_zone_offset()
165
+ tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
166
+ tz_offset = tz_match[1] rescue ''
167
+ return tz_offset
168
+ end
169
+
170
+ #--
171
+ # Adium time format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like:
172
+ # 2008-10-05T22.26.20-0800
173
+ #++
174
+ # Converts a pidgin datestamp to an Adium one.
175
+ def create_adium_time(time, is_first_line = false)
176
+ # parsed_date = [year, month, day, hour, min, sec]
177
+ if time =~ @time_regex
178
+ year, month, day, hour, min, sec = $1.to_i,
179
+ $2.to_i,
180
+ $3.to_i,
181
+ $4.to_i,
182
+ $5.to_i,
183
+ $6.to_i
184
+ elsif is_first_line and time =~ @time_regex_first_line
185
+ hour = $4.to_i
186
+ if $7 == 'PM' and hour != 12
187
+ hour += 12
188
+ end
189
+ year, month, day, min, sec = $3.to_i, # year
190
+ $1.to_i, # month
191
+ $2.to_i, # day
192
+ # already did hour
193
+ $5.to_i, # minutes
194
+ $6.to_i # seconds
195
+ elsif time =~ @minimal_time_regex
196
+ # "04:22:05" => %w{04 22 05}
197
+ hour = $1.to_i
198
+ if $4 == 'PM' and hour != 12
199
+ hour += 12
200
+ end
201
+ year, month, day = @basic_time_info
202
+ min = $2.to_i
203
+ sec = $3.to_i
204
+ else
205
+ error("You have found an odd timestamp. Please report it to the developer.")
206
+ log_msg("The timestamp: #{time}")
207
+ log_msg("Continuing...")
208
+ year,month,day,hour,min,sec = ParseDate.parsedate(time)
209
+ end
210
+ return Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
211
+ end
212
+
213
+ # Extract required data from the file. Run by parse.
214
+ def pre_parse
215
+ # Deal with first line.
216
+
217
+ # the first line is special. It tells us (in order of regex groups):
218
+ # 1) who we're talking to
219
+ # 2) what time/date
220
+ # 3) what SN we used
221
+ # 4) what protocol (AIM, icq, jabber...)
222
+ first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
223
+ if first_line_match.nil?
224
+ raise InvalidFirstLineError
225
+ else
226
+ service = first_line_match[4]
227
+ # @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
228
+ user_SN = first_line_match[3].downcase.tr(' ', '')
229
+ partner_SN = first_line_match[1]
230
+ pidgin_chat_time_start = first_line_match[2]
231
+ basic_time_info = case @first_line
232
+ when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
233
+ when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
234
+ end
235
+ adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
236
+ return [service,
237
+ user_SN,
238
+ partner_SN,
239
+ basic_time_info,
240
+ adium_chat_time_start]
241
+ end
242
+ end
243
+
244
+ def get_sender_by_alias(alias_name)
245
+ no_action = alias_name.sub(/^\*{3}/, '')
246
+ if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
247
+ # Set the current alias being used of the ones in @user_aliases
248
+ @user_alias = no_action
249
+ return @user_SN
250
+ else
251
+ return @partner_SN
252
+ end
253
+ end
254
+
255
+ #--
256
+ # create_msg takes an array of captures from matching against
257
+ # @line_regex and returns a Message object or one of its subclasses.
258
+ # It can be used for TextLogParser and HtmlLogParser because both of
259
+ # them return data in the same indexes in the matches array.
260
+ #++
261
+ def create_msg(matches)
262
+ msg = nil
263
+ # Either a regular message line or an auto-reply/away message.
264
+ time = create_adium_time(matches[0])
265
+ buddy_alias = matches[1]
266
+ sender = get_sender_by_alias(buddy_alias)
267
+ body = matches[3]
268
+ if matches[2] # auto-reply
269
+ msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
270
+ else
271
+ # normal message
272
+ msg = XMLMessage.new(sender, time, buddy_alias, body)
273
+ end
274
+ return msg
275
+ end
276
+
277
+ #--
278
+ # create_status_or_event_msg takes an array of +MatchData+ captures from
279
+ # matching against @line_regex_status and returns an Event or Status.
280
+ #++
281
+ def create_status_or_event_msg(matches)
282
+ # ["22:58:00", "BuddyName logged in."]
283
+ # 0: time
284
+ # 1: status message or event
285
+ msg = nil
286
+ time = create_adium_time(matches[0])
287
+ str = matches[1]
288
+ # Return nil, which will get compact'ed out
289
+ return nil if @ignore_events.detect{|regex| str =~ regex }
290
+
291
+ regex, status = @status_map.detect{|regex, status| str =~ regex}
292
+ if regex and status
293
+ # Status message
294
+ buddy_alias = regex.match(str)[1]
295
+ sender = get_sender_by_alias(buddy_alias)
296
+ msg = StatusMessage.new(sender, time, buddy_alias, status)
297
+ else
298
+ # Test for event
299
+ regex = @lib_purple_events.detect{|regex| str =~ regex }
300
+ event_type = 'libpurpleEvent' if regex
301
+ unless regex and event_type
302
+ # not a libpurple event, try others
303
+ if @event_map.detect{|regex,event_type| str =~ regex}
304
+ regex, event_type = $1, $2
305
+ else
306
+ error("Could not match string to status or event!")
307
+ error(sprintf("matches: %p", matches))
308
+ error(sprintf("str: %p", str))
309
+ exit 1
310
+ end
311
+ end
312
+ if regex and event_type
313
+ regex_matches = regex.match(str)
314
+ # Event message
315
+ if regex_matches.size == 1
316
+ # No alias - this means it's the user
317
+ buddy_alias = @user_alias
318
+ sender = @user_SN
319
+ else
320
+ buddy_alias = regex_matches[1]
321
+ sender = get_sender_by_alias(buddy_alias)
322
+ end
323
+ msg = Event.new(sender, time, buddy_alias, str, event_type)
324
+ end
325
+ end
326
+ return msg
327
+ end
328
+ end
329
+
330
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
331
+ # using this class directly.
332
+ class TextLogParser < BasicParser
333
+ def initialize(src_path, user_aliases)
334
+ super(src_path, user_aliases)
335
+ @timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
336
+
337
+ # @line_regex matches a line in a TXT log file other than the first
338
+ # @line_regex matchdata:
339
+ # 0: timestamp
340
+ # 1: screen name or alias, if alias set
341
+ # 2: "<AUTO-REPLY>" or nil
342
+ # 3: message body
343
+ @line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
344
+ # @line_regex_status matches a status line
345
+ # @line_regex_status matchdata:
346
+ # 0: timestamp
347
+ # 1: status message
348
+ @line_regex_status = /#{@timestamp_rx} ([^:]+)/o
349
+ end
350
+
351
+ #################
352
+ private
353
+ #################
354
+
355
+ def cleanup(text)
356
+ text.tr!("\r", '')
357
+ # Replace newlines with "<br/>" unless they end a chat line.
358
+ text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
359
+ # Escape entities since this will be in XML
360
+ text.gsub!('&', '&amp;') # escape '&' first
361
+ text.gsub!('<', '&lt;')
362
+ text.gsub!('>', '&gt;')
363
+ text.gsub!('"', '&quot;')
364
+ text.gsub!("'", '&apos;')
365
+ return text
366
+ end
367
+ end
368
+
369
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
370
+ # of using this class directly.
371
+ class HtmlLogParser < BasicParser
372
+ def initialize(src_path, user_aliases)
373
+ super(src_path, user_aliases)
374
+ @timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
375
+
376
+ # @line_regex matches a line in an HTML log file other than the
377
+ # first time matches on either "2008-11-17 14:12" or "14:12"
378
+ # @line_regex match obj:
379
+ # 0: timestamp, extended or not
380
+ # 1: screen name or alias, if alias set
381
+ # 2: "&lt;AUTO-REPLY&gt;" or nil
382
+ # 3: message body
383
+ # The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
384
+ @line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.+)<br ?\/>/o
385
+ # @line_regex_status matches a status line
386
+ # @line_regex_status match obj:
387
+ # 0: timestamp
388
+ # 1: status message
389
+ @line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
390
+ end
391
+
392
+ #################
393
+ private
394
+ #################
395
+
396
+ # Returns a cleaned string.
397
+ # Removes the following tags from _text_:
398
+ # * html
399
+ # * body
400
+ # * font
401
+ # * a with no innertext, e.g. <a href="blah"></a>
402
+ # And removes the following style declarations:
403
+ # * color: #000000 (just turns text black)
404
+ # * font-family
405
+ # * font-size
406
+ # * background
407
+ # * em (really it's changed to <span style="font-style: italic;">)
408
+ # Since each <span> has only one style declaration, spans with these
409
+ # declarations are removed (but the text inside them is preserved).
410
+ def cleanup(text)
411
+ # Sometimes this is in there. I don't know why.
412
+ text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
413
+ # We can remove <font> safely since Pidgin and Adium both show bold
414
+ # using <span style="font-weight: bold;"> except Pidgin uses single
415
+ # quotes while Adium uses double quotes.
416
+ text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
417
+
418
+ text.tr!("\r", '')
419
+ # Remove empty lines
420
+ text.gsub!("\n\n", "\n")
421
+
422
+ # Remove newlines that end the file, since they screw up the
423
+ # newline -> <br/> conversion
424
+ text.gsub!(/\n\Z/, '')
425
+
426
+ # Replace newlines with "<br/>" unless they end a chat line.
427
+ # This must go after we remove <font> tags.
428
+ text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
429
+
430
+ # These empty links are sometimes appended to every line in a chat,
431
+ # for some weird reason. Remove them.
432
+ text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
433
+
434
+ # Replace single quotes inside tags with double quotes so we can
435
+ # easily change single quotes to entities.
436
+ # For spans, removes a space after the final declaration if it exists.
437
+ text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
438
+ text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
439
+ =begin
440
+ text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
441
+ text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
442
+ text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
443
+ =end
444
+ text.gsub!("'", '&apos;')
445
+
446
+ # This actually does match stuff, but doesn't group it correctly. :(
447
+ # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
448
+ text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
449
+ # Remove empty spans.
450
+ next if $2 == ''
451
+
452
+ # style = style declaration
453
+ # innertext = text inside <span>
454
+ style, innertext = $1, $2
455
+ # TODO: replace double quotes with "&quot;", but only outside tags; may still be tags inside spans
456
+ # innertext.gsub!("")
457
+
458
+ styleparts = style.split(/; ?/)
459
+ styleparts.map! do |p|
460
+ if p =~ /^color/
461
+ # Regarding the bit with the ">", sometimes this happens:
462
+ # <span style="color: #000000>today;">today was busy</span>
463
+ # Then p = "color: #000000>today"
464
+ # Or it can end in ">;", with no text before the semicolon.
465
+ # So remove the ">" and anything following it.
466
+
467
+ # Use regex instead of string, to account for funky ">" stuff
468
+ if p =~ /color: #000000/
469
+ next
470
+ elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
471
+ # Keep the color but remove the bit after it
472
+ next($1)
473
+ end
474
+ else
475
+ # don't remove font-weight
476
+ case p
477
+ when /^font-family/: next
478
+ when /^font-size/: next
479
+ when /^background/: next
480
+ end
481
+ end
482
+ end.compact!
483
+ unless styleparts.empty?
484
+ style = styleparts.join('; ')
485
+ innertext = "<span style=\"#{style};\">#{innertext}</span>"
486
+ end
487
+ innertext
488
+ end
489
+ # Pidgin uses <em>, Adium uses <span>
490
+ if text.gsub!('<em>', '<span style="font-style: italic;">')
491
+ text.gsub!('</em>', '</span>')
492
+ end
493
+ return text
494
+ end
495
+ end
496
+
497
+ # A holding object for each line of the chat. It is subclassed as
498
+ # appropriate (eg AutoReplyMessage). Each subclass (but not Message
499
+ # itself) has its own to_s which prints out its information in a format
500
+ # appropriate for putting in an Adium log file.
501
+ # Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
502
+ class Message
503
+ def initialize(sender, time, buddy_alias)
504
+ @sender = sender
505
+ @time = time
506
+ @buddy_alias = buddy_alias
507
+ end
508
+ attr_accessor :sender, :time, :buddy_alias
509
+ end
510
+
511
+ # Basic message with body text (as opposed to pure status messages, which
512
+ # have no body).
513
+ class XMLMessage < Message
514
+ include Pidgin2Adium
515
+ def initialize(sender, time, buddy_alias, body)
516
+ super(sender, time, buddy_alias)
517
+ @body = body
518
+ @styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
519
+ normalize_body!()
520
+ end
521
+ attr_accessor :body
522
+
523
+ def to_s
524
+ return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
525
+ @sender, @time, @buddy_alias, @styled_body)
526
+ end
527
+
528
+ #################
529
+ private
530
+ #################
531
+
532
+ # Balances mismatched tags, normalizes body style, and fixes actions
533
+ # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
534
+ # "*Buddy waves at you*").
535
+ def normalize_body!
536
+ normalize_body_entities!()
537
+ # Fix mismatched tags. Yes, it's faster to do it per-message
538
+ # than all at once.
539
+ @body = balance_tags(@body)
540
+ if @buddy_alias[0,3] == '***'
541
+ # "***<alias>" is what pidgin sets as the alias for a /me action
542
+ @buddy_alias.slice!(0,3)
543
+ @body = '*' << @body << '*'
544
+ end
545
+ end
546
+
547
+ # Escapes entities.
548
+ def normalize_body_entities!
549
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
550
+ @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
551
+ end
552
+ end
553
+
554
+ # An auto reply message.
555
+ class AutoReplyMessage < XMLMessage
556
+ def to_s
557
+ return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
558
+ @sender, @time, @buddy_alias, @styled_body)
559
+ end
560
+ end
561
+
562
+ # A message saying e.g. "Blahblah has gone away."
563
+ class StatusMessage < Message
564
+ def initialize(sender, time, buddy_alias, status)
565
+ super(sender, time, buddy_alias)
566
+ @status = status
567
+ end
568
+ attr_accessor :status
569
+
570
+ def to_s
571
+ return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
572
+ end
573
+ end
574
+
575
+ # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
576
+ # messages to display what Adium calls events. These include sending a file,
577
+ # starting a Direct IM connection, or an error in chat.
578
+ class Event < XMLMessage
579
+ def initialize(sender, time, buddy_alias, body, event_type)
580
+ super(sender, time, buddy_alias, body)
581
+ @event_type = event_type
582
+ end
583
+ attr_accessor :event_type
584
+
585
+ def to_s
586
+ return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
587
+ @event_type, @sender, @time, @buddy_alias, @styled_body)
588
+ end
589
+ end
590
+ end # end module