pidgin2adium 0.0.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -18,22 +18,22 @@ OptionParser.new do |opts|
18
18
  opts.on('-i IN_DIR', '--in IN_DIR', 'Specify directory where pidgin logs are stored') do |v|
19
19
  options[:in] = v
20
20
  end
21
- opts.on('-o OUT_DIR', '--out OUT_DIR', 'Specify directory where Adium logs will be stored (not the Adium directory in ~/Library') do |out|
21
+ opts.on('-o', '--out OUT_DIR', 'Specify directory where Adium logs will be stored (not the Adium directory in ~/Library)') do |out|
22
22
  options[:out] = out
23
23
  end
24
- opts.on('-l LIBRARY_DIR', '--libdir LIBRARY_DIR',
24
+ opts.on('-l', '--libdir LIBRARY_DIR',
25
25
  'Specify dirname where Adium logs are stored (eg "AIM.<username>" for',
26
- '~/Library/Application Support/Adium 2.0/Users/Default/Logs/AIM.<username>') do |ld|
26
+ '~/Library/Application Support/Adium 2.0/Users/Default/Logs/AIM.<username>)') do |ld|
27
27
  options[:libdir] = ld
28
28
  end
29
29
  opts.on('-d', '--debug', 'Turn debug on.') do |lf|
30
30
  options[:debug] = true
31
31
  end
32
- opts.on("--time-zone [TIME ZONE]",
32
+ opts.on('-t', "--time-zone [TIME ZONE]",
33
33
  "Set time zone like \"EST\". Defaults to local time zone: #{Time.now.zone}") do |tz|
34
34
  options[:timezone] = tz
35
35
  end
36
- opts.on('-a MY_ALIASES_AND_SNs', "--aliases MY_ALIASES_AND_SNs",
36
+ opts.on('-a', "--aliases MY_ALIASES_AND_SNs",
37
37
  "A comma-separated list of your aliases and screenname(s) so this script knows which person in a chat is you.",
38
38
  "Whitespace is removed and aliases are lowercased.") do |aliases|
39
39
  options[:aliases] = aliases.split(',')
@@ -56,12 +56,12 @@ required_opts.each do |short, long|
56
56
  end
57
57
  exit 1 if need_opts
58
58
 
59
- log_converter = Pidgin2Adium::Logs.new(src=options[:in],
60
- out = options[:out],
61
- aliases = options[:aliases],
62
- libdir = options[:libdir],
63
- tz = options[:timezone],
64
- debug = options[:debug]
59
+ log_converter = Pidgin2Adium::Logs.new(options[:in],
60
+ options[:out],
61
+ options[:aliases],
62
+ options[:libdir],
63
+ options[:timezone],
64
+ options[:debug]
65
65
  )
66
66
 
67
67
  log_converter.start
@@ -1,185 +1,53 @@
1
1
  # ADD DOCUMENTATION
2
2
  require 'pidgin2adium/balance-tags.rb'
3
- require 'hpricot'
4
3
 
5
4
  module Pidgin2Adium
6
- def Pidgin2Adium.normalizeBodyEntities!(body)
7
- # Convert '&' to '&amp;' only if it's not followed by an entity.
8
- body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
9
- # replace single quotes with '&apos;' but only outside <span>s.
10
- parts = body.split(/(<\/?span.*?>)/)
11
- body = parts.map{ |part| part.match(/<\/?span/) ? part : part.gsub("'", '&apos;') }.join('')
12
- end
13
-
14
- def Pidgin2Adium.normalizeBody!(body, aliasStr)
15
- # Fix mismatched tags.
16
- body = Pidgin2Adium.balance_tags(body)
17
- normalizeBodyEntities!(body)
18
- if aliasStr[0,3] == '***'
19
- # "***<alias>" is what pidgin sets as the alias for a /me action
20
- aliasStr.slice!(0,3)
21
- body = '*' + body + '*'
22
- end
23
- body = '<div><span style="font-family: Helvetica; font-size: 12pt;">' +
24
- body +
25
- '</span></div>'
26
- end
27
-
28
5
  class ChatFileGenerator
29
- def initialize(service, mySN, otherPersonsSN, chatTimePidgin_start, tzOffset, masterAlias, destDirBase)
30
- # basicTimeInfo is for files that only have the full timestamp at
31
- # the top; we can use it to fill in the minimal per-line timestamps.
32
- # It has only 3 elements ([year, month, dayofmonth]) because
33
- # you should be able to fill everything else in.
34
- # If you can't, something's wrong.
35
- @basicTimeInfo = nil
36
- # @chatMessage is a 2D array composed of arrays like so (e.g.):
37
- # ['time'=>'2:23:48 PM', 'alias'=>'Me', 'status' => 'available', 'body'=>'abcdefg', auto-reply=true]
38
- @chatMessage=[]
39
- # chatTimeAdium_start format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like so:
40
- # 2008-10-05T22.26.20-0800
41
- @chatTimeAdium_start=nil
42
- @chatTimePidgin_start=chatTimePidgin_start
43
- @destDirBase=destDirBase
44
- @masterAlias=masterAlias
45
- @mySN=mySN
46
- @otherPersonsSN=otherPersonsSN
47
- @service=service
48
- @tzOffset=tzOffset
6
+ def initialize(service, userSN, partnerSN, adiumChatTimeStart, destDirBase)
7
+ @service = service
8
+ @userSN = userSN
9
+ @partnerSN = partnerSN
10
+ @adiumChatTimeStart = adiumChatTimeStart
11
+ @destDirBase = destDirBase
12
+
13
+ # @chatLines is an array of Message, Status, and Event objects
14
+ @chatLines = []
49
15
  # key is for Pidgin, value is for Adium
50
16
  # Just used for <service>.<screenname> in directory structure
51
- @SERVICE_NAME_MAP={'aim'=>'AIM',
52
- 'jabber'=>'jabber',
53
- 'gtalk'=>'GTalk',
17
+ @SERVICE_NAME_MAP = {'aim' => 'AIM',
18
+ 'jabber' =>'jabber',
19
+ 'gtalk'=> 'GTalk',
54
20
  'icq' => 'ICQ',
55
- 'qq'=>'QQ',
56
- 'msn'=>'MSN',
57
- 'yahoo'=>'Yahoo'}
58
- end
59
-
60
- def convert()
61
- initChatTime()
62
- return buildDomAndOutput()
21
+ 'qq' => 'QQ',
22
+ 'msn' => 'MSN',
23
+ 'yahoo' => 'Yahoo'}
63
24
  end
64
25
 
65
- def initChatTime()
66
- # ParseDate.parsedate "Tuesday, July 5th, 2007, 18:35:20 UTC"
67
- # # => [2007, 7, 5, 18, 35, 20, "UTC", 2]
68
- # [year, month, day of month, hour, minute, sec, timezone, day of week]
69
- # strtotime returns seconds since the epoch
70
- @chatTimeAdium_start = createAdiumDate(@chatTimePidgin_start)
71
- @basicTimeInfo = ParseDate.parsedate(@chatTimePidgin_start)[0..2]
72
- end
73
-
74
- # Add a line to @chatMessage.
75
- # It is its own method because attr_writer creates the method 'chatMessage=', which doesn't help for chatMessage.push
26
+ # Add a line to @chatLines.
27
+ # It is its own method because attr_writer creates the method
28
+ # 'chatMessage=', which doesn't help for chatMessage.push
76
29
  def appendLine(line)
77
- @chatMessage.push(line)
78
- end
79
-
80
- #
81
- def createAdiumDate(date)
82
- epochSecs = getEpochSeconds(date)
83
- if @tzOffset.nil?
84
- Pidgin2Adium.logMsg("@tzOffset is nil. This really shouldn't happen.", true)
85
- @tzOffset = "+0"
86
- end
87
- return Time.at(epochSecs).strftime("%Y-%m-%dT%H.%M.%S#{@tzOffset}")
88
- end
89
-
90
- def getEpochSeconds(timestr)
91
- parsed_date = ParseDate.parsedate(timestr)
92
- [0, 1, 2].each do |i|
93
- parsed_date[i] = @basicTimeInfo[i] if parsed_date[i].nil?
94
- end
95
- return Time.local(*parsed_date).tv_sec
30
+ @chatLines.push(line)
96
31
  end
97
32
 
98
- def getScreenNameByAlias(aliasStr)
99
- myAliasStr = aliasStr.clone
100
- myAliasStr.slice!(0,3) if myAliasStr[0,3] == '***'
101
- if aliasStr==""
102
- return ""
103
- else
104
- return @masterAlias.include?(myAliasStr.downcase.gsub(/\s*/, '')) ? @mySN : @otherPersonsSN
105
- end
106
- end
107
-
108
- # returns path of output file
109
- def buildDomAndOutput()
33
+ # Returns path of output file
34
+ def convert()
110
35
  serviceName = @SERVICE_NAME_MAP[@service.downcase]
111
- destDirReal = File.join(@destDirBase, "#{serviceName}.#{@mySN}", @otherPersonsSN, "#{@otherPersonsSN} (#{@chatTimeAdium_start}).chatlog")
36
+ destDirReal = File.join(@destDirBase, "#{serviceName}.#{@userSN}", @partnerSN, "#{@partnerSN} (#{@adiumChatTimeStart}).chatlog")
112
37
  FileUtils.mkdir_p(destDirReal)
113
- destFilePath = destDirReal + '/' + "#{@otherPersonsSN} (#{@chatTimeAdium_start}).xml"
38
+ destFilePath = destDirReal << '/' << "#{@partnerSN} (#{@adiumChatTimeStart}).xml"
114
39
  if File.exist?(destFilePath)
115
40
  return Pidgin2Adium::Logs::FILE_EXISTS
116
41
  end
117
42
 
118
- # no \n before </chat> because {body} has it already
119
- chatLogTemplate = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
120
- "<chat xmlns=\"http://purl.org/net/ulf/ns/0.4-02\" account=\"#{@mySN}\" service=\"#{serviceName}\">\n{body}</chat>"
121
-
122
43
  allMsgs = ""
123
- @chatMessage.each do |msg|
124
- # template is set to a copy of one of the three templates,
125
- # the {...} vars are subbed, and then it's added to allMsgs
126
- template = nil
127
- # Note:
128
- # away/auto message has both body and status set
129
- # pure status has status but not body set
130
- # pure message has body set but not status
131
- begin
132
- chatTimeAdium = createAdiumDate(msg['time'])
133
- rescue TypeError => bang
134
- puts '*' * 80
135
- @chatMessage.each { |m| p m }
136
- puts "Oops! Time error! on msg:"
137
- p msg
138
- puts "Rest of message is above, just below the stars."
139
- return false
140
- end
141
- sender = getScreenNameByAlias(msg['alias'])
142
- time = chatTimeAdium
143
- aliasStr = msg['alias']
144
- if msg['body']
145
- body = msg['body']
146
- if msg['status'].nil?
147
- # Body with no status
148
- if msg['auto-reply'] == true
149
- # auto-reply from away message
150
- template = AutoReplyMessage.new(sender, time, aliasStr, body)
151
- else
152
- # pure regular message
153
- template = XMLMessage.new(sender, time, aliasStr, body)
154
- end
155
- else
156
- # Body with status message
157
- template = AwayMessage.new(sender, time, aliasStr, body)
158
- end
159
- elsif msg['status']
160
- # Status message, no body
161
- template = StatusMessage.new(sender, time, aliasStr, msg['status'])
162
- else
163
- Pidgin2Adium.logMsg("msg has neither status nor body key set. Unsure what to do. msg is as follows:", true)
164
- Pidgin2Adium.logMsg(sprintf('%p', msg), true)
165
- return false
166
- end
167
- begin
168
- allMsgs += template.getOutput()
169
- rescue TypeError => bang
170
- Pidgin2Adium.logMsg "TypeError: #{bang.message}"
171
- Pidgin2Adium.logMsg "This is probably caused by an unrecognized status string."
172
- Pidgin2Adium.logMsg "Go to the file currently being worked on (displayed above) at time #{msg['time']}"
173
- Pidgin2Adium.logMsg "and add the status message there to one of the hashes in SrcHtmlFileParse.getAliasAndStatus."
174
- Pidgin2Adium.logMsg "**Debug info**"
175
- Pidgin2Adium.logMsg "msg: #{msg.inspect}"
176
- Pidgin2Adium.logMsg "--"
177
- Pidgin2Adium.logMsg "Exiting."
178
- return false
179
- end
180
- end
181
- ret = chatLogTemplate.sub("{body}", allMsgs)
182
- # xml is ok.
44
+ # TODO: inject?
45
+ @chatLines.each { |obj| allMsgs << obj.getOutput() }
46
+ # xml is done.
47
+
48
+ # no \n before </chat> because allMsgs has it already
49
+ ret = sprintf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
50
+ '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>', @userSN, serviceName, allMsgs)
183
51
 
184
52
  # we already checked to see if the file previously existed.
185
53
  outfile = File.new(destFilePath, 'w')
@@ -187,57 +55,5 @@ module Pidgin2Adium
187
55
  outfile.close
188
56
  return destFilePath
189
57
  end
190
-
191
- # A holding object for each line of the chat.
192
- # It is subclassed as appropriate (eg AutoReplyMessage).
193
- # All Messages have senders, times, and aliases.
194
- class Message
195
- def initialize(sender, time, aliasStr)
196
- @sender = sender
197
- @time = time
198
- @aliasStr = aliasStr
199
- end
200
- end
201
-
202
- # Basic message with body text (as opposed to pure status messages which have no body).
203
- class XMLMessage < Message
204
- def initialize(sender, time, aliasStr, body)
205
- super(sender, time, aliasStr)
206
- @body = Pidgin2Adium.normalizeBody!(body, @aliasStr)
207
- end
208
-
209
- def getOutput
210
- return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' + "\n",
211
- @sender, @time, @aliasStr, @body)
212
- end
213
-
214
- end
215
-
216
- # An auto reply message, meaning it has a body.
217
- class AutoReplyMessage < XMLMessage
218
- def getOutput
219
- return sprintf('<message sender="%s" time="%s" alias="%s" auto="true">%s</message>' + "\n",
220
- @sender, @time, @aliasStr, @body)
221
- end
222
- end
223
-
224
- class AwayMessage < XMLMessage
225
- def getOutput
226
- return sprintf('<status type="away" sender="%s" time="%s" alias="%s">%s</status>' + "\n",
227
- @sender, @time, @aliasStr, @body)
228
- end
229
- end
230
-
231
- # A message saying e.g. "Blahblah has gone away."
232
- class StatusMessage < Message
233
- def initialize(sender, time, aliasStr, status)
234
- super(sender, time, aliasStr)
235
- @status = status
236
- end
237
- def getOutput
238
- return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' + "\n",
239
- @status, @sender, @time, @aliasStr)
240
- end
241
- end
242
58
  end
243
59
  end
@@ -1,40 +1,66 @@
1
1
  # =SrcFileParse
2
- # The class SrcFileParse has two subclasses, SrcTxtFileParse and SrcHtmlFileParse
2
+ # The class +SrcFileParse+ has 2 subclasses, +SrcTxtFileParse+ and +SrcHtmlFileParse+
3
3
  # It parses the file passed into it and extracts the following
4
4
  # from each line in the chat: time, alias, and message and/or status.
5
+
6
+ require 'parsedate'
7
+
5
8
  module Pidgin2Adium
6
- # The two subclasses of SrcFileParse,
7
- # SrcTxtFileParse and SrcHtmlFileParse, only differ
8
- # in that they have their own @line_regex, @line_regex_status,
9
- # and most importantly, createMsgData, which takes the
10
- # +MatchData+ objects from matching against @line_regex and
11
- # fits them into hashes.
9
+ # The two subclasses of +SrcFileParse+,
10
+ # +SrcTxtFileParse+ and +SrcHtmlFileParse+, only differ
11
+ # in that they have their own @lineRegex, @lineRegexStatus,
12
+ # and most importantly, createMsg and createStatusOrEventMsg, which take
13
+ # the +MatchData+ objects from matching against @lineRegex or
14
+ # @lineRegexStatus, respectively and return object instances.
15
+ # +createMsg+ returns a +Message+ instance (or one of its subclasses).
16
+ # +createStatusOrEventMsg+ returns a +Status+ or +Event+ instance.
12
17
  class SrcFileParse
13
- def initialize(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
18
+ def initialize(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
14
19
  @srcPath = srcPath
15
20
  # these two are to pass to chatFG in parseFile
16
21
  @destDirBase = destDirBase
17
- @masterAlias = masterAlias
22
+ @userAliases = userAliases
18
23
  @userTZ = userTZ
19
24
  @userTZOffset = userTZOffset
20
- # Automagically does grouping for you. Will be inserted in @line_regex{,_status}
21
- @timestamp_regex_str = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: .{1,2})?)\)'
25
+ @tzOffset = getTimeZoneOffset()
26
+
27
+ # Used in @lineRegex{,Status}. Only one group: the entire timestamp.
28
+ @timestampRegexStr = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: .{1,2})?)\)'
22
29
  # the first line is special: it tells us
23
30
  # 1) who we're talking to
24
31
  # 2) what time/date
25
32
  # 3) what SN we used
26
- # 4) what protocol (AIM, jabber...)
27
- @first_line_regex = /Conversation with (.*?) at (.*?) on (.*?) \((.*?)\)/s
28
- end
33
+ # 4) what protocol (AIM, icq, jabber...)
34
+ @firstLineRegex = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/
29
35
 
30
- # Takes the body of a line of a chat and returns the [username, status] as a 2-element array.
31
- # Example:
32
- # Pass in "Generic Screenname228 has signed off" and it returns <tt>["Generic Screenname228", "offline"]</tt>
33
- def getAliasAndStatus(str)
34
- alias_and_status = [nil, nil]
36
+ # Possible formats for timestamps:
37
+ # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
38
+ @timeRegexOne = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/
39
+ # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
40
+ @timeRegexTwo = %r{(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)}
41
+ # sometimes a line in a chat doesn't have a full timestamp
42
+ # "04:22:05 AM" => %w{04 22 05 AM}
43
+ @minimalTimeRegex = /(\d{1,2}):(\d{2}):(\d{2}) ?([AP]M)?/
44
+
45
+ # {user,partner}SN set in parseFile() after reading the first line
46
+ @userSN = nil
47
+ @partnerSN = nil
48
+
49
+ # @basicTimeInfo is for files that only have the full timestamp at
50
+ # the top; we can use it to fill in the minimal per-line timestamps.
51
+ # It has only 3 elements (year, month, dayofmonth) because
52
+ # you should be able to fill everything else in.
53
+ # If you can't, something's wrong.
54
+ @basicTimeInfo = []
35
55
 
36
- # Screen name is in regex group 1.
37
- status_map = {
56
+ # @userAlias is set each time getSenderByAlias is called. Set an
57
+ # initial value just in case the first message doesn't give us an
58
+ # alias.
59
+ @userAlias = @userAliases[0]
60
+
61
+ # @statusMap, @libPurpleEvents, and @events are used in
62
+ # createStatusOrEventMessage.
63
+ @statusMap = {
38
64
  /(.+) logged in\.$/ => 'online',
39
65
  /(.+) logged out\.$/ => 'offline',
40
66
  /(.+) has signed on\.$/ => 'online',
@@ -42,239 +68,418 @@ module Pidgin2Adium
42
68
  /(.+) has gone away\.$/ => 'away',
43
69
  /(.+) is no longer away\.$/ => 'available',
44
70
  /(.+) has become idle\.$/ => 'idle',
45
- /(.+) is no longer idle\.$/ => 'available',
46
- # file transfer
47
- /Starting transfer of .+ from (.+)/ => 'file-transfer-start',
48
- /^Offering to send .+ to (.+)$/ => 'fileTransferRequested',
49
- /(.+) is offering to send file/ => 'fileTransferRequested',
71
+ /(.+) is no longer idle\.$/ => 'available'
50
72
  }
51
73
 
52
- # statuses that come from my end. I totally made up these status names.
53
- my_status_map = {
74
+ # libPurpleEvents are all of eventType libPurple
75
+ @libPurpleEvents = [
76
+ # file transfer
77
+ /Starting transfer of .+ from (.+)/,
78
+ /^Offering to send .+ to (.+)$/,
79
+ /(.+) is offering to send file/,
80
+ /^Transfer of file .+ complete$/,
81
+ /Error reading|writing|accessing .+: .+/,
82
+ /You cancelled the transfer of/,
83
+ /File transfer cancelled/,
84
+ /(.+) cancelled the transfer of/,
85
+ /(.+) cancelled the file transfer/,
86
+ # Direct IM - actual (dis)connect events are their own types
87
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
88
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
89
+ /^Attempting to connect via proxy server\.$/,
90
+ /^Direct IM with (.+) failed/,
54
91
  # encryption
55
- /^Received message encrypted with wrong key$/ => 'encrypt-error',
56
- /^Requesting key\.\.\.$/ => 'encrypt-error',
57
- /^Outgoing message lost\.$/ => 'encrypt-error',
58
- /^Conflicting Key Received!$/ => 'encrypt-error',
59
- /^Error in decryption- asking for resend\.\.\.$/ => 'encrypt-error',
60
- /^Making new key pair\.\.\.$/ => 'encrypt-key-create',
92
+ /Received message encrypted with wrong key/,
93
+ /^Requesting key\.\.\.$/,
94
+ /^Outgoing message lost\.$/,
95
+ /^Conflicting Key Received!$/,
96
+ /^Error in decryption- asking for resend\.\.\.$/,
97
+ /^Making new key pair\.\.\.$/,
61
98
  # file transfer - these are in this (non-used) list because you can't get the alias out of matchData[1]
62
- /^You canceled the transfer of .+$/ => 'file-transfer-cancel',
63
- /^Transfer of file .+ complete$/ => 'fileTransferCompleted',
99
+ /^You canceled the transfer of .+$/,
64
100
  # sending errors
65
- /^Last outgoing message not received properly- resetting$/ => 'sending-error',
66
- /^Resending\.\.\.$/ => 'sending-error',
101
+ /^Last outgoing message not received properly- resetting$/,
102
+ /'Resending\.\.\./,
67
103
  # connection errors
68
- /^Lost connection with the remote user:<br\/>Remote host closed connection\.$/ => 'lost-remote-conn',
69
- # direct IM stuff
70
- /^Attempting to connect to .+ at .+ for Direct IM\./ => 'direct-im-connect',
71
- /^Asking .+ to connect to us at .+ for Direct IM\./ => 'direct-im-ask',
72
- /^Direct IM with .+ failed/ => 'direct-im-failed',
73
- /^Attempting to connect to .+\.$/ => 'direct-im-connect',
74
- /^Attempting to connect via proxy server\.$/ => 'direct-im-proxy',
75
- /^Direct IM established$/ => 'direct-im-established',
76
- /^Lost connection with the remote user:<br\/>Windows socket error/ => 'direct-im-lost-conn',
104
+ /Lost connection with the remote user:.+/,
77
105
  # chats
78
- /^.+ entered the room\.$/ => 'chat-entered-room',
79
- /^.+ left the room\.$/ => 'chat-left-room'
80
- }
106
+ /^.+ entered the room\.$/,
107
+ /^.+ left the room\.$/
108
+ ]
81
109
 
82
- regex, status = status_map.detect{ |regex, status| regex.match(str) }
83
- if regex and status
84
- alias_and_status = [regex.match(str)[1], status]
85
- else
86
- # not one of the regular statuses, try my statuses.
87
- regex, status = my_status_map.detect{ |regex, status| regex.match(str) }
88
- alias_and_status = ['System Message', status]
89
- end
90
- return alias_and_status
110
+ # non-libpurple events
111
+ # Each key maps to an eventType string. The keys will be matched against a line of chat
112
+ # and the partner's alias will be in regex group 1, IF the alias is matched.
113
+ @eventMap = {
114
+ # .+ is not an alias, it's a proxy server so no grouping
115
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
116
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
117
+ /^Direct IM established$/ => 'directIMConnected',
118
+ /Unable to send message. The message is too large./ => 'chat-error',
119
+ /You missed .+ messages from (.+) because they were too large./ => 'chat-error'
120
+ }
91
121
  end
92
122
 
93
123
  def getTimeZoneOffset()
94
- tz_regex = /([-+]\d+)[A-Z]{3}\.(txt|html?)/
95
- tz_match = tz_regex.match(@srcPath)
96
- tz_offset = tz_match.nil? ? @userTZOffset : tz_match[1]
97
- return tz_offset
124
+ tzMatch = /([-\+]\d+)[A-Z]{3}\.txt|html?/.match(@srcPath)
125
+ tzOffset = tzMatch[1] rescue @userTZOffset
126
+ return tzOffset
127
+ end
128
+
129
+ # Adium time format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like:
130
+ # 2008-10-05T22.26.20-0800
131
+ def createAdiumTime(time)
132
+ # parsedDate = [year, month, day, hour, min, sec]
133
+ parsedDate = case time
134
+ when @timeRegexOne
135
+ [$~[1].to_i, # year
136
+ $~[2].to_i, # month
137
+ $~[3].to_i, # day
138
+ $~[4].to_i, # hour
139
+ $~[5].to_i, # minute
140
+ $~[6].to_i] # seconds
141
+ when @timeRegexTwo
142
+ hours = $~[4].to_i
143
+ if $~[7] == 'PM' and hours != 12
144
+ hours += 12
145
+ end
146
+ [$~[3].to_i, # year
147
+ $~[1].to_i, # month
148
+ $~[2].to_i, # day
149
+ hours,
150
+ $~[5].to_i, # minutes
151
+ $~[6].to_i] # seconds
152
+ when @minimalTimeRegex
153
+ # "04:22:05" => %w{04 22 05}
154
+ hours = $~[1].to_i
155
+ if $~[4] == 'PM' and hours != 12
156
+ hours += 12
157
+ end
158
+ @basicTimeInfo + # [year, month, day]
159
+ [hours,
160
+ $~[2].to_i, # minutes
161
+ $~[3].to_i] # seconds
162
+ else
163
+ Pidgin2Adium.logMsg("You have found an odd timestamp.", true)
164
+ Pidgin2Adium.logMsg("Please report it to the developer.")
165
+ Pidgin2Adium.logMsg("The timestamp: #{time}")
166
+ Pidgin2Adium.logMsg("Continuing...")
167
+
168
+ ParseDate.parsedate(time)
169
+ end
170
+ return Time.local(*parsedDate).strftime("%Y-%m-%dT%H.%M.%S#{@tzOffset}")
98
171
  end
99
172
 
100
173
  # parseFile slurps up @srcPath into one big string and runs
101
174
  # SrcHtmlFileParse.cleanup if it's an HTML file.
102
- # It then uses regexes to break up the string, uses createMsgData
175
+ # It then uses regexes to break up the string, uses create(Status)Msg
103
176
  # to turn the regex MatchData into data hashes, and feeds it to
104
177
  # ChatFileGenerator, which creates the XML data string.
105
178
  # This method returns a ChatFileGenerator object.
106
179
  def parseFile()
107
- fileContent = File.read(@srcPath) # one big string
108
- if self.class == SrcHtmlFileParse
109
- fileContent = self.cleanup(fileContent)
110
- end
180
+ file = File.new(@srcPath, 'r')
111
181
  # Deal with first line.
112
- first_line_match = @first_line_regex.match(fileContent)
113
-
114
- if first_line_match.nil?
115
- Pidgin2Adium.logMsg("Parsing of #{@srcPath} failed (could not find first line).", true)
182
+ firstLine = file.readline()
183
+ firstLineMatch = @firstLineRegex.match(firstLine)
184
+ if firstLineMatch.nil?
185
+ file.close()
186
+ Pidgin2Adium.logMsg("Parsing of #{@srcPath} failed (could not find valid first line).", true)
116
187
  return false
188
+ else
189
+ # one big string, without the first line
190
+ if self.class == SrcHtmlFileParse
191
+ fileContent = self.cleanup(file.read())
192
+ else
193
+ fileContent = file.read()
194
+ end
195
+ file.close()
117
196
  end
118
- service = first_line_match[4]
119
- # mySN is standardized to avoid "AIM.name" and "AIM.na me" folders
120
- mySN = first_line_match[3].downcase.sub(' ', '')
121
- otherPersonsSN = first_line_match[1]
122
- chatTimePidgin_start = first_line_match[2]
197
+
198
+ service = firstLineMatch[4]
199
+ # userSN is standardized to avoid "AIM.name" and "AIM.na me" folders
200
+ @userSN = firstLineMatch[3].downcase.gsub(' ', '')
201
+ @partnerSN = firstLineMatch[1]
202
+ pidginChatTimeStart = firstLineMatch[2]
203
+ @basicTimeInfo = case firstLine
204
+ when @timeRegexOne: [$1.to_i, $2.to_i, $3.to_i]
205
+ when @timeRegexTwo: [$3.to_i, $1.to_i, $2.to_i]
206
+ end
207
+
123
208
  chatFG = ChatFileGenerator.new(service,
124
- mySN,
125
- otherPersonsSN,
126
- chatTimePidgin_start,
127
- getTimeZoneOffset(),
128
- @masterAlias,
209
+ @userSN,
210
+ @partnerSN,
211
+ createAdiumTime(pidginChatTimeStart),
129
212
  @destDirBase)
130
- all_line_matches = fileContent.scan( Regexp.union(@line_regex, @line_regex_status) )
213
+ fileContent.each_line do |line|
214
+ case line
215
+ when @lineRegex
216
+ chatFG.appendLine( createMsg($~.captures) )
217
+ when @lineRegexStatus
218
+ msg = createStatusOrEventMsg($~.captures)
219
+ # msg is nil if we couldn't parse the status line
220
+ chatFG.appendLine(msg) unless msg.nil?
221
+ end
222
+ end
223
+ return chatFG
224
+ end
131
225
 
132
- # an empty chat window that got saved
133
- if all_line_matches.empty?
134
- return chatFG
226
+ def getSenderByAlias(aliasName)
227
+ if @userAliases.include? aliasName.downcase.sub(/^\*{3}/,'').gsub(/\s+/, '')
228
+ # Set the current alias being used of the ones in @userAliases
229
+ @userAlias = aliasName.sub(/^\*{3}/, '')
230
+ return @userSN
231
+ else
232
+ return @partnerSN
135
233
  end
234
+ end
136
235
 
137
- all_line_matches.each do |line|
138
- chatFG.appendLine( createMsgData(line) )
236
+ # createMsg takes an array of captures from matching against @lineRegex
237
+ # and returns a Message object or one of its subclasses.
238
+ # It can be used for SrcTxtFileParse and SrcHtmlFileParse because
239
+ # both of them return data in the same indexes in the matches array.
240
+ def createMsg(matches)
241
+ msg = nil
242
+ # Either a regular message line or an auto-reply/away message.
243
+ time = createAdiumTime(matches[0])
244
+ aliasStr = matches[1]
245
+ sender = getSenderByAlias(aliasStr)
246
+ body = matches[3]
247
+ if matches[2] # auto-reply
248
+ msg = AutoReplyMessage.new(sender, time, aliasStr, body)
249
+ else
250
+ # normal message
251
+ msg = XMLMessage.new(sender, time, aliasStr, body)
139
252
  end
140
- return chatFG
253
+ return msg
254
+ end
255
+
256
+ # createStatusOrEventMsg takes an array of +MatchData+ captures from
257
+ # matching against @lineRegexStatus and returns an Event or Status.
258
+ def createStatusOrEventMsg(matches)
259
+ # ["22:58:00", "BuddyName logged in."]
260
+ # 0: time
261
+ # 1: status message or event
262
+ msg = nil
263
+ time = createAdiumTime(matches[0])
264
+ str = matches[1]
265
+ regex, status = @statusMap.detect{|regex, status| str =~ regex}
266
+ if regex and status
267
+ # Status message
268
+ aliasStr = regex.match(str)[1]
269
+ sender = getSenderByAlias(aliasStr)
270
+ msg = StatusMessage.new(sender, time, aliasStr, status)
271
+ else
272
+ # Test for event
273
+ regex = @libPurpleEvents.detect{|regex| str =~ regex }
274
+ eventType = 'libpurpleEvent' if regex
275
+ unless regex and eventType
276
+ # not a libpurple event, try others
277
+ regexAndEventType = @eventMap.detect{|regex,eventType| str =~ regex}
278
+ if regexAndEventType.nil?
279
+ Pidgin2Adium.logMsg("You have found an odd status line. Please send this line to the developer.", true)
280
+ Pidgin2Adium.logMsg("The line is: #{str}", true)
281
+ return nil
282
+ else
283
+ regex = regexAndEventType[0]
284
+ eventType = regexAndEventType[1]
285
+ end
286
+ end
287
+ if regex and eventType
288
+ regexMatches = regex.match(str)
289
+ # Event message
290
+ if regexMatches.size == 1
291
+ # No alias - this means it's the user
292
+ aliasStr = @userAlias
293
+ sender = @userSN
294
+ else
295
+ aliasStr = regex.match(str)[1]
296
+ sender = getSenderByAlias(aliasStr)
297
+ end
298
+ msg = Event.new(sender, time, aliasStr, str, eventType)
299
+ end
300
+ end
301
+ return msg
141
302
  end
142
303
  end
143
304
 
144
305
  class SrcTxtFileParse < SrcFileParse
145
- def initialize(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
146
- super(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
147
- # @line_regex matches a line in an HTML log file other than the first
148
- # @line_regex matchdata:
306
+ def initialize(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
307
+ super(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
308
+ # @lineRegex matches a line in a TXT log file other than the first
309
+ # @lineRegex matchdata:
149
310
  # 0: timestamp
150
- # 1: screen name
311
+ # 1: screen name or alias, if alias set
151
312
  # 2: "<AUTO-REPLY>" or nil
152
- # 3: message
153
- @line_regex = /#{@timestamp_regex_str} (.*?) ?(<AUTO-REPLY>)?: (.*)$/
154
- # @line_regex_status matches a status line
155
- # @line_regex_status matchdata:
313
+ # 3: message body
314
+ @lineRegex = /#{@timestampRegexStr} (.*?) ?(<AUTO-REPLY>)?: (.*)$/o
315
+ # @lineRegexStatus matches a status line
316
+ # @lineRegexStatus matchdata:
156
317
  # 0: timestamp
157
- # 1: message
158
- @line_regex_status = /#{@timestamp_regex_str} ([^:]+?)[\r\n]{1,2}/
318
+ # 1: status message
319
+ @lineRegexStatus = /#{@timestampRegexStr} ([^:]+?)[\r\n]/o
159
320
  end
160
321
 
161
- # createMsgData takes a +MatchData+ object (from @line_regex or @line_regex_status) and returns a hash
162
- # with the following keys: time, alias, and message and/or status.
163
- def createMsgData(matchObj)
164
- msg_data_hash = { 'time' => nil, 'alias' => nil, 'status' => nil, 'body' => nil, 'auto-reply' => nil }
165
- if matchObj[4..5] == [nil, nil]
166
- # regular message
167
- # ["10:58:29", "BuddyName", "<AUTO-REPLY>", "hello!\r", nil, nil]
168
- msg_data_hash['time'] = matchObj[0]
169
- msg_data_hash['alias'] = matchObj[1]
170
- msg_data_hash['auto-reply'] = (matchObj[2] != nil)
171
- # strip() to remove "\r" from end
172
- msg_data_hash['body'] = matchObj[3].strip
173
- elsif matchObj[0..3] == [nil, nil, nil, nil]
174
- # status message
175
- # [nil, nil, nil, nil, "22:58:00", "BuddyName logged in."]
176
- alias_and_status = getAliasAndStatus(matchObj[5])
177
- msg_data_hash['time'] = matchObj[4]
178
- msg_data_hash['alias'] = alias_and_status[0]
179
- msg_data_hash['status'] = alias_and_status[1]
180
- end
181
- return msg_data_hash
182
- end
183
322
  end
184
323
 
185
324
  class SrcHtmlFileParse < SrcFileParse
186
- def initialize(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
187
- super(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
188
- # @line_regex matches a line in an HTML log file other than the first
325
+ def initialize(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
326
+ super(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
327
+ # @lineRegex matches a line in an HTML log file other than the first
189
328
  # time matches on either "2008-11-17 14:12" or "14:12"
190
- # @line_regex match obj:
329
+ # @lineRegex match obj:
191
330
  # 0: timestamp, extended or not
192
- # 1: alias
331
+ # 1: screen name or alias, if alias set
193
332
  # 2: "&lt;AUTO-REPLY&gt;" or nil
194
333
  # 3: message body
195
334
  # <span style='color: #000000;'>test sms</span>
196
- @line_regex = /#{@timestamp_regex_str} ?<b>(.*?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.*)<br ?\/>/ #(?:[\n\r]{1,2}<(?:font|\/body))/s
197
- # @line_regex_status matches a status line
198
- # @line_regex_status match obj:
335
+ @lineRegex = /#{@timestampRegexStr} ?<b>(.*?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.*)<br ?\/>/o
336
+ # @lineRegexStatus matches a status line
337
+ # @lineRegexStatus match obj:
199
338
  # 0: timestamp
200
339
  # 1: status message
201
- @line_regex_status = /#{@timestamp_regex_str} ?<b> (.*?)<\/b><br\/>/
202
- end
203
-
204
- # createMsgData takes a +MatchData+ object (from @line_regex or @line_regex_status) and returns a hash
205
- # with the following keys: time, alias, and message and/or status.
206
- def createMsgData(matchObj)
207
- msg_data_hash = { 'time' => nil,
208
- 'alias' => nil,
209
- 'auto-reply' => nil,
210
- 'body' => nil,
211
- 'status' => nil}
212
- # the Regexp.union leaves nil where one of the regexes didn't match.
213
- # (Is there any way to have it not do this?)
214
- # ie
215
- # the first one matches: ['foo', 'bar', 'baz', 'bash', nil, nil]
216
- # second one matches: [nil, nil, nil, nil, 'bim', 'bam']
217
- if matchObj[0..3] == [nil, nil, nil, nil]
218
- # This is a status message.
219
- # slice off results from other Regexp
220
- # becomes: ["11:27:53", "Generic Screenname228 logged in."]
221
- matchObj = matchObj[4..5]
222
- alias_and_status = getAliasAndStatus(matchObj[1])
223
- msg_data_hash['time'] = matchObj[0]
224
- msg_data_hash['alias'] = alias_and_status[0]
225
- msg_data_hash['status'] = alias_and_status[1]
226
- elsif matchObj[4..5] == [nil, nil]
227
- # Either a regular message line or an auto-reply/away message.
228
- # slice off results from other Regexp
229
- matchObj = matchObj[0..3]
230
- msg_data_hash['time'] = matchObj[0]
231
- msg_data_hash['alias'] = matchObj[1]
232
- msg_data_hash['body'] = matchObj[3]
233
- if not matchObj[2].nil?
234
- # an auto-reply message
235
- msg_data_hash['auto-reply'] = true
236
- end
237
- end
238
- return msg_data_hash
340
+ @lineRegexStatus = /#{@timestampRegexStr} ?<b> (.*?)<\/b><br ?\/>/o
239
341
  end
240
342
 
241
- # Removes <font> tags, empty <a>s, spans with either no color
343
+ # Removes <font> tags, empty <a>s, and spans with either no color
242
344
  # information or color information that just turns the text black.
243
345
  # Returns a string.
244
346
  def cleanup(text)
245
- color_regex = /.*(color: ?#[[:alnum:]]{6}; ?).*/
246
- # For some reason, Hpricot doesn't work well with
247
- # elem.swap(elem.innerHTML) when the elements are nested
248
- # (eg doc.search('font') only returns the outside <font> tags,
249
- # not "font font") and also it appears that it doesn't reinterpret
250
- # the doc when outside tags are swapped with their innerHTML (so
251
- # when <html> tags are replaced with their innerHTML, then
252
- # a search for <font> tags in the new HTML fails).
253
- # Long story short, we use gsub.
347
+ # Pidgin and Adium both show bold using
348
+ # <span style="font-weight: bold;"> except Pidgin uses single quotes
349
+ # and Adium uses double quotes
254
350
  text.gsub!(/<\/?(html|body|font).*?>/, '')
255
- doc = Hpricot(text)
256
- # These empty links sometimes are appended to every line in a chat,
351
+ # These empty links are sometimes appended to every line in a chat,
257
352
  # for some weird reason. Remove them.
258
- doc.search("a[text()='']").remove
259
- spans = doc.search('span')
260
- spans.each do |span|
261
- if span.empty?
262
- Hpricot::Elements[span].remove
263
- else
264
- # No need to check for the span.attributes.key?('style')
265
- if span[:style] =~ color_regex
266
- # Remove black-text spans after other processing because
267
- # the processing can reduce spans to that
268
- span[:style] = span[:style].gsub(color_regex, '\1').
269
- gsub(/color: ?#000000; ?/,'')
270
- # Remove span but keep its contents
271
- span.swap(span.innerHTML) if span[:style] == ''
353
+ text.gsub!(%r{<a href='.+?'>\s*?</a>}, '')
354
+ text.gsub!(%r{(.*?)<span.+style='(.+?)'>(.*?)</span>(.*)}) do |s|
355
+ # before = text before match
356
+ # style = style declaration
357
+ # innertext = text inside <span>
358
+ # after = text after match
359
+ before, style, innertext, after = *($~[1..4])
360
+ # TODO: remove after from string then see what balanceTags does
361
+ # Remove empty spans.
362
+ nil if innertext == ''
363
+ # Only allow some style declarations
364
+ # We keep:
365
+ # font-weight: bold
366
+ # color (except #000000)
367
+ # text-decoration: underline
368
+ styleparts = style.split(/; ?/)
369
+ styleparts.map! do |p|
370
+ # Short-circuit for common declaration
371
+ # Yes, sometimes there's a ">" before the ";".
372
+ if p == 'color: #000000;' or p == 'color: #000000>;'
373
+ nil
272
374
  else
273
- span.swap(span.innerHTML)
375
+ case p
376
+ when /font-family/: nil
377
+ when /font-size/: nil
378
+ when /background/: nil
379
+ end
274
380
  end
275
381
  end
382
+ styleparts.compact!
383
+ if styleparts.empty?
384
+ style = ''
385
+ elsif styleparts.size == 1
386
+ style = styleparts[0] << ';'
387
+ else
388
+ style = styleparts.join('; ') << ';'
389
+ end
390
+ if style != ''
391
+ innertext = "<span style=\"#{style}\">#{innertext}</span>"
392
+ end
393
+ before + innertext + after
394
+ end
395
+ # Pidgin uses <em>, Adium uses <span>
396
+ if text.gsub!('<em>', '<span style="italic">')
397
+ text.gsub!('</em>', '</span>')
276
398
  end
277
- return doc.to_html
399
+ return text
400
+ end
401
+ end
402
+
403
+ # A holding object for each line of the chat.
404
+ # It is subclassed as appropriate (eg AutoReplyMessage).
405
+ # All Messages have senders, times, and aliases.
406
+ class Message
407
+ def initialize(sender, time, aliasStr)
408
+ @sender = sender
409
+ @time = time
410
+ @aliasStr = aliasStr
411
+ end
412
+ end
413
+
414
+ # Basic message with body text (as opposed to pure status messages, which
415
+ # have no body).
416
+ class XMLMessage < Message
417
+ def initialize(sender, time, aliasStr, body)
418
+ super(sender, time, aliasStr)
419
+ @body = body
420
+ normalizeBody!()
421
+ end
422
+
423
+ def getOutput
424
+ return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
425
+ @sender, @time, @aliasStr, @body)
426
+ end
427
+
428
+ def normalizeBody!
429
+ normalizeBodyEntities!()
430
+ # Fix mismatched tags. Yes, it's faster to do it per-message
431
+ # than all at once.
432
+ @body = Pidgin2Adium.balanceTags(@body)
433
+ if @aliasStr[0,3] == '***'
434
+ # "***<alias>" is what pidgin sets as the alias for a /me action
435
+ @aliasStr.slice!(0,3)
436
+ @body = '*' << @body << '*'
437
+ end
438
+ @body = '<div><span style="font-family: Helvetica; font-size: 12pt;">' <<
439
+ @body <<
440
+ '</span></div>'
441
+ end
442
+
443
+ def normalizeBodyEntities!
444
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
445
+ @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
446
+ # replace single quotes with '&apos;' but only outside <span>s.
447
+ @body.gsub!(/(.*?)(<span.*?>.*?<\/span>)(.*?)/) do
448
+ before, span, after = $1, ($2||''), $3||''
449
+ before.gsub("'", '&aquot;') <<
450
+ span <<
451
+ after.gsub("'", '&aquot;')
452
+ end
453
+ end
454
+ end
455
+
456
+ # An auto reply message, meaning it has a body.
457
+ class AutoReplyMessage < XMLMessage
458
+ def getOutput
459
+ return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n", @sender, @time, @aliasStr, @body)
460
+ end
461
+ end
462
+
463
+ # A message saying e.g. "Blahblah has gone away."
464
+ class StatusMessage < Message
465
+ def initialize(sender, time, aliasStr, status)
466
+ super(sender, time, aliasStr)
467
+ @status = status
468
+ end
469
+ def getOutput
470
+ return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @aliasStr)
471
+ end
472
+ end
473
+
474
+ # An <event> line of the chat
475
+ class Event < XMLMessage
476
+ def initialize(sender, time, aliasStr, body, type="libpurpleMessage")
477
+ super(sender, time, aliasStr, body)
478
+ @type = type
479
+ end
480
+
481
+ def getOutput
482
+ return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>', @type, @sender, @time, @aliasStr, @body)
278
483
  end
279
484
  end
280
485
  end # end module