pidgin2adium 0.0.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -18,22 +18,22 @@ OptionParser.new do |opts|
18
18
  opts.on('-i IN_DIR', '--in IN_DIR', 'Specify directory where pidgin logs are stored') do |v|
19
19
  options[:in] = v
20
20
  end
21
- opts.on('-o OUT_DIR', '--out OUT_DIR', 'Specify directory where Adium logs will be stored (not the Adium directory in ~/Library') do |out|
21
+ opts.on('-o', '--out OUT_DIR', 'Specify directory where Adium logs will be stored (not the Adium directory in ~/Library)') do |out|
22
22
  options[:out] = out
23
23
  end
24
- opts.on('-l LIBRARY_DIR', '--libdir LIBRARY_DIR',
24
+ opts.on('-l', '--libdir LIBRARY_DIR',
25
25
  'Specify dirname where Adium logs are stored (eg "AIM.<username>" for',
26
- '~/Library/Application Support/Adium 2.0/Users/Default/Logs/AIM.<username>') do |ld|
26
+ '~/Library/Application Support/Adium 2.0/Users/Default/Logs/AIM.<username>)') do |ld|
27
27
  options[:libdir] = ld
28
28
  end
29
29
  opts.on('-d', '--debug', 'Turn debug on.') do |lf|
30
30
  options[:debug] = true
31
31
  end
32
- opts.on("--time-zone [TIME ZONE]",
32
+ opts.on('-t', "--time-zone [TIME ZONE]",
33
33
  "Set time zone like \"EST\". Defaults to local time zone: #{Time.now.zone}") do |tz|
34
34
  options[:timezone] = tz
35
35
  end
36
- opts.on('-a MY_ALIASES_AND_SNs', "--aliases MY_ALIASES_AND_SNs",
36
+ opts.on('-a', "--aliases MY_ALIASES_AND_SNs",
37
37
  "A comma-separated list of your aliases and screenname(s) so this script knows which person in a chat is you.",
38
38
  "Whitespace is removed and aliases are lowercased.") do |aliases|
39
39
  options[:aliases] = aliases.split(',')
@@ -56,12 +56,12 @@ required_opts.each do |short, long|
56
56
  end
57
57
  exit 1 if need_opts
58
58
 
59
- log_converter = Pidgin2Adium::Logs.new(src=options[:in],
60
- out = options[:out],
61
- aliases = options[:aliases],
62
- libdir = options[:libdir],
63
- tz = options[:timezone],
64
- debug = options[:debug]
59
+ log_converter = Pidgin2Adium::Logs.new(options[:in],
60
+ options[:out],
61
+ options[:aliases],
62
+ options[:libdir],
63
+ options[:timezone],
64
+ options[:debug]
65
65
  )
66
66
 
67
67
  log_converter.start
@@ -1,185 +1,53 @@
1
1
  # ADD DOCUMENTATION
2
2
  require 'pidgin2adium/balance-tags.rb'
3
- require 'hpricot'
4
3
 
5
4
  module Pidgin2Adium
6
- def Pidgin2Adium.normalizeBodyEntities!(body)
7
- # Convert '&' to '&amp;' only if it's not followed by an entity.
8
- body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
9
- # replace single quotes with '&apos;' but only outside <span>s.
10
- parts = body.split(/(<\/?span.*?>)/)
11
- body = parts.map{ |part| part.match(/<\/?span/) ? part : part.gsub("'", '&apos;') }.join('')
12
- end
13
-
14
- def Pidgin2Adium.normalizeBody!(body, aliasStr)
15
- # Fix mismatched tags.
16
- body = Pidgin2Adium.balance_tags(body)
17
- normalizeBodyEntities!(body)
18
- if aliasStr[0,3] == '***'
19
- # "***<alias>" is what pidgin sets as the alias for a /me action
20
- aliasStr.slice!(0,3)
21
- body = '*' + body + '*'
22
- end
23
- body = '<div><span style="font-family: Helvetica; font-size: 12pt;">' +
24
- body +
25
- '</span></div>'
26
- end
27
-
28
5
  class ChatFileGenerator
29
- def initialize(service, mySN, otherPersonsSN, chatTimePidgin_start, tzOffset, masterAlias, destDirBase)
30
- # basicTimeInfo is for files that only have the full timestamp at
31
- # the top; we can use it to fill in the minimal per-line timestamps.
32
- # It has only 3 elements ([year, month, dayofmonth]) because
33
- # you should be able to fill everything else in.
34
- # If you can't, something's wrong.
35
- @basicTimeInfo = nil
36
- # @chatMessage is a 2D array composed of arrays like so (e.g.):
37
- # ['time'=>'2:23:48 PM', 'alias'=>'Me', 'status' => 'available', 'body'=>'abcdefg', auto-reply=true]
38
- @chatMessage=[]
39
- # chatTimeAdium_start format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like so:
40
- # 2008-10-05T22.26.20-0800
41
- @chatTimeAdium_start=nil
42
- @chatTimePidgin_start=chatTimePidgin_start
43
- @destDirBase=destDirBase
44
- @masterAlias=masterAlias
45
- @mySN=mySN
46
- @otherPersonsSN=otherPersonsSN
47
- @service=service
48
- @tzOffset=tzOffset
6
+ def initialize(service, userSN, partnerSN, adiumChatTimeStart, destDirBase)
7
+ @service = service
8
+ @userSN = userSN
9
+ @partnerSN = partnerSN
10
+ @adiumChatTimeStart = adiumChatTimeStart
11
+ @destDirBase = destDirBase
12
+
13
+ # @chatLines is an array of Message, Status, and Event objects
14
+ @chatLines = []
49
15
  # key is for Pidgin, value is for Adium
50
16
  # Just used for <service>.<screenname> in directory structure
51
- @SERVICE_NAME_MAP={'aim'=>'AIM',
52
- 'jabber'=>'jabber',
53
- 'gtalk'=>'GTalk',
17
+ @SERVICE_NAME_MAP = {'aim' => 'AIM',
18
+ 'jabber' =>'jabber',
19
+ 'gtalk'=> 'GTalk',
54
20
  'icq' => 'ICQ',
55
- 'qq'=>'QQ',
56
- 'msn'=>'MSN',
57
- 'yahoo'=>'Yahoo'}
58
- end
59
-
60
- def convert()
61
- initChatTime()
62
- return buildDomAndOutput()
21
+ 'qq' => 'QQ',
22
+ 'msn' => 'MSN',
23
+ 'yahoo' => 'Yahoo'}
63
24
  end
64
25
 
65
- def initChatTime()
66
- # ParseDate.parsedate "Tuesday, July 5th, 2007, 18:35:20 UTC"
67
- # # => [2007, 7, 5, 18, 35, 20, "UTC", 2]
68
- # [year, month, day of month, hour, minute, sec, timezone, day of week]
69
- # strtotime returns seconds since the epoch
70
- @chatTimeAdium_start = createAdiumDate(@chatTimePidgin_start)
71
- @basicTimeInfo = ParseDate.parsedate(@chatTimePidgin_start)[0..2]
72
- end
73
-
74
- # Add a line to @chatMessage.
75
- # It is its own method because attr_writer creates the method 'chatMessage=', which doesn't help for chatMessage.push
26
+ # Add a line to @chatLines.
27
+ # It is its own method because attr_writer creates the method
28
+ # 'chatMessage=', which doesn't help for chatMessage.push
76
29
  def appendLine(line)
77
- @chatMessage.push(line)
78
- end
79
-
80
- #
81
- def createAdiumDate(date)
82
- epochSecs = getEpochSeconds(date)
83
- if @tzOffset.nil?
84
- Pidgin2Adium.logMsg("@tzOffset is nil. This really shouldn't happen.", true)
85
- @tzOffset = "+0"
86
- end
87
- return Time.at(epochSecs).strftime("%Y-%m-%dT%H.%M.%S#{@tzOffset}")
88
- end
89
-
90
- def getEpochSeconds(timestr)
91
- parsed_date = ParseDate.parsedate(timestr)
92
- [0, 1, 2].each do |i|
93
- parsed_date[i] = @basicTimeInfo[i] if parsed_date[i].nil?
94
- end
95
- return Time.local(*parsed_date).tv_sec
30
+ @chatLines.push(line)
96
31
  end
97
32
 
98
- def getScreenNameByAlias(aliasStr)
99
- myAliasStr = aliasStr.clone
100
- myAliasStr.slice!(0,3) if myAliasStr[0,3] == '***'
101
- if aliasStr==""
102
- return ""
103
- else
104
- return @masterAlias.include?(myAliasStr.downcase.gsub(/\s*/, '')) ? @mySN : @otherPersonsSN
105
- end
106
- end
107
-
108
- # returns path of output file
109
- def buildDomAndOutput()
33
+ # Returns path of output file
34
+ def convert()
110
35
  serviceName = @SERVICE_NAME_MAP[@service.downcase]
111
- destDirReal = File.join(@destDirBase, "#{serviceName}.#{@mySN}", @otherPersonsSN, "#{@otherPersonsSN} (#{@chatTimeAdium_start}).chatlog")
36
+ destDirReal = File.join(@destDirBase, "#{serviceName}.#{@userSN}", @partnerSN, "#{@partnerSN} (#{@adiumChatTimeStart}).chatlog")
112
37
  FileUtils.mkdir_p(destDirReal)
113
- destFilePath = destDirReal + '/' + "#{@otherPersonsSN} (#{@chatTimeAdium_start}).xml"
38
+ destFilePath = destDirReal << '/' << "#{@partnerSN} (#{@adiumChatTimeStart}).xml"
114
39
  if File.exist?(destFilePath)
115
40
  return Pidgin2Adium::Logs::FILE_EXISTS
116
41
  end
117
42
 
118
- # no \n before </chat> because {body} has it already
119
- chatLogTemplate = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
120
- "<chat xmlns=\"http://purl.org/net/ulf/ns/0.4-02\" account=\"#{@mySN}\" service=\"#{serviceName}\">\n{body}</chat>"
121
-
122
43
  allMsgs = ""
123
- @chatMessage.each do |msg|
124
- # template is set to a copy of one of the three templates,
125
- # the {...} vars are subbed, and then it's added to allMsgs
126
- template = nil
127
- # Note:
128
- # away/auto message has both body and status set
129
- # pure status has status but not body set
130
- # pure message has body set but not status
131
- begin
132
- chatTimeAdium = createAdiumDate(msg['time'])
133
- rescue TypeError => bang
134
- puts '*' * 80
135
- @chatMessage.each { |m| p m }
136
- puts "Oops! Time error! on msg:"
137
- p msg
138
- puts "Rest of message is above, just below the stars."
139
- return false
140
- end
141
- sender = getScreenNameByAlias(msg['alias'])
142
- time = chatTimeAdium
143
- aliasStr = msg['alias']
144
- if msg['body']
145
- body = msg['body']
146
- if msg['status'].nil?
147
- # Body with no status
148
- if msg['auto-reply'] == true
149
- # auto-reply from away message
150
- template = AutoReplyMessage.new(sender, time, aliasStr, body)
151
- else
152
- # pure regular message
153
- template = XMLMessage.new(sender, time, aliasStr, body)
154
- end
155
- else
156
- # Body with status message
157
- template = AwayMessage.new(sender, time, aliasStr, body)
158
- end
159
- elsif msg['status']
160
- # Status message, no body
161
- template = StatusMessage.new(sender, time, aliasStr, msg['status'])
162
- else
163
- Pidgin2Adium.logMsg("msg has neither status nor body key set. Unsure what to do. msg is as follows:", true)
164
- Pidgin2Adium.logMsg(sprintf('%p', msg), true)
165
- return false
166
- end
167
- begin
168
- allMsgs += template.getOutput()
169
- rescue TypeError => bang
170
- Pidgin2Adium.logMsg "TypeError: #{bang.message}"
171
- Pidgin2Adium.logMsg "This is probably caused by an unrecognized status string."
172
- Pidgin2Adium.logMsg "Go to the file currently being worked on (displayed above) at time #{msg['time']}"
173
- Pidgin2Adium.logMsg "and add the status message there to one of the hashes in SrcHtmlFileParse.getAliasAndStatus."
174
- Pidgin2Adium.logMsg "**Debug info**"
175
- Pidgin2Adium.logMsg "msg: #{msg.inspect}"
176
- Pidgin2Adium.logMsg "--"
177
- Pidgin2Adium.logMsg "Exiting."
178
- return false
179
- end
180
- end
181
- ret = chatLogTemplate.sub("{body}", allMsgs)
182
- # xml is ok.
44
+ # TODO: inject?
45
+ @chatLines.each { |obj| allMsgs << obj.getOutput() }
46
+ # xml is done.
47
+
48
+ # no \n before </chat> because allMsgs has it already
49
+ ret = sprintf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
50
+ '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>', @userSN, serviceName, allMsgs)
183
51
 
184
52
  # we already checked to see if the file previously existed.
185
53
  outfile = File.new(destFilePath, 'w')
@@ -187,57 +55,5 @@ module Pidgin2Adium
187
55
  outfile.close
188
56
  return destFilePath
189
57
  end
190
-
191
- # A holding object for each line of the chat.
192
- # It is subclassed as appropriate (eg AutoReplyMessage).
193
- # All Messages have senders, times, and aliases.
194
- class Message
195
- def initialize(sender, time, aliasStr)
196
- @sender = sender
197
- @time = time
198
- @aliasStr = aliasStr
199
- end
200
- end
201
-
202
- # Basic message with body text (as opposed to pure status messages which have no body).
203
- class XMLMessage < Message
204
- def initialize(sender, time, aliasStr, body)
205
- super(sender, time, aliasStr)
206
- @body = Pidgin2Adium.normalizeBody!(body, @aliasStr)
207
- end
208
-
209
- def getOutput
210
- return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' + "\n",
211
- @sender, @time, @aliasStr, @body)
212
- end
213
-
214
- end
215
-
216
- # An auto reply message, meaning it has a body.
217
- class AutoReplyMessage < XMLMessage
218
- def getOutput
219
- return sprintf('<message sender="%s" time="%s" alias="%s" auto="true">%s</message>' + "\n",
220
- @sender, @time, @aliasStr, @body)
221
- end
222
- end
223
-
224
- class AwayMessage < XMLMessage
225
- def getOutput
226
- return sprintf('<status type="away" sender="%s" time="%s" alias="%s">%s</status>' + "\n",
227
- @sender, @time, @aliasStr, @body)
228
- end
229
- end
230
-
231
- # A message saying e.g. "Blahblah has gone away."
232
- class StatusMessage < Message
233
- def initialize(sender, time, aliasStr, status)
234
- super(sender, time, aliasStr)
235
- @status = status
236
- end
237
- def getOutput
238
- return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' + "\n",
239
- @status, @sender, @time, @aliasStr)
240
- end
241
- end
242
58
  end
243
59
  end
@@ -1,40 +1,66 @@
1
1
  # =SrcFileParse
2
- # The class SrcFileParse has two subclasses, SrcTxtFileParse and SrcHtmlFileParse
2
+ # The class +SrcFileParse+ has 2 subclasses, +SrcTxtFileParse+ and +SrcHtmlFileParse+
3
3
  # It parses the file passed into it and extracts the following
4
4
  # from each line in the chat: time, alias, and message and/or status.
5
+
6
+ require 'parsedate'
7
+
5
8
  module Pidgin2Adium
6
- # The two subclasses of SrcFileParse,
7
- # SrcTxtFileParse and SrcHtmlFileParse, only differ
8
- # in that they have their own @line_regex, @line_regex_status,
9
- # and most importantly, createMsgData, which takes the
10
- # +MatchData+ objects from matching against @line_regex and
11
- # fits them into hashes.
9
+ # The two subclasses of +SrcFileParse+,
10
+ # +SrcTxtFileParse+ and +SrcHtmlFileParse+, only differ
11
+ # in that they have their own @lineRegex, @lineRegexStatus,
12
+ # and most importantly, createMsg and createStatusOrEventMsg, which take
13
+ # the +MatchData+ objects from matching against @lineRegex or
14
+ # @lineRegexStatus, respectively and return object instances.
15
+ # +createMsg+ returns a +Message+ instance (or one of its subclasses).
16
+ # +createStatusOrEventMsg+ returns a +Status+ or +Event+ instance.
12
17
  class SrcFileParse
13
- def initialize(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
18
+ def initialize(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
14
19
  @srcPath = srcPath
15
20
  # these two are to pass to chatFG in parseFile
16
21
  @destDirBase = destDirBase
17
- @masterAlias = masterAlias
22
+ @userAliases = userAliases
18
23
  @userTZ = userTZ
19
24
  @userTZOffset = userTZOffset
20
- # Automagically does grouping for you. Will be inserted in @line_regex{,_status}
21
- @timestamp_regex_str = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: .{1,2})?)\)'
25
+ @tzOffset = getTimeZoneOffset()
26
+
27
+ # Used in @lineRegex{,Status}. Only one group: the entire timestamp.
28
+ @timestampRegexStr = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: .{1,2})?)\)'
22
29
  # the first line is special: it tells us
23
30
  # 1) who we're talking to
24
31
  # 2) what time/date
25
32
  # 3) what SN we used
26
- # 4) what protocol (AIM, jabber...)
27
- @first_line_regex = /Conversation with (.*?) at (.*?) on (.*?) \((.*?)\)/s
28
- end
33
+ # 4) what protocol (AIM, icq, jabber...)
34
+ @firstLineRegex = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/
29
35
 
30
- # Takes the body of a line of a chat and returns the [username, status] as a 2-element array.
31
- # Example:
32
- # Pass in "Generic Screenname228 has signed off" and it returns <tt>["Generic Screenname228", "offline"]</tt>
33
- def getAliasAndStatus(str)
34
- alias_and_status = [nil, nil]
36
+ # Possible formats for timestamps:
37
+ # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
38
+ @timeRegexOne = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/
39
+ # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
40
+ @timeRegexTwo = %r{(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)}
41
+ # sometimes a line in a chat doesn't have a full timestamp
42
+ # "04:22:05 AM" => %w{04 22 05 AM}
43
+ @minimalTimeRegex = /(\d{1,2}):(\d{2}):(\d{2}) ?([AP]M)?/
44
+
45
+ # {user,partner}SN set in parseFile() after reading the first line
46
+ @userSN = nil
47
+ @partnerSN = nil
48
+
49
+ # @basicTimeInfo is for files that only have the full timestamp at
50
+ # the top; we can use it to fill in the minimal per-line timestamps.
51
+ # It has only 3 elements (year, month, dayofmonth) because
52
+ # you should be able to fill everything else in.
53
+ # If you can't, something's wrong.
54
+ @basicTimeInfo = []
35
55
 
36
- # Screen name is in regex group 1.
37
- status_map = {
56
+ # @userAlias is set each time getSenderByAlias is called. Set an
57
+ # initial value just in case the first message doesn't give us an
58
+ # alias.
59
+ @userAlias = @userAliases[0]
60
+
61
+ # @statusMap, @libPurpleEvents, and @events are used in
62
+ # createStatusOrEventMessage.
63
+ @statusMap = {
38
64
  /(.+) logged in\.$/ => 'online',
39
65
  /(.+) logged out\.$/ => 'offline',
40
66
  /(.+) has signed on\.$/ => 'online',
@@ -42,239 +68,418 @@ module Pidgin2Adium
42
68
  /(.+) has gone away\.$/ => 'away',
43
69
  /(.+) is no longer away\.$/ => 'available',
44
70
  /(.+) has become idle\.$/ => 'idle',
45
- /(.+) is no longer idle\.$/ => 'available',
46
- # file transfer
47
- /Starting transfer of .+ from (.+)/ => 'file-transfer-start',
48
- /^Offering to send .+ to (.+)$/ => 'fileTransferRequested',
49
- /(.+) is offering to send file/ => 'fileTransferRequested',
71
+ /(.+) is no longer idle\.$/ => 'available'
50
72
  }
51
73
 
52
- # statuses that come from my end. I totally made up these status names.
53
- my_status_map = {
74
+ # libPurpleEvents are all of eventType libPurple
75
+ @libPurpleEvents = [
76
+ # file transfer
77
+ /Starting transfer of .+ from (.+)/,
78
+ /^Offering to send .+ to (.+)$/,
79
+ /(.+) is offering to send file/,
80
+ /^Transfer of file .+ complete$/,
81
+ /Error reading|writing|accessing .+: .+/,
82
+ /You cancelled the transfer of/,
83
+ /File transfer cancelled/,
84
+ /(.+) cancelled the transfer of/,
85
+ /(.+) cancelled the file transfer/,
86
+ # Direct IM - actual (dis)connect events are their own types
87
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
88
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
89
+ /^Attempting to connect via proxy server\.$/,
90
+ /^Direct IM with (.+) failed/,
54
91
  # encryption
55
- /^Received message encrypted with wrong key$/ => 'encrypt-error',
56
- /^Requesting key\.\.\.$/ => 'encrypt-error',
57
- /^Outgoing message lost\.$/ => 'encrypt-error',
58
- /^Conflicting Key Received!$/ => 'encrypt-error',
59
- /^Error in decryption- asking for resend\.\.\.$/ => 'encrypt-error',
60
- /^Making new key pair\.\.\.$/ => 'encrypt-key-create',
92
+ /Received message encrypted with wrong key/,
93
+ /^Requesting key\.\.\.$/,
94
+ /^Outgoing message lost\.$/,
95
+ /^Conflicting Key Received!$/,
96
+ /^Error in decryption- asking for resend\.\.\.$/,
97
+ /^Making new key pair\.\.\.$/,
61
98
  # file transfer - these are in this (non-used) list because you can't get the alias out of matchData[1]
62
- /^You canceled the transfer of .+$/ => 'file-transfer-cancel',
63
- /^Transfer of file .+ complete$/ => 'fileTransferCompleted',
99
+ /^You canceled the transfer of .+$/,
64
100
  # sending errors
65
- /^Last outgoing message not received properly- resetting$/ => 'sending-error',
66
- /^Resending\.\.\.$/ => 'sending-error',
101
+ /^Last outgoing message not received properly- resetting$/,
102
+ /'Resending\.\.\./,
67
103
  # connection errors
68
- /^Lost connection with the remote user:<br\/>Remote host closed connection\.$/ => 'lost-remote-conn',
69
- # direct IM stuff
70
- /^Attempting to connect to .+ at .+ for Direct IM\./ => 'direct-im-connect',
71
- /^Asking .+ to connect to us at .+ for Direct IM\./ => 'direct-im-ask',
72
- /^Direct IM with .+ failed/ => 'direct-im-failed',
73
- /^Attempting to connect to .+\.$/ => 'direct-im-connect',
74
- /^Attempting to connect via proxy server\.$/ => 'direct-im-proxy',
75
- /^Direct IM established$/ => 'direct-im-established',
76
- /^Lost connection with the remote user:<br\/>Windows socket error/ => 'direct-im-lost-conn',
104
+ /Lost connection with the remote user:.+/,
77
105
  # chats
78
- /^.+ entered the room\.$/ => 'chat-entered-room',
79
- /^.+ left the room\.$/ => 'chat-left-room'
80
- }
106
+ /^.+ entered the room\.$/,
107
+ /^.+ left the room\.$/
108
+ ]
81
109
 
82
- regex, status = status_map.detect{ |regex, status| regex.match(str) }
83
- if regex and status
84
- alias_and_status = [regex.match(str)[1], status]
85
- else
86
- # not one of the regular statuses, try my statuses.
87
- regex, status = my_status_map.detect{ |regex, status| regex.match(str) }
88
- alias_and_status = ['System Message', status]
89
- end
90
- return alias_and_status
110
+ # non-libpurple events
111
+ # Each key maps to an eventType string. The keys will be matched against a line of chat
112
+ # and the partner's alias will be in regex group 1, IF the alias is matched.
113
+ @eventMap = {
114
+ # .+ is not an alias, it's a proxy server so no grouping
115
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
116
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
117
+ /^Direct IM established$/ => 'directIMConnected',
118
+ /Unable to send message. The message is too large./ => 'chat-error',
119
+ /You missed .+ messages from (.+) because they were too large./ => 'chat-error'
120
+ }
91
121
  end
92
122
 
93
123
  def getTimeZoneOffset()
94
- tz_regex = /([-+]\d+)[A-Z]{3}\.(txt|html?)/
95
- tz_match = tz_regex.match(@srcPath)
96
- tz_offset = tz_match.nil? ? @userTZOffset : tz_match[1]
97
- return tz_offset
124
+ tzMatch = /([-\+]\d+)[A-Z]{3}\.txt|html?/.match(@srcPath)
125
+ tzOffset = tzMatch[1] rescue @userTZOffset
126
+ return tzOffset
127
+ end
128
+
129
+ # Adium time format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like:
130
+ # 2008-10-05T22.26.20-0800
131
+ def createAdiumTime(time)
132
+ # parsedDate = [year, month, day, hour, min, sec]
133
+ parsedDate = case time
134
+ when @timeRegexOne
135
+ [$~[1].to_i, # year
136
+ $~[2].to_i, # month
137
+ $~[3].to_i, # day
138
+ $~[4].to_i, # hour
139
+ $~[5].to_i, # minute
140
+ $~[6].to_i] # seconds
141
+ when @timeRegexTwo
142
+ hours = $~[4].to_i
143
+ if $~[7] == 'PM' and hours != 12
144
+ hours += 12
145
+ end
146
+ [$~[3].to_i, # year
147
+ $~[1].to_i, # month
148
+ $~[2].to_i, # day
149
+ hours,
150
+ $~[5].to_i, # minutes
151
+ $~[6].to_i] # seconds
152
+ when @minimalTimeRegex
153
+ # "04:22:05" => %w{04 22 05}
154
+ hours = $~[1].to_i
155
+ if $~[4] == 'PM' and hours != 12
156
+ hours += 12
157
+ end
158
+ @basicTimeInfo + # [year, month, day]
159
+ [hours,
160
+ $~[2].to_i, # minutes
161
+ $~[3].to_i] # seconds
162
+ else
163
+ Pidgin2Adium.logMsg("You have found an odd timestamp.", true)
164
+ Pidgin2Adium.logMsg("Please report it to the developer.")
165
+ Pidgin2Adium.logMsg("The timestamp: #{time}")
166
+ Pidgin2Adium.logMsg("Continuing...")
167
+
168
+ ParseDate.parsedate(time)
169
+ end
170
+ return Time.local(*parsedDate).strftime("%Y-%m-%dT%H.%M.%S#{@tzOffset}")
98
171
  end
99
172
 
100
173
  # parseFile slurps up @srcPath into one big string and runs
101
174
  # SrcHtmlFileParse.cleanup if it's an HTML file.
102
- # It then uses regexes to break up the string, uses createMsgData
175
+ # It then uses regexes to break up the string, uses create(Status)Msg
103
176
  # to turn the regex MatchData into data hashes, and feeds it to
104
177
  # ChatFileGenerator, which creates the XML data string.
105
178
  # This method returns a ChatFileGenerator object.
106
179
  def parseFile()
107
- fileContent = File.read(@srcPath) # one big string
108
- if self.class == SrcHtmlFileParse
109
- fileContent = self.cleanup(fileContent)
110
- end
180
+ file = File.new(@srcPath, 'r')
111
181
  # Deal with first line.
112
- first_line_match = @first_line_regex.match(fileContent)
113
-
114
- if first_line_match.nil?
115
- Pidgin2Adium.logMsg("Parsing of #{@srcPath} failed (could not find first line).", true)
182
+ firstLine = file.readline()
183
+ firstLineMatch = @firstLineRegex.match(firstLine)
184
+ if firstLineMatch.nil?
185
+ file.close()
186
+ Pidgin2Adium.logMsg("Parsing of #{@srcPath} failed (could not find valid first line).", true)
116
187
  return false
188
+ else
189
+ # one big string, without the first line
190
+ if self.class == SrcHtmlFileParse
191
+ fileContent = self.cleanup(file.read())
192
+ else
193
+ fileContent = file.read()
194
+ end
195
+ file.close()
117
196
  end
118
- service = first_line_match[4]
119
- # mySN is standardized to avoid "AIM.name" and "AIM.na me" folders
120
- mySN = first_line_match[3].downcase.sub(' ', '')
121
- otherPersonsSN = first_line_match[1]
122
- chatTimePidgin_start = first_line_match[2]
197
+
198
+ service = firstLineMatch[4]
199
+ # userSN is standardized to avoid "AIM.name" and "AIM.na me" folders
200
+ @userSN = firstLineMatch[3].downcase.gsub(' ', '')
201
+ @partnerSN = firstLineMatch[1]
202
+ pidginChatTimeStart = firstLineMatch[2]
203
+ @basicTimeInfo = case firstLine
204
+ when @timeRegexOne: [$1.to_i, $2.to_i, $3.to_i]
205
+ when @timeRegexTwo: [$3.to_i, $1.to_i, $2.to_i]
206
+ end
207
+
123
208
  chatFG = ChatFileGenerator.new(service,
124
- mySN,
125
- otherPersonsSN,
126
- chatTimePidgin_start,
127
- getTimeZoneOffset(),
128
- @masterAlias,
209
+ @userSN,
210
+ @partnerSN,
211
+ createAdiumTime(pidginChatTimeStart),
129
212
  @destDirBase)
130
- all_line_matches = fileContent.scan( Regexp.union(@line_regex, @line_regex_status) )
213
+ fileContent.each_line do |line|
214
+ case line
215
+ when @lineRegex
216
+ chatFG.appendLine( createMsg($~.captures) )
217
+ when @lineRegexStatus
218
+ msg = createStatusOrEventMsg($~.captures)
219
+ # msg is nil if we couldn't parse the status line
220
+ chatFG.appendLine(msg) unless msg.nil?
221
+ end
222
+ end
223
+ return chatFG
224
+ end
131
225
 
132
- # an empty chat window that got saved
133
- if all_line_matches.empty?
134
- return chatFG
226
+ def getSenderByAlias(aliasName)
227
+ if @userAliases.include? aliasName.downcase.sub(/^\*{3}/,'').gsub(/\s+/, '')
228
+ # Set the current alias being used of the ones in @userAliases
229
+ @userAlias = aliasName.sub(/^\*{3}/, '')
230
+ return @userSN
231
+ else
232
+ return @partnerSN
135
233
  end
234
+ end
136
235
 
137
- all_line_matches.each do |line|
138
- chatFG.appendLine( createMsgData(line) )
236
+ # createMsg takes an array of captures from matching against @lineRegex
237
+ # and returns a Message object or one of its subclasses.
238
+ # It can be used for SrcTxtFileParse and SrcHtmlFileParse because
239
+ # both of them return data in the same indexes in the matches array.
240
+ def createMsg(matches)
241
+ msg = nil
242
+ # Either a regular message line or an auto-reply/away message.
243
+ time = createAdiumTime(matches[0])
244
+ aliasStr = matches[1]
245
+ sender = getSenderByAlias(aliasStr)
246
+ body = matches[3]
247
+ if matches[2] # auto-reply
248
+ msg = AutoReplyMessage.new(sender, time, aliasStr, body)
249
+ else
250
+ # normal message
251
+ msg = XMLMessage.new(sender, time, aliasStr, body)
139
252
  end
140
- return chatFG
253
+ return msg
254
+ end
255
+
256
+ # createStatusOrEventMsg takes an array of +MatchData+ captures from
257
+ # matching against @lineRegexStatus and returns an Event or Status.
258
+ def createStatusOrEventMsg(matches)
259
+ # ["22:58:00", "BuddyName logged in."]
260
+ # 0: time
261
+ # 1: status message or event
262
+ msg = nil
263
+ time = createAdiumTime(matches[0])
264
+ str = matches[1]
265
+ regex, status = @statusMap.detect{|regex, status| str =~ regex}
266
+ if regex and status
267
+ # Status message
268
+ aliasStr = regex.match(str)[1]
269
+ sender = getSenderByAlias(aliasStr)
270
+ msg = StatusMessage.new(sender, time, aliasStr, status)
271
+ else
272
+ # Test for event
273
+ regex = @libPurpleEvents.detect{|regex| str =~ regex }
274
+ eventType = 'libpurpleEvent' if regex
275
+ unless regex and eventType
276
+ # not a libpurple event, try others
277
+ regexAndEventType = @eventMap.detect{|regex,eventType| str =~ regex}
278
+ if regexAndEventType.nil?
279
+ Pidgin2Adium.logMsg("You have found an odd status line. Please send this line to the developer.", true)
280
+ Pidgin2Adium.logMsg("The line is: #{str}", true)
281
+ return nil
282
+ else
283
+ regex = regexAndEventType[0]
284
+ eventType = regexAndEventType[1]
285
+ end
286
+ end
287
+ if regex and eventType
288
+ regexMatches = regex.match(str)
289
+ # Event message
290
+ if regexMatches.size == 1
291
+ # No alias - this means it's the user
292
+ aliasStr = @userAlias
293
+ sender = @userSN
294
+ else
295
+ aliasStr = regex.match(str)[1]
296
+ sender = getSenderByAlias(aliasStr)
297
+ end
298
+ msg = Event.new(sender, time, aliasStr, str, eventType)
299
+ end
300
+ end
301
+ return msg
141
302
  end
142
303
  end
143
304
 
144
305
  class SrcTxtFileParse < SrcFileParse
145
- def initialize(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
146
- super(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
147
- # @line_regex matches a line in an HTML log file other than the first
148
- # @line_regex matchdata:
306
+ def initialize(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
307
+ super(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
308
+ # @lineRegex matches a line in a TXT log file other than the first
309
+ # @lineRegex matchdata:
149
310
  # 0: timestamp
150
- # 1: screen name
311
+ # 1: screen name or alias, if alias set
151
312
  # 2: "<AUTO-REPLY>" or nil
152
- # 3: message
153
- @line_regex = /#{@timestamp_regex_str} (.*?) ?(<AUTO-REPLY>)?: (.*)$/
154
- # @line_regex_status matches a status line
155
- # @line_regex_status matchdata:
313
+ # 3: message body
314
+ @lineRegex = /#{@timestampRegexStr} (.*?) ?(<AUTO-REPLY>)?: (.*)$/o
315
+ # @lineRegexStatus matches a status line
316
+ # @lineRegexStatus matchdata:
156
317
  # 0: timestamp
157
- # 1: message
158
- @line_regex_status = /#{@timestamp_regex_str} ([^:]+?)[\r\n]{1,2}/
318
+ # 1: status message
319
+ @lineRegexStatus = /#{@timestampRegexStr} ([^:]+?)[\r\n]/o
159
320
  end
160
321
 
161
- # createMsgData takes a +MatchData+ object (from @line_regex or @line_regex_status) and returns a hash
162
- # with the following keys: time, alias, and message and/or status.
163
- def createMsgData(matchObj)
164
- msg_data_hash = { 'time' => nil, 'alias' => nil, 'status' => nil, 'body' => nil, 'auto-reply' => nil }
165
- if matchObj[4..5] == [nil, nil]
166
- # regular message
167
- # ["10:58:29", "BuddyName", "<AUTO-REPLY>", "hello!\r", nil, nil]
168
- msg_data_hash['time'] = matchObj[0]
169
- msg_data_hash['alias'] = matchObj[1]
170
- msg_data_hash['auto-reply'] = (matchObj[2] != nil)
171
- # strip() to remove "\r" from end
172
- msg_data_hash['body'] = matchObj[3].strip
173
- elsif matchObj[0..3] == [nil, nil, nil, nil]
174
- # status message
175
- # [nil, nil, nil, nil, "22:58:00", "BuddyName logged in."]
176
- alias_and_status = getAliasAndStatus(matchObj[5])
177
- msg_data_hash['time'] = matchObj[4]
178
- msg_data_hash['alias'] = alias_and_status[0]
179
- msg_data_hash['status'] = alias_and_status[1]
180
- end
181
- return msg_data_hash
182
- end
183
322
  end
184
323
 
185
324
  class SrcHtmlFileParse < SrcFileParse
186
- def initialize(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
187
- super(srcPath, destDirBase, masterAlias, userTZ, userTZOffset)
188
- # @line_regex matches a line in an HTML log file other than the first
325
+ def initialize(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
326
+ super(srcPath, destDirBase, userAliases, userTZ, userTZOffset)
327
+ # @lineRegex matches a line in an HTML log file other than the first
189
328
  # time matches on either "2008-11-17 14:12" or "14:12"
190
- # @line_regex match obj:
329
+ # @lineRegex match obj:
191
330
  # 0: timestamp, extended or not
192
- # 1: alias
331
+ # 1: screen name or alias, if alias set
193
332
  # 2: "&lt;AUTO-REPLY&gt;" or nil
194
333
  # 3: message body
195
334
  # <span style='color: #000000;'>test sms</span>
196
- @line_regex = /#{@timestamp_regex_str} ?<b>(.*?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.*)<br ?\/>/ #(?:[\n\r]{1,2}<(?:font|\/body))/s
197
- # @line_regex_status matches a status line
198
- # @line_regex_status match obj:
335
+ @lineRegex = /#{@timestampRegexStr} ?<b>(.*?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.*)<br ?\/>/o
336
+ # @lineRegexStatus matches a status line
337
+ # @lineRegexStatus match obj:
199
338
  # 0: timestamp
200
339
  # 1: status message
201
- @line_regex_status = /#{@timestamp_regex_str} ?<b> (.*?)<\/b><br\/>/
202
- end
203
-
204
- # createMsgData takes a +MatchData+ object (from @line_regex or @line_regex_status) and returns a hash
205
- # with the following keys: time, alias, and message and/or status.
206
- def createMsgData(matchObj)
207
- msg_data_hash = { 'time' => nil,
208
- 'alias' => nil,
209
- 'auto-reply' => nil,
210
- 'body' => nil,
211
- 'status' => nil}
212
- # the Regexp.union leaves nil where one of the regexes didn't match.
213
- # (Is there any way to have it not do this?)
214
- # ie
215
- # the first one matches: ['foo', 'bar', 'baz', 'bash', nil, nil]
216
- # second one matches: [nil, nil, nil, nil, 'bim', 'bam']
217
- if matchObj[0..3] == [nil, nil, nil, nil]
218
- # This is a status message.
219
- # slice off results from other Regexp
220
- # becomes: ["11:27:53", "Generic Screenname228 logged in."]
221
- matchObj = matchObj[4..5]
222
- alias_and_status = getAliasAndStatus(matchObj[1])
223
- msg_data_hash['time'] = matchObj[0]
224
- msg_data_hash['alias'] = alias_and_status[0]
225
- msg_data_hash['status'] = alias_and_status[1]
226
- elsif matchObj[4..5] == [nil, nil]
227
- # Either a regular message line or an auto-reply/away message.
228
- # slice off results from other Regexp
229
- matchObj = matchObj[0..3]
230
- msg_data_hash['time'] = matchObj[0]
231
- msg_data_hash['alias'] = matchObj[1]
232
- msg_data_hash['body'] = matchObj[3]
233
- if not matchObj[2].nil?
234
- # an auto-reply message
235
- msg_data_hash['auto-reply'] = true
236
- end
237
- end
238
- return msg_data_hash
340
+ @lineRegexStatus = /#{@timestampRegexStr} ?<b> (.*?)<\/b><br ?\/>/o
239
341
  end
240
342
 
241
- # Removes <font> tags, empty <a>s, spans with either no color
343
+ # Removes <font> tags, empty <a>s, and spans with either no color
242
344
  # information or color information that just turns the text black.
243
345
  # Returns a string.
244
346
  def cleanup(text)
245
- color_regex = /.*(color: ?#[[:alnum:]]{6}; ?).*/
246
- # For some reason, Hpricot doesn't work well with
247
- # elem.swap(elem.innerHTML) when the elements are nested
248
- # (eg doc.search('font') only returns the outside <font> tags,
249
- # not "font font") and also it appears that it doesn't reinterpret
250
- # the doc when outside tags are swapped with their innerHTML (so
251
- # when <html> tags are replaced with their innerHTML, then
252
- # a search for <font> tags in the new HTML fails).
253
- # Long story short, we use gsub.
347
+ # Pidgin and Adium both show bold using
348
+ # <span style="font-weight: bold;"> except Pidgin uses single quotes
349
+ # and Adium uses double quotes
254
350
  text.gsub!(/<\/?(html|body|font).*?>/, '')
255
- doc = Hpricot(text)
256
- # These empty links sometimes are appended to every line in a chat,
351
+ # These empty links are sometimes appended to every line in a chat,
257
352
  # for some weird reason. Remove them.
258
- doc.search("a[text()='']").remove
259
- spans = doc.search('span')
260
- spans.each do |span|
261
- if span.empty?
262
- Hpricot::Elements[span].remove
263
- else
264
- # No need to check for the span.attributes.key?('style')
265
- if span[:style] =~ color_regex
266
- # Remove black-text spans after other processing because
267
- # the processing can reduce spans to that
268
- span[:style] = span[:style].gsub(color_regex, '\1').
269
- gsub(/color: ?#000000; ?/,'')
270
- # Remove span but keep its contents
271
- span.swap(span.innerHTML) if span[:style] == ''
353
+ text.gsub!(%r{<a href='.+?'>\s*?</a>}, '')
354
+ text.gsub!(%r{(.*?)<span.+style='(.+?)'>(.*?)</span>(.*)}) do |s|
355
+ # before = text before match
356
+ # style = style declaration
357
+ # innertext = text inside <span>
358
+ # after = text after match
359
+ before, style, innertext, after = *($~[1..4])
360
+ # TODO: remove after from string then see what balanceTags does
361
+ # Remove empty spans.
362
+ nil if innertext == ''
363
+ # Only allow some style declarations
364
+ # We keep:
365
+ # font-weight: bold
366
+ # color (except #000000)
367
+ # text-decoration: underline
368
+ styleparts = style.split(/; ?/)
369
+ styleparts.map! do |p|
370
+ # Short-circuit for common declaration
371
+ # Yes, sometimes there's a ">" before the ";".
372
+ if p == 'color: #000000;' or p == 'color: #000000>;'
373
+ nil
272
374
  else
273
- span.swap(span.innerHTML)
375
+ case p
376
+ when /font-family/: nil
377
+ when /font-size/: nil
378
+ when /background/: nil
379
+ end
274
380
  end
275
381
  end
382
+ styleparts.compact!
383
+ if styleparts.empty?
384
+ style = ''
385
+ elsif styleparts.size == 1
386
+ style = styleparts[0] << ';'
387
+ else
388
+ style = styleparts.join('; ') << ';'
389
+ end
390
+ if style != ''
391
+ innertext = "<span style=\"#{style}\">#{innertext}</span>"
392
+ end
393
+ before + innertext + after
394
+ end
395
+ # Pidgin uses <em>, Adium uses <span>
396
+ if text.gsub!('<em>', '<span style="italic">')
397
+ text.gsub!('</em>', '</span>')
276
398
  end
277
- return doc.to_html
399
+ return text
400
+ end
401
+ end
402
+
403
+ # A holding object for each line of the chat.
404
+ # It is subclassed as appropriate (eg AutoReplyMessage).
405
+ # All Messages have senders, times, and aliases.
406
+ class Message
407
+ def initialize(sender, time, aliasStr)
408
+ @sender = sender
409
+ @time = time
410
+ @aliasStr = aliasStr
411
+ end
412
+ end
413
+
414
+ # Basic message with body text (as opposed to pure status messages, which
415
+ # have no body).
416
+ class XMLMessage < Message
417
+ def initialize(sender, time, aliasStr, body)
418
+ super(sender, time, aliasStr)
419
+ @body = body
420
+ normalizeBody!()
421
+ end
422
+
423
+ def getOutput
424
+ return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
425
+ @sender, @time, @aliasStr, @body)
426
+ end
427
+
428
+ def normalizeBody!
429
+ normalizeBodyEntities!()
430
+ # Fix mismatched tags. Yes, it's faster to do it per-message
431
+ # than all at once.
432
+ @body = Pidgin2Adium.balanceTags(@body)
433
+ if @aliasStr[0,3] == '***'
434
+ # "***<alias>" is what pidgin sets as the alias for a /me action
435
+ @aliasStr.slice!(0,3)
436
+ @body = '*' << @body << '*'
437
+ end
438
+ @body = '<div><span style="font-family: Helvetica; font-size: 12pt;">' <<
439
+ @body <<
440
+ '</span></div>'
441
+ end
442
+
443
+ def normalizeBodyEntities!
444
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
445
+ @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
446
+ # replace single quotes with '&apos;' but only outside <span>s.
447
+ @body.gsub!(/(.*?)(<span.*?>.*?<\/span>)(.*?)/) do
448
+ before, span, after = $1, ($2||''), $3||''
449
+ before.gsub("'", '&aquot;') <<
450
+ span <<
451
+ after.gsub("'", '&aquot;')
452
+ end
453
+ end
454
+ end
455
+
456
+ # An auto reply message, meaning it has a body.
457
+ class AutoReplyMessage < XMLMessage
458
+ def getOutput
459
+ return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n", @sender, @time, @aliasStr, @body)
460
+ end
461
+ end
462
+
463
+ # A message saying e.g. "Blahblah has gone away."
464
+ class StatusMessage < Message
465
+ def initialize(sender, time, aliasStr, status)
466
+ super(sender, time, aliasStr)
467
+ @status = status
468
+ end
469
+ def getOutput
470
+ return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @aliasStr)
471
+ end
472
+ end
473
+
474
+ # An <event> line of the chat
475
+ class Event < XMLMessage
476
+ def initialize(sender, time, aliasStr, body, type="libpurpleMessage")
477
+ super(sender, time, aliasStr, body)
478
+ @type = type
479
+ end
480
+
481
+ def getOutput
482
+ return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>', @type, @sender, @time, @aliasStr, @body)
278
483
  end
279
484
  end
280
485
  end # end module