pidgin2adium 1.0.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +6 -0
- data/Manifest.txt +10 -0
- data/README.rdoc +106 -0
- data/Rakefile.rb +26 -0
- data/bin/pidgin2adium +72 -0
- data/lib/pidgin2adium.rb +120 -0
- data/lib/pidgin2adium/{balance-tags.rb → balance_tags.rb} +32 -29
- data/lib/pidgin2adium/log_converter.rb +68 -0
- data/lib/pidgin2adium/log_file.rb +101 -0
- data/lib/pidgin2adium/log_parser.rb +590 -0
- metadata +39 -19
- data/bin/pidgin2adium_logs +0 -67
- data/bin/pidgin2adium_status +0 -15
- data/lib/pidgin2adium/ChatFileGenerator.rb +0 -59
- data/lib/pidgin2adium/SrcFileParse.rb +0 -485
- data/lib/pidgin2adium/logs.rb +0 -250
- data/lib/pidgin2adium/status.rb +0 -113
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'pidgin2adium'
|
2
|
+
|
3
|
+
module Pidgin2Adium
|
4
|
+
# An easy way to batch-process a directory. Used by the pidgin2adium
|
5
|
+
# command-line script.
|
6
|
+
class LogConverter
|
7
|
+
include Pidgin2Adium
|
8
|
+
# You can add options using the _opts_ hash, which can have the
|
9
|
+
# following keys, all of which are optional:
|
10
|
+
# * *overwrite*: If true, then overwrite even if log is found.
|
11
|
+
# Defaults to false.
|
12
|
+
# * *output_dir*: The top-level dir to put the logs in.
|
13
|
+
# Logs under output_dir are still each in their own folders, etc.
|
14
|
+
# Defaults to Pidgin2Adium::ADIUM_LOG_DIR
|
15
|
+
def initialize(pidgin_log_dir, aliases, opts = {})
|
16
|
+
# parse_and_generate will process it for us
|
17
|
+
@opts = opts
|
18
|
+
|
19
|
+
@pidgin_log_dir = File.expand_path(pidgin_log_dir)
|
20
|
+
@my_aliases = aliases
|
21
|
+
|
22
|
+
unless File.directory?(@pidgin_log_dir)
|
23
|
+
puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
|
24
|
+
raise Errno::ENOENT
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Runs Pidgin2Adium::parse_and_generate on every log file in directory
|
29
|
+
# provided in new, then deletes Adium's search indexes to force
|
30
|
+
# it to rescan logs on startup.
|
31
|
+
def start
|
32
|
+
log_msg "Begin converting."
|
33
|
+
begin
|
34
|
+
files_path = get_all_chat_files(@pidgin_log_dir)
|
35
|
+
rescue Errno::EACCES => bang
|
36
|
+
error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
|
37
|
+
error("Details: #{bang.message}")
|
38
|
+
raise Errno::EACCES
|
39
|
+
end
|
40
|
+
|
41
|
+
total_files = files_path.size
|
42
|
+
total_successes = 0
|
43
|
+
log_msg("#{total_files} files to convert.")
|
44
|
+
files_path.each_with_index do |fname, i|
|
45
|
+
log_msg(
|
46
|
+
sprintf("[%d/%d] Converting %s...",
|
47
|
+
(i+1), total_files, fname)
|
48
|
+
)
|
49
|
+
result = parse_and_generate(fname, @my_aliases, @opts)
|
50
|
+
total_successes += 1 if result == true
|
51
|
+
end
|
52
|
+
|
53
|
+
delete_search_indexes()
|
54
|
+
|
55
|
+
log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
56
|
+
end
|
57
|
+
|
58
|
+
###########
|
59
|
+
private
|
60
|
+
###########
|
61
|
+
|
62
|
+
def get_all_chat_files(dir)
|
63
|
+
return [] if File.basename(dir) == ".system"
|
64
|
+
# recurse into each subdir
|
65
|
+
return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# ADD DOCUMENTATION
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
module Pidgin2Adium
|
6
|
+
# A holding object for the result of LogParser.parse. It makes the
|
7
|
+
# instance variable @chat_lines available, which is an array of objects
|
8
|
+
# which each have at least the instance variables _sender_, _time_, and
|
9
|
+
# _buddy_alias_ available. Some objects in @chat_lines have more variables
|
10
|
+
# available, specifically:
|
11
|
+
# * XMLMessage, AutoReplyMessage, and Event:: _body_
|
12
|
+
# * Event:: _event_type_
|
13
|
+
# * StatusMessage:: _status_
|
14
|
+
class LogFile
|
15
|
+
include Pidgin2Adium
|
16
|
+
def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
|
17
|
+
@chat_lines = chat_lines
|
18
|
+
@user_SN = user_SN
|
19
|
+
@partner_SN = partner_SN
|
20
|
+
@adium_chat_time_start = adium_chat_time_start
|
21
|
+
|
22
|
+
# @chat_str is generated when to_s is called
|
23
|
+
@chat_str = nil
|
24
|
+
|
25
|
+
# key is for Pidgin, value is for Adium
|
26
|
+
# Just used for <service>.<screenname> in directory structure
|
27
|
+
service_name_map = {'aim' => 'AIM',
|
28
|
+
'jabber' =>'jabber',
|
29
|
+
'gtalk'=> 'GTalk',
|
30
|
+
'icq' => 'ICQ',
|
31
|
+
'qq' => 'QQ',
|
32
|
+
'msn' => 'MSN',
|
33
|
+
'yahoo' => 'Yahoo'}
|
34
|
+
|
35
|
+
@service = service_name_map[service.downcase]
|
36
|
+
end
|
37
|
+
|
38
|
+
attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
|
39
|
+
|
40
|
+
# Returns contents of log file
|
41
|
+
def to_s
|
42
|
+
if @chat_str.nil?
|
43
|
+
# Faster than inject() or each()
|
44
|
+
@chat_str = @chat_lines.map{|l| l.to_s }.join
|
45
|
+
end
|
46
|
+
return @chat_str
|
47
|
+
end
|
48
|
+
|
49
|
+
def each(&blk)
|
50
|
+
@chat_lines.each{|l| yield l }
|
51
|
+
end
|
52
|
+
|
53
|
+
# Set overwrite=true to create a logfile even if logfile already exists.
|
54
|
+
# Returns one of:
|
55
|
+
# * false (if an error occurred),
|
56
|
+
# * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
|
57
|
+
# * the path to the new Adium log file.
|
58
|
+
def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
|
59
|
+
# output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
|
60
|
+
output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
|
61
|
+
# output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
|
62
|
+
output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
|
63
|
+
begin
|
64
|
+
FileUtils.mkdir_p(output_dir)
|
65
|
+
rescue => bang
|
66
|
+
error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
|
67
|
+
return false
|
68
|
+
end
|
69
|
+
if overwrite
|
70
|
+
unless File.exist?(output_path)
|
71
|
+
# File doesn't exist, but maybe it does with a different
|
72
|
+
# time zone. Check for a file that differs only in time
|
73
|
+
# zone and, if found, change @output_path to target it.
|
74
|
+
maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
|
75
|
+
unless maybe_matches.empty?
|
76
|
+
output_path = maybe_matches[0]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
else
|
80
|
+
if File.exist?(output_path)
|
81
|
+
return FILE_EXISTS
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
begin
|
86
|
+
outfile = File.new(output_path, 'w')
|
87
|
+
rescue => bang
|
88
|
+
error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
|
89
|
+
return false
|
90
|
+
end
|
91
|
+
|
92
|
+
# no \n before </chat> because @chat_str (from to_s) has it already
|
93
|
+
outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
|
94
|
+
'<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
|
95
|
+
@user_SN, @service, self.to_s)
|
96
|
+
outfile.close
|
97
|
+
|
98
|
+
return output_path
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,590 @@
|
|
1
|
+
# Contains the class BasicParser and its subclasses, HtmlLogParser and
|
2
|
+
# TextFileParser, which parse the file passed into it and return a LogFile
|
3
|
+
# object.
|
4
|
+
#
|
5
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
6
|
+
# using these classes directly.
|
7
|
+
require 'parsedate'
|
8
|
+
|
9
|
+
require 'pidgin2adium/balance_tags'
|
10
|
+
require 'pidgin2adium/log_file'
|
11
|
+
|
12
|
+
module Pidgin2Adium
|
13
|
+
# Empty class. Raise'd by LogParser if the first line of a log is not
|
14
|
+
# parseable.
|
15
|
+
class InvalidFirstLineError < StandardError; end
|
16
|
+
|
17
|
+
# BasicParser is a base class. Its subclasses are TextLogParser and
|
18
|
+
# HtmlLogParser.
|
19
|
+
#
|
20
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
21
|
+
# using this class directly.
|
22
|
+
class BasicParser
|
23
|
+
include Pidgin2Adium
|
24
|
+
def initialize(src_path, user_aliases)
|
25
|
+
@src_path = src_path
|
26
|
+
# Whitespace is removed for easy matching later on.
|
27
|
+
@user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
|
28
|
+
# @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
|
29
|
+
# alias.
|
30
|
+
# Set an initial value just in case the first message doesn't give
|
31
|
+
# us an alias.
|
32
|
+
@user_alias = user_aliases.split(',')[0]
|
33
|
+
|
34
|
+
@tz_offset = get_time_zone_offset()
|
35
|
+
|
36
|
+
file = File.new(@src_path, 'r')
|
37
|
+
@first_line = file.readline
|
38
|
+
@file_content = file.read
|
39
|
+
file.close
|
40
|
+
|
41
|
+
# Time regexes must be set before pre_parse().
|
42
|
+
# "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
|
43
|
+
# ONLY used (if at all) in first line of chat ("Conversation with...at...")
|
44
|
+
@time_regex_first_line = %r{(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)}
|
45
|
+
# "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
|
46
|
+
@time_regex = /(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})/
|
47
|
+
# sometimes a line in a chat doesn't have a full timestamp
|
48
|
+
# "04:22:05 AM" => %w{04 22 05 AM}
|
49
|
+
@minimal_time_regex = /(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?/
|
50
|
+
|
51
|
+
# Whether or not the first line is parseable.
|
52
|
+
@first_line_is_valid = true
|
53
|
+
begin
|
54
|
+
@service,
|
55
|
+
@user_SN,
|
56
|
+
@partner_SN,
|
57
|
+
# @basic_time_info is for files that only have the full
|
58
|
+
# timestamp at the top; we can use it to fill in the minimal
|
59
|
+
# per-line timestamps. It has only 3 elements (year, month,
|
60
|
+
# dayofmonth) because you should be able to fill everything
|
61
|
+
# else in. If you can't, something's wrong.
|
62
|
+
@basic_time_info,
|
63
|
+
# When the chat started, in Adium's format
|
64
|
+
@adium_chat_time_start = pre_parse()
|
65
|
+
rescue InvalidFirstLineError
|
66
|
+
@first_line_is_valid = false
|
67
|
+
error("Parsing of #{@src_path} failed (could not find valid first line).")
|
68
|
+
return # stop processing
|
69
|
+
end
|
70
|
+
|
71
|
+
# @status_map, @lib_purple_events, and @events are used in
|
72
|
+
# create_status_or_event_msg
|
73
|
+
@status_map = {
|
74
|
+
/(.+) logged in\.$/ => 'online',
|
75
|
+
/(.+) logged out\.$/ => 'offline',
|
76
|
+
/(.+) has signed on\.$/ => 'online',
|
77
|
+
/(.+) has signed off\.$/ => 'offline',
|
78
|
+
/(.+) has gone away\.$/ => 'away',
|
79
|
+
/(.+) is no longer away\.$/ => 'available',
|
80
|
+
/(.+) has become idle\.$/ => 'idle',
|
81
|
+
/(.+) is no longer idle\.$/ => 'available'
|
82
|
+
}
|
83
|
+
|
84
|
+
# lib_purple_events are all of event_type libPurple
|
85
|
+
@lib_purple_events = [
|
86
|
+
# file transfer
|
87
|
+
/Starting transfer of .+ from (.+)/,
|
88
|
+
/^Offering to send .+ to (.+)$/,
|
89
|
+
/(.+) is offering to send file/,
|
90
|
+
/^Transfer of file .+ complete$/,
|
91
|
+
/Error reading|writing|accessing .+: .+/,
|
92
|
+
/You cancelled the transfer of/,
|
93
|
+
/File transfer cancelled/,
|
94
|
+
/(.+) cancelled the transfer of/,
|
95
|
+
/(.+) cancelled the file transfer/,
|
96
|
+
# Direct IM - actual (dis)connect events are their own types
|
97
|
+
/^Attempting to connect to (.+) at .+ for Direct IM\./,
|
98
|
+
/^Asking (.+) to connect to us at .+ for Direct IM\./,
|
99
|
+
/^Attempting to connect via proxy server\.$/,
|
100
|
+
/^Direct IM with (.+) failed/,
|
101
|
+
# encryption
|
102
|
+
/Received message encrypted with wrong key/,
|
103
|
+
/^Requesting key\.\.\.$/,
|
104
|
+
/^Outgoing message lost\.$/,
|
105
|
+
/^Conflicting Key Received!$/,
|
106
|
+
/^Error in decryption- asking for resend\.\.\.$/,
|
107
|
+
/^Making new key pair\.\.\.$/,
|
108
|
+
# file transfer
|
109
|
+
/You canceled the transfer of/,
|
110
|
+
/(.+?) canceled the transfer of/,
|
111
|
+
# sending errors
|
112
|
+
/^Last outgoing message not received properly- resetting$/,
|
113
|
+
/Resending\.\.\./,
|
114
|
+
# connection errors
|
115
|
+
/Lost connection with the remote user:.+/,
|
116
|
+
# chats
|
117
|
+
/^.+ entered the room\.$/,
|
118
|
+
/^.+ left the room\.$/
|
119
|
+
]
|
120
|
+
|
121
|
+
# non-libpurple events
|
122
|
+
# Each key maps to an event_type string. The keys will be matched against a line of chat
|
123
|
+
# and the partner's alias will be in regex group 1, IF the alias is matched.
|
124
|
+
@event_map = {
|
125
|
+
# .+ is not an alias, it's a proxy server so no grouping
|
126
|
+
/^Attempting to connect to .+\.$/ => 'direct-im-connect',
|
127
|
+
# NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
|
128
|
+
/^Direct IM established$/ => 'directIMConnected',
|
129
|
+
/Unable to send message/ => 'chat-error',
|
130
|
+
/You missed .+ messages from (.+) because they were too large/ => 'chat-error',
|
131
|
+
/User information not available/ => 'chat-error'
|
132
|
+
}
|
133
|
+
|
134
|
+
@ignore_events = [
|
135
|
+
# Adium ignores SN/alias changes.
|
136
|
+
/^.+? is now known as .+?\.<br\/?>$/
|
137
|
+
]
|
138
|
+
end
|
139
|
+
|
140
|
+
# This method returns a LogFile instance, or false if an error occurred.
|
141
|
+
def parse
|
142
|
+
return false unless @first_line_is_valid
|
143
|
+
@file_content = cleanup(@file_content).split("\n")
|
144
|
+
|
145
|
+
@file_content.map! do |line|
|
146
|
+
next if line =~ /^\s+$/
|
147
|
+
if line =~ @line_regex
|
148
|
+
create_msg($~.captures)
|
149
|
+
elsif line =~ @line_regex_status
|
150
|
+
create_status_or_event_msg($~.captures)
|
151
|
+
else
|
152
|
+
error "Could not parse line:"
|
153
|
+
p line # returns nil which is then removed by compact
|
154
|
+
exit 1 # if $DEBUG FIXME
|
155
|
+
end
|
156
|
+
end.compact!
|
157
|
+
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
158
|
+
end
|
159
|
+
|
160
|
+
#################
|
161
|
+
private
|
162
|
+
#################
|
163
|
+
|
164
|
+
def get_time_zone_offset()
|
165
|
+
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
166
|
+
tz_offset = tz_match[1] rescue ''
|
167
|
+
return tz_offset
|
168
|
+
end
|
169
|
+
|
170
|
+
#--
|
171
|
+
# Adium time format: YYYY-MM-DD\THH.MM.SS[+-]TZ_HRS like:
|
172
|
+
# 2008-10-05T22.26.20-0800
|
173
|
+
#++
|
174
|
+
# Converts a pidgin datestamp to an Adium one.
|
175
|
+
def create_adium_time(time, is_first_line = false)
|
176
|
+
# parsed_date = [year, month, day, hour, min, sec]
|
177
|
+
if time =~ @time_regex
|
178
|
+
year, month, day, hour, min, sec = $1.to_i,
|
179
|
+
$2.to_i,
|
180
|
+
$3.to_i,
|
181
|
+
$4.to_i,
|
182
|
+
$5.to_i,
|
183
|
+
$6.to_i
|
184
|
+
elsif is_first_line and time =~ @time_regex_first_line
|
185
|
+
hour = $4.to_i
|
186
|
+
if $7 == 'PM' and hour != 12
|
187
|
+
hour += 12
|
188
|
+
end
|
189
|
+
year, month, day, min, sec = $3.to_i, # year
|
190
|
+
$1.to_i, # month
|
191
|
+
$2.to_i, # day
|
192
|
+
# already did hour
|
193
|
+
$5.to_i, # minutes
|
194
|
+
$6.to_i # seconds
|
195
|
+
elsif time =~ @minimal_time_regex
|
196
|
+
# "04:22:05" => %w{04 22 05}
|
197
|
+
hour = $1.to_i
|
198
|
+
if $4 == 'PM' and hour != 12
|
199
|
+
hour += 12
|
200
|
+
end
|
201
|
+
year, month, day = @basic_time_info
|
202
|
+
min = $2.to_i
|
203
|
+
sec = $3.to_i
|
204
|
+
else
|
205
|
+
error("You have found an odd timestamp. Please report it to the developer.")
|
206
|
+
log_msg("The timestamp: #{time}")
|
207
|
+
log_msg("Continuing...")
|
208
|
+
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
209
|
+
end
|
210
|
+
return Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
211
|
+
end
|
212
|
+
|
213
|
+
# Extract required data from the file. Run by parse.
|
214
|
+
def pre_parse
|
215
|
+
# Deal with first line.
|
216
|
+
|
217
|
+
# the first line is special. It tells us (in order of regex groups):
|
218
|
+
# 1) who we're talking to
|
219
|
+
# 2) what time/date
|
220
|
+
# 3) what SN we used
|
221
|
+
# 4) what protocol (AIM, icq, jabber...)
|
222
|
+
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
223
|
+
if first_line_match.nil?
|
224
|
+
raise InvalidFirstLineError
|
225
|
+
else
|
226
|
+
service = first_line_match[4]
|
227
|
+
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
228
|
+
user_SN = first_line_match[3].downcase.tr(' ', '')
|
229
|
+
partner_SN = first_line_match[1]
|
230
|
+
pidgin_chat_time_start = first_line_match[2]
|
231
|
+
basic_time_info = case @first_line
|
232
|
+
when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
|
233
|
+
when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
|
234
|
+
end
|
235
|
+
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
236
|
+
return [service,
|
237
|
+
user_SN,
|
238
|
+
partner_SN,
|
239
|
+
basic_time_info,
|
240
|
+
adium_chat_time_start]
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
def get_sender_by_alias(alias_name)
|
245
|
+
no_action = alias_name.sub(/^\*{3}/, '')
|
246
|
+
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
247
|
+
# Set the current alias being used of the ones in @user_aliases
|
248
|
+
@user_alias = no_action
|
249
|
+
return @user_SN
|
250
|
+
else
|
251
|
+
return @partner_SN
|
252
|
+
end
|
253
|
+
end
|
254
|
+
|
255
|
+
#--
|
256
|
+
# create_msg takes an array of captures from matching against
|
257
|
+
# @line_regex and returns a Message object or one of its subclasses.
|
258
|
+
# It can be used for TextLogParser and HtmlLogParser because both of
|
259
|
+
# them return data in the same indexes in the matches array.
|
260
|
+
#++
|
261
|
+
def create_msg(matches)
|
262
|
+
msg = nil
|
263
|
+
# Either a regular message line or an auto-reply/away message.
|
264
|
+
time = create_adium_time(matches[0])
|
265
|
+
buddy_alias = matches[1]
|
266
|
+
sender = get_sender_by_alias(buddy_alias)
|
267
|
+
body = matches[3]
|
268
|
+
if matches[2] # auto-reply
|
269
|
+
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
270
|
+
else
|
271
|
+
# normal message
|
272
|
+
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
273
|
+
end
|
274
|
+
return msg
|
275
|
+
end
|
276
|
+
|
277
|
+
#--
|
278
|
+
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
279
|
+
# matching against @line_regex_status and returns an Event or Status.
|
280
|
+
#++
|
281
|
+
def create_status_or_event_msg(matches)
|
282
|
+
# ["22:58:00", "BuddyName logged in."]
|
283
|
+
# 0: time
|
284
|
+
# 1: status message or event
|
285
|
+
msg = nil
|
286
|
+
time = create_adium_time(matches[0])
|
287
|
+
str = matches[1]
|
288
|
+
# Return nil, which will get compact'ed out
|
289
|
+
return nil if @ignore_events.detect{|regex| str =~ regex }
|
290
|
+
|
291
|
+
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
292
|
+
if regex and status
|
293
|
+
# Status message
|
294
|
+
buddy_alias = regex.match(str)[1]
|
295
|
+
sender = get_sender_by_alias(buddy_alias)
|
296
|
+
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
297
|
+
else
|
298
|
+
# Test for event
|
299
|
+
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
300
|
+
event_type = 'libpurpleEvent' if regex
|
301
|
+
unless regex and event_type
|
302
|
+
# not a libpurple event, try others
|
303
|
+
if @event_map.detect{|regex,event_type| str =~ regex}
|
304
|
+
regex, event_type = $1, $2
|
305
|
+
else
|
306
|
+
error("Could not match string to status or event!")
|
307
|
+
error(sprintf("matches: %p", matches))
|
308
|
+
error(sprintf("str: %p", str))
|
309
|
+
exit 1
|
310
|
+
end
|
311
|
+
end
|
312
|
+
if regex and event_type
|
313
|
+
regex_matches = regex.match(str)
|
314
|
+
# Event message
|
315
|
+
if regex_matches.size == 1
|
316
|
+
# No alias - this means it's the user
|
317
|
+
buddy_alias = @user_alias
|
318
|
+
sender = @user_SN
|
319
|
+
else
|
320
|
+
buddy_alias = regex_matches[1]
|
321
|
+
sender = get_sender_by_alias(buddy_alias)
|
322
|
+
end
|
323
|
+
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
324
|
+
end
|
325
|
+
end
|
326
|
+
return msg
|
327
|
+
end
|
328
|
+
end
|
329
|
+
|
330
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
331
|
+
# using this class directly.
|
332
|
+
class TextLogParser < BasicParser
|
333
|
+
def initialize(src_path, user_aliases)
|
334
|
+
super(src_path, user_aliases)
|
335
|
+
@timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
|
336
|
+
|
337
|
+
# @line_regex matches a line in a TXT log file other than the first
|
338
|
+
# @line_regex matchdata:
|
339
|
+
# 0: timestamp
|
340
|
+
# 1: screen name or alias, if alias set
|
341
|
+
# 2: "<AUTO-REPLY>" or nil
|
342
|
+
# 3: message body
|
343
|
+
@line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
|
344
|
+
# @line_regex_status matches a status line
|
345
|
+
# @line_regex_status matchdata:
|
346
|
+
# 0: timestamp
|
347
|
+
# 1: status message
|
348
|
+
@line_regex_status = /#{@timestamp_rx} ([^:]+)/o
|
349
|
+
end
|
350
|
+
|
351
|
+
#################
|
352
|
+
private
|
353
|
+
#################
|
354
|
+
|
355
|
+
def cleanup(text)
|
356
|
+
text.tr!("\r", '')
|
357
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
358
|
+
text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
|
359
|
+
# Escape entities since this will be in XML
|
360
|
+
text.gsub!('&', '&') # escape '&' first
|
361
|
+
text.gsub!('<', '<')
|
362
|
+
text.gsub!('>', '>')
|
363
|
+
text.gsub!('"', '"')
|
364
|
+
text.gsub!("'", ''')
|
365
|
+
return text
|
366
|
+
end
|
367
|
+
end
|
368
|
+
|
369
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
|
370
|
+
# of using this class directly.
|
371
|
+
class HtmlLogParser < BasicParser
|
372
|
+
def initialize(src_path, user_aliases)
|
373
|
+
super(src_path, user_aliases)
|
374
|
+
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
375
|
+
|
376
|
+
# @line_regex matches a line in an HTML log file other than the
|
377
|
+
# first time matches on either "2008-11-17 14:12" or "14:12"
|
378
|
+
# @line_regex match obj:
|
379
|
+
# 0: timestamp, extended or not
|
380
|
+
# 1: screen name or alias, if alias set
|
381
|
+
# 2: "<AUTO-REPLY>" or nil
|
382
|
+
# 3: message body
|
383
|
+
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
384
|
+
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
385
|
+
# @line_regex_status matches a status line
|
386
|
+
# @line_regex_status match obj:
|
387
|
+
# 0: timestamp
|
388
|
+
# 1: status message
|
389
|
+
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
390
|
+
end
|
391
|
+
|
392
|
+
#################
|
393
|
+
private
|
394
|
+
#################
|
395
|
+
|
396
|
+
# Returns a cleaned string.
|
397
|
+
# Removes the following tags from _text_:
|
398
|
+
# * html
|
399
|
+
# * body
|
400
|
+
# * font
|
401
|
+
# * a with no innertext, e.g. <a href="blah"></a>
|
402
|
+
# And removes the following style declarations:
|
403
|
+
# * color: #000000 (just turns text black)
|
404
|
+
# * font-family
|
405
|
+
# * font-size
|
406
|
+
# * background
|
407
|
+
# * em (really it's changed to <span style="font-style: italic;">)
|
408
|
+
# Since each <span> has only one style declaration, spans with these
|
409
|
+
# declarations are removed (but the text inside them is preserved).
|
410
|
+
def cleanup(text)
|
411
|
+
# Sometimes this is in there. I don't know why.
|
412
|
+
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
413
|
+
# We can remove <font> safely since Pidgin and Adium both show bold
|
414
|
+
# using <span style="font-weight: bold;"> except Pidgin uses single
|
415
|
+
# quotes while Adium uses double quotes.
|
416
|
+
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
417
|
+
|
418
|
+
text.tr!("\r", '')
|
419
|
+
# Remove empty lines
|
420
|
+
text.gsub!("\n\n", "\n")
|
421
|
+
|
422
|
+
# Remove newlines that end the file, since they screw up the
|
423
|
+
# newline -> <br/> conversion
|
424
|
+
text.gsub!(/\n\Z/, '')
|
425
|
+
|
426
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
427
|
+
# This must go after we remove <font> tags.
|
428
|
+
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
429
|
+
|
430
|
+
# These empty links are sometimes appended to every line in a chat,
|
431
|
+
# for some weird reason. Remove them.
|
432
|
+
text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
|
433
|
+
|
434
|
+
# Replace single quotes inside tags with double quotes so we can
|
435
|
+
# easily change single quotes to entities.
|
436
|
+
# For spans, removes a space after the final declaration if it exists.
|
437
|
+
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
438
|
+
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
439
|
+
=begin
|
440
|
+
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
441
|
+
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
442
|
+
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
443
|
+
=end
|
444
|
+
text.gsub!("'", ''')
|
445
|
+
|
446
|
+
# This actually does match stuff, but doesn't group it correctly. :(
|
447
|
+
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
448
|
+
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
449
|
+
# Remove empty spans.
|
450
|
+
next if $2 == ''
|
451
|
+
|
452
|
+
# style = style declaration
|
453
|
+
# innertext = text inside <span>
|
454
|
+
style, innertext = $1, $2
|
455
|
+
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
456
|
+
# innertext.gsub!("")
|
457
|
+
|
458
|
+
styleparts = style.split(/; ?/)
|
459
|
+
styleparts.map! do |p|
|
460
|
+
if p =~ /^color/
|
461
|
+
# Regarding the bit with the ">", sometimes this happens:
|
462
|
+
# <span style="color: #000000>today;">today was busy</span>
|
463
|
+
# Then p = "color: #000000>today"
|
464
|
+
# Or it can end in ">;", with no text before the semicolon.
|
465
|
+
# So remove the ">" and anything following it.
|
466
|
+
|
467
|
+
# Use regex instead of string, to account for funky ">" stuff
|
468
|
+
if p =~ /color: #000000/
|
469
|
+
next
|
470
|
+
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
471
|
+
# Keep the color but remove the bit after it
|
472
|
+
next($1)
|
473
|
+
end
|
474
|
+
else
|
475
|
+
# don't remove font-weight
|
476
|
+
case p
|
477
|
+
when /^font-family/: next
|
478
|
+
when /^font-size/: next
|
479
|
+
when /^background/: next
|
480
|
+
end
|
481
|
+
end
|
482
|
+
end.compact!
|
483
|
+
unless styleparts.empty?
|
484
|
+
style = styleparts.join('; ')
|
485
|
+
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
486
|
+
end
|
487
|
+
innertext
|
488
|
+
end
|
489
|
+
# Pidgin uses <em>, Adium uses <span>
|
490
|
+
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
491
|
+
text.gsub!('</em>', '</span>')
|
492
|
+
end
|
493
|
+
return text
|
494
|
+
end
|
495
|
+
end
|
496
|
+
|
497
|
+
# A holding object for each line of the chat. It is subclassed as
|
498
|
+
# appropriate (eg AutoReplyMessage). Each subclass (but not Message
|
499
|
+
# itself) has its own to_s which prints out its information in a format
|
500
|
+
# appropriate for putting in an Adium log file.
|
501
|
+
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
502
|
+
class Message
|
503
|
+
def initialize(sender, time, buddy_alias)
|
504
|
+
@sender = sender
|
505
|
+
@time = time
|
506
|
+
@buddy_alias = buddy_alias
|
507
|
+
end
|
508
|
+
attr_accessor :sender, :time, :buddy_alias
|
509
|
+
end
|
510
|
+
|
511
|
+
# Basic message with body text (as opposed to pure status messages, which
|
512
|
+
# have no body).
|
513
|
+
class XMLMessage < Message
|
514
|
+
include Pidgin2Adium
|
515
|
+
def initialize(sender, time, buddy_alias, body)
|
516
|
+
super(sender, time, buddy_alias)
|
517
|
+
@body = body
|
518
|
+
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
519
|
+
normalize_body!()
|
520
|
+
end
|
521
|
+
attr_accessor :body
|
522
|
+
|
523
|
+
def to_s
|
524
|
+
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
525
|
+
@sender, @time, @buddy_alias, @styled_body)
|
526
|
+
end
|
527
|
+
|
528
|
+
#################
|
529
|
+
private
|
530
|
+
#################
|
531
|
+
|
532
|
+
# Balances mismatched tags, normalizes body style, and fixes actions
|
533
|
+
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
534
|
+
# "*Buddy waves at you*").
|
535
|
+
def normalize_body!
|
536
|
+
normalize_body_entities!()
|
537
|
+
# Fix mismatched tags. Yes, it's faster to do it per-message
|
538
|
+
# than all at once.
|
539
|
+
@body = balance_tags(@body)
|
540
|
+
if @buddy_alias[0,3] == '***'
|
541
|
+
# "***<alias>" is what pidgin sets as the alias for a /me action
|
542
|
+
@buddy_alias.slice!(0,3)
|
543
|
+
@body = '*' << @body << '*'
|
544
|
+
end
|
545
|
+
end
|
546
|
+
|
547
|
+
# Escapes entities.
|
548
|
+
def normalize_body_entities!
|
549
|
+
# Convert '&' to '&' only if it's not followed by an entity.
|
550
|
+
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
# An auto reply message.
|
555
|
+
class AutoReplyMessage < XMLMessage
|
556
|
+
def to_s
|
557
|
+
return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
|
558
|
+
@sender, @time, @buddy_alias, @styled_body)
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
# A message saying e.g. "Blahblah has gone away."
|
563
|
+
class StatusMessage < Message
|
564
|
+
def initialize(sender, time, buddy_alias, status)
|
565
|
+
super(sender, time, buddy_alias)
|
566
|
+
@status = status
|
567
|
+
end
|
568
|
+
attr_accessor :status
|
569
|
+
|
570
|
+
def to_s
|
571
|
+
return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
# Pidgin does not have Events, but Adium does. Pidgin mostly uses system
|
576
|
+
# messages to display what Adium calls events. These include sending a file,
|
577
|
+
# starting a Direct IM connection, or an error in chat.
|
578
|
+
class Event < XMLMessage
|
579
|
+
def initialize(sender, time, buddy_alias, body, event_type)
|
580
|
+
super(sender, time, buddy_alias, body)
|
581
|
+
@event_type = event_type
|
582
|
+
end
|
583
|
+
attr_accessor :event_type
|
584
|
+
|
585
|
+
def to_s
|
586
|
+
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
587
|
+
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
588
|
+
end
|
589
|
+
end
|
590
|
+
end # end module
|