pidgin2adium 3.0.1 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +22 -0
- data/.gitignore +7 -0
- data/{History.txt → ChangeLog} +11 -0
- data/Gemfile +1 -9
- data/README.rdoc +38 -39
- data/Rakefile +4 -2
- data/VERSION +1 -1
- data/bin/pidgin2adium +63 -54
- data/ext/balance_tags_c/balance_tags_c.c +161 -161
- data/lib/pidgin2adium.rb +97 -97
- data/lib/pidgin2adium/balance_tags.rb +2 -2
- data/lib/pidgin2adium/basic_parser.rb +412 -0
- data/lib/pidgin2adium/html_log_parser.rb +125 -0
- data/lib/pidgin2adium/log_converter.rb +12 -13
- data/lib/pidgin2adium/log_file.rb +1 -1
- data/lib/pidgin2adium/log_parser.rb +3 -618
- data/lib/pidgin2adium/message.rb +97 -0
- data/lib/pidgin2adium/text_log_parser.rb +39 -0
- data/pidgin2adium.gemspec +31 -9
- data/spec/balance_tags_c_extn_spec.rb +47 -0
- data/spec/basic_parser_spec.rb +217 -0
- data/spec/html_log_parser_spec.rb +150 -0
- data/spec/log_converter_spec.rb +48 -0
- data/spec/log_file_spec.rb +168 -0
- data/spec/logfiles/2006-12-21.223606.txt +3 -0
- data/spec/logfiles/2008-01-15.071445-0500PST.htm +5 -0
- data/spec/logfiles/2008-01-15.071445-0500PST.html +5 -0
- data/spec/pidgin2adium_spec.rb +248 -3
- data/spec/spec_helper.rb +69 -16
- data/spec/test-output/README.md +1 -0
- data/spec/test-output/html_log_output.xml +6 -0
- data/spec/test-output/text_log_output.xml +4 -0
- data/spec/text_log_parser_spec.rb +42 -0
- data/tasks/extconf/balance_tags_c.rake +5 -1
- metadata +40 -26
- data/bin/pidgin2adium_profiler +0 -1
- data/tasks/build_profiler.rake +0 -49
@@ -0,0 +1,125 @@
|
|
1
|
+
# HtmlLogParser class, a subclass of BasicParser.
|
2
|
+
# Used for parse()ing HTML logs.
|
3
|
+
|
4
|
+
require 'balance_tags_c'
|
5
|
+
|
6
|
+
module Pidgin2Adium
|
7
|
+
class HtmlLogParser < BasicParser
|
8
|
+
def initialize(src_path, user_aliases)
|
9
|
+
super(src_path, user_aliases)
|
10
|
+
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
11
|
+
|
12
|
+
# @line_regex matches a line in an HTML log file other than the
|
13
|
+
# first time matches on either "2008-11-17 14:12" or "14:12"
|
14
|
+
# @line_regex match obj:
|
15
|
+
# 0: timestamp, extended or not
|
16
|
+
# 1: screen name or alias, if alias set
|
17
|
+
# 2: "<AUTO-REPLY>" or nil
|
18
|
+
# 3: message body
|
19
|
+
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
20
|
+
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
21
|
+
# @line_regex_status matches a status line
|
22
|
+
# @line_regex_status match obj:
|
23
|
+
# 0: timestamp
|
24
|
+
# 1: status message
|
25
|
+
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns a cleaned string.
|
29
|
+
# Removes the following tags from _text_:
|
30
|
+
# * html
|
31
|
+
# * body
|
32
|
+
# * font
|
33
|
+
# * a with no innertext, e.g. <a href="blah"></a>
|
34
|
+
# And removes the following style declarations:
|
35
|
+
# * color: #000000 (just turns text black)
|
36
|
+
# * font-family
|
37
|
+
# * font-size
|
38
|
+
# * background
|
39
|
+
# * em (really it's changed to <span style="font-style: italic;">)
|
40
|
+
# Since each <span> has only one style declaration, spans with these
|
41
|
+
# declarations are removed (but the text inside them is preserved).
|
42
|
+
def cleanup(text)
|
43
|
+
# Sometimes this is in there. I don't know why.
|
44
|
+
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
45
|
+
# We can remove <font> safely since Pidgin and Adium both show bold
|
46
|
+
# using <span style="font-weight: bold;"> except Pidgin uses single
|
47
|
+
# quotes while Adium uses double quotes.
|
48
|
+
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
49
|
+
|
50
|
+
text.tr!("\r", '')
|
51
|
+
# Remove empty lines
|
52
|
+
text.gsub!("\n\n", "\n")
|
53
|
+
|
54
|
+
# Remove newlines that end the file, since they screw up the
|
55
|
+
# newline -> <br/> conversion
|
56
|
+
text.gsub!(/\n\Z/, '')
|
57
|
+
|
58
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
59
|
+
# This must go after we remove <font> tags.
|
60
|
+
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
61
|
+
|
62
|
+
# These empty links are sometimes appended to every line in a chat,
|
63
|
+
# for some weird reason. Remove them.
|
64
|
+
text.gsub!(%r{<a href=['"].+?['"]>\s*?</a>}, '')
|
65
|
+
|
66
|
+
# Replace single quotes inside tags with double quotes so we can
|
67
|
+
# easily change single quotes to entities.
|
68
|
+
# For spans, removes a space after the final declaration if it exists.
|
69
|
+
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
70
|
+
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
71
|
+
=begin
|
72
|
+
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
73
|
+
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
74
|
+
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
75
|
+
=end
|
76
|
+
text.gsub!("'", ''')
|
77
|
+
|
78
|
+
# This actually does match stuff, but doesn't group it correctly. :(
|
79
|
+
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
80
|
+
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
81
|
+
# Remove empty spans.
|
82
|
+
next if $2 == ''
|
83
|
+
|
84
|
+
# style = style declaration
|
85
|
+
# innertext = text inside <span>
|
86
|
+
style, innertext = $1, $2
|
87
|
+
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
88
|
+
# innertext.gsub!("")
|
89
|
+
|
90
|
+
styleparts = style.split(/; ?/)
|
91
|
+
styleparts.map! do |p|
|
92
|
+
if p[0,5] == 'color'
|
93
|
+
if p.include?('color: #000000')
|
94
|
+
next
|
95
|
+
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
96
|
+
# Regarding the bit with the ">", sometimes this happens:
|
97
|
+
# <span style="color: #000000>today;">today was busy</span>
|
98
|
+
# Then p = "color: #000000>today"
|
99
|
+
# Or it can end in ">;", with no text before the semicolon.
|
100
|
+
# So keep the color but remove the ">" and anything following it.
|
101
|
+
next($1)
|
102
|
+
end
|
103
|
+
else
|
104
|
+
# don't remove font-weight
|
105
|
+
case p
|
106
|
+
when /^font-family/ then next
|
107
|
+
when /^font-size/ then next
|
108
|
+
when /^background/ then next
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end.compact!
|
112
|
+
unless styleparts.empty?
|
113
|
+
style = styleparts.join('; ')
|
114
|
+
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
115
|
+
end
|
116
|
+
innertext
|
117
|
+
end
|
118
|
+
# Pidgin uses <em>, Adium uses <span>
|
119
|
+
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
120
|
+
text.gsub!('</em>', '</span>')
|
121
|
+
end
|
122
|
+
return text
|
123
|
+
end
|
124
|
+
end # END HtmlLogParser class
|
125
|
+
end
|
@@ -20,8 +20,12 @@ module Pidgin2Adium
|
|
20
20
|
@my_aliases = aliases
|
21
21
|
|
22
22
|
unless File.directory?(@pidgin_log_dir)
|
23
|
-
|
24
|
-
|
23
|
+
msg = "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
|
24
|
+
error(msg)
|
25
|
+
|
26
|
+
# ENOENT automatically prepends "No such file or directory - " to
|
27
|
+
# its initializer's arguments
|
28
|
+
raise Errno::ENOENT.new("source directory #{@pidgin_log_dir}")
|
25
29
|
end
|
26
30
|
end
|
27
31
|
|
@@ -31,11 +35,11 @@ module Pidgin2Adium
|
|
31
35
|
def start
|
32
36
|
log_msg "Begin converting."
|
33
37
|
begin
|
34
|
-
files_path = get_all_chat_files(
|
38
|
+
files_path = get_all_chat_files()
|
35
39
|
rescue Errno::EACCES => bang
|
36
40
|
error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
|
37
41
|
error("Details: #{bang.message}")
|
38
|
-
raise
|
42
|
+
raise bang
|
39
43
|
end
|
40
44
|
|
41
45
|
total_files = files_path.size
|
@@ -52,21 +56,16 @@ module Pidgin2Adium
|
|
52
56
|
|
53
57
|
delete_search_indexes()
|
54
58
|
|
55
|
-
log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
59
|
+
Pidgin2Adium.log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
56
60
|
puts "Minor error messages:"
|
57
61
|
puts @@oops_messages.join("\n")
|
58
62
|
puts "Major error messages:"
|
59
63
|
puts @@error_messages.join("\n")
|
60
64
|
end
|
61
65
|
|
62
|
-
|
63
|
-
private
|
64
|
-
###########
|
65
|
-
|
66
|
-
def get_all_chat_files(dir)
|
67
|
-
return [] if File.basename(dir) == ".system"
|
66
|
+
def get_all_chat_files
|
68
67
|
# recurse into each subdir
|
69
|
-
|
68
|
+
Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS
|
70
69
|
end
|
71
|
-
end
|
70
|
+
end
|
72
71
|
end
|
@@ -1,618 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
#
|
5
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
6
|
-
# using these classes directly.
|
7
|
-
require 'parsedate'
|
8
|
-
require 'time' # for Time.zone_offset
|
9
|
-
|
10
|
-
require 'balance_tags_c'
|
11
|
-
require 'pidgin2adium/log_file'
|
12
|
-
|
13
|
-
module Pidgin2Adium
|
14
|
-
# Empty class. Raise'd by LogParser if the first line of a log is not
|
15
|
-
# parseable.
|
16
|
-
class InvalidFirstLineError < StandardError; end
|
17
|
-
|
18
|
-
# BasicParser is a base class. Its subclasses are TextLogParser and
|
19
|
-
# HtmlLogParser.
|
20
|
-
#
|
21
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
22
|
-
# using this class directly.
|
23
|
-
class BasicParser
|
24
|
-
include Pidgin2Adium
|
25
|
-
def initialize(src_path, user_aliases)
|
26
|
-
@src_path = src_path
|
27
|
-
# Whitespace is removed for easy matching later on.
|
28
|
-
@user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
|
29
|
-
# @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
|
30
|
-
# alias.
|
31
|
-
# Set an initial value just in case the first message doesn't give
|
32
|
-
# us an alias.
|
33
|
-
@user_alias = user_aliases.split(',')[0]
|
34
|
-
|
35
|
-
@tz_offset = get_time_zone_offset()
|
36
|
-
|
37
|
-
file = File.new(@src_path, 'r')
|
38
|
-
@first_line = file.readline
|
39
|
-
@file_content = file.read
|
40
|
-
file.close
|
41
|
-
|
42
|
-
# Time regexes must be set before pre_parse().
|
43
|
-
# "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
|
44
|
-
# ONLY used (if at all) in first line of chat ("Conversation with...at...")
|
45
|
-
@time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
|
46
|
-
# "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
|
47
|
-
@time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
|
48
|
-
# sometimes a line in a chat doesn't have a full timestamp
|
49
|
-
# "04:22:05 AM" => %w{04 22 05 AM}
|
50
|
-
@minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
|
51
|
-
|
52
|
-
# Whether or not the first line is parseable.
|
53
|
-
@first_line_is_valid = true
|
54
|
-
begin
|
55
|
-
@service,
|
56
|
-
@user_SN,
|
57
|
-
@partner_SN,
|
58
|
-
# @basic_time_info is for files that only have the full
|
59
|
-
# timestamp at the top; we can use it to fill in the minimal
|
60
|
-
# per-line timestamps. It has only 3 elements (year, month,
|
61
|
-
# dayofmonth) because you should be able to fill everything
|
62
|
-
# else in. If you can't, something's wrong.
|
63
|
-
@basic_time_info,
|
64
|
-
# When the chat started, in Adium's format
|
65
|
-
@adium_chat_time_start = pre_parse()
|
66
|
-
rescue InvalidFirstLineError
|
67
|
-
@first_line_is_valid = false
|
68
|
-
error("Failed to parse, invalid first line: #{@src_path}")
|
69
|
-
return # stop processing
|
70
|
-
end
|
71
|
-
|
72
|
-
# @status_map, @lib_purple_events, and @events are used in
|
73
|
-
# create_status_or_event_msg
|
74
|
-
@status_map = {
|
75
|
-
/(.+) logged in\.$/ => 'online',
|
76
|
-
/(.+) logged out\.$/ => 'offline',
|
77
|
-
/(.+) has signed on\.$/ => 'online',
|
78
|
-
/(.+) has signed off\.$/ => 'offline',
|
79
|
-
/(.+) has gone away\.$/ => 'away',
|
80
|
-
/(.+) is no longer away\.$/ => 'available',
|
81
|
-
/(.+) has become idle\.$/ => 'idle',
|
82
|
-
/(.+) is no longer idle\.$/ => 'available'
|
83
|
-
}
|
84
|
-
|
85
|
-
# lib_purple_events are all of event_type libPurple
|
86
|
-
@lib_purple_events = [
|
87
|
-
# file transfer
|
88
|
-
/Starting transfer of .+ from (.+)/,
|
89
|
-
/^Offering to send .+ to (.+)$/,
|
90
|
-
/(.+) is offering to send file/,
|
91
|
-
/^Transfer of file .+ complete$/,
|
92
|
-
/Error reading|writing|accessing .+: .+/,
|
93
|
-
/You cancell?ed the transfer of/,
|
94
|
-
/File transfer cancelled/,
|
95
|
-
/(.+?) cancell?ed the transfer of/,
|
96
|
-
/(.+?) cancelled the file transfer/,
|
97
|
-
# Direct IM - actual (dis)connect events are their own types
|
98
|
-
/^Attempting to connect to (.+) at .+ for Direct IM\./,
|
99
|
-
/^Asking (.+) to connect to us at .+ for Direct IM\./,
|
100
|
-
/^Attempting to connect via proxy server\.$/,
|
101
|
-
/^Direct IM with (.+) failed/,
|
102
|
-
# encryption
|
103
|
-
/Received message encrypted with wrong key/,
|
104
|
-
/^Requesting key\.\.\.$/,
|
105
|
-
/^Outgoing message lost\.$/,
|
106
|
-
/^Conflicting Key Received!$/,
|
107
|
-
/^Error in decryption- asking for resend\.\.\.$/,
|
108
|
-
/^Making new key pair\.\.\.$/,
|
109
|
-
# sending errors
|
110
|
-
/^Last outgoing message not received properly- resetting$/,
|
111
|
-
/Resending\.\.\./,
|
112
|
-
# connection errors
|
113
|
-
/Lost connection with the remote user:.+/,
|
114
|
-
# chats
|
115
|
-
/^.+ entered the room\.$/,
|
116
|
-
/^.+ left the room\.$/
|
117
|
-
]
|
118
|
-
|
119
|
-
# non-libpurple events
|
120
|
-
# Each key maps to an event_type string. The keys will be matched against a line of chat
|
121
|
-
# and the partner's alias will be in regex group 1, IF the alias is matched.
|
122
|
-
@event_map = {
|
123
|
-
# .+ is not an alias, it's a proxy server so no grouping
|
124
|
-
/^Attempting to connect to .+\.$/ => 'direct-im-connect',
|
125
|
-
# NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
|
126
|
-
/^Direct IM established$/ => 'directIMConnected',
|
127
|
-
/Unable to send message/ => 'chat-error',
|
128
|
-
/You missed .+ messages from (.+) because they were too large/ => 'chat-error',
|
129
|
-
/User information not available/ => 'chat-error'
|
130
|
-
}
|
131
|
-
|
132
|
-
@ignore_events = [
|
133
|
-
# Adium ignores SN/alias changes.
|
134
|
-
/^.+? is now known as .+?\.<br\/?>$/
|
135
|
-
]
|
136
|
-
end
|
137
|
-
|
138
|
-
# This method returns a LogFile instance, or false if an error occurred.
|
139
|
-
def parse
|
140
|
-
return false unless @first_line_is_valid
|
141
|
-
@file_content = cleanup(@file_content).split("\n")
|
142
|
-
|
143
|
-
@file_content.map! do |line|
|
144
|
-
# "next" returns nil which is removed by compact
|
145
|
-
next if line =~ /^\s+$/
|
146
|
-
if line =~ @line_regex
|
147
|
-
create_msg($~.captures)
|
148
|
-
elsif line =~ @line_regex_status
|
149
|
-
msg = create_status_or_event_msg($~.captures)
|
150
|
-
# Error occurred while parsing
|
151
|
-
return false if msg == false
|
152
|
-
else
|
153
|
-
error "Could not parse line:"
|
154
|
-
p line
|
155
|
-
return false
|
156
|
-
end
|
157
|
-
end
|
158
|
-
@file_content.compact!
|
159
|
-
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
160
|
-
end
|
161
|
-
# Prevent parse from being called directly from BasicParser, since
|
162
|
-
# it uses subclassing magic.
|
163
|
-
protected :parse
|
164
|
-
|
165
|
-
#################
|
166
|
-
private
|
167
|
-
#################
|
168
|
-
|
169
|
-
def get_time_zone_offset()
|
170
|
-
# We must have a tz_offset or else the Adium Chat Log viewer
|
171
|
-
# doesn't read the date correctly and then:
|
172
|
-
# 1) the log has an empty start date column in the viewer
|
173
|
-
# 2) The timestamps are all the same for the whole log
|
174
|
-
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
175
|
-
if tz_match and tz_match[1]
|
176
|
-
tz_offset = tz_match[1]
|
177
|
-
else
|
178
|
-
# "-0500" (3d rather than 2d to allow for "+")
|
179
|
-
tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
|
180
|
-
end
|
181
|
-
return tz_offset
|
182
|
-
end
|
183
|
-
|
184
|
-
#--
|
185
|
-
# Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
|
186
|
-
# 2008-10-05T22:26:20-0800
|
187
|
-
# HOWEVER:
|
188
|
-
# If it's the first line, then return it like this (note periods):
|
189
|
-
# 2008-10-05T22.26.20-0800
|
190
|
-
# because it will be used in the filename.
|
191
|
-
#++
|
192
|
-
# Converts a pidgin datestamp to an Adium one.
|
193
|
-
def create_adium_time(time, is_first_line = false)
|
194
|
-
# parsed_date = [year, month, day, hour, min, sec]
|
195
|
-
if time =~ @time_regex
|
196
|
-
year, month, day, hour, min, sec = $1.to_i,
|
197
|
-
$2.to_i,
|
198
|
-
$3.to_i,
|
199
|
-
$4.to_i,
|
200
|
-
$5.to_i,
|
201
|
-
$6.to_i
|
202
|
-
elsif is_first_line and time =~ @time_regex_first_line
|
203
|
-
hour = $4.to_i
|
204
|
-
if $7 == 'PM' and hour != 12
|
205
|
-
hour += 12
|
206
|
-
end
|
207
|
-
year, month, day, min, sec = $3.to_i, # year
|
208
|
-
$1.to_i, # month
|
209
|
-
$2.to_i, # day
|
210
|
-
# already did hour
|
211
|
-
$5.to_i, # minutes
|
212
|
-
$6.to_i # seconds
|
213
|
-
elsif time =~ @minimal_time_regex
|
214
|
-
# "04:22:05" => %w{04 22 05}
|
215
|
-
hour = $1.to_i
|
216
|
-
if $4 == 'PM' and hour != 12
|
217
|
-
hour += 12
|
218
|
-
end
|
219
|
-
year, month, day = @basic_time_info
|
220
|
-
min = $2.to_i
|
221
|
-
sec = $3.to_i
|
222
|
-
else
|
223
|
-
error("You have found an odd timestamp. Please report it to the developer.")
|
224
|
-
log_msg("The timestamp: #{time}")
|
225
|
-
log_msg("Continuing...")
|
226
|
-
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
227
|
-
end
|
228
|
-
if is_first_line
|
229
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
230
|
-
else
|
231
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
|
232
|
-
end
|
233
|
-
return adium_time
|
234
|
-
end
|
235
|
-
|
236
|
-
# Extract required data from the file. Run by parse.
|
237
|
-
def pre_parse
|
238
|
-
# Deal with first line.
|
239
|
-
|
240
|
-
# the first line is special. It tells us (in order of regex groups):
|
241
|
-
# 1) who we're talking to
|
242
|
-
# 2) what time/date
|
243
|
-
# 3) what SN we used
|
244
|
-
# 4) what protocol (AIM, icq, jabber...)
|
245
|
-
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
246
|
-
if first_line_match.nil?
|
247
|
-
raise InvalidFirstLineError
|
248
|
-
else
|
249
|
-
service = first_line_match[4]
|
250
|
-
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
251
|
-
user_SN = first_line_match[3].downcase.tr(' ', '')
|
252
|
-
partner_SN = first_line_match[1]
|
253
|
-
pidgin_chat_time_start = first_line_match[2]
|
254
|
-
basic_time_info = case pidgin_chat_time_start
|
255
|
-
when @time_regex then [$1.to_i, $2.to_i, $3.to_i]
|
256
|
-
when @time_regex_first_line then [$3.to_i, $1.to_i, $2.to_i]
|
257
|
-
end
|
258
|
-
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
259
|
-
return [service,
|
260
|
-
user_SN,
|
261
|
-
partner_SN,
|
262
|
-
basic_time_info,
|
263
|
-
adium_chat_time_start]
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
def get_sender_by_alias(alias_name)
|
268
|
-
no_action = alias_name.sub(/^\*{3}/, '')
|
269
|
-
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
270
|
-
# Set the current alias being used of the ones in @user_aliases
|
271
|
-
@user_alias = no_action
|
272
|
-
return @user_SN
|
273
|
-
else
|
274
|
-
return @partner_SN
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
#--
|
279
|
-
# create_msg takes an array of captures from matching against
|
280
|
-
# @line_regex and returns a Message object or one of its subclasses.
|
281
|
-
# It can be used for TextLogParser and HtmlLogParser because both of
|
282
|
-
# them return data in the same indexes in the matches array.
|
283
|
-
#++
|
284
|
-
def create_msg(matches)
|
285
|
-
msg = nil
|
286
|
-
# Either a regular message line or an auto-reply/away message.
|
287
|
-
time = create_adium_time(matches[0])
|
288
|
-
buddy_alias = matches[1]
|
289
|
-
sender = get_sender_by_alias(buddy_alias)
|
290
|
-
body = matches[3]
|
291
|
-
if matches[2] # auto-reply
|
292
|
-
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
293
|
-
else
|
294
|
-
# normal message
|
295
|
-
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
296
|
-
end
|
297
|
-
return msg
|
298
|
-
end
|
299
|
-
|
300
|
-
#--
|
301
|
-
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
302
|
-
# matching against @line_regex_status and returns an Event or Status.
|
303
|
-
# Returns nil if it's a message that should be ignored, or false if an
|
304
|
-
# error occurred.
|
305
|
-
#++
|
306
|
-
def create_status_or_event_msg(matches)
|
307
|
-
# ["22:58:00", "BuddyName logged in."]
|
308
|
-
# 0: time
|
309
|
-
# 1: status message or event
|
310
|
-
msg = nil
|
311
|
-
time = create_adium_time(matches[0])
|
312
|
-
str = matches[1]
|
313
|
-
# Return nil, which will get compact'ed out
|
314
|
-
return nil if @ignore_events.detect{|regex| str =~ regex }
|
315
|
-
|
316
|
-
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
317
|
-
if regex and status
|
318
|
-
# Status message
|
319
|
-
buddy_alias = regex.match(str)[1]
|
320
|
-
sender = get_sender_by_alias(buddy_alias)
|
321
|
-
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
322
|
-
else
|
323
|
-
# Test for event
|
324
|
-
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
325
|
-
event_type = 'libpurpleEvent' if regex
|
326
|
-
unless regex and event_type
|
327
|
-
# not a libpurple event, try others
|
328
|
-
if @event_map.detect{|regex,event_type| str =~ regex}
|
329
|
-
regex, event_type = $1, $2
|
330
|
-
else
|
331
|
-
error(sprintf("Error parsing status or event message, no status or event found: %p", str))
|
332
|
-
return false
|
333
|
-
end
|
334
|
-
end
|
335
|
-
if regex and event_type
|
336
|
-
regex_matches = regex.match(str)
|
337
|
-
# Event message
|
338
|
-
if regex_matches.size == 1
|
339
|
-
# No alias - this means it's the user
|
340
|
-
buddy_alias = @user_alias
|
341
|
-
sender = @user_SN
|
342
|
-
else
|
343
|
-
buddy_alias = regex_matches[1]
|
344
|
-
sender = get_sender_by_alias(buddy_alias)
|
345
|
-
end
|
346
|
-
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
347
|
-
end
|
348
|
-
end
|
349
|
-
return msg
|
350
|
-
end
|
351
|
-
end # END BasicParser class
|
352
|
-
|
353
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
354
|
-
# using this class directly.
|
355
|
-
class TextLogParser < BasicParser
|
356
|
-
def initialize(src_path, user_aliases)
|
357
|
-
super(src_path, user_aliases)
|
358
|
-
@timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
|
359
|
-
|
360
|
-
# @line_regex matches a line in a TXT log file other than the first
|
361
|
-
# @line_regex matchdata:
|
362
|
-
# 0: timestamp
|
363
|
-
# 1: screen name or alias, if alias set
|
364
|
-
# 2: "<AUTO-REPLY>" or nil
|
365
|
-
# 3: message body
|
366
|
-
@line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
|
367
|
-
|
368
|
-
# @line_regex_status matches a status line
|
369
|
-
# @line_regex_status matchdata:
|
370
|
-
# 0: timestamp
|
371
|
-
# 1: status message
|
372
|
-
@line_regex_status = /#{@timestamp_rx} ([^:]+)/o
|
373
|
-
end
|
374
|
-
|
375
|
-
public :parse
|
376
|
-
|
377
|
-
#################
|
378
|
-
private
|
379
|
-
#################
|
380
|
-
|
381
|
-
def cleanup(text)
|
382
|
-
text.tr!("\r", '')
|
383
|
-
# Replace newlines with "<br/>" unless they end a chat line.
|
384
|
-
text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
|
385
|
-
# Escape entities since this will be in XML
|
386
|
-
text.gsub!('&', '&') # escape '&' first
|
387
|
-
text.gsub!('<', '<')
|
388
|
-
text.gsub!('>', '>')
|
389
|
-
text.gsub!('"', '"')
|
390
|
-
text.gsub!("'", ''')
|
391
|
-
return text
|
392
|
-
end
|
393
|
-
end
|
394
|
-
|
395
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
|
396
|
-
# of using this class directly.
|
397
|
-
class HtmlLogParser < BasicParser
|
398
|
-
def initialize(src_path, user_aliases)
|
399
|
-
super(src_path, user_aliases)
|
400
|
-
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
401
|
-
|
402
|
-
# @line_regex matches a line in an HTML log file other than the
|
403
|
-
# first time matches on either "2008-11-17 14:12" or "14:12"
|
404
|
-
# @line_regex match obj:
|
405
|
-
# 0: timestamp, extended or not
|
406
|
-
# 1: screen name or alias, if alias set
|
407
|
-
# 2: "<AUTO-REPLY>" or nil
|
408
|
-
# 3: message body
|
409
|
-
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
410
|
-
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
411
|
-
# @line_regex_status matches a status line
|
412
|
-
# @line_regex_status match obj:
|
413
|
-
# 0: timestamp
|
414
|
-
# 1: status message
|
415
|
-
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
416
|
-
end
|
417
|
-
|
418
|
-
public :parse
|
419
|
-
|
420
|
-
#################
|
421
|
-
private
|
422
|
-
#################
|
423
|
-
|
424
|
-
# Returns a cleaned string.
|
425
|
-
# Removes the following tags from _text_:
|
426
|
-
# * html
|
427
|
-
# * body
|
428
|
-
# * font
|
429
|
-
# * a with no innertext, e.g. <a href="blah"></a>
|
430
|
-
# And removes the following style declarations:
|
431
|
-
# * color: #000000 (just turns text black)
|
432
|
-
# * font-family
|
433
|
-
# * font-size
|
434
|
-
# * background
|
435
|
-
# * em (really it's changed to <span style="font-style: italic;">)
|
436
|
-
# Since each <span> has only one style declaration, spans with these
|
437
|
-
# declarations are removed (but the text inside them is preserved).
|
438
|
-
def cleanup(text)
|
439
|
-
# Sometimes this is in there. I don't know why.
|
440
|
-
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
441
|
-
# We can remove <font> safely since Pidgin and Adium both show bold
|
442
|
-
# using <span style="font-weight: bold;"> except Pidgin uses single
|
443
|
-
# quotes while Adium uses double quotes.
|
444
|
-
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
445
|
-
|
446
|
-
text.tr!("\r", '')
|
447
|
-
# Remove empty lines
|
448
|
-
text.gsub!("\n\n", "\n")
|
449
|
-
|
450
|
-
# Remove newlines that end the file, since they screw up the
|
451
|
-
# newline -> <br/> conversion
|
452
|
-
text.gsub!(/\n\Z/, '')
|
453
|
-
|
454
|
-
# Replace newlines with "<br/>" unless they end a chat line.
|
455
|
-
# This must go after we remove <font> tags.
|
456
|
-
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
457
|
-
|
458
|
-
# These empty links are sometimes appended to every line in a chat,
|
459
|
-
# for some weird reason. Remove them.
|
460
|
-
text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
|
461
|
-
|
462
|
-
# Replace single quotes inside tags with double quotes so we can
|
463
|
-
# easily change single quotes to entities.
|
464
|
-
# For spans, removes a space after the final declaration if it exists.
|
465
|
-
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
466
|
-
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
467
|
-
=begin
|
468
|
-
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
469
|
-
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
470
|
-
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
471
|
-
=end
|
472
|
-
text.gsub!("'", ''')
|
473
|
-
|
474
|
-
# This actually does match stuff, but doesn't group it correctly. :(
|
475
|
-
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
476
|
-
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
477
|
-
# Remove empty spans.
|
478
|
-
next if $2 == ''
|
479
|
-
|
480
|
-
# style = style declaration
|
481
|
-
# innertext = text inside <span>
|
482
|
-
style, innertext = $1, $2
|
483
|
-
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
484
|
-
# innertext.gsub!("")
|
485
|
-
|
486
|
-
styleparts = style.split(/; ?/)
|
487
|
-
styleparts.map! do |p|
|
488
|
-
if p[0,5] == 'color'
|
489
|
-
if p.include?('color: #000000')
|
490
|
-
next
|
491
|
-
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
492
|
-
# Regarding the bit with the ">", sometimes this happens:
|
493
|
-
# <span style="color: #000000>today;">today was busy</span>
|
494
|
-
# Then p = "color: #000000>today"
|
495
|
-
# Or it can end in ">;", with no text before the semicolon.
|
496
|
-
# So keep the color but remove the ">" and anything following it.
|
497
|
-
next($1)
|
498
|
-
end
|
499
|
-
else
|
500
|
-
# don't remove font-weight
|
501
|
-
case p
|
502
|
-
when /^font-family/ then next
|
503
|
-
when /^font-size/ then next
|
504
|
-
when /^background/ then next
|
505
|
-
end
|
506
|
-
end
|
507
|
-
end.compact!
|
508
|
-
unless styleparts.empty?
|
509
|
-
style = styleparts.join('; ')
|
510
|
-
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
511
|
-
end
|
512
|
-
innertext
|
513
|
-
end
|
514
|
-
# Pidgin uses <em>, Adium uses <span>
|
515
|
-
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
516
|
-
text.gsub!('</em>', '</span>')
|
517
|
-
end
|
518
|
-
return text
|
519
|
-
end
|
520
|
-
end # END HtmlLogParser class
|
521
|
-
|
522
|
-
# A holding object for each line of the chat. It is subclassed as
|
523
|
-
# appropriate (eg AutoReplyMessage). Each subclass (but not Message
|
524
|
-
# itself) has its own to_s which prints out its information in a format
|
525
|
-
# appropriate for putting in an Adium log file.
|
526
|
-
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
527
|
-
class Message
|
528
|
-
def initialize(sender, time, buddy_alias)
|
529
|
-
# The sender's screen name
|
530
|
-
@sender = sender
|
531
|
-
# The time the message was sent, in Adium format (e.g.
|
532
|
-
# "2008-10-05T22:26:20-0800")
|
533
|
-
@time = time
|
534
|
-
# The receiver's alias (NOT screen name)
|
535
|
-
@buddy_alias = buddy_alias
|
536
|
-
end
|
537
|
-
attr_accessor :sender, :time, :buddy_alias
|
538
|
-
end
|
539
|
-
|
540
|
-
# Basic message with body text (as opposed to pure status messages, which
|
541
|
-
# have no body).
|
542
|
-
class XMLMessage < Message
|
543
|
-
def initialize(sender, time, buddy_alias, body)
|
544
|
-
super(sender, time, buddy_alias)
|
545
|
-
@body = body
|
546
|
-
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
547
|
-
normalize_body!()
|
548
|
-
end
|
549
|
-
attr_accessor :body
|
550
|
-
|
551
|
-
def to_s
|
552
|
-
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
553
|
-
@sender, @time, @buddy_alias, @styled_body)
|
554
|
-
end
|
555
|
-
|
556
|
-
#################
|
557
|
-
private
|
558
|
-
#################
|
559
|
-
|
560
|
-
# Balances mismatched tags, normalizes body style, and fixes actions
|
561
|
-
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
562
|
-
# "*Buddy waves at you*").
|
563
|
-
def normalize_body!
|
564
|
-
normalize_body_entities!()
|
565
|
-
# Fix mismatched tags. Yes, it's faster to do it per-message
|
566
|
-
# than all at once.
|
567
|
-
@body = Pidgin2Adium.balance_tags_c(@body)
|
568
|
-
if @buddy_alias[0,3] == '***'
|
569
|
-
# "***<alias>" is what pidgin sets as the alias for a /me action
|
570
|
-
@buddy_alias.slice!(0,3)
|
571
|
-
@body = '*' << @body << '*'
|
572
|
-
end
|
573
|
-
end
|
574
|
-
|
575
|
-
# Escapes entities.
|
576
|
-
def normalize_body_entities!
|
577
|
-
# Convert '&' to '&' only if it's not followed by an entity.
|
578
|
-
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
579
|
-
end
|
580
|
-
end # END XMLMessage
|
581
|
-
|
582
|
-
# An auto reply message.
|
583
|
-
class AutoReplyMessage < XMLMessage
|
584
|
-
def to_s
|
585
|
-
return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
|
586
|
-
@sender, @time, @buddy_alias, @styled_body)
|
587
|
-
end
|
588
|
-
end
|
589
|
-
|
590
|
-
# A message saying e.g. "Blahblah has gone away."
|
591
|
-
class StatusMessage < Message
|
592
|
-
def initialize(sender, time, buddy_alias, status)
|
593
|
-
super(sender, time, buddy_alias)
|
594
|
-
@status = status
|
595
|
-
end
|
596
|
-
attr_accessor :status
|
597
|
-
|
598
|
-
def to_s
|
599
|
-
return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
|
600
|
-
end
|
601
|
-
end
|
602
|
-
|
603
|
-
# Pidgin does not have Events, but Adium does. Pidgin mostly uses system
|
604
|
-
# messages to display what Adium calls events. These include sending a file,
|
605
|
-
# starting a Direct IM connection, or an error in chat.
|
606
|
-
class Event < XMLMessage
|
607
|
-
def initialize(sender, time, buddy_alias, body, event_type)
|
608
|
-
super(sender, time, buddy_alias, body)
|
609
|
-
@event_type = event_type
|
610
|
-
end
|
611
|
-
attr_accessor :event_type
|
612
|
-
|
613
|
-
def to_s
|
614
|
-
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
615
|
-
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
616
|
-
end
|
617
|
-
end
|
618
|
-
end # end module
|
1
|
+
require 'pidgin2adium/basic_parser'
|
2
|
+
require 'pidgin2adium/text_log_parser'
|
3
|
+
require 'pidgin2adium/html_log_parser'
|