pidgin2adium 3.0.1 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +22 -0
- data/.gitignore +7 -0
- data/{History.txt → ChangeLog} +11 -0
- data/Gemfile +1 -9
- data/README.rdoc +38 -39
- data/Rakefile +4 -2
- data/VERSION +1 -1
- data/bin/pidgin2adium +63 -54
- data/ext/balance_tags_c/balance_tags_c.c +161 -161
- data/lib/pidgin2adium.rb +97 -97
- data/lib/pidgin2adium/balance_tags.rb +2 -2
- data/lib/pidgin2adium/basic_parser.rb +412 -0
- data/lib/pidgin2adium/html_log_parser.rb +125 -0
- data/lib/pidgin2adium/log_converter.rb +12 -13
- data/lib/pidgin2adium/log_file.rb +1 -1
- data/lib/pidgin2adium/log_parser.rb +3 -618
- data/lib/pidgin2adium/message.rb +97 -0
- data/lib/pidgin2adium/text_log_parser.rb +39 -0
- data/pidgin2adium.gemspec +31 -9
- data/spec/balance_tags_c_extn_spec.rb +47 -0
- data/spec/basic_parser_spec.rb +217 -0
- data/spec/html_log_parser_spec.rb +150 -0
- data/spec/log_converter_spec.rb +48 -0
- data/spec/log_file_spec.rb +168 -0
- data/spec/logfiles/2006-12-21.223606.txt +3 -0
- data/spec/logfiles/2008-01-15.071445-0500PST.htm +5 -0
- data/spec/logfiles/2008-01-15.071445-0500PST.html +5 -0
- data/spec/pidgin2adium_spec.rb +248 -3
- data/spec/spec_helper.rb +69 -16
- data/spec/test-output/README.md +1 -0
- data/spec/test-output/html_log_output.xml +6 -0
- data/spec/test-output/text_log_output.xml +4 -0
- data/spec/text_log_parser_spec.rb +42 -0
- data/tasks/extconf/balance_tags_c.rake +5 -1
- metadata +40 -26
- data/bin/pidgin2adium_profiler +0 -1
- data/tasks/build_profiler.rake +0 -49
@@ -0,0 +1,125 @@
|
|
1
|
+
# HtmlLogParser class, a subclass of BasicParser.
|
2
|
+
# Used for parse()ing HTML logs.
|
3
|
+
|
4
|
+
require 'balance_tags_c'
|
5
|
+
|
6
|
+
module Pidgin2Adium
|
7
|
+
class HtmlLogParser < BasicParser
|
8
|
+
def initialize(src_path, user_aliases)
|
9
|
+
super(src_path, user_aliases)
|
10
|
+
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
11
|
+
|
12
|
+
# @line_regex matches a line in an HTML log file other than the
|
13
|
+
# first time matches on either "2008-11-17 14:12" or "14:12"
|
14
|
+
# @line_regex match obj:
|
15
|
+
# 0: timestamp, extended or not
|
16
|
+
# 1: screen name or alias, if alias set
|
17
|
+
# 2: "<AUTO-REPLY>" or nil
|
18
|
+
# 3: message body
|
19
|
+
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
20
|
+
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
21
|
+
# @line_regex_status matches a status line
|
22
|
+
# @line_regex_status match obj:
|
23
|
+
# 0: timestamp
|
24
|
+
# 1: status message
|
25
|
+
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
26
|
+
end
|
27
|
+
|
28
|
+
# Returns a cleaned string.
|
29
|
+
# Removes the following tags from _text_:
|
30
|
+
# * html
|
31
|
+
# * body
|
32
|
+
# * font
|
33
|
+
# * a with no innertext, e.g. <a href="blah"></a>
|
34
|
+
# And removes the following style declarations:
|
35
|
+
# * color: #000000 (just turns text black)
|
36
|
+
# * font-family
|
37
|
+
# * font-size
|
38
|
+
# * background
|
39
|
+
# * em (really it's changed to <span style="font-style: italic;">)
|
40
|
+
# Since each <span> has only one style declaration, spans with these
|
41
|
+
# declarations are removed (but the text inside them is preserved).
|
42
|
+
def cleanup(text)
|
43
|
+
# Sometimes this is in there. I don't know why.
|
44
|
+
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
45
|
+
# We can remove <font> safely since Pidgin and Adium both show bold
|
46
|
+
# using <span style="font-weight: bold;"> except Pidgin uses single
|
47
|
+
# quotes while Adium uses double quotes.
|
48
|
+
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
49
|
+
|
50
|
+
text.tr!("\r", '')
|
51
|
+
# Remove empty lines
|
52
|
+
text.gsub!("\n\n", "\n")
|
53
|
+
|
54
|
+
# Remove newlines that end the file, since they screw up the
|
55
|
+
# newline -> <br/> conversion
|
56
|
+
text.gsub!(/\n\Z/, '')
|
57
|
+
|
58
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
59
|
+
# This must go after we remove <font> tags.
|
60
|
+
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
61
|
+
|
62
|
+
# These empty links are sometimes appended to every line in a chat,
|
63
|
+
# for some weird reason. Remove them.
|
64
|
+
text.gsub!(%r{<a href=['"].+?['"]>\s*?</a>}, '')
|
65
|
+
|
66
|
+
# Replace single quotes inside tags with double quotes so we can
|
67
|
+
# easily change single quotes to entities.
|
68
|
+
# For spans, removes a space after the final declaration if it exists.
|
69
|
+
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
70
|
+
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
71
|
+
=begin
|
72
|
+
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
73
|
+
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
74
|
+
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
75
|
+
=end
|
76
|
+
text.gsub!("'", ''')
|
77
|
+
|
78
|
+
# This actually does match stuff, but doesn't group it correctly. :(
|
79
|
+
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
80
|
+
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
81
|
+
# Remove empty spans.
|
82
|
+
next if $2 == ''
|
83
|
+
|
84
|
+
# style = style declaration
|
85
|
+
# innertext = text inside <span>
|
86
|
+
style, innertext = $1, $2
|
87
|
+
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
88
|
+
# innertext.gsub!("")
|
89
|
+
|
90
|
+
styleparts = style.split(/; ?/)
|
91
|
+
styleparts.map! do |p|
|
92
|
+
if p[0,5] == 'color'
|
93
|
+
if p.include?('color: #000000')
|
94
|
+
next
|
95
|
+
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
96
|
+
# Regarding the bit with the ">", sometimes this happens:
|
97
|
+
# <span style="color: #000000>today;">today was busy</span>
|
98
|
+
# Then p = "color: #000000>today"
|
99
|
+
# Or it can end in ">;", with no text before the semicolon.
|
100
|
+
# So keep the color but remove the ">" and anything following it.
|
101
|
+
next($1)
|
102
|
+
end
|
103
|
+
else
|
104
|
+
# don't remove font-weight
|
105
|
+
case p
|
106
|
+
when /^font-family/ then next
|
107
|
+
when /^font-size/ then next
|
108
|
+
when /^background/ then next
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end.compact!
|
112
|
+
unless styleparts.empty?
|
113
|
+
style = styleparts.join('; ')
|
114
|
+
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
115
|
+
end
|
116
|
+
innertext
|
117
|
+
end
|
118
|
+
# Pidgin uses <em>, Adium uses <span>
|
119
|
+
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
120
|
+
text.gsub!('</em>', '</span>')
|
121
|
+
end
|
122
|
+
return text
|
123
|
+
end
|
124
|
+
end # END HtmlLogParser class
|
125
|
+
end
|
@@ -20,8 +20,12 @@ module Pidgin2Adium
|
|
20
20
|
@my_aliases = aliases
|
21
21
|
|
22
22
|
unless File.directory?(@pidgin_log_dir)
|
23
|
-
|
24
|
-
|
23
|
+
msg = "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
|
24
|
+
error(msg)
|
25
|
+
|
26
|
+
# ENOENT automatically prepends "No such file or directory - " to
|
27
|
+
# its initializer's arguments
|
28
|
+
raise Errno::ENOENT.new("source directory #{@pidgin_log_dir}")
|
25
29
|
end
|
26
30
|
end
|
27
31
|
|
@@ -31,11 +35,11 @@ module Pidgin2Adium
|
|
31
35
|
def start
|
32
36
|
log_msg "Begin converting."
|
33
37
|
begin
|
34
|
-
files_path = get_all_chat_files(
|
38
|
+
files_path = get_all_chat_files()
|
35
39
|
rescue Errno::EACCES => bang
|
36
40
|
error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
|
37
41
|
error("Details: #{bang.message}")
|
38
|
-
raise
|
42
|
+
raise bang
|
39
43
|
end
|
40
44
|
|
41
45
|
total_files = files_path.size
|
@@ -52,21 +56,16 @@ module Pidgin2Adium
|
|
52
56
|
|
53
57
|
delete_search_indexes()
|
54
58
|
|
55
|
-
log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
59
|
+
Pidgin2Adium.log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
56
60
|
puts "Minor error messages:"
|
57
61
|
puts @@oops_messages.join("\n")
|
58
62
|
puts "Major error messages:"
|
59
63
|
puts @@error_messages.join("\n")
|
60
64
|
end
|
61
65
|
|
62
|
-
|
63
|
-
private
|
64
|
-
###########
|
65
|
-
|
66
|
-
def get_all_chat_files(dir)
|
67
|
-
return [] if File.basename(dir) == ".system"
|
66
|
+
def get_all_chat_files
|
68
67
|
# recurse into each subdir
|
69
|
-
|
68
|
+
Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS
|
70
69
|
end
|
71
|
-
end
|
70
|
+
end
|
72
71
|
end
|
@@ -1,618 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
#
|
5
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
6
|
-
# using these classes directly.
|
7
|
-
require 'parsedate'
|
8
|
-
require 'time' # for Time.zone_offset
|
9
|
-
|
10
|
-
require 'balance_tags_c'
|
11
|
-
require 'pidgin2adium/log_file'
|
12
|
-
|
13
|
-
module Pidgin2Adium
|
14
|
-
# Empty class. Raise'd by LogParser if the first line of a log is not
|
15
|
-
# parseable.
|
16
|
-
class InvalidFirstLineError < StandardError; end
|
17
|
-
|
18
|
-
# BasicParser is a base class. Its subclasses are TextLogParser and
|
19
|
-
# HtmlLogParser.
|
20
|
-
#
|
21
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
22
|
-
# using this class directly.
|
23
|
-
class BasicParser
|
24
|
-
include Pidgin2Adium
|
25
|
-
def initialize(src_path, user_aliases)
|
26
|
-
@src_path = src_path
|
27
|
-
# Whitespace is removed for easy matching later on.
|
28
|
-
@user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
|
29
|
-
# @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
|
30
|
-
# alias.
|
31
|
-
# Set an initial value just in case the first message doesn't give
|
32
|
-
# us an alias.
|
33
|
-
@user_alias = user_aliases.split(',')[0]
|
34
|
-
|
35
|
-
@tz_offset = get_time_zone_offset()
|
36
|
-
|
37
|
-
file = File.new(@src_path, 'r')
|
38
|
-
@first_line = file.readline
|
39
|
-
@file_content = file.read
|
40
|
-
file.close
|
41
|
-
|
42
|
-
# Time regexes must be set before pre_parse().
|
43
|
-
# "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
|
44
|
-
# ONLY used (if at all) in first line of chat ("Conversation with...at...")
|
45
|
-
@time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
|
46
|
-
# "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
|
47
|
-
@time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
|
48
|
-
# sometimes a line in a chat doesn't have a full timestamp
|
49
|
-
# "04:22:05 AM" => %w{04 22 05 AM}
|
50
|
-
@minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
|
51
|
-
|
52
|
-
# Whether or not the first line is parseable.
|
53
|
-
@first_line_is_valid = true
|
54
|
-
begin
|
55
|
-
@service,
|
56
|
-
@user_SN,
|
57
|
-
@partner_SN,
|
58
|
-
# @basic_time_info is for files that only have the full
|
59
|
-
# timestamp at the top; we can use it to fill in the minimal
|
60
|
-
# per-line timestamps. It has only 3 elements (year, month,
|
61
|
-
# dayofmonth) because you should be able to fill everything
|
62
|
-
# else in. If you can't, something's wrong.
|
63
|
-
@basic_time_info,
|
64
|
-
# When the chat started, in Adium's format
|
65
|
-
@adium_chat_time_start = pre_parse()
|
66
|
-
rescue InvalidFirstLineError
|
67
|
-
@first_line_is_valid = false
|
68
|
-
error("Failed to parse, invalid first line: #{@src_path}")
|
69
|
-
return # stop processing
|
70
|
-
end
|
71
|
-
|
72
|
-
# @status_map, @lib_purple_events, and @events are used in
|
73
|
-
# create_status_or_event_msg
|
74
|
-
@status_map = {
|
75
|
-
/(.+) logged in\.$/ => 'online',
|
76
|
-
/(.+) logged out\.$/ => 'offline',
|
77
|
-
/(.+) has signed on\.$/ => 'online',
|
78
|
-
/(.+) has signed off\.$/ => 'offline',
|
79
|
-
/(.+) has gone away\.$/ => 'away',
|
80
|
-
/(.+) is no longer away\.$/ => 'available',
|
81
|
-
/(.+) has become idle\.$/ => 'idle',
|
82
|
-
/(.+) is no longer idle\.$/ => 'available'
|
83
|
-
}
|
84
|
-
|
85
|
-
# lib_purple_events are all of event_type libPurple
|
86
|
-
@lib_purple_events = [
|
87
|
-
# file transfer
|
88
|
-
/Starting transfer of .+ from (.+)/,
|
89
|
-
/^Offering to send .+ to (.+)$/,
|
90
|
-
/(.+) is offering to send file/,
|
91
|
-
/^Transfer of file .+ complete$/,
|
92
|
-
/Error reading|writing|accessing .+: .+/,
|
93
|
-
/You cancell?ed the transfer of/,
|
94
|
-
/File transfer cancelled/,
|
95
|
-
/(.+?) cancell?ed the transfer of/,
|
96
|
-
/(.+?) cancelled the file transfer/,
|
97
|
-
# Direct IM - actual (dis)connect events are their own types
|
98
|
-
/^Attempting to connect to (.+) at .+ for Direct IM\./,
|
99
|
-
/^Asking (.+) to connect to us at .+ for Direct IM\./,
|
100
|
-
/^Attempting to connect via proxy server\.$/,
|
101
|
-
/^Direct IM with (.+) failed/,
|
102
|
-
# encryption
|
103
|
-
/Received message encrypted with wrong key/,
|
104
|
-
/^Requesting key\.\.\.$/,
|
105
|
-
/^Outgoing message lost\.$/,
|
106
|
-
/^Conflicting Key Received!$/,
|
107
|
-
/^Error in decryption- asking for resend\.\.\.$/,
|
108
|
-
/^Making new key pair\.\.\.$/,
|
109
|
-
# sending errors
|
110
|
-
/^Last outgoing message not received properly- resetting$/,
|
111
|
-
/Resending\.\.\./,
|
112
|
-
# connection errors
|
113
|
-
/Lost connection with the remote user:.+/,
|
114
|
-
# chats
|
115
|
-
/^.+ entered the room\.$/,
|
116
|
-
/^.+ left the room\.$/
|
117
|
-
]
|
118
|
-
|
119
|
-
# non-libpurple events
|
120
|
-
# Each key maps to an event_type string. The keys will be matched against a line of chat
|
121
|
-
# and the partner's alias will be in regex group 1, IF the alias is matched.
|
122
|
-
@event_map = {
|
123
|
-
# .+ is not an alias, it's a proxy server so no grouping
|
124
|
-
/^Attempting to connect to .+\.$/ => 'direct-im-connect',
|
125
|
-
# NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
|
126
|
-
/^Direct IM established$/ => 'directIMConnected',
|
127
|
-
/Unable to send message/ => 'chat-error',
|
128
|
-
/You missed .+ messages from (.+) because they were too large/ => 'chat-error',
|
129
|
-
/User information not available/ => 'chat-error'
|
130
|
-
}
|
131
|
-
|
132
|
-
@ignore_events = [
|
133
|
-
# Adium ignores SN/alias changes.
|
134
|
-
/^.+? is now known as .+?\.<br\/?>$/
|
135
|
-
]
|
136
|
-
end
|
137
|
-
|
138
|
-
# This method returns a LogFile instance, or false if an error occurred.
|
139
|
-
def parse
|
140
|
-
return false unless @first_line_is_valid
|
141
|
-
@file_content = cleanup(@file_content).split("\n")
|
142
|
-
|
143
|
-
@file_content.map! do |line|
|
144
|
-
# "next" returns nil which is removed by compact
|
145
|
-
next if line =~ /^\s+$/
|
146
|
-
if line =~ @line_regex
|
147
|
-
create_msg($~.captures)
|
148
|
-
elsif line =~ @line_regex_status
|
149
|
-
msg = create_status_or_event_msg($~.captures)
|
150
|
-
# Error occurred while parsing
|
151
|
-
return false if msg == false
|
152
|
-
else
|
153
|
-
error "Could not parse line:"
|
154
|
-
p line
|
155
|
-
return false
|
156
|
-
end
|
157
|
-
end
|
158
|
-
@file_content.compact!
|
159
|
-
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
160
|
-
end
|
161
|
-
# Prevent parse from being called directly from BasicParser, since
|
162
|
-
# it uses subclassing magic.
|
163
|
-
protected :parse
|
164
|
-
|
165
|
-
#################
|
166
|
-
private
|
167
|
-
#################
|
168
|
-
|
169
|
-
def get_time_zone_offset()
|
170
|
-
# We must have a tz_offset or else the Adium Chat Log viewer
|
171
|
-
# doesn't read the date correctly and then:
|
172
|
-
# 1) the log has an empty start date column in the viewer
|
173
|
-
# 2) The timestamps are all the same for the whole log
|
174
|
-
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
175
|
-
if tz_match and tz_match[1]
|
176
|
-
tz_offset = tz_match[1]
|
177
|
-
else
|
178
|
-
# "-0500" (3d rather than 2d to allow for "+")
|
179
|
-
tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
|
180
|
-
end
|
181
|
-
return tz_offset
|
182
|
-
end
|
183
|
-
|
184
|
-
#--
|
185
|
-
# Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
|
186
|
-
# 2008-10-05T22:26:20-0800
|
187
|
-
# HOWEVER:
|
188
|
-
# If it's the first line, then return it like this (note periods):
|
189
|
-
# 2008-10-05T22.26.20-0800
|
190
|
-
# because it will be used in the filename.
|
191
|
-
#++
|
192
|
-
# Converts a pidgin datestamp to an Adium one.
|
193
|
-
def create_adium_time(time, is_first_line = false)
|
194
|
-
# parsed_date = [year, month, day, hour, min, sec]
|
195
|
-
if time =~ @time_regex
|
196
|
-
year, month, day, hour, min, sec = $1.to_i,
|
197
|
-
$2.to_i,
|
198
|
-
$3.to_i,
|
199
|
-
$4.to_i,
|
200
|
-
$5.to_i,
|
201
|
-
$6.to_i
|
202
|
-
elsif is_first_line and time =~ @time_regex_first_line
|
203
|
-
hour = $4.to_i
|
204
|
-
if $7 == 'PM' and hour != 12
|
205
|
-
hour += 12
|
206
|
-
end
|
207
|
-
year, month, day, min, sec = $3.to_i, # year
|
208
|
-
$1.to_i, # month
|
209
|
-
$2.to_i, # day
|
210
|
-
# already did hour
|
211
|
-
$5.to_i, # minutes
|
212
|
-
$6.to_i # seconds
|
213
|
-
elsif time =~ @minimal_time_regex
|
214
|
-
# "04:22:05" => %w{04 22 05}
|
215
|
-
hour = $1.to_i
|
216
|
-
if $4 == 'PM' and hour != 12
|
217
|
-
hour += 12
|
218
|
-
end
|
219
|
-
year, month, day = @basic_time_info
|
220
|
-
min = $2.to_i
|
221
|
-
sec = $3.to_i
|
222
|
-
else
|
223
|
-
error("You have found an odd timestamp. Please report it to the developer.")
|
224
|
-
log_msg("The timestamp: #{time}")
|
225
|
-
log_msg("Continuing...")
|
226
|
-
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
227
|
-
end
|
228
|
-
if is_first_line
|
229
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
230
|
-
else
|
231
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
|
232
|
-
end
|
233
|
-
return adium_time
|
234
|
-
end
|
235
|
-
|
236
|
-
# Extract required data from the file. Run by parse.
|
237
|
-
def pre_parse
|
238
|
-
# Deal with first line.
|
239
|
-
|
240
|
-
# the first line is special. It tells us (in order of regex groups):
|
241
|
-
# 1) who we're talking to
|
242
|
-
# 2) what time/date
|
243
|
-
# 3) what SN we used
|
244
|
-
# 4) what protocol (AIM, icq, jabber...)
|
245
|
-
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
246
|
-
if first_line_match.nil?
|
247
|
-
raise InvalidFirstLineError
|
248
|
-
else
|
249
|
-
service = first_line_match[4]
|
250
|
-
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
251
|
-
user_SN = first_line_match[3].downcase.tr(' ', '')
|
252
|
-
partner_SN = first_line_match[1]
|
253
|
-
pidgin_chat_time_start = first_line_match[2]
|
254
|
-
basic_time_info = case pidgin_chat_time_start
|
255
|
-
when @time_regex then [$1.to_i, $2.to_i, $3.to_i]
|
256
|
-
when @time_regex_first_line then [$3.to_i, $1.to_i, $2.to_i]
|
257
|
-
end
|
258
|
-
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
259
|
-
return [service,
|
260
|
-
user_SN,
|
261
|
-
partner_SN,
|
262
|
-
basic_time_info,
|
263
|
-
adium_chat_time_start]
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
def get_sender_by_alias(alias_name)
|
268
|
-
no_action = alias_name.sub(/^\*{3}/, '')
|
269
|
-
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
270
|
-
# Set the current alias being used of the ones in @user_aliases
|
271
|
-
@user_alias = no_action
|
272
|
-
return @user_SN
|
273
|
-
else
|
274
|
-
return @partner_SN
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
#--
|
279
|
-
# create_msg takes an array of captures from matching against
|
280
|
-
# @line_regex and returns a Message object or one of its subclasses.
|
281
|
-
# It can be used for TextLogParser and HtmlLogParser because both of
|
282
|
-
# them return data in the same indexes in the matches array.
|
283
|
-
#++
|
284
|
-
def create_msg(matches)
|
285
|
-
msg = nil
|
286
|
-
# Either a regular message line or an auto-reply/away message.
|
287
|
-
time = create_adium_time(matches[0])
|
288
|
-
buddy_alias = matches[1]
|
289
|
-
sender = get_sender_by_alias(buddy_alias)
|
290
|
-
body = matches[3]
|
291
|
-
if matches[2] # auto-reply
|
292
|
-
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
293
|
-
else
|
294
|
-
# normal message
|
295
|
-
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
296
|
-
end
|
297
|
-
return msg
|
298
|
-
end
|
299
|
-
|
300
|
-
#--
|
301
|
-
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
302
|
-
# matching against @line_regex_status and returns an Event or Status.
|
303
|
-
# Returns nil if it's a message that should be ignored, or false if an
|
304
|
-
# error occurred.
|
305
|
-
#++
|
306
|
-
def create_status_or_event_msg(matches)
|
307
|
-
# ["22:58:00", "BuddyName logged in."]
|
308
|
-
# 0: time
|
309
|
-
# 1: status message or event
|
310
|
-
msg = nil
|
311
|
-
time = create_adium_time(matches[0])
|
312
|
-
str = matches[1]
|
313
|
-
# Return nil, which will get compact'ed out
|
314
|
-
return nil if @ignore_events.detect{|regex| str =~ regex }
|
315
|
-
|
316
|
-
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
317
|
-
if regex and status
|
318
|
-
# Status message
|
319
|
-
buddy_alias = regex.match(str)[1]
|
320
|
-
sender = get_sender_by_alias(buddy_alias)
|
321
|
-
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
322
|
-
else
|
323
|
-
# Test for event
|
324
|
-
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
325
|
-
event_type = 'libpurpleEvent' if regex
|
326
|
-
unless regex and event_type
|
327
|
-
# not a libpurple event, try others
|
328
|
-
if @event_map.detect{|regex,event_type| str =~ regex}
|
329
|
-
regex, event_type = $1, $2
|
330
|
-
else
|
331
|
-
error(sprintf("Error parsing status or event message, no status or event found: %p", str))
|
332
|
-
return false
|
333
|
-
end
|
334
|
-
end
|
335
|
-
if regex and event_type
|
336
|
-
regex_matches = regex.match(str)
|
337
|
-
# Event message
|
338
|
-
if regex_matches.size == 1
|
339
|
-
# No alias - this means it's the user
|
340
|
-
buddy_alias = @user_alias
|
341
|
-
sender = @user_SN
|
342
|
-
else
|
343
|
-
buddy_alias = regex_matches[1]
|
344
|
-
sender = get_sender_by_alias(buddy_alias)
|
345
|
-
end
|
346
|
-
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
347
|
-
end
|
348
|
-
end
|
349
|
-
return msg
|
350
|
-
end
|
351
|
-
end # END BasicParser class
|
352
|
-
|
353
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
354
|
-
# using this class directly.
|
355
|
-
class TextLogParser < BasicParser
|
356
|
-
def initialize(src_path, user_aliases)
|
357
|
-
super(src_path, user_aliases)
|
358
|
-
@timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
|
359
|
-
|
360
|
-
# @line_regex matches a line in a TXT log file other than the first
|
361
|
-
# @line_regex matchdata:
|
362
|
-
# 0: timestamp
|
363
|
-
# 1: screen name or alias, if alias set
|
364
|
-
# 2: "<AUTO-REPLY>" or nil
|
365
|
-
# 3: message body
|
366
|
-
@line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
|
367
|
-
|
368
|
-
# @line_regex_status matches a status line
|
369
|
-
# @line_regex_status matchdata:
|
370
|
-
# 0: timestamp
|
371
|
-
# 1: status message
|
372
|
-
@line_regex_status = /#{@timestamp_rx} ([^:]+)/o
|
373
|
-
end
|
374
|
-
|
375
|
-
public :parse
|
376
|
-
|
377
|
-
#################
|
378
|
-
private
|
379
|
-
#################
|
380
|
-
|
381
|
-
def cleanup(text)
|
382
|
-
text.tr!("\r", '')
|
383
|
-
# Replace newlines with "<br/>" unless they end a chat line.
|
384
|
-
text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
|
385
|
-
# Escape entities since this will be in XML
|
386
|
-
text.gsub!('&', '&') # escape '&' first
|
387
|
-
text.gsub!('<', '<')
|
388
|
-
text.gsub!('>', '>')
|
389
|
-
text.gsub!('"', '"')
|
390
|
-
text.gsub!("'", ''')
|
391
|
-
return text
|
392
|
-
end
|
393
|
-
end
|
394
|
-
|
395
|
-
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
|
396
|
-
# of using this class directly.
|
397
|
-
class HtmlLogParser < BasicParser
|
398
|
-
def initialize(src_path, user_aliases)
|
399
|
-
super(src_path, user_aliases)
|
400
|
-
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
401
|
-
|
402
|
-
# @line_regex matches a line in an HTML log file other than the
|
403
|
-
# first time matches on either "2008-11-17 14:12" or "14:12"
|
404
|
-
# @line_regex match obj:
|
405
|
-
# 0: timestamp, extended or not
|
406
|
-
# 1: screen name or alias, if alias set
|
407
|
-
# 2: "<AUTO-REPLY>" or nil
|
408
|
-
# 3: message body
|
409
|
-
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
410
|
-
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
411
|
-
# @line_regex_status matches a status line
|
412
|
-
# @line_regex_status match obj:
|
413
|
-
# 0: timestamp
|
414
|
-
# 1: status message
|
415
|
-
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
416
|
-
end
|
417
|
-
|
418
|
-
public :parse
|
419
|
-
|
420
|
-
#################
|
421
|
-
private
|
422
|
-
#################
|
423
|
-
|
424
|
-
# Returns a cleaned string.
|
425
|
-
# Removes the following tags from _text_:
|
426
|
-
# * html
|
427
|
-
# * body
|
428
|
-
# * font
|
429
|
-
# * a with no innertext, e.g. <a href="blah"></a>
|
430
|
-
# And removes the following style declarations:
|
431
|
-
# * color: #000000 (just turns text black)
|
432
|
-
# * font-family
|
433
|
-
# * font-size
|
434
|
-
# * background
|
435
|
-
# * em (really it's changed to <span style="font-style: italic;">)
|
436
|
-
# Since each <span> has only one style declaration, spans with these
|
437
|
-
# declarations are removed (but the text inside them is preserved).
|
438
|
-
def cleanup(text)
|
439
|
-
# Sometimes this is in there. I don't know why.
|
440
|
-
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
441
|
-
# We can remove <font> safely since Pidgin and Adium both show bold
|
442
|
-
# using <span style="font-weight: bold;"> except Pidgin uses single
|
443
|
-
# quotes while Adium uses double quotes.
|
444
|
-
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
445
|
-
|
446
|
-
text.tr!("\r", '')
|
447
|
-
# Remove empty lines
|
448
|
-
text.gsub!("\n\n", "\n")
|
449
|
-
|
450
|
-
# Remove newlines that end the file, since they screw up the
|
451
|
-
# newline -> <br/> conversion
|
452
|
-
text.gsub!(/\n\Z/, '')
|
453
|
-
|
454
|
-
# Replace newlines with "<br/>" unless they end a chat line.
|
455
|
-
# This must go after we remove <font> tags.
|
456
|
-
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
457
|
-
|
458
|
-
# These empty links are sometimes appended to every line in a chat,
|
459
|
-
# for some weird reason. Remove them.
|
460
|
-
text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
|
461
|
-
|
462
|
-
# Replace single quotes inside tags with double quotes so we can
|
463
|
-
# easily change single quotes to entities.
|
464
|
-
# For spans, removes a space after the final declaration if it exists.
|
465
|
-
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
466
|
-
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
467
|
-
=begin
|
468
|
-
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
469
|
-
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
470
|
-
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
471
|
-
=end
|
472
|
-
text.gsub!("'", ''')
|
473
|
-
|
474
|
-
# This actually does match stuff, but doesn't group it correctly. :(
|
475
|
-
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
476
|
-
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
477
|
-
# Remove empty spans.
|
478
|
-
next if $2 == ''
|
479
|
-
|
480
|
-
# style = style declaration
|
481
|
-
# innertext = text inside <span>
|
482
|
-
style, innertext = $1, $2
|
483
|
-
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
484
|
-
# innertext.gsub!("")
|
485
|
-
|
486
|
-
styleparts = style.split(/; ?/)
|
487
|
-
styleparts.map! do |p|
|
488
|
-
if p[0,5] == 'color'
|
489
|
-
if p.include?('color: #000000')
|
490
|
-
next
|
491
|
-
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
492
|
-
# Regarding the bit with the ">", sometimes this happens:
|
493
|
-
# <span style="color: #000000>today;">today was busy</span>
|
494
|
-
# Then p = "color: #000000>today"
|
495
|
-
# Or it can end in ">;", with no text before the semicolon.
|
496
|
-
# So keep the color but remove the ">" and anything following it.
|
497
|
-
next($1)
|
498
|
-
end
|
499
|
-
else
|
500
|
-
# don't remove font-weight
|
501
|
-
case p
|
502
|
-
when /^font-family/ then next
|
503
|
-
when /^font-size/ then next
|
504
|
-
when /^background/ then next
|
505
|
-
end
|
506
|
-
end
|
507
|
-
end.compact!
|
508
|
-
unless styleparts.empty?
|
509
|
-
style = styleparts.join('; ')
|
510
|
-
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
511
|
-
end
|
512
|
-
innertext
|
513
|
-
end
|
514
|
-
# Pidgin uses <em>, Adium uses <span>
|
515
|
-
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
516
|
-
text.gsub!('</em>', '</span>')
|
517
|
-
end
|
518
|
-
return text
|
519
|
-
end
|
520
|
-
end # END HtmlLogParser class
|
521
|
-
|
522
|
-
# A holding object for each line of the chat. It is subclassed as
|
523
|
-
# appropriate (eg AutoReplyMessage). Each subclass (but not Message
|
524
|
-
# itself) has its own to_s which prints out its information in a format
|
525
|
-
# appropriate for putting in an Adium log file.
|
526
|
-
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
527
|
-
class Message
|
528
|
-
def initialize(sender, time, buddy_alias)
|
529
|
-
# The sender's screen name
|
530
|
-
@sender = sender
|
531
|
-
# The time the message was sent, in Adium format (e.g.
|
532
|
-
# "2008-10-05T22:26:20-0800")
|
533
|
-
@time = time
|
534
|
-
# The receiver's alias (NOT screen name)
|
535
|
-
@buddy_alias = buddy_alias
|
536
|
-
end
|
537
|
-
attr_accessor :sender, :time, :buddy_alias
|
538
|
-
end
|
539
|
-
|
540
|
-
# Basic message with body text (as opposed to pure status messages, which
|
541
|
-
# have no body).
|
542
|
-
class XMLMessage < Message
|
543
|
-
def initialize(sender, time, buddy_alias, body)
|
544
|
-
super(sender, time, buddy_alias)
|
545
|
-
@body = body
|
546
|
-
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
547
|
-
normalize_body!()
|
548
|
-
end
|
549
|
-
attr_accessor :body
|
550
|
-
|
551
|
-
def to_s
|
552
|
-
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
553
|
-
@sender, @time, @buddy_alias, @styled_body)
|
554
|
-
end
|
555
|
-
|
556
|
-
#################
|
557
|
-
private
|
558
|
-
#################
|
559
|
-
|
560
|
-
# Balances mismatched tags, normalizes body style, and fixes actions
|
561
|
-
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
562
|
-
# "*Buddy waves at you*").
|
563
|
-
def normalize_body!
|
564
|
-
normalize_body_entities!()
|
565
|
-
# Fix mismatched tags. Yes, it's faster to do it per-message
|
566
|
-
# than all at once.
|
567
|
-
@body = Pidgin2Adium.balance_tags_c(@body)
|
568
|
-
if @buddy_alias[0,3] == '***'
|
569
|
-
# "***<alias>" is what pidgin sets as the alias for a /me action
|
570
|
-
@buddy_alias.slice!(0,3)
|
571
|
-
@body = '*' << @body << '*'
|
572
|
-
end
|
573
|
-
end
|
574
|
-
|
575
|
-
# Escapes entities.
|
576
|
-
def normalize_body_entities!
|
577
|
-
# Convert '&' to '&' only if it's not followed by an entity.
|
578
|
-
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
579
|
-
end
|
580
|
-
end # END XMLMessage
|
581
|
-
|
582
|
-
# An auto reply message.
|
583
|
-
class AutoReplyMessage < XMLMessage
|
584
|
-
def to_s
|
585
|
-
return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
|
586
|
-
@sender, @time, @buddy_alias, @styled_body)
|
587
|
-
end
|
588
|
-
end
|
589
|
-
|
590
|
-
# A message saying e.g. "Blahblah has gone away."
|
591
|
-
class StatusMessage < Message
|
592
|
-
def initialize(sender, time, buddy_alias, status)
|
593
|
-
super(sender, time, buddy_alias)
|
594
|
-
@status = status
|
595
|
-
end
|
596
|
-
attr_accessor :status
|
597
|
-
|
598
|
-
def to_s
|
599
|
-
return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
|
600
|
-
end
|
601
|
-
end
|
602
|
-
|
603
|
-
# Pidgin does not have Events, but Adium does. Pidgin mostly uses system
|
604
|
-
# messages to display what Adium calls events. These include sending a file,
|
605
|
-
# starting a Direct IM connection, or an error in chat.
|
606
|
-
class Event < XMLMessage
|
607
|
-
def initialize(sender, time, buddy_alias, body, event_type)
|
608
|
-
super(sender, time, buddy_alias, body)
|
609
|
-
@event_type = event_type
|
610
|
-
end
|
611
|
-
attr_accessor :event_type
|
612
|
-
|
613
|
-
def to_s
|
614
|
-
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
615
|
-
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
616
|
-
end
|
617
|
-
end
|
618
|
-
end # end module
|
1
|
+
require 'pidgin2adium/basic_parser'
|
2
|
+
require 'pidgin2adium/text_log_parser'
|
3
|
+
require 'pidgin2adium/html_log_parser'
|