pidgin2adium 3.0.0 → 3.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.autotest +0 -3
- data/.gitignore +22 -0
- data/.rspec +1 -0
- data/Gemfile +11 -0
- data/History.txt +8 -0
- data/LICENSE +20 -0
- data/README.rdoc +15 -25
- data/Rakefile +68 -0
- data/VERSION +1 -0
- data/bin/pidgin2adium_profiler +1 -0
- data/config/website.yml +2 -0
- data/ext/balance_tags_c/balance_tags_c.c +7 -7
- data/lib/pidgin2adium.rb +108 -108
- data/lib/pidgin2adium/balance_tags.rb +118 -0
- data/lib/pidgin2adium/log_converter.rb +59 -59
- data/lib/pidgin2adium/log_file.rb +91 -91
- data/lib/pidgin2adium/log_parser.rb +590 -589
- data/pidgin2adium.gemspec +79 -0
- data/spec/pidgin2adium_spec.rb +7 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +19 -0
- data/tasks/build_profiler.rake +49 -0
- data/tasks/extconf.rake +0 -5
- metadata +72 -41
- data/Rakefile.rb +0 -41
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_balance_tags_c_extn.rb +0 -10
@@ -0,0 +1,118 @@
|
|
1
|
+
module Pidgin2Adium
|
2
|
+
# Balances tags of string using a modified stack. Returns a balanced
|
3
|
+
# string, but also affects the text passed into it!
|
4
|
+
# Use text = balance_tags(text).
|
5
|
+
|
6
|
+
# From Wordpress's formatting.php; rewritten in Ruby by Gabe
|
7
|
+
# Berke-Williams, 2009.
|
8
|
+
# Author:: Leonard Lin <leonard@acm.org>
|
9
|
+
# License:: GPL v2.0
|
10
|
+
# Copyright:: November 4, 2001
|
11
|
+
def Pidgin2Adium.balance_tags( text )
|
12
|
+
tagstack = []
|
13
|
+
stacksize = 0
|
14
|
+
tagqueue = ''
|
15
|
+
newtext = ''
|
16
|
+
single_tags = %w{br hr img input meta} # Known single-entity/self-closing tags
|
17
|
+
#nestable_tags = %w{blockquote div span} # Tags that can be immediately nested within themselves
|
18
|
+
nestable_tags = %w{blockquote div span font} # Tags that can be immediately nested within themselves
|
19
|
+
# 1: tagname, with possible leading "/"
|
20
|
+
# 2: attributes
|
21
|
+
tag_regex = /<(\/?\w*)\s*([^>]*)>/
|
22
|
+
|
23
|
+
# WP bug fix for comments - in case you REALLY meant to type '< !--'
|
24
|
+
text.gsub!('< !--', '< !--')
|
25
|
+
|
26
|
+
# WP bug fix for LOVE <3 (and other situations with '<' before a number)
|
27
|
+
text.gsub!(/<([0-9]{1})/, '<\1')
|
28
|
+
|
29
|
+
while ( pos = (text =~ tag_regex) )
|
30
|
+
newtext << tagqueue
|
31
|
+
tag = $1.downcase
|
32
|
+
attributes = $2
|
33
|
+
matchlen = $~[0].size
|
34
|
+
|
35
|
+
# clear the shifter
|
36
|
+
tagqueue = ''
|
37
|
+
# Pop or Push
|
38
|
+
if (tag[0,1] == "/") # End Tag
|
39
|
+
tag.slice!(0,1)
|
40
|
+
# if too many closing tags
|
41
|
+
if(stacksize <= 0)
|
42
|
+
tag = ''
|
43
|
+
#or close to be safe: tag = '/' << tag
|
44
|
+
elsif (tagstack[stacksize - 1] == tag) # found closing tag
|
45
|
+
# if stacktop value == tag close value then pop
|
46
|
+
tag = '</' << tag << '>' # Close Tag
|
47
|
+
# Pop
|
48
|
+
tagstack.pop
|
49
|
+
stacksize -= 1
|
50
|
+
else # closing tag not at top, search for it
|
51
|
+
(stacksize-1).downto(0) do |j|
|
52
|
+
if (tagstack[j] == tag)
|
53
|
+
# add tag to tagqueue
|
54
|
+
ss = stacksize - 1
|
55
|
+
ss.downto(j) do |k|
|
56
|
+
tagqueue << '</' << tagstack.pop << '>'
|
57
|
+
stacksize -= 1
|
58
|
+
end
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
tag = ''
|
63
|
+
end
|
64
|
+
else
|
65
|
+
# Begin Tag
|
66
|
+
|
67
|
+
# Tag Cleaning
|
68
|
+
if( (attributes[-1,1] == '/') || (tag == '') )
|
69
|
+
# If: self-closing or '', don't do anything.
|
70
|
+
elsif ( single_tags.include?(tag) )
|
71
|
+
# ElseIf: it's a known single-entity tag but it doesn't close itself, do so
|
72
|
+
attributes << '/'
|
73
|
+
else
|
74
|
+
# Push the tag onto the stack
|
75
|
+
# If the top of the stack is the same as the tag we want to push, close previous tag
|
76
|
+
if ((stacksize > 0) &&
|
77
|
+
! nestable_tags.include?(tag) &&
|
78
|
+
(tagstack[stacksize - 1] == tag))
|
79
|
+
tagqueue = '</' << tagstack.pop << '>'
|
80
|
+
stacksize -= 1
|
81
|
+
end
|
82
|
+
tagstack.push(tag)
|
83
|
+
stacksize += 1
|
84
|
+
end
|
85
|
+
|
86
|
+
# Attributes
|
87
|
+
if(attributes != '')
|
88
|
+
attributes = ' ' << attributes
|
89
|
+
end
|
90
|
+
tag = '<' << tag << attributes << '>'
|
91
|
+
#If already queuing a close tag, then put this tag on, too
|
92
|
+
if (tagqueue)
|
93
|
+
tagqueue << tag
|
94
|
+
tag = ''
|
95
|
+
end
|
96
|
+
end
|
97
|
+
newtext << text[0,pos] << tag
|
98
|
+
text = text[pos+matchlen, text.length - (pos+matchlen)]
|
99
|
+
end
|
100
|
+
|
101
|
+
# Clear Tag Queue
|
102
|
+
newtext << tagqueue
|
103
|
+
|
104
|
+
# Add Remaining text
|
105
|
+
newtext << text
|
106
|
+
|
107
|
+
# Empty Stack
|
108
|
+
tagstack.reverse_each do |t|
|
109
|
+
newtext << '</' << t << '>' # Add remaining tags to close
|
110
|
+
end
|
111
|
+
|
112
|
+
# WP fix for the bug with HTML comments
|
113
|
+
newtext.gsub!("< !--", "<!--")
|
114
|
+
newtext.gsub!("< !--", "< !--")
|
115
|
+
|
116
|
+
return newtext
|
117
|
+
end
|
118
|
+
end
|
@@ -1,72 +1,72 @@
|
|
1
1
|
require 'pidgin2adium'
|
2
2
|
|
3
3
|
module Pidgin2Adium
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
4
|
+
# An easy way to batch-process a directory. Used by the pidgin2adium
|
5
|
+
# command-line script.
|
6
|
+
class LogConverter
|
7
|
+
include Pidgin2Adium
|
8
|
+
# You can add options using the _opts_ hash, which can have the
|
9
|
+
# following keys, all of which are optional:
|
10
|
+
# * *overwrite*: If true, then overwrite even if log is found.
|
11
|
+
# Defaults to false.
|
12
|
+
# * *output_dir*: The top-level dir to put the logs in.
|
13
|
+
# Logs under output_dir are still each in their own folders, etc.
|
14
|
+
# Defaults to Pidgin2Adium::ADIUM_LOG_DIR
|
15
|
+
def initialize(pidgin_log_dir, aliases, opts = {})
|
16
|
+
# parse_and_generate will process it for us
|
17
|
+
@opts = opts
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
@pidgin_log_dir = File.expand_path(pidgin_log_dir)
|
20
|
+
@my_aliases = aliases
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
unless File.directory?(@pidgin_log_dir)
|
23
|
+
puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
|
24
|
+
raise Errno::ENOENT
|
25
|
+
end
|
26
|
+
end
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
28
|
+
# Runs Pidgin2Adium::parse_and_generate on every log file in directory
|
29
|
+
# provided in new, then deletes Adium's search indexes to force
|
30
|
+
# it to rescan logs on startup.
|
31
|
+
def start
|
32
|
+
log_msg "Begin converting."
|
33
|
+
begin
|
34
|
+
files_path = get_all_chat_files(@pidgin_log_dir)
|
35
|
+
rescue Errno::EACCES => bang
|
36
|
+
error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
|
37
|
+
error("Details: #{bang.message}")
|
38
|
+
raise Errno::EACCES
|
39
|
+
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
41
|
+
total_files = files_path.size
|
42
|
+
total_successes = 0
|
43
|
+
log_msg("#{total_files} files to convert.")
|
44
|
+
files_path.each_with_index do |fname, i|
|
45
|
+
log_msg(
|
46
|
+
sprintf("[%d/%d] Converting %s...",
|
47
|
+
(i+1), total_files, fname)
|
48
|
+
)
|
49
|
+
result = parse_and_generate(fname, @my_aliases, @opts)
|
50
|
+
total_successes += 1 if result == true
|
51
|
+
end
|
52
52
|
|
53
|
-
|
53
|
+
delete_search_indexes()
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
55
|
+
log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
56
|
+
puts "Minor error messages:"
|
57
|
+
puts @@oops_messages.join("\n")
|
58
|
+
puts "Major error messages:"
|
59
|
+
puts @@error_messages.join("\n")
|
60
|
+
end
|
61
61
|
|
62
|
-
|
63
|
-
|
64
|
-
|
62
|
+
###########
|
63
|
+
private
|
64
|
+
###########
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
66
|
+
def get_all_chat_files(dir)
|
67
|
+
return [] if File.basename(dir) == ".system"
|
68
|
+
# recurse into each subdir
|
69
|
+
return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
|
71
70
|
end
|
71
|
+
end # END LogConverter class
|
72
72
|
end
|
@@ -1,102 +1,102 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
|
3
3
|
module Pidgin2Adium
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
4
|
+
# A holding object for the result of LogParser.parse. It makes the
|
5
|
+
# instance variable @chat_lines available, which is an array of Message
|
6
|
+
# subclass instances (XMLMessage, Event, etc.)
|
7
|
+
# Here is a list of the instance variables for each class in @chat_lines:
|
8
|
+
#
|
9
|
+
# <b>All of these variables are read/write.</b>
|
10
|
+
# All:: sender, time, buddy_alias
|
11
|
+
# XMLMessage:: body
|
12
|
+
# AutoReplyMessage:: body
|
13
|
+
# Event:: body, event_type
|
14
|
+
# StatusMessage:: status
|
15
|
+
class LogFile
|
16
|
+
include Pidgin2Adium
|
17
|
+
def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
|
18
|
+
@chat_lines = chat_lines
|
19
|
+
@user_SN = user_SN
|
20
|
+
@partner_SN = partner_SN
|
21
|
+
@adium_chat_time_start = adium_chat_time_start
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
# key is for Pidgin, value is for Adium
|
27
|
-
# Just used for <service>.<screenname> in directory structure
|
28
|
-
service_name_map = {'aim' => 'AIM',
|
29
|
-
'jabber' =>'Jabber',
|
30
|
-
'gtalk'=> 'GTalk',
|
31
|
-
'icq' => 'ICQ',
|
32
|
-
'qq' => 'QQ',
|
33
|
-
'msn' => 'MSN',
|
34
|
-
'yahoo' => 'Yahoo!'}
|
35
|
-
|
36
|
-
@service = service_name_map[service.downcase]
|
37
|
-
end
|
38
|
-
|
39
|
-
attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
|
23
|
+
# @chat_str is generated when to_s is called
|
24
|
+
@chat_str = nil
|
40
25
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
def each(&blk)
|
51
|
-
@chat_lines.each{|l| yield l }
|
52
|
-
end
|
26
|
+
# key is for Pidgin, value is for Adium
|
27
|
+
# Just used for <service>.<screenname> in directory structure
|
28
|
+
service_name_map = {'aim' => 'AIM',
|
29
|
+
'jabber' =>'Jabber',
|
30
|
+
'gtalk'=> 'GTalk',
|
31
|
+
'icq' => 'ICQ',
|
32
|
+
'qq' => 'QQ',
|
33
|
+
'msn' => 'MSN',
|
34
|
+
'yahoo' => 'Yahoo!'}
|
53
35
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
36
|
+
@service = service_name_map[service.downcase]
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
|
40
|
+
|
41
|
+
# Returns contents of log file
|
42
|
+
def to_s
|
43
|
+
if @chat_str.nil?
|
44
|
+
# Faster than inject() or each()
|
45
|
+
@chat_str = @chat_lines.map{|l| l.to_s }.join
|
46
|
+
end
|
47
|
+
return @chat_str
|
48
|
+
end
|
49
|
+
|
50
|
+
def each(&blk)
|
51
|
+
@chat_lines.each{|l| yield l }
|
52
|
+
end
|
53
|
+
|
54
|
+
# Set overwrite=true to create a logfile even if logfile already exists.
|
55
|
+
# Returns one of:
|
56
|
+
# * false (if an error occurred),
|
57
|
+
# * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
|
58
|
+
# * the path to the new Adium log file.
|
59
|
+
def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
|
60
|
+
# output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
|
61
|
+
output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
|
62
|
+
# output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
|
63
|
+
output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
|
64
|
+
begin
|
65
|
+
FileUtils.mkdir_p(output_dir)
|
66
|
+
rescue => bang
|
67
|
+
error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
|
68
|
+
return false
|
69
|
+
end
|
70
|
+
if overwrite
|
71
|
+
unless File.exist?(output_path)
|
72
|
+
# File doesn't exist, but maybe it does with a different
|
73
|
+
# time zone. Check for a file that differs only in time
|
74
|
+
# zone and, if found, change @output_path to target it.
|
75
|
+
maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
|
76
|
+
unless maybe_matches.empty?
|
77
|
+
output_path = maybe_matches[0]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
else
|
81
|
+
if File.exist?(output_path)
|
82
|
+
return FILE_EXISTS
|
83
|
+
end
|
84
|
+
end
|
85
85
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
86
|
+
begin
|
87
|
+
outfile = File.new(output_path, 'w')
|
88
|
+
rescue => bang
|
89
|
+
error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
|
90
|
+
return false
|
91
|
+
end
|
92
92
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
93
|
+
# no \n before </chat> because @chat_str (from to_s) has it already
|
94
|
+
outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
|
95
|
+
'<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
|
96
|
+
@user_SN, @service, self.to_s)
|
97
|
+
outfile.close
|
98
98
|
|
99
|
-
|
100
|
-
end
|
99
|
+
return output_path
|
101
100
|
end
|
101
|
+
end # END LogFile class
|
102
102
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# Contains the class BasicParser and its subclasses, HtmlLogParser and
|
2
2
|
# TextFileParser, which parse the file passed into it and return a LogFile
|
3
|
-
# object.
|
3
|
+
# object.
|
4
4
|
#
|
5
5
|
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
6
6
|
# using these classes directly.
|
@@ -11,607 +11,608 @@ require 'balance_tags_c'
|
|
11
11
|
require 'pidgin2adium/log_file'
|
12
12
|
|
13
13
|
module Pidgin2Adium
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
# This method returns a LogFile instance, or false if an error occurred.
|
139
|
-
def parse
|
140
|
-
return false unless @first_line_is_valid
|
141
|
-
@file_content = cleanup(@file_content).split("\n")
|
142
|
-
|
143
|
-
@file_content.map! do |line|
|
144
|
-
# "next" returns nil which is removed by compact
|
145
|
-
next if line =~ /^\s+$/
|
146
|
-
if line =~ @line_regex
|
147
|
-
create_msg($~.captures)
|
148
|
-
elsif line =~ @line_regex_status
|
149
|
-
msg = create_status_or_event_msg($~.captures)
|
150
|
-
# Error occurred while parsing
|
151
|
-
return false if msg == false
|
152
|
-
else
|
153
|
-
error "Could not parse line:"
|
154
|
-
p line
|
155
|
-
return false
|
156
|
-
end
|
157
|
-
end
|
158
|
-
@file_content.compact!
|
159
|
-
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
160
|
-
end
|
161
|
-
# Prevent parse from being called directly from BasicParser, since
|
162
|
-
# it uses subclassing magic.
|
163
|
-
protected :parse
|
164
|
-
|
165
|
-
#################
|
166
|
-
private
|
167
|
-
#################
|
168
|
-
|
169
|
-
def get_time_zone_offset()
|
170
|
-
# We must have a tz_offset or else the Adium Chat Log viewer
|
171
|
-
# doesn't read the date correctly and then:
|
172
|
-
# 1) the log has an empty start date column in the viewer
|
173
|
-
# 2) The timestamps are all the same for the whole log
|
174
|
-
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
175
|
-
if tz_match and tz_match[1]
|
176
|
-
tz_offset = tz_match[1]
|
177
|
-
else
|
178
|
-
# "-0500" (3d rather than 2d to allow for "+")
|
179
|
-
tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
|
180
|
-
end
|
181
|
-
return tz_offset
|
182
|
-
end
|
183
|
-
|
184
|
-
#--
|
185
|
-
# Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
|
186
|
-
# 2008-10-05T22:26:20-0800
|
187
|
-
# HOWEVER:
|
188
|
-
# If it's the first line, then return it like this (note periods):
|
189
|
-
# 2008-10-05T22.26.20-0800
|
190
|
-
# because it will be used in the filename.
|
191
|
-
#++
|
192
|
-
# Converts a pidgin datestamp to an Adium one.
|
193
|
-
def create_adium_time(time, is_first_line = false)
|
194
|
-
# parsed_date = [year, month, day, hour, min, sec]
|
195
|
-
if time =~ @time_regex
|
196
|
-
year, month, day, hour, min, sec = $1.to_i,
|
197
|
-
$2.to_i,
|
198
|
-
$3.to_i,
|
199
|
-
$4.to_i,
|
200
|
-
$5.to_i,
|
201
|
-
$6.to_i
|
202
|
-
elsif is_first_line and time =~ @time_regex_first_line
|
203
|
-
hour = $4.to_i
|
204
|
-
if $7 == 'PM' and hour != 12
|
205
|
-
hour += 12
|
206
|
-
end
|
207
|
-
year, month, day, min, sec = $3.to_i, # year
|
208
|
-
$1.to_i, # month
|
209
|
-
$2.to_i, # day
|
210
|
-
# already did hour
|
211
|
-
$5.to_i, # minutes
|
212
|
-
$6.to_i # seconds
|
213
|
-
elsif time =~ @minimal_time_regex
|
214
|
-
# "04:22:05" => %w{04 22 05}
|
215
|
-
hour = $1.to_i
|
216
|
-
if $4 == 'PM' and hour != 12
|
217
|
-
hour += 12
|
218
|
-
end
|
219
|
-
year, month, day = @basic_time_info
|
220
|
-
min = $2.to_i
|
221
|
-
sec = $3.to_i
|
222
|
-
else
|
223
|
-
error("You have found an odd timestamp. Please report it to the developer.")
|
224
|
-
log_msg("The timestamp: #{time}")
|
225
|
-
log_msg("Continuing...")
|
226
|
-
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
227
|
-
end
|
228
|
-
if is_first_line
|
229
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
230
|
-
else
|
231
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
|
232
|
-
end
|
233
|
-
return adium_time
|
234
|
-
end
|
235
|
-
|
236
|
-
# Extract required data from the file. Run by parse.
|
237
|
-
def pre_parse
|
238
|
-
# Deal with first line.
|
239
|
-
|
240
|
-
# the first line is special. It tells us (in order of regex groups):
|
241
|
-
# 1) who we're talking to
|
242
|
-
# 2) what time/date
|
243
|
-
# 3) what SN we used
|
244
|
-
# 4) what protocol (AIM, icq, jabber...)
|
245
|
-
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
246
|
-
if first_line_match.nil?
|
247
|
-
raise InvalidFirstLineError
|
248
|
-
else
|
249
|
-
service = first_line_match[4]
|
250
|
-
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
251
|
-
user_SN = first_line_match[3].downcase.tr(' ', '')
|
252
|
-
partner_SN = first_line_match[1]
|
253
|
-
pidgin_chat_time_start = first_line_match[2]
|
254
|
-
basic_time_info = case pidgin_chat_time_start
|
255
|
-
when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
|
256
|
-
when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
|
257
|
-
end
|
258
|
-
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
259
|
-
return [service,
|
260
|
-
user_SN,
|
261
|
-
partner_SN,
|
262
|
-
basic_time_info,
|
263
|
-
adium_chat_time_start]
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
def get_sender_by_alias(alias_name)
|
268
|
-
no_action = alias_name.sub(/^\*{3}/, '')
|
269
|
-
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
270
|
-
# Set the current alias being used of the ones in @user_aliases
|
271
|
-
@user_alias = no_action
|
272
|
-
return @user_SN
|
273
|
-
else
|
274
|
-
return @partner_SN
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
#--
|
279
|
-
# create_msg takes an array of captures from matching against
|
280
|
-
# @line_regex and returns a Message object or one of its subclasses.
|
281
|
-
# It can be used for TextLogParser and HtmlLogParser because both of
|
282
|
-
# them return data in the same indexes in the matches array.
|
283
|
-
#++
|
284
|
-
def create_msg(matches)
|
285
|
-
msg = nil
|
286
|
-
# Either a regular message line or an auto-reply/away message.
|
287
|
-
time = create_adium_time(matches[0])
|
288
|
-
buddy_alias = matches[1]
|
289
|
-
sender = get_sender_by_alias(buddy_alias)
|
290
|
-
body = matches[3]
|
291
|
-
if matches[2] # auto-reply
|
292
|
-
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
293
|
-
else
|
294
|
-
# normal message
|
295
|
-
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
296
|
-
end
|
297
|
-
return msg
|
298
|
-
end
|
299
|
-
|
300
|
-
#--
|
301
|
-
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
302
|
-
# matching against @line_regex_status and returns an Event or Status.
|
303
|
-
# Returns nil if it's a message that should be ignored, or false if an
|
304
|
-
# error occurred.
|
305
|
-
#++
|
306
|
-
def create_status_or_event_msg(matches)
|
307
|
-
# ["22:58:00", "BuddyName logged in."]
|
308
|
-
# 0: time
|
309
|
-
# 1: status message or event
|
310
|
-
msg = nil
|
311
|
-
time = create_adium_time(matches[0])
|
312
|
-
str = matches[1]
|
313
|
-
# Return nil, which will get compact'ed out
|
314
|
-
return nil if @ignore_events.detect{|regex| str =~ regex }
|
315
|
-
|
316
|
-
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
317
|
-
if regex and status
|
318
|
-
# Status message
|
319
|
-
buddy_alias = regex.match(str)[1]
|
320
|
-
sender = get_sender_by_alias(buddy_alias)
|
321
|
-
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
322
|
-
else
|
323
|
-
# Test for event
|
324
|
-
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
325
|
-
event_type = 'libpurpleEvent' if regex
|
326
|
-
unless regex and event_type
|
327
|
-
# not a libpurple event, try others
|
328
|
-
if @event_map.detect{|regex,event_type| str =~ regex}
|
329
|
-
regex, event_type = $1, $2
|
330
|
-
else
|
331
|
-
error(sprintf("Error parsing status or event message, no status or event found: %p", str))
|
332
|
-
return false
|
333
|
-
end
|
334
|
-
end
|
335
|
-
if regex and event_type
|
336
|
-
regex_matches = regex.match(str)
|
337
|
-
# Event message
|
338
|
-
if regex_matches.size == 1
|
339
|
-
# No alias - this means it's the user
|
340
|
-
buddy_alias = @user_alias
|
341
|
-
sender = @user_SN
|
342
|
-
else
|
343
|
-
buddy_alias = regex_matches[1]
|
344
|
-
sender = get_sender_by_alias(buddy_alias)
|
345
|
-
end
|
346
|
-
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
347
|
-
end
|
348
|
-
end
|
349
|
-
return msg
|
350
|
-
end
|
14
|
+
# Empty class. Raise'd by LogParser if the first line of a log is not
|
15
|
+
# parseable.
|
16
|
+
class InvalidFirstLineError < StandardError; end
|
17
|
+
|
18
|
+
# BasicParser is a base class. Its subclasses are TextLogParser and
|
19
|
+
# HtmlLogParser.
|
20
|
+
#
|
21
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
22
|
+
# using this class directly.
|
23
|
+
class BasicParser
|
24
|
+
include Pidgin2Adium
|
25
|
+
def initialize(src_path, user_aliases)
|
26
|
+
@src_path = src_path
|
27
|
+
# Whitespace is removed for easy matching later on.
|
28
|
+
@user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
|
29
|
+
# @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
|
30
|
+
# alias.
|
31
|
+
# Set an initial value just in case the first message doesn't give
|
32
|
+
# us an alias.
|
33
|
+
@user_alias = user_aliases.split(',')[0]
|
34
|
+
|
35
|
+
@tz_offset = get_time_zone_offset()
|
36
|
+
|
37
|
+
file = File.new(@src_path, 'r')
|
38
|
+
@first_line = file.readline
|
39
|
+
@file_content = file.read
|
40
|
+
file.close
|
41
|
+
|
42
|
+
# Time regexes must be set before pre_parse().
|
43
|
+
# "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
|
44
|
+
# ONLY used (if at all) in first line of chat ("Conversation with...at...")
|
45
|
+
@time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
|
46
|
+
# "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
|
47
|
+
@time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
|
48
|
+
# sometimes a line in a chat doesn't have a full timestamp
|
49
|
+
# "04:22:05 AM" => %w{04 22 05 AM}
|
50
|
+
@minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
|
51
|
+
|
52
|
+
# Whether or not the first line is parseable.
|
53
|
+
@first_line_is_valid = true
|
54
|
+
begin
|
55
|
+
@service,
|
56
|
+
@user_SN,
|
57
|
+
@partner_SN,
|
58
|
+
# @basic_time_info is for files that only have the full
|
59
|
+
# timestamp at the top; we can use it to fill in the minimal
|
60
|
+
# per-line timestamps. It has only 3 elements (year, month,
|
61
|
+
# dayofmonth) because you should be able to fill everything
|
62
|
+
# else in. If you can't, something's wrong.
|
63
|
+
@basic_time_info,
|
64
|
+
# When the chat started, in Adium's format
|
65
|
+
@adium_chat_time_start = pre_parse()
|
66
|
+
rescue InvalidFirstLineError
|
67
|
+
@first_line_is_valid = false
|
68
|
+
error("Failed to parse, invalid first line: #{@src_path}")
|
69
|
+
return # stop processing
|
70
|
+
end
|
71
|
+
|
72
|
+
# @status_map, @lib_purple_events, and @events are used in
|
73
|
+
# create_status_or_event_msg
|
74
|
+
@status_map = {
|
75
|
+
/(.+) logged in\.$/ => 'online',
|
76
|
+
/(.+) logged out\.$/ => 'offline',
|
77
|
+
/(.+) has signed on\.$/ => 'online',
|
78
|
+
/(.+) has signed off\.$/ => 'offline',
|
79
|
+
/(.+) has gone away\.$/ => 'away',
|
80
|
+
/(.+) is no longer away\.$/ => 'available',
|
81
|
+
/(.+) has become idle\.$/ => 'idle',
|
82
|
+
/(.+) is no longer idle\.$/ => 'available'
|
83
|
+
}
|
84
|
+
|
85
|
+
# lib_purple_events are all of event_type libPurple
|
86
|
+
@lib_purple_events = [
|
87
|
+
# file transfer
|
88
|
+
/Starting transfer of .+ from (.+)/,
|
89
|
+
/^Offering to send .+ to (.+)$/,
|
90
|
+
/(.+) is offering to send file/,
|
91
|
+
/^Transfer of file .+ complete$/,
|
92
|
+
/Error reading|writing|accessing .+: .+/,
|
93
|
+
/You cancell?ed the transfer of/,
|
94
|
+
/File transfer cancelled/,
|
95
|
+
/(.+?) cancell?ed the transfer of/,
|
96
|
+
/(.+?) cancelled the file transfer/,
|
97
|
+
# Direct IM - actual (dis)connect events are their own types
|
98
|
+
/^Attempting to connect to (.+) at .+ for Direct IM\./,
|
99
|
+
/^Asking (.+) to connect to us at .+ for Direct IM\./,
|
100
|
+
/^Attempting to connect via proxy server\.$/,
|
101
|
+
/^Direct IM with (.+) failed/,
|
102
|
+
# encryption
|
103
|
+
/Received message encrypted with wrong key/,
|
104
|
+
/^Requesting key\.\.\.$/,
|
105
|
+
/^Outgoing message lost\.$/,
|
106
|
+
/^Conflicting Key Received!$/,
|
107
|
+
/^Error in decryption- asking for resend\.\.\.$/,
|
108
|
+
/^Making new key pair\.\.\.$/,
|
109
|
+
# sending errors
|
110
|
+
/^Last outgoing message not received properly- resetting$/,
|
111
|
+
/Resending\.\.\./,
|
112
|
+
# connection errors
|
113
|
+
/Lost connection with the remote user:.+/,
|
114
|
+
# chats
|
115
|
+
/^.+ entered the room\.$/,
|
116
|
+
/^.+ left the room\.$/
|
117
|
+
]
|
118
|
+
|
119
|
+
# non-libpurple events
|
120
|
+
# Each key maps to an event_type string. The keys will be matched against a line of chat
|
121
|
+
# and the partner's alias will be in regex group 1, IF the alias is matched.
|
122
|
+
@event_map = {
|
123
|
+
# .+ is not an alias, it's a proxy server so no grouping
|
124
|
+
/^Attempting to connect to .+\.$/ => 'direct-im-connect',
|
125
|
+
# NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
|
126
|
+
/^Direct IM established$/ => 'directIMConnected',
|
127
|
+
/Unable to send message/ => 'chat-error',
|
128
|
+
/You missed .+ messages from (.+) because they were too large/ => 'chat-error',
|
129
|
+
/User information not available/ => 'chat-error'
|
130
|
+
}
|
131
|
+
|
132
|
+
@ignore_events = [
|
133
|
+
# Adium ignores SN/alias changes.
|
134
|
+
/^.+? is now known as .+?\.<br\/?>$/
|
135
|
+
]
|
351
136
|
end
|
352
137
|
|
353
|
-
#
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
138
|
+
# This method returns a LogFile instance, or false if an error occurred.
|
139
|
+
def parse
|
140
|
+
return false unless @first_line_is_valid
|
141
|
+
@file_content = cleanup(@file_content).split("\n")
|
142
|
+
|
143
|
+
@file_content.map! do |line|
|
144
|
+
# "next" returns nil which is removed by compact
|
145
|
+
next if line =~ /^\s+$/
|
146
|
+
if line =~ @line_regex
|
147
|
+
create_msg($~.captures)
|
148
|
+
elsif line =~ @line_regex_status
|
149
|
+
msg = create_status_or_event_msg($~.captures)
|
150
|
+
# Error occurred while parsing
|
151
|
+
return false if msg == false
|
152
|
+
else
|
153
|
+
error "Could not parse line:"
|
154
|
+
p line
|
155
|
+
return false
|
156
|
+
end
|
157
|
+
end
|
158
|
+
@file_content.compact!
|
159
|
+
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
160
|
+
end
|
161
|
+
# Prevent parse from being called directly from BasicParser, since
|
162
|
+
# it uses subclassing magic.
|
163
|
+
protected :parse
|
164
|
+
|
165
|
+
#################
|
166
|
+
private
|
167
|
+
#################
|
168
|
+
|
169
|
+
def get_time_zone_offset()
|
170
|
+
# We must have a tz_offset or else the Adium Chat Log viewer
|
171
|
+
# doesn't read the date correctly and then:
|
172
|
+
# 1) the log has an empty start date column in the viewer
|
173
|
+
# 2) The timestamps are all the same for the whole log
|
174
|
+
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
175
|
+
if tz_match and tz_match[1]
|
176
|
+
tz_offset = tz_match[1]
|
177
|
+
else
|
178
|
+
# "-0500" (3d rather than 2d to allow for "+")
|
179
|
+
tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
|
180
|
+
end
|
181
|
+
return tz_offset
|
182
|
+
end
|
183
|
+
|
184
|
+
#--
|
185
|
+
# Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
|
186
|
+
# 2008-10-05T22:26:20-0800
|
187
|
+
# HOWEVER:
|
188
|
+
# If it's the first line, then return it like this (note periods):
|
189
|
+
# 2008-10-05T22.26.20-0800
|
190
|
+
# because it will be used in the filename.
|
191
|
+
#++
|
192
|
+
# Converts a pidgin datestamp to an Adium one.
|
193
|
+
def create_adium_time(time, is_first_line = false)
|
194
|
+
# parsed_date = [year, month, day, hour, min, sec]
|
195
|
+
if time =~ @time_regex
|
196
|
+
year, month, day, hour, min, sec = $1.to_i,
|
197
|
+
$2.to_i,
|
198
|
+
$3.to_i,
|
199
|
+
$4.to_i,
|
200
|
+
$5.to_i,
|
201
|
+
$6.to_i
|
202
|
+
elsif is_first_line and time =~ @time_regex_first_line
|
203
|
+
hour = $4.to_i
|
204
|
+
if $7 == 'PM' and hour != 12
|
205
|
+
hour += 12
|
206
|
+
end
|
207
|
+
year, month, day, min, sec = $3.to_i, # year
|
208
|
+
$1.to_i, # month
|
209
|
+
$2.to_i, # day
|
210
|
+
# already did hour
|
211
|
+
$5.to_i, # minutes
|
212
|
+
$6.to_i # seconds
|
213
|
+
elsif time =~ @minimal_time_regex
|
214
|
+
# "04:22:05" => %w{04 22 05}
|
215
|
+
hour = $1.to_i
|
216
|
+
if $4 == 'PM' and hour != 12
|
217
|
+
hour += 12
|
218
|
+
end
|
219
|
+
year, month, day = @basic_time_info
|
220
|
+
min = $2.to_i
|
221
|
+
sec = $3.to_i
|
222
|
+
else
|
223
|
+
error("You have found an odd timestamp. Please report it to the developer.")
|
224
|
+
log_msg("The timestamp: #{time}")
|
225
|
+
log_msg("Continuing...")
|
226
|
+
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
227
|
+
end
|
228
|
+
if is_first_line
|
229
|
+
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
230
|
+
else
|
231
|
+
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
|
232
|
+
end
|
233
|
+
return adium_time
|
234
|
+
end
|
235
|
+
|
236
|
+
# Extract required data from the file. Run by parse.
|
237
|
+
def pre_parse
|
238
|
+
# Deal with first line.
|
239
|
+
|
240
|
+
# the first line is special. It tells us (in order of regex groups):
|
241
|
+
# 1) who we're talking to
|
242
|
+
# 2) what time/date
|
243
|
+
# 3) what SN we used
|
244
|
+
# 4) what protocol (AIM, icq, jabber...)
|
245
|
+
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
246
|
+
if first_line_match.nil?
|
247
|
+
raise InvalidFirstLineError
|
248
|
+
else
|
249
|
+
service = first_line_match[4]
|
250
|
+
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
251
|
+
user_SN = first_line_match[3].downcase.tr(' ', '')
|
252
|
+
partner_SN = first_line_match[1]
|
253
|
+
pidgin_chat_time_start = first_line_match[2]
|
254
|
+
basic_time_info = case pidgin_chat_time_start
|
255
|
+
when @time_regex then [$1.to_i, $2.to_i, $3.to_i]
|
256
|
+
when @time_regex_first_line then [$3.to_i, $1.to_i, $2.to_i]
|
257
|
+
end
|
258
|
+
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
259
|
+
return [service,
|
260
|
+
user_SN,
|
261
|
+
partner_SN,
|
262
|
+
basic_time_info,
|
263
|
+
adium_chat_time_start]
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
def get_sender_by_alias(alias_name)
|
268
|
+
no_action = alias_name.sub(/^\*{3}/, '')
|
269
|
+
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
270
|
+
# Set the current alias being used of the ones in @user_aliases
|
271
|
+
@user_alias = no_action
|
272
|
+
return @user_SN
|
273
|
+
else
|
274
|
+
return @partner_SN
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
#--
|
279
|
+
# create_msg takes an array of captures from matching against
|
280
|
+
# @line_regex and returns a Message object or one of its subclasses.
|
281
|
+
# It can be used for TextLogParser and HtmlLogParser because both of
|
282
|
+
# them return data in the same indexes in the matches array.
|
283
|
+
#++
|
284
|
+
def create_msg(matches)
|
285
|
+
msg = nil
|
286
|
+
# Either a regular message line or an auto-reply/away message.
|
287
|
+
time = create_adium_time(matches[0])
|
288
|
+
buddy_alias = matches[1]
|
289
|
+
sender = get_sender_by_alias(buddy_alias)
|
290
|
+
body = matches[3]
|
291
|
+
if matches[2] # auto-reply
|
292
|
+
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
293
|
+
else
|
294
|
+
# normal message
|
295
|
+
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
296
|
+
end
|
297
|
+
return msg
|
298
|
+
end
|
299
|
+
|
300
|
+
#--
|
301
|
+
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
302
|
+
# matching against @line_regex_status and returns an Event or Status.
|
303
|
+
# Returns nil if it's a message that should be ignored, or false if an
|
304
|
+
# error occurred.
|
305
|
+
#++
|
306
|
+
def create_status_or_event_msg(matches)
|
307
|
+
# ["22:58:00", "BuddyName logged in."]
|
308
|
+
# 0: time
|
309
|
+
# 1: status message or event
|
310
|
+
msg = nil
|
311
|
+
time = create_adium_time(matches[0])
|
312
|
+
str = matches[1]
|
313
|
+
# Return nil, which will get compact'ed out
|
314
|
+
return nil if @ignore_events.detect{|regex| str =~ regex }
|
315
|
+
|
316
|
+
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
317
|
+
if regex and status
|
318
|
+
# Status message
|
319
|
+
buddy_alias = regex.match(str)[1]
|
320
|
+
sender = get_sender_by_alias(buddy_alias)
|
321
|
+
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
322
|
+
else
|
323
|
+
# Test for event
|
324
|
+
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
325
|
+
event_type = 'libpurpleEvent' if regex
|
326
|
+
unless regex and event_type
|
327
|
+
# not a libpurple event, try others
|
328
|
+
if @event_map.detect{|regex,event_type| str =~ regex}
|
329
|
+
regex, event_type = $1, $2
|
330
|
+
else
|
331
|
+
error(sprintf("Error parsing status or event message, no status or event found: %p", str))
|
332
|
+
return false
|
333
|
+
end
|
334
|
+
end
|
335
|
+
if regex and event_type
|
336
|
+
regex_matches = regex.match(str)
|
337
|
+
# Event message
|
338
|
+
if regex_matches.size == 1
|
339
|
+
# No alias - this means it's the user
|
340
|
+
buddy_alias = @user_alias
|
341
|
+
sender = @user_SN
|
342
|
+
else
|
343
|
+
buddy_alias = regex_matches[1]
|
344
|
+
sender = get_sender_by_alias(buddy_alias)
|
345
|
+
end
|
346
|
+
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
return msg
|
350
|
+
end
|
351
|
+
end # END BasicParser class
|
352
|
+
|
353
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
354
|
+
# using this class directly.
|
355
|
+
class TextLogParser < BasicParser
|
356
|
+
def initialize(src_path, user_aliases)
|
357
|
+
super(src_path, user_aliases)
|
358
|
+
@timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
|
359
|
+
|
360
|
+
# @line_regex matches a line in a TXT log file other than the first
|
361
|
+
# @line_regex matchdata:
|
362
|
+
# 0: timestamp
|
363
|
+
# 1: screen name or alias, if alias set
|
364
|
+
# 2: "<AUTO-REPLY>" or nil
|
365
|
+
# 3: message body
|
366
|
+
@line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
|
367
|
+
|
368
|
+
# @line_regex_status matches a status line
|
369
|
+
# @line_regex_status matchdata:
|
370
|
+
# 0: timestamp
|
371
|
+
# 1: status message
|
372
|
+
@line_regex_status = /#{@timestamp_rx} ([^:]+)/o
|
392
373
|
end
|
393
374
|
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
375
|
+
public :parse
|
376
|
+
|
377
|
+
#################
|
378
|
+
private
|
379
|
+
#################
|
380
|
+
|
381
|
+
def cleanup(text)
|
382
|
+
text.tr!("\r", '')
|
383
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
384
|
+
text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
|
385
|
+
# Escape entities since this will be in XML
|
386
|
+
text.gsub!('&', '&') # escape '&' first
|
387
|
+
text.gsub!('<', '<')
|
388
|
+
text.gsub!('>', '>')
|
389
|
+
text.gsub!('"', '"')
|
390
|
+
text.gsub!("'", ''')
|
391
|
+
return text
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
|
396
|
+
# of using this class directly.
|
397
|
+
class HtmlLogParser < BasicParser
|
398
|
+
def initialize(src_path, user_aliases)
|
399
|
+
super(src_path, user_aliases)
|
400
|
+
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
401
|
+
|
402
|
+
# @line_regex matches a line in an HTML log file other than the
|
403
|
+
# first time matches on either "2008-11-17 14:12" or "14:12"
|
404
|
+
# @line_regex match obj:
|
405
|
+
# 0: timestamp, extended or not
|
406
|
+
# 1: screen name or alias, if alias set
|
407
|
+
# 2: "<AUTO-REPLY>" or nil
|
408
|
+
# 3: message body
|
409
|
+
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
410
|
+
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
411
|
+
# @line_regex_status matches a status line
|
412
|
+
# @line_regex_status match obj:
|
413
|
+
# 0: timestamp
|
414
|
+
# 1: status message
|
415
|
+
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
416
|
+
end
|
417
|
+
|
418
|
+
public :parse
|
419
|
+
|
420
|
+
#################
|
421
|
+
private
|
422
|
+
#################
|
423
|
+
|
424
|
+
# Returns a cleaned string.
|
425
|
+
# Removes the following tags from _text_:
|
426
|
+
# * html
|
427
|
+
# * body
|
428
|
+
# * font
|
429
|
+
# * a with no innertext, e.g. <a href="blah"></a>
|
430
|
+
# And removes the following style declarations:
|
431
|
+
# * color: #000000 (just turns text black)
|
432
|
+
# * font-family
|
433
|
+
# * font-size
|
434
|
+
# * background
|
435
|
+
# * em (really it's changed to <span style="font-style: italic;">)
|
436
|
+
# Since each <span> has only one style declaration, spans with these
|
437
|
+
# declarations are removed (but the text inside them is preserved).
|
438
|
+
def cleanup(text)
|
439
|
+
# Sometimes this is in there. I don't know why.
|
440
|
+
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
441
|
+
# We can remove <font> safely since Pidgin and Adium both show bold
|
442
|
+
# using <span style="font-weight: bold;"> except Pidgin uses single
|
443
|
+
# quotes while Adium uses double quotes.
|
444
|
+
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
445
|
+
|
446
|
+
text.tr!("\r", '')
|
447
|
+
# Remove empty lines
|
448
|
+
text.gsub!("\n\n", "\n")
|
449
|
+
|
450
|
+
# Remove newlines that end the file, since they screw up the
|
451
|
+
# newline -> <br/> conversion
|
452
|
+
text.gsub!(/\n\Z/, '')
|
453
|
+
|
454
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
455
|
+
# This must go after we remove <font> tags.
|
456
|
+
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
457
|
+
|
458
|
+
# These empty links are sometimes appended to every line in a chat,
|
459
|
+
# for some weird reason. Remove them.
|
460
|
+
text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
|
461
|
+
|
462
|
+
# Replace single quotes inside tags with double quotes so we can
|
463
|
+
# easily change single quotes to entities.
|
464
|
+
# For spans, removes a space after the final declaration if it exists.
|
465
|
+
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
466
|
+
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
466
467
|
=begin
|
467
|
-
|
468
|
-
|
469
|
-
|
468
|
+
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
469
|
+
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
470
|
+
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
470
471
|
=end
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
472
|
+
text.gsub!("'", ''')
|
473
|
+
|
474
|
+
# This actually does match stuff, but doesn't group it correctly. :(
|
475
|
+
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
476
|
+
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
477
|
+
# Remove empty spans.
|
478
|
+
next if $2 == ''
|
479
|
+
|
480
|
+
# style = style declaration
|
481
|
+
# innertext = text inside <span>
|
482
|
+
style, innertext = $1, $2
|
483
|
+
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
484
|
+
# innertext.gsub!("")
|
485
|
+
|
486
|
+
styleparts = style.split(/; ?/)
|
487
|
+
styleparts.map! do |p|
|
488
|
+
if p[0,5] == 'color'
|
489
|
+
if p.include?('color: #000000')
|
490
|
+
next
|
491
|
+
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
492
|
+
# Regarding the bit with the ">", sometimes this happens:
|
493
|
+
# <span style="color: #000000>today;">today was busy</span>
|
494
|
+
# Then p = "color: #000000>today"
|
495
|
+
# Or it can end in ">;", with no text before the semicolon.
|
496
|
+
# So keep the color but remove the ">" and anything following it.
|
497
|
+
next($1)
|
498
|
+
end
|
499
|
+
else
|
500
|
+
# don't remove font-weight
|
501
|
+
case p
|
502
|
+
when /^font-family/ then next
|
503
|
+
when /^font-size/ then next
|
504
|
+
when /^background/ then next
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end.compact!
|
508
|
+
unless styleparts.empty?
|
509
|
+
style = styleparts.join('; ')
|
510
|
+
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
511
|
+
end
|
512
|
+
innertext
|
513
|
+
end
|
514
|
+
# Pidgin uses <em>, Adium uses <span>
|
515
|
+
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
516
|
+
text.gsub!('</em>', '</span>')
|
517
|
+
end
|
518
|
+
return text
|
519
|
+
end
|
520
|
+
end # END HtmlLogParser class
|
521
|
+
|
522
|
+
# A holding object for each line of the chat. It is subclassed as
|
523
|
+
# appropriate (eg AutoReplyMessage). Each subclass (but not Message
|
524
|
+
# itself) has its own to_s which prints out its information in a format
|
525
|
+
# appropriate for putting in an Adium log file.
|
526
|
+
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
527
|
+
class Message
|
528
|
+
def initialize(sender, time, buddy_alias)
|
529
|
+
# The sender's screen name
|
530
|
+
@sender = sender
|
531
|
+
# The time the message was sent, in Adium format (e.g.
|
532
|
+
# "2008-10-05T22:26:20-0800")
|
533
|
+
@time = time
|
534
|
+
# The receiver's alias (NOT screen name)
|
535
|
+
@buddy_alias = buddy_alias
|
536
|
+
end
|
537
|
+
attr_accessor :sender, :time, :buddy_alias
|
538
|
+
end
|
539
|
+
|
540
|
+
# Basic message with body text (as opposed to pure status messages, which
|
541
|
+
# have no body).
|
542
|
+
class XMLMessage < Message
|
543
|
+
def initialize(sender, time, buddy_alias, body)
|
544
|
+
super(sender, time, buddy_alias)
|
545
|
+
@body = body
|
546
|
+
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
547
|
+
normalize_body!()
|
548
|
+
end
|
549
|
+
attr_accessor :body
|
550
|
+
|
551
|
+
def to_s
|
552
|
+
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
553
|
+
@sender, @time, @buddy_alias, @styled_body)
|
554
|
+
end
|
555
|
+
|
556
|
+
#################
|
557
|
+
private
|
558
|
+
#################
|
559
|
+
|
560
|
+
# Balances mismatched tags, normalizes body style, and fixes actions
|
561
|
+
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
562
|
+
# "*Buddy waves at you*").
|
563
|
+
def normalize_body!
|
564
|
+
normalize_body_entities!()
|
565
|
+
# Fix mismatched tags. Yes, it's faster to do it per-message
|
566
|
+
# than all at once.
|
567
|
+
@body = Pidgin2Adium.balance_tags_c(@body)
|
568
|
+
if @buddy_alias[0,3] == '***'
|
569
|
+
# "***<alias>" is what pidgin sets as the alias for a /me action
|
570
|
+
@buddy_alias.slice!(0,3)
|
571
|
+
@body = '*' << @body << '*'
|
572
|
+
end
|
519
573
|
end
|
520
574
|
|
521
|
-
#
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
526
|
-
class Message
|
527
|
-
def initialize(sender, time, buddy_alias)
|
528
|
-
# The sender's screen name
|
529
|
-
@sender = sender
|
530
|
-
# The time the message was sent, in Adium format (e.g.
|
531
|
-
# "2008-10-05T22:26:20-0800")
|
532
|
-
@time = time
|
533
|
-
# The receiver's alias (NOT screen name)
|
534
|
-
@buddy_alias = buddy_alias
|
535
|
-
end
|
536
|
-
attr_accessor :sender, :time, :buddy_alias
|
575
|
+
# Escapes entities.
|
576
|
+
def normalize_body_entities!
|
577
|
+
# Convert '&' to '&' only if it's not followed by an entity.
|
578
|
+
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
537
579
|
end
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
546
|
-
normalize_body!()
|
547
|
-
end
|
548
|
-
attr_accessor :body
|
549
|
-
|
550
|
-
def to_s
|
551
|
-
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
552
|
-
@sender, @time, @buddy_alias, @styled_body)
|
553
|
-
end
|
554
|
-
|
555
|
-
#################
|
556
|
-
private
|
557
|
-
#################
|
558
|
-
|
559
|
-
# Balances mismatched tags, normalizes body style, and fixes actions
|
560
|
-
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
561
|
-
# "*Buddy waves at you*").
|
562
|
-
def normalize_body!
|
563
|
-
normalize_body_entities!()
|
564
|
-
# Fix mismatched tags. Yes, it's faster to do it per-message
|
565
|
-
# than all at once.
|
566
|
-
@body = Pidgin2Adium.balance_tags_c(@body)
|
567
|
-
if @buddy_alias[0,3] == '***'
|
568
|
-
# "***<alias>" is what pidgin sets as the alias for a /me action
|
569
|
-
@buddy_alias.slice!(0,3)
|
570
|
-
@body = '*' << @body << '*'
|
571
|
-
end
|
572
|
-
end
|
573
|
-
|
574
|
-
# Escapes entities.
|
575
|
-
def normalize_body_entities!
|
576
|
-
# Convert '&' to '&' only if it's not followed by an entity.
|
577
|
-
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
578
|
-
end
|
580
|
+
end # END XMLMessage
|
581
|
+
|
582
|
+
# An auto reply message.
|
583
|
+
class AutoReplyMessage < XMLMessage
|
584
|
+
def to_s
|
585
|
+
return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
|
586
|
+
@sender, @time, @buddy_alias, @styled_body)
|
579
587
|
end
|
588
|
+
end
|
580
589
|
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
end
|
590
|
+
# A message saying e.g. "Blahblah has gone away."
|
591
|
+
class StatusMessage < Message
|
592
|
+
def initialize(sender, time, buddy_alias, status)
|
593
|
+
super(sender, time, buddy_alias)
|
594
|
+
@status = status
|
587
595
|
end
|
596
|
+
attr_accessor :status
|
588
597
|
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
598
|
+
def to_s
|
599
|
+
return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
|
600
|
+
end
|
601
|
+
end
|
602
|
+
|
603
|
+
# Pidgin does not have Events, but Adium does. Pidgin mostly uses system
|
604
|
+
# messages to display what Adium calls events. These include sending a file,
|
605
|
+
# starting a Direct IM connection, or an error in chat.
|
606
|
+
class Event < XMLMessage
|
607
|
+
def initialize(sender, time, buddy_alias, body, event_type)
|
608
|
+
super(sender, time, buddy_alias, body)
|
609
|
+
@event_type = event_type
|
600
610
|
end
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
def initialize(sender, time, buddy_alias, body, event_type)
|
607
|
-
super(sender, time, buddy_alias, body)
|
608
|
-
@event_type = event_type
|
609
|
-
end
|
610
|
-
attr_accessor :event_type
|
611
|
-
|
612
|
-
def to_s
|
613
|
-
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
614
|
-
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
615
|
-
end
|
611
|
+
attr_accessor :event_type
|
612
|
+
|
613
|
+
def to_s
|
614
|
+
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
615
|
+
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
616
616
|
end
|
617
|
+
end
|
617
618
|
end # end module
|