pidgin2adium 3.0.0 → 3.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.autotest +0 -3
- data/.gitignore +22 -0
- data/.rspec +1 -0
- data/Gemfile +11 -0
- data/History.txt +8 -0
- data/LICENSE +20 -0
- data/README.rdoc +15 -25
- data/Rakefile +68 -0
- data/VERSION +1 -0
- data/bin/pidgin2adium_profiler +1 -0
- data/config/website.yml +2 -0
- data/ext/balance_tags_c/balance_tags_c.c +7 -7
- data/lib/pidgin2adium.rb +108 -108
- data/lib/pidgin2adium/balance_tags.rb +118 -0
- data/lib/pidgin2adium/log_converter.rb +59 -59
- data/lib/pidgin2adium/log_file.rb +91 -91
- data/lib/pidgin2adium/log_parser.rb +590 -589
- data/pidgin2adium.gemspec +79 -0
- data/spec/pidgin2adium_spec.rb +7 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +19 -0
- data/tasks/build_profiler.rake +49 -0
- data/tasks/extconf.rake +0 -5
- metadata +72 -41
- data/Rakefile.rb +0 -41
- data/script/console +0 -10
- data/script/destroy +0 -14
- data/script/generate +0 -14
- data/test/test_balance_tags_c_extn.rb +0 -10
@@ -0,0 +1,118 @@
|
|
1
|
+
module Pidgin2Adium
|
2
|
+
# Balances tags of string using a modified stack. Returns a balanced
|
3
|
+
# string, but also affects the text passed into it!
|
4
|
+
# Use text = balance_tags(text).
|
5
|
+
|
6
|
+
# From Wordpress's formatting.php; rewritten in Ruby by Gabe
|
7
|
+
# Berke-Williams, 2009.
|
8
|
+
# Author:: Leonard Lin <leonard@acm.org>
|
9
|
+
# License:: GPL v2.0
|
10
|
+
# Copyright:: November 4, 2001
|
11
|
+
def Pidgin2Adium.balance_tags( text )
|
12
|
+
tagstack = []
|
13
|
+
stacksize = 0
|
14
|
+
tagqueue = ''
|
15
|
+
newtext = ''
|
16
|
+
single_tags = %w{br hr img input meta} # Known single-entity/self-closing tags
|
17
|
+
#nestable_tags = %w{blockquote div span} # Tags that can be immediately nested within themselves
|
18
|
+
nestable_tags = %w{blockquote div span font} # Tags that can be immediately nested within themselves
|
19
|
+
# 1: tagname, with possible leading "/"
|
20
|
+
# 2: attributes
|
21
|
+
tag_regex = /<(\/?\w*)\s*([^>]*)>/
|
22
|
+
|
23
|
+
# WP bug fix for comments - in case you REALLY meant to type '< !--'
|
24
|
+
text.gsub!('< !--', '< !--')
|
25
|
+
|
26
|
+
# WP bug fix for LOVE <3 (and other situations with '<' before a number)
|
27
|
+
text.gsub!(/<([0-9]{1})/, '<\1')
|
28
|
+
|
29
|
+
while ( pos = (text =~ tag_regex) )
|
30
|
+
newtext << tagqueue
|
31
|
+
tag = $1.downcase
|
32
|
+
attributes = $2
|
33
|
+
matchlen = $~[0].size
|
34
|
+
|
35
|
+
# clear the shifter
|
36
|
+
tagqueue = ''
|
37
|
+
# Pop or Push
|
38
|
+
if (tag[0,1] == "/") # End Tag
|
39
|
+
tag.slice!(0,1)
|
40
|
+
# if too many closing tags
|
41
|
+
if(stacksize <= 0)
|
42
|
+
tag = ''
|
43
|
+
#or close to be safe: tag = '/' << tag
|
44
|
+
elsif (tagstack[stacksize - 1] == tag) # found closing tag
|
45
|
+
# if stacktop value == tag close value then pop
|
46
|
+
tag = '</' << tag << '>' # Close Tag
|
47
|
+
# Pop
|
48
|
+
tagstack.pop
|
49
|
+
stacksize -= 1
|
50
|
+
else # closing tag not at top, search for it
|
51
|
+
(stacksize-1).downto(0) do |j|
|
52
|
+
if (tagstack[j] == tag)
|
53
|
+
# add tag to tagqueue
|
54
|
+
ss = stacksize - 1
|
55
|
+
ss.downto(j) do |k|
|
56
|
+
tagqueue << '</' << tagstack.pop << '>'
|
57
|
+
stacksize -= 1
|
58
|
+
end
|
59
|
+
break
|
60
|
+
end
|
61
|
+
end
|
62
|
+
tag = ''
|
63
|
+
end
|
64
|
+
else
|
65
|
+
# Begin Tag
|
66
|
+
|
67
|
+
# Tag Cleaning
|
68
|
+
if( (attributes[-1,1] == '/') || (tag == '') )
|
69
|
+
# If: self-closing or '', don't do anything.
|
70
|
+
elsif ( single_tags.include?(tag) )
|
71
|
+
# ElseIf: it's a known single-entity tag but it doesn't close itself, do so
|
72
|
+
attributes << '/'
|
73
|
+
else
|
74
|
+
# Push the tag onto the stack
|
75
|
+
# If the top of the stack is the same as the tag we want to push, close previous tag
|
76
|
+
if ((stacksize > 0) &&
|
77
|
+
! nestable_tags.include?(tag) &&
|
78
|
+
(tagstack[stacksize - 1] == tag))
|
79
|
+
tagqueue = '</' << tagstack.pop << '>'
|
80
|
+
stacksize -= 1
|
81
|
+
end
|
82
|
+
tagstack.push(tag)
|
83
|
+
stacksize += 1
|
84
|
+
end
|
85
|
+
|
86
|
+
# Attributes
|
87
|
+
if(attributes != '')
|
88
|
+
attributes = ' ' << attributes
|
89
|
+
end
|
90
|
+
tag = '<' << tag << attributes << '>'
|
91
|
+
#If already queuing a close tag, then put this tag on, too
|
92
|
+
if (tagqueue)
|
93
|
+
tagqueue << tag
|
94
|
+
tag = ''
|
95
|
+
end
|
96
|
+
end
|
97
|
+
newtext << text[0,pos] << tag
|
98
|
+
text = text[pos+matchlen, text.length - (pos+matchlen)]
|
99
|
+
end
|
100
|
+
|
101
|
+
# Clear Tag Queue
|
102
|
+
newtext << tagqueue
|
103
|
+
|
104
|
+
# Add Remaining text
|
105
|
+
newtext << text
|
106
|
+
|
107
|
+
# Empty Stack
|
108
|
+
tagstack.reverse_each do |t|
|
109
|
+
newtext << '</' << t << '>' # Add remaining tags to close
|
110
|
+
end
|
111
|
+
|
112
|
+
# WP fix for the bug with HTML comments
|
113
|
+
newtext.gsub!("< !--", "<!--")
|
114
|
+
newtext.gsub!("< !--", "< !--")
|
115
|
+
|
116
|
+
return newtext
|
117
|
+
end
|
118
|
+
end
|
@@ -1,72 +1,72 @@
|
|
1
1
|
require 'pidgin2adium'
|
2
2
|
|
3
3
|
module Pidgin2Adium
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
4
|
+
# An easy way to batch-process a directory. Used by the pidgin2adium
|
5
|
+
# command-line script.
|
6
|
+
class LogConverter
|
7
|
+
include Pidgin2Adium
|
8
|
+
# You can add options using the _opts_ hash, which can have the
|
9
|
+
# following keys, all of which are optional:
|
10
|
+
# * *overwrite*: If true, then overwrite even if log is found.
|
11
|
+
# Defaults to false.
|
12
|
+
# * *output_dir*: The top-level dir to put the logs in.
|
13
|
+
# Logs under output_dir are still each in their own folders, etc.
|
14
|
+
# Defaults to Pidgin2Adium::ADIUM_LOG_DIR
|
15
|
+
def initialize(pidgin_log_dir, aliases, opts = {})
|
16
|
+
# parse_and_generate will process it for us
|
17
|
+
@opts = opts
|
18
18
|
|
19
|
-
|
20
|
-
|
19
|
+
@pidgin_log_dir = File.expand_path(pidgin_log_dir)
|
20
|
+
@my_aliases = aliases
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
unless File.directory?(@pidgin_log_dir)
|
23
|
+
puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
|
24
|
+
raise Errno::ENOENT
|
25
|
+
end
|
26
|
+
end
|
27
27
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
28
|
+
# Runs Pidgin2Adium::parse_and_generate on every log file in directory
|
29
|
+
# provided in new, then deletes Adium's search indexes to force
|
30
|
+
# it to rescan logs on startup.
|
31
|
+
def start
|
32
|
+
log_msg "Begin converting."
|
33
|
+
begin
|
34
|
+
files_path = get_all_chat_files(@pidgin_log_dir)
|
35
|
+
rescue Errno::EACCES => bang
|
36
|
+
error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
|
37
|
+
error("Details: #{bang.message}")
|
38
|
+
raise Errno::EACCES
|
39
|
+
end
|
40
40
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
41
|
+
total_files = files_path.size
|
42
|
+
total_successes = 0
|
43
|
+
log_msg("#{total_files} files to convert.")
|
44
|
+
files_path.each_with_index do |fname, i|
|
45
|
+
log_msg(
|
46
|
+
sprintf("[%d/%d] Converting %s...",
|
47
|
+
(i+1), total_files, fname)
|
48
|
+
)
|
49
|
+
result = parse_and_generate(fname, @my_aliases, @opts)
|
50
|
+
total_successes += 1 if result == true
|
51
|
+
end
|
52
52
|
|
53
|
-
|
53
|
+
delete_search_indexes()
|
54
54
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
55
|
+
log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
|
56
|
+
puts "Minor error messages:"
|
57
|
+
puts @@oops_messages.join("\n")
|
58
|
+
puts "Major error messages:"
|
59
|
+
puts @@error_messages.join("\n")
|
60
|
+
end
|
61
61
|
|
62
|
-
|
63
|
-
|
64
|
-
|
62
|
+
###########
|
63
|
+
private
|
64
|
+
###########
|
65
65
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
end
|
66
|
+
def get_all_chat_files(dir)
|
67
|
+
return [] if File.basename(dir) == ".system"
|
68
|
+
# recurse into each subdir
|
69
|
+
return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
|
71
70
|
end
|
71
|
+
end # END LogConverter class
|
72
72
|
end
|
@@ -1,102 +1,102 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
|
3
3
|
module Pidgin2Adium
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
4
|
+
# A holding object for the result of LogParser.parse. It makes the
|
5
|
+
# instance variable @chat_lines available, which is an array of Message
|
6
|
+
# subclass instances (XMLMessage, Event, etc.)
|
7
|
+
# Here is a list of the instance variables for each class in @chat_lines:
|
8
|
+
#
|
9
|
+
# <b>All of these variables are read/write.</b>
|
10
|
+
# All:: sender, time, buddy_alias
|
11
|
+
# XMLMessage:: body
|
12
|
+
# AutoReplyMessage:: body
|
13
|
+
# Event:: body, event_type
|
14
|
+
# StatusMessage:: status
|
15
|
+
class LogFile
|
16
|
+
include Pidgin2Adium
|
17
|
+
def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
|
18
|
+
@chat_lines = chat_lines
|
19
|
+
@user_SN = user_SN
|
20
|
+
@partner_SN = partner_SN
|
21
|
+
@adium_chat_time_start = adium_chat_time_start
|
22
22
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
# key is for Pidgin, value is for Adium
|
27
|
-
# Just used for <service>.<screenname> in directory structure
|
28
|
-
service_name_map = {'aim' => 'AIM',
|
29
|
-
'jabber' =>'Jabber',
|
30
|
-
'gtalk'=> 'GTalk',
|
31
|
-
'icq' => 'ICQ',
|
32
|
-
'qq' => 'QQ',
|
33
|
-
'msn' => 'MSN',
|
34
|
-
'yahoo' => 'Yahoo!'}
|
35
|
-
|
36
|
-
@service = service_name_map[service.downcase]
|
37
|
-
end
|
38
|
-
|
39
|
-
attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
|
23
|
+
# @chat_str is generated when to_s is called
|
24
|
+
@chat_str = nil
|
40
25
|
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
def each(&blk)
|
51
|
-
@chat_lines.each{|l| yield l }
|
52
|
-
end
|
26
|
+
# key is for Pidgin, value is for Adium
|
27
|
+
# Just used for <service>.<screenname> in directory structure
|
28
|
+
service_name_map = {'aim' => 'AIM',
|
29
|
+
'jabber' =>'Jabber',
|
30
|
+
'gtalk'=> 'GTalk',
|
31
|
+
'icq' => 'ICQ',
|
32
|
+
'qq' => 'QQ',
|
33
|
+
'msn' => 'MSN',
|
34
|
+
'yahoo' => 'Yahoo!'}
|
53
35
|
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
36
|
+
@service = service_name_map[service.downcase]
|
37
|
+
end
|
38
|
+
|
39
|
+
attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
|
40
|
+
|
41
|
+
# Returns contents of log file
|
42
|
+
def to_s
|
43
|
+
if @chat_str.nil?
|
44
|
+
# Faster than inject() or each()
|
45
|
+
@chat_str = @chat_lines.map{|l| l.to_s }.join
|
46
|
+
end
|
47
|
+
return @chat_str
|
48
|
+
end
|
49
|
+
|
50
|
+
def each(&blk)
|
51
|
+
@chat_lines.each{|l| yield l }
|
52
|
+
end
|
53
|
+
|
54
|
+
# Set overwrite=true to create a logfile even if logfile already exists.
|
55
|
+
# Returns one of:
|
56
|
+
# * false (if an error occurred),
|
57
|
+
# * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
|
58
|
+
# * the path to the new Adium log file.
|
59
|
+
def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
|
60
|
+
# output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
|
61
|
+
output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
|
62
|
+
# output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
|
63
|
+
output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
|
64
|
+
begin
|
65
|
+
FileUtils.mkdir_p(output_dir)
|
66
|
+
rescue => bang
|
67
|
+
error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
|
68
|
+
return false
|
69
|
+
end
|
70
|
+
if overwrite
|
71
|
+
unless File.exist?(output_path)
|
72
|
+
# File doesn't exist, but maybe it does with a different
|
73
|
+
# time zone. Check for a file that differs only in time
|
74
|
+
# zone and, if found, change @output_path to target it.
|
75
|
+
maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
|
76
|
+
unless maybe_matches.empty?
|
77
|
+
output_path = maybe_matches[0]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
else
|
81
|
+
if File.exist?(output_path)
|
82
|
+
return FILE_EXISTS
|
83
|
+
end
|
84
|
+
end
|
85
85
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
86
|
+
begin
|
87
|
+
outfile = File.new(output_path, 'w')
|
88
|
+
rescue => bang
|
89
|
+
error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
|
90
|
+
return false
|
91
|
+
end
|
92
92
|
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
93
|
+
# no \n before </chat> because @chat_str (from to_s) has it already
|
94
|
+
outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
|
95
|
+
'<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
|
96
|
+
@user_SN, @service, self.to_s)
|
97
|
+
outfile.close
|
98
98
|
|
99
|
-
|
100
|
-
end
|
99
|
+
return output_path
|
101
100
|
end
|
101
|
+
end # END LogFile class
|
102
102
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# Contains the class BasicParser and its subclasses, HtmlLogParser and
|
2
2
|
# TextFileParser, which parse the file passed into it and return a LogFile
|
3
|
-
# object.
|
3
|
+
# object.
|
4
4
|
#
|
5
5
|
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
6
6
|
# using these classes directly.
|
@@ -11,607 +11,608 @@ require 'balance_tags_c'
|
|
11
11
|
require 'pidgin2adium/log_file'
|
12
12
|
|
13
13
|
module Pidgin2Adium
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
# This method returns a LogFile instance, or false if an error occurred.
|
139
|
-
def parse
|
140
|
-
return false unless @first_line_is_valid
|
141
|
-
@file_content = cleanup(@file_content).split("\n")
|
142
|
-
|
143
|
-
@file_content.map! do |line|
|
144
|
-
# "next" returns nil which is removed by compact
|
145
|
-
next if line =~ /^\s+$/
|
146
|
-
if line =~ @line_regex
|
147
|
-
create_msg($~.captures)
|
148
|
-
elsif line =~ @line_regex_status
|
149
|
-
msg = create_status_or_event_msg($~.captures)
|
150
|
-
# Error occurred while parsing
|
151
|
-
return false if msg == false
|
152
|
-
else
|
153
|
-
error "Could not parse line:"
|
154
|
-
p line
|
155
|
-
return false
|
156
|
-
end
|
157
|
-
end
|
158
|
-
@file_content.compact!
|
159
|
-
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
160
|
-
end
|
161
|
-
# Prevent parse from being called directly from BasicParser, since
|
162
|
-
# it uses subclassing magic.
|
163
|
-
protected :parse
|
164
|
-
|
165
|
-
#################
|
166
|
-
private
|
167
|
-
#################
|
168
|
-
|
169
|
-
def get_time_zone_offset()
|
170
|
-
# We must have a tz_offset or else the Adium Chat Log viewer
|
171
|
-
# doesn't read the date correctly and then:
|
172
|
-
# 1) the log has an empty start date column in the viewer
|
173
|
-
# 2) The timestamps are all the same for the whole log
|
174
|
-
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
175
|
-
if tz_match and tz_match[1]
|
176
|
-
tz_offset = tz_match[1]
|
177
|
-
else
|
178
|
-
# "-0500" (3d rather than 2d to allow for "+")
|
179
|
-
tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
|
180
|
-
end
|
181
|
-
return tz_offset
|
182
|
-
end
|
183
|
-
|
184
|
-
#--
|
185
|
-
# Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
|
186
|
-
# 2008-10-05T22:26:20-0800
|
187
|
-
# HOWEVER:
|
188
|
-
# If it's the first line, then return it like this (note periods):
|
189
|
-
# 2008-10-05T22.26.20-0800
|
190
|
-
# because it will be used in the filename.
|
191
|
-
#++
|
192
|
-
# Converts a pidgin datestamp to an Adium one.
|
193
|
-
def create_adium_time(time, is_first_line = false)
|
194
|
-
# parsed_date = [year, month, day, hour, min, sec]
|
195
|
-
if time =~ @time_regex
|
196
|
-
year, month, day, hour, min, sec = $1.to_i,
|
197
|
-
$2.to_i,
|
198
|
-
$3.to_i,
|
199
|
-
$4.to_i,
|
200
|
-
$5.to_i,
|
201
|
-
$6.to_i
|
202
|
-
elsif is_first_line and time =~ @time_regex_first_line
|
203
|
-
hour = $4.to_i
|
204
|
-
if $7 == 'PM' and hour != 12
|
205
|
-
hour += 12
|
206
|
-
end
|
207
|
-
year, month, day, min, sec = $3.to_i, # year
|
208
|
-
$1.to_i, # month
|
209
|
-
$2.to_i, # day
|
210
|
-
# already did hour
|
211
|
-
$5.to_i, # minutes
|
212
|
-
$6.to_i # seconds
|
213
|
-
elsif time =~ @minimal_time_regex
|
214
|
-
# "04:22:05" => %w{04 22 05}
|
215
|
-
hour = $1.to_i
|
216
|
-
if $4 == 'PM' and hour != 12
|
217
|
-
hour += 12
|
218
|
-
end
|
219
|
-
year, month, day = @basic_time_info
|
220
|
-
min = $2.to_i
|
221
|
-
sec = $3.to_i
|
222
|
-
else
|
223
|
-
error("You have found an odd timestamp. Please report it to the developer.")
|
224
|
-
log_msg("The timestamp: #{time}")
|
225
|
-
log_msg("Continuing...")
|
226
|
-
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
227
|
-
end
|
228
|
-
if is_first_line
|
229
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
230
|
-
else
|
231
|
-
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
|
232
|
-
end
|
233
|
-
return adium_time
|
234
|
-
end
|
235
|
-
|
236
|
-
# Extract required data from the file. Run by parse.
|
237
|
-
def pre_parse
|
238
|
-
# Deal with first line.
|
239
|
-
|
240
|
-
# the first line is special. It tells us (in order of regex groups):
|
241
|
-
# 1) who we're talking to
|
242
|
-
# 2) what time/date
|
243
|
-
# 3) what SN we used
|
244
|
-
# 4) what protocol (AIM, icq, jabber...)
|
245
|
-
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
246
|
-
if first_line_match.nil?
|
247
|
-
raise InvalidFirstLineError
|
248
|
-
else
|
249
|
-
service = first_line_match[4]
|
250
|
-
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
251
|
-
user_SN = first_line_match[3].downcase.tr(' ', '')
|
252
|
-
partner_SN = first_line_match[1]
|
253
|
-
pidgin_chat_time_start = first_line_match[2]
|
254
|
-
basic_time_info = case pidgin_chat_time_start
|
255
|
-
when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
|
256
|
-
when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
|
257
|
-
end
|
258
|
-
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
259
|
-
return [service,
|
260
|
-
user_SN,
|
261
|
-
partner_SN,
|
262
|
-
basic_time_info,
|
263
|
-
adium_chat_time_start]
|
264
|
-
end
|
265
|
-
end
|
266
|
-
|
267
|
-
def get_sender_by_alias(alias_name)
|
268
|
-
no_action = alias_name.sub(/^\*{3}/, '')
|
269
|
-
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
270
|
-
# Set the current alias being used of the ones in @user_aliases
|
271
|
-
@user_alias = no_action
|
272
|
-
return @user_SN
|
273
|
-
else
|
274
|
-
return @partner_SN
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
#--
|
279
|
-
# create_msg takes an array of captures from matching against
|
280
|
-
# @line_regex and returns a Message object or one of its subclasses.
|
281
|
-
# It can be used for TextLogParser and HtmlLogParser because both of
|
282
|
-
# them return data in the same indexes in the matches array.
|
283
|
-
#++
|
284
|
-
def create_msg(matches)
|
285
|
-
msg = nil
|
286
|
-
# Either a regular message line or an auto-reply/away message.
|
287
|
-
time = create_adium_time(matches[0])
|
288
|
-
buddy_alias = matches[1]
|
289
|
-
sender = get_sender_by_alias(buddy_alias)
|
290
|
-
body = matches[3]
|
291
|
-
if matches[2] # auto-reply
|
292
|
-
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
293
|
-
else
|
294
|
-
# normal message
|
295
|
-
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
296
|
-
end
|
297
|
-
return msg
|
298
|
-
end
|
299
|
-
|
300
|
-
#--
|
301
|
-
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
302
|
-
# matching against @line_regex_status and returns an Event or Status.
|
303
|
-
# Returns nil if it's a message that should be ignored, or false if an
|
304
|
-
# error occurred.
|
305
|
-
#++
|
306
|
-
def create_status_or_event_msg(matches)
|
307
|
-
# ["22:58:00", "BuddyName logged in."]
|
308
|
-
# 0: time
|
309
|
-
# 1: status message or event
|
310
|
-
msg = nil
|
311
|
-
time = create_adium_time(matches[0])
|
312
|
-
str = matches[1]
|
313
|
-
# Return nil, which will get compact'ed out
|
314
|
-
return nil if @ignore_events.detect{|regex| str =~ regex }
|
315
|
-
|
316
|
-
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
317
|
-
if regex and status
|
318
|
-
# Status message
|
319
|
-
buddy_alias = regex.match(str)[1]
|
320
|
-
sender = get_sender_by_alias(buddy_alias)
|
321
|
-
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
322
|
-
else
|
323
|
-
# Test for event
|
324
|
-
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
325
|
-
event_type = 'libpurpleEvent' if regex
|
326
|
-
unless regex and event_type
|
327
|
-
# not a libpurple event, try others
|
328
|
-
if @event_map.detect{|regex,event_type| str =~ regex}
|
329
|
-
regex, event_type = $1, $2
|
330
|
-
else
|
331
|
-
error(sprintf("Error parsing status or event message, no status or event found: %p", str))
|
332
|
-
return false
|
333
|
-
end
|
334
|
-
end
|
335
|
-
if regex and event_type
|
336
|
-
regex_matches = regex.match(str)
|
337
|
-
# Event message
|
338
|
-
if regex_matches.size == 1
|
339
|
-
# No alias - this means it's the user
|
340
|
-
buddy_alias = @user_alias
|
341
|
-
sender = @user_SN
|
342
|
-
else
|
343
|
-
buddy_alias = regex_matches[1]
|
344
|
-
sender = get_sender_by_alias(buddy_alias)
|
345
|
-
end
|
346
|
-
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
347
|
-
end
|
348
|
-
end
|
349
|
-
return msg
|
350
|
-
end
|
14
|
+
# Empty class. Raise'd by LogParser if the first line of a log is not
|
15
|
+
# parseable.
|
16
|
+
class InvalidFirstLineError < StandardError; end
|
17
|
+
|
18
|
+
# BasicParser is a base class. Its subclasses are TextLogParser and
|
19
|
+
# HtmlLogParser.
|
20
|
+
#
|
21
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
22
|
+
# using this class directly.
|
23
|
+
class BasicParser
|
24
|
+
include Pidgin2Adium
|
25
|
+
def initialize(src_path, user_aliases)
|
26
|
+
@src_path = src_path
|
27
|
+
# Whitespace is removed for easy matching later on.
|
28
|
+
@user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
|
29
|
+
# @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
|
30
|
+
# alias.
|
31
|
+
# Set an initial value just in case the first message doesn't give
|
32
|
+
# us an alias.
|
33
|
+
@user_alias = user_aliases.split(',')[0]
|
34
|
+
|
35
|
+
@tz_offset = get_time_zone_offset()
|
36
|
+
|
37
|
+
file = File.new(@src_path, 'r')
|
38
|
+
@first_line = file.readline
|
39
|
+
@file_content = file.read
|
40
|
+
file.close
|
41
|
+
|
42
|
+
# Time regexes must be set before pre_parse().
|
43
|
+
# "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
|
44
|
+
# ONLY used (if at all) in first line of chat ("Conversation with...at...")
|
45
|
+
@time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
|
46
|
+
# "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
|
47
|
+
@time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
|
48
|
+
# sometimes a line in a chat doesn't have a full timestamp
|
49
|
+
# "04:22:05 AM" => %w{04 22 05 AM}
|
50
|
+
@minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
|
51
|
+
|
52
|
+
# Whether or not the first line is parseable.
|
53
|
+
@first_line_is_valid = true
|
54
|
+
begin
|
55
|
+
@service,
|
56
|
+
@user_SN,
|
57
|
+
@partner_SN,
|
58
|
+
# @basic_time_info is for files that only have the full
|
59
|
+
# timestamp at the top; we can use it to fill in the minimal
|
60
|
+
# per-line timestamps. It has only 3 elements (year, month,
|
61
|
+
# dayofmonth) because you should be able to fill everything
|
62
|
+
# else in. If you can't, something's wrong.
|
63
|
+
@basic_time_info,
|
64
|
+
# When the chat started, in Adium's format
|
65
|
+
@adium_chat_time_start = pre_parse()
|
66
|
+
rescue InvalidFirstLineError
|
67
|
+
@first_line_is_valid = false
|
68
|
+
error("Failed to parse, invalid first line: #{@src_path}")
|
69
|
+
return # stop processing
|
70
|
+
end
|
71
|
+
|
72
|
+
# @status_map, @lib_purple_events, and @events are used in
|
73
|
+
# create_status_or_event_msg
|
74
|
+
@status_map = {
|
75
|
+
/(.+) logged in\.$/ => 'online',
|
76
|
+
/(.+) logged out\.$/ => 'offline',
|
77
|
+
/(.+) has signed on\.$/ => 'online',
|
78
|
+
/(.+) has signed off\.$/ => 'offline',
|
79
|
+
/(.+) has gone away\.$/ => 'away',
|
80
|
+
/(.+) is no longer away\.$/ => 'available',
|
81
|
+
/(.+) has become idle\.$/ => 'idle',
|
82
|
+
/(.+) is no longer idle\.$/ => 'available'
|
83
|
+
}
|
84
|
+
|
85
|
+
# lib_purple_events are all of event_type libPurple
|
86
|
+
@lib_purple_events = [
|
87
|
+
# file transfer
|
88
|
+
/Starting transfer of .+ from (.+)/,
|
89
|
+
/^Offering to send .+ to (.+)$/,
|
90
|
+
/(.+) is offering to send file/,
|
91
|
+
/^Transfer of file .+ complete$/,
|
92
|
+
/Error reading|writing|accessing .+: .+/,
|
93
|
+
/You cancell?ed the transfer of/,
|
94
|
+
/File transfer cancelled/,
|
95
|
+
/(.+?) cancell?ed the transfer of/,
|
96
|
+
/(.+?) cancelled the file transfer/,
|
97
|
+
# Direct IM - actual (dis)connect events are their own types
|
98
|
+
/^Attempting to connect to (.+) at .+ for Direct IM\./,
|
99
|
+
/^Asking (.+) to connect to us at .+ for Direct IM\./,
|
100
|
+
/^Attempting to connect via proxy server\.$/,
|
101
|
+
/^Direct IM with (.+) failed/,
|
102
|
+
# encryption
|
103
|
+
/Received message encrypted with wrong key/,
|
104
|
+
/^Requesting key\.\.\.$/,
|
105
|
+
/^Outgoing message lost\.$/,
|
106
|
+
/^Conflicting Key Received!$/,
|
107
|
+
/^Error in decryption- asking for resend\.\.\.$/,
|
108
|
+
/^Making new key pair\.\.\.$/,
|
109
|
+
# sending errors
|
110
|
+
/^Last outgoing message not received properly- resetting$/,
|
111
|
+
/Resending\.\.\./,
|
112
|
+
# connection errors
|
113
|
+
/Lost connection with the remote user:.+/,
|
114
|
+
# chats
|
115
|
+
/^.+ entered the room\.$/,
|
116
|
+
/^.+ left the room\.$/
|
117
|
+
]
|
118
|
+
|
119
|
+
# non-libpurple events
|
120
|
+
# Each key maps to an event_type string. The keys will be matched against a line of chat
|
121
|
+
# and the partner's alias will be in regex group 1, IF the alias is matched.
|
122
|
+
@event_map = {
|
123
|
+
# .+ is not an alias, it's a proxy server so no grouping
|
124
|
+
/^Attempting to connect to .+\.$/ => 'direct-im-connect',
|
125
|
+
# NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
|
126
|
+
/^Direct IM established$/ => 'directIMConnected',
|
127
|
+
/Unable to send message/ => 'chat-error',
|
128
|
+
/You missed .+ messages from (.+) because they were too large/ => 'chat-error',
|
129
|
+
/User information not available/ => 'chat-error'
|
130
|
+
}
|
131
|
+
|
132
|
+
@ignore_events = [
|
133
|
+
# Adium ignores SN/alias changes.
|
134
|
+
/^.+? is now known as .+?\.<br\/?>$/
|
135
|
+
]
|
351
136
|
end
|
352
137
|
|
353
|
-
#
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
138
|
+
# This method returns a LogFile instance, or false if an error occurred.
|
139
|
+
def parse
|
140
|
+
return false unless @first_line_is_valid
|
141
|
+
@file_content = cleanup(@file_content).split("\n")
|
142
|
+
|
143
|
+
@file_content.map! do |line|
|
144
|
+
# "next" returns nil which is removed by compact
|
145
|
+
next if line =~ /^\s+$/
|
146
|
+
if line =~ @line_regex
|
147
|
+
create_msg($~.captures)
|
148
|
+
elsif line =~ @line_regex_status
|
149
|
+
msg = create_status_or_event_msg($~.captures)
|
150
|
+
# Error occurred while parsing
|
151
|
+
return false if msg == false
|
152
|
+
else
|
153
|
+
error "Could not parse line:"
|
154
|
+
p line
|
155
|
+
return false
|
156
|
+
end
|
157
|
+
end
|
158
|
+
@file_content.compact!
|
159
|
+
return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
|
160
|
+
end
|
161
|
+
# Prevent parse from being called directly from BasicParser, since
|
162
|
+
# it uses subclassing magic.
|
163
|
+
protected :parse
|
164
|
+
|
165
|
+
#################
|
166
|
+
private
|
167
|
+
#################
|
168
|
+
|
169
|
+
def get_time_zone_offset()
|
170
|
+
# We must have a tz_offset or else the Adium Chat Log viewer
|
171
|
+
# doesn't read the date correctly and then:
|
172
|
+
# 1) the log has an empty start date column in the viewer
|
173
|
+
# 2) The timestamps are all the same for the whole log
|
174
|
+
tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
|
175
|
+
if tz_match and tz_match[1]
|
176
|
+
tz_offset = tz_match[1]
|
177
|
+
else
|
178
|
+
# "-0500" (3d rather than 2d to allow for "+")
|
179
|
+
tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
|
180
|
+
end
|
181
|
+
return tz_offset
|
182
|
+
end
|
183
|
+
|
184
|
+
#--
|
185
|
+
# Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
|
186
|
+
# 2008-10-05T22:26:20-0800
|
187
|
+
# HOWEVER:
|
188
|
+
# If it's the first line, then return it like this (note periods):
|
189
|
+
# 2008-10-05T22.26.20-0800
|
190
|
+
# because it will be used in the filename.
|
191
|
+
#++
|
192
|
+
# Converts a pidgin datestamp to an Adium one.
|
193
|
+
def create_adium_time(time, is_first_line = false)
|
194
|
+
# parsed_date = [year, month, day, hour, min, sec]
|
195
|
+
if time =~ @time_regex
|
196
|
+
year, month, day, hour, min, sec = $1.to_i,
|
197
|
+
$2.to_i,
|
198
|
+
$3.to_i,
|
199
|
+
$4.to_i,
|
200
|
+
$5.to_i,
|
201
|
+
$6.to_i
|
202
|
+
elsif is_first_line and time =~ @time_regex_first_line
|
203
|
+
hour = $4.to_i
|
204
|
+
if $7 == 'PM' and hour != 12
|
205
|
+
hour += 12
|
206
|
+
end
|
207
|
+
year, month, day, min, sec = $3.to_i, # year
|
208
|
+
$1.to_i, # month
|
209
|
+
$2.to_i, # day
|
210
|
+
# already did hour
|
211
|
+
$5.to_i, # minutes
|
212
|
+
$6.to_i # seconds
|
213
|
+
elsif time =~ @minimal_time_regex
|
214
|
+
# "04:22:05" => %w{04 22 05}
|
215
|
+
hour = $1.to_i
|
216
|
+
if $4 == 'PM' and hour != 12
|
217
|
+
hour += 12
|
218
|
+
end
|
219
|
+
year, month, day = @basic_time_info
|
220
|
+
min = $2.to_i
|
221
|
+
sec = $3.to_i
|
222
|
+
else
|
223
|
+
error("You have found an odd timestamp. Please report it to the developer.")
|
224
|
+
log_msg("The timestamp: #{time}")
|
225
|
+
log_msg("Continuing...")
|
226
|
+
year,month,day,hour,min,sec = ParseDate.parsedate(time)
|
227
|
+
end
|
228
|
+
if is_first_line
|
229
|
+
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
|
230
|
+
else
|
231
|
+
adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
|
232
|
+
end
|
233
|
+
return adium_time
|
234
|
+
end
|
235
|
+
|
236
|
+
# Extract required data from the file. Run by parse.
|
237
|
+
def pre_parse
|
238
|
+
# Deal with first line.
|
239
|
+
|
240
|
+
# the first line is special. It tells us (in order of regex groups):
|
241
|
+
# 1) who we're talking to
|
242
|
+
# 2) what time/date
|
243
|
+
# 3) what SN we used
|
244
|
+
# 4) what protocol (AIM, icq, jabber...)
|
245
|
+
first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
|
246
|
+
if first_line_match.nil?
|
247
|
+
raise InvalidFirstLineError
|
248
|
+
else
|
249
|
+
service = first_line_match[4]
|
250
|
+
# @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
|
251
|
+
user_SN = first_line_match[3].downcase.tr(' ', '')
|
252
|
+
partner_SN = first_line_match[1]
|
253
|
+
pidgin_chat_time_start = first_line_match[2]
|
254
|
+
basic_time_info = case pidgin_chat_time_start
|
255
|
+
when @time_regex then [$1.to_i, $2.to_i, $3.to_i]
|
256
|
+
when @time_regex_first_line then [$3.to_i, $1.to_i, $2.to_i]
|
257
|
+
end
|
258
|
+
adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
|
259
|
+
return [service,
|
260
|
+
user_SN,
|
261
|
+
partner_SN,
|
262
|
+
basic_time_info,
|
263
|
+
adium_chat_time_start]
|
264
|
+
end
|
265
|
+
end
|
266
|
+
|
267
|
+
def get_sender_by_alias(alias_name)
|
268
|
+
no_action = alias_name.sub(/^\*{3}/, '')
|
269
|
+
if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
|
270
|
+
# Set the current alias being used of the ones in @user_aliases
|
271
|
+
@user_alias = no_action
|
272
|
+
return @user_SN
|
273
|
+
else
|
274
|
+
return @partner_SN
|
275
|
+
end
|
276
|
+
end
|
277
|
+
|
278
|
+
#--
|
279
|
+
# create_msg takes an array of captures from matching against
|
280
|
+
# @line_regex and returns a Message object or one of its subclasses.
|
281
|
+
# It can be used for TextLogParser and HtmlLogParser because both of
|
282
|
+
# them return data in the same indexes in the matches array.
|
283
|
+
#++
|
284
|
+
def create_msg(matches)
|
285
|
+
msg = nil
|
286
|
+
# Either a regular message line or an auto-reply/away message.
|
287
|
+
time = create_adium_time(matches[0])
|
288
|
+
buddy_alias = matches[1]
|
289
|
+
sender = get_sender_by_alias(buddy_alias)
|
290
|
+
body = matches[3]
|
291
|
+
if matches[2] # auto-reply
|
292
|
+
msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
|
293
|
+
else
|
294
|
+
# normal message
|
295
|
+
msg = XMLMessage.new(sender, time, buddy_alias, body)
|
296
|
+
end
|
297
|
+
return msg
|
298
|
+
end
|
299
|
+
|
300
|
+
#--
|
301
|
+
# create_status_or_event_msg takes an array of +MatchData+ captures from
|
302
|
+
# matching against @line_regex_status and returns an Event or Status.
|
303
|
+
# Returns nil if it's a message that should be ignored, or false if an
|
304
|
+
# error occurred.
|
305
|
+
#++
|
306
|
+
def create_status_or_event_msg(matches)
|
307
|
+
# ["22:58:00", "BuddyName logged in."]
|
308
|
+
# 0: time
|
309
|
+
# 1: status message or event
|
310
|
+
msg = nil
|
311
|
+
time = create_adium_time(matches[0])
|
312
|
+
str = matches[1]
|
313
|
+
# Return nil, which will get compact'ed out
|
314
|
+
return nil if @ignore_events.detect{|regex| str =~ regex }
|
315
|
+
|
316
|
+
regex, status = @status_map.detect{|regex, status| str =~ regex}
|
317
|
+
if regex and status
|
318
|
+
# Status message
|
319
|
+
buddy_alias = regex.match(str)[1]
|
320
|
+
sender = get_sender_by_alias(buddy_alias)
|
321
|
+
msg = StatusMessage.new(sender, time, buddy_alias, status)
|
322
|
+
else
|
323
|
+
# Test for event
|
324
|
+
regex = @lib_purple_events.detect{|regex| str =~ regex }
|
325
|
+
event_type = 'libpurpleEvent' if regex
|
326
|
+
unless regex and event_type
|
327
|
+
# not a libpurple event, try others
|
328
|
+
if @event_map.detect{|regex,event_type| str =~ regex}
|
329
|
+
regex, event_type = $1, $2
|
330
|
+
else
|
331
|
+
error(sprintf("Error parsing status or event message, no status or event found: %p", str))
|
332
|
+
return false
|
333
|
+
end
|
334
|
+
end
|
335
|
+
if regex and event_type
|
336
|
+
regex_matches = regex.match(str)
|
337
|
+
# Event message
|
338
|
+
if regex_matches.size == 1
|
339
|
+
# No alias - this means it's the user
|
340
|
+
buddy_alias = @user_alias
|
341
|
+
sender = @user_SN
|
342
|
+
else
|
343
|
+
buddy_alias = regex_matches[1]
|
344
|
+
sender = get_sender_by_alias(buddy_alias)
|
345
|
+
end
|
346
|
+
msg = Event.new(sender, time, buddy_alias, str, event_type)
|
347
|
+
end
|
348
|
+
end
|
349
|
+
return msg
|
350
|
+
end
|
351
|
+
end # END BasicParser class
|
352
|
+
|
353
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
|
354
|
+
# using this class directly.
|
355
|
+
class TextLogParser < BasicParser
|
356
|
+
def initialize(src_path, user_aliases)
|
357
|
+
super(src_path, user_aliases)
|
358
|
+
@timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
|
359
|
+
|
360
|
+
# @line_regex matches a line in a TXT log file other than the first
|
361
|
+
# @line_regex matchdata:
|
362
|
+
# 0: timestamp
|
363
|
+
# 1: screen name or alias, if alias set
|
364
|
+
# 2: "<AUTO-REPLY>" or nil
|
365
|
+
# 3: message body
|
366
|
+
@line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
|
367
|
+
|
368
|
+
# @line_regex_status matches a status line
|
369
|
+
# @line_regex_status matchdata:
|
370
|
+
# 0: timestamp
|
371
|
+
# 1: status message
|
372
|
+
@line_regex_status = /#{@timestamp_rx} ([^:]+)/o
|
392
373
|
end
|
393
374
|
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
375
|
+
public :parse
|
376
|
+
|
377
|
+
#################
|
378
|
+
private
|
379
|
+
#################
|
380
|
+
|
381
|
+
def cleanup(text)
|
382
|
+
text.tr!("\r", '')
|
383
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
384
|
+
text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
|
385
|
+
# Escape entities since this will be in XML
|
386
|
+
text.gsub!('&', '&') # escape '&' first
|
387
|
+
text.gsub!('<', '<')
|
388
|
+
text.gsub!('>', '>')
|
389
|
+
text.gsub!('"', '"')
|
390
|
+
text.gsub!("'", ''')
|
391
|
+
return text
|
392
|
+
end
|
393
|
+
end
|
394
|
+
|
395
|
+
# Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
|
396
|
+
# of using this class directly.
|
397
|
+
class HtmlLogParser < BasicParser
|
398
|
+
def initialize(src_path, user_aliases)
|
399
|
+
super(src_path, user_aliases)
|
400
|
+
@timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
|
401
|
+
|
402
|
+
# @line_regex matches a line in an HTML log file other than the
|
403
|
+
# first time matches on either "2008-11-17 14:12" or "14:12"
|
404
|
+
# @line_regex match obj:
|
405
|
+
# 0: timestamp, extended or not
|
406
|
+
# 1: screen name or alias, if alias set
|
407
|
+
# 2: "<AUTO-REPLY>" or nil
|
408
|
+
# 3: message body
|
409
|
+
# The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
|
410
|
+
@line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(<AUTO-REPLY>)?:?<\/b> ?(.+)<br ?\/>/o
|
411
|
+
# @line_regex_status matches a status line
|
412
|
+
# @line_regex_status match obj:
|
413
|
+
# 0: timestamp
|
414
|
+
# 1: status message
|
415
|
+
@line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
|
416
|
+
end
|
417
|
+
|
418
|
+
public :parse
|
419
|
+
|
420
|
+
#################
|
421
|
+
private
|
422
|
+
#################
|
423
|
+
|
424
|
+
# Returns a cleaned string.
|
425
|
+
# Removes the following tags from _text_:
|
426
|
+
# * html
|
427
|
+
# * body
|
428
|
+
# * font
|
429
|
+
# * a with no innertext, e.g. <a href="blah"></a>
|
430
|
+
# And removes the following style declarations:
|
431
|
+
# * color: #000000 (just turns text black)
|
432
|
+
# * font-family
|
433
|
+
# * font-size
|
434
|
+
# * background
|
435
|
+
# * em (really it's changed to <span style="font-style: italic;">)
|
436
|
+
# Since each <span> has only one style declaration, spans with these
|
437
|
+
# declarations are removed (but the text inside them is preserved).
|
438
|
+
def cleanup(text)
|
439
|
+
# Sometimes this is in there. I don't know why.
|
440
|
+
text.gsub!(%r{</FONT HSPACE='\d'>}, '')
|
441
|
+
# We can remove <font> safely since Pidgin and Adium both show bold
|
442
|
+
# using <span style="font-weight: bold;"> except Pidgin uses single
|
443
|
+
# quotes while Adium uses double quotes.
|
444
|
+
text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
|
445
|
+
|
446
|
+
text.tr!("\r", '')
|
447
|
+
# Remove empty lines
|
448
|
+
text.gsub!("\n\n", "\n")
|
449
|
+
|
450
|
+
# Remove newlines that end the file, since they screw up the
|
451
|
+
# newline -> <br/> conversion
|
452
|
+
text.gsub!(/\n\Z/, '')
|
453
|
+
|
454
|
+
# Replace newlines with "<br/>" unless they end a chat line.
|
455
|
+
# This must go after we remove <font> tags.
|
456
|
+
text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
|
457
|
+
|
458
|
+
# These empty links are sometimes appended to every line in a chat,
|
459
|
+
# for some weird reason. Remove them.
|
460
|
+
text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
|
461
|
+
|
462
|
+
# Replace single quotes inside tags with double quotes so we can
|
463
|
+
# easily change single quotes to entities.
|
464
|
+
# For spans, removes a space after the final declaration if it exists.
|
465
|
+
text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
|
466
|
+
text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
|
466
467
|
=begin
|
467
|
-
|
468
|
-
|
469
|
-
|
468
|
+
text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
|
469
|
+
text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
|
470
|
+
text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
|
470
471
|
=end
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
472
|
+
text.gsub!("'", ''')
|
473
|
+
|
474
|
+
# This actually does match stuff, but doesn't group it correctly. :(
|
475
|
+
# text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
|
476
|
+
text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
|
477
|
+
# Remove empty spans.
|
478
|
+
next if $2 == ''
|
479
|
+
|
480
|
+
# style = style declaration
|
481
|
+
# innertext = text inside <span>
|
482
|
+
style, innertext = $1, $2
|
483
|
+
# TODO: replace double quotes with """, but only outside tags; may still be tags inside spans
|
484
|
+
# innertext.gsub!("")
|
485
|
+
|
486
|
+
styleparts = style.split(/; ?/)
|
487
|
+
styleparts.map! do |p|
|
488
|
+
if p[0,5] == 'color'
|
489
|
+
if p.include?('color: #000000')
|
490
|
+
next
|
491
|
+
elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
|
492
|
+
# Regarding the bit with the ">", sometimes this happens:
|
493
|
+
# <span style="color: #000000>today;">today was busy</span>
|
494
|
+
# Then p = "color: #000000>today"
|
495
|
+
# Or it can end in ">;", with no text before the semicolon.
|
496
|
+
# So keep the color but remove the ">" and anything following it.
|
497
|
+
next($1)
|
498
|
+
end
|
499
|
+
else
|
500
|
+
# don't remove font-weight
|
501
|
+
case p
|
502
|
+
when /^font-family/ then next
|
503
|
+
when /^font-size/ then next
|
504
|
+
when /^background/ then next
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end.compact!
|
508
|
+
unless styleparts.empty?
|
509
|
+
style = styleparts.join('; ')
|
510
|
+
innertext = "<span style=\"#{style};\">#{innertext}</span>"
|
511
|
+
end
|
512
|
+
innertext
|
513
|
+
end
|
514
|
+
# Pidgin uses <em>, Adium uses <span>
|
515
|
+
if text.gsub!('<em>', '<span style="font-style: italic;">')
|
516
|
+
text.gsub!('</em>', '</span>')
|
517
|
+
end
|
518
|
+
return text
|
519
|
+
end
|
520
|
+
end # END HtmlLogParser class
|
521
|
+
|
522
|
+
# A holding object for each line of the chat. It is subclassed as
|
523
|
+
# appropriate (eg AutoReplyMessage). Each subclass (but not Message
|
524
|
+
# itself) has its own to_s which prints out its information in a format
|
525
|
+
# appropriate for putting in an Adium log file.
|
526
|
+
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
527
|
+
class Message
|
528
|
+
def initialize(sender, time, buddy_alias)
|
529
|
+
# The sender's screen name
|
530
|
+
@sender = sender
|
531
|
+
# The time the message was sent, in Adium format (e.g.
|
532
|
+
# "2008-10-05T22:26:20-0800")
|
533
|
+
@time = time
|
534
|
+
# The receiver's alias (NOT screen name)
|
535
|
+
@buddy_alias = buddy_alias
|
536
|
+
end
|
537
|
+
attr_accessor :sender, :time, :buddy_alias
|
538
|
+
end
|
539
|
+
|
540
|
+
# Basic message with body text (as opposed to pure status messages, which
|
541
|
+
# have no body).
|
542
|
+
class XMLMessage < Message
|
543
|
+
def initialize(sender, time, buddy_alias, body)
|
544
|
+
super(sender, time, buddy_alias)
|
545
|
+
@body = body
|
546
|
+
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
547
|
+
normalize_body!()
|
548
|
+
end
|
549
|
+
attr_accessor :body
|
550
|
+
|
551
|
+
def to_s
|
552
|
+
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
553
|
+
@sender, @time, @buddy_alias, @styled_body)
|
554
|
+
end
|
555
|
+
|
556
|
+
#################
|
557
|
+
private
|
558
|
+
#################
|
559
|
+
|
560
|
+
# Balances mismatched tags, normalizes body style, and fixes actions
|
561
|
+
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
562
|
+
# "*Buddy waves at you*").
|
563
|
+
def normalize_body!
|
564
|
+
normalize_body_entities!()
|
565
|
+
# Fix mismatched tags. Yes, it's faster to do it per-message
|
566
|
+
# than all at once.
|
567
|
+
@body = Pidgin2Adium.balance_tags_c(@body)
|
568
|
+
if @buddy_alias[0,3] == '***'
|
569
|
+
# "***<alias>" is what pidgin sets as the alias for a /me action
|
570
|
+
@buddy_alias.slice!(0,3)
|
571
|
+
@body = '*' << @body << '*'
|
572
|
+
end
|
519
573
|
end
|
520
574
|
|
521
|
-
#
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
# Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
|
526
|
-
class Message
|
527
|
-
def initialize(sender, time, buddy_alias)
|
528
|
-
# The sender's screen name
|
529
|
-
@sender = sender
|
530
|
-
# The time the message was sent, in Adium format (e.g.
|
531
|
-
# "2008-10-05T22:26:20-0800")
|
532
|
-
@time = time
|
533
|
-
# The receiver's alias (NOT screen name)
|
534
|
-
@buddy_alias = buddy_alias
|
535
|
-
end
|
536
|
-
attr_accessor :sender, :time, :buddy_alias
|
575
|
+
# Escapes entities.
|
576
|
+
def normalize_body_entities!
|
577
|
+
# Convert '&' to '&' only if it's not followed by an entity.
|
578
|
+
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
537
579
|
end
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
@styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
|
546
|
-
normalize_body!()
|
547
|
-
end
|
548
|
-
attr_accessor :body
|
549
|
-
|
550
|
-
def to_s
|
551
|
-
return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
|
552
|
-
@sender, @time, @buddy_alias, @styled_body)
|
553
|
-
end
|
554
|
-
|
555
|
-
#################
|
556
|
-
private
|
557
|
-
#################
|
558
|
-
|
559
|
-
# Balances mismatched tags, normalizes body style, and fixes actions
|
560
|
-
# so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
|
561
|
-
# "*Buddy waves at you*").
|
562
|
-
def normalize_body!
|
563
|
-
normalize_body_entities!()
|
564
|
-
# Fix mismatched tags. Yes, it's faster to do it per-message
|
565
|
-
# than all at once.
|
566
|
-
@body = Pidgin2Adium.balance_tags_c(@body)
|
567
|
-
if @buddy_alias[0,3] == '***'
|
568
|
-
# "***<alias>" is what pidgin sets as the alias for a /me action
|
569
|
-
@buddy_alias.slice!(0,3)
|
570
|
-
@body = '*' << @body << '*'
|
571
|
-
end
|
572
|
-
end
|
573
|
-
|
574
|
-
# Escapes entities.
|
575
|
-
def normalize_body_entities!
|
576
|
-
# Convert '&' to '&' only if it's not followed by an entity.
|
577
|
-
@body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&')
|
578
|
-
end
|
580
|
+
end # END XMLMessage
|
581
|
+
|
582
|
+
# An auto reply message.
|
583
|
+
class AutoReplyMessage < XMLMessage
|
584
|
+
def to_s
|
585
|
+
return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
|
586
|
+
@sender, @time, @buddy_alias, @styled_body)
|
579
587
|
end
|
588
|
+
end
|
580
589
|
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
end
|
590
|
+
# A message saying e.g. "Blahblah has gone away."
|
591
|
+
class StatusMessage < Message
|
592
|
+
def initialize(sender, time, buddy_alias, status)
|
593
|
+
super(sender, time, buddy_alias)
|
594
|
+
@status = status
|
587
595
|
end
|
596
|
+
attr_accessor :status
|
588
597
|
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
598
|
+
def to_s
|
599
|
+
return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
|
600
|
+
end
|
601
|
+
end
|
602
|
+
|
603
|
+
# Pidgin does not have Events, but Adium does. Pidgin mostly uses system
|
604
|
+
# messages to display what Adium calls events. These include sending a file,
|
605
|
+
# starting a Direct IM connection, or an error in chat.
|
606
|
+
class Event < XMLMessage
|
607
|
+
def initialize(sender, time, buddy_alias, body, event_type)
|
608
|
+
super(sender, time, buddy_alias, body)
|
609
|
+
@event_type = event_type
|
600
610
|
end
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
def initialize(sender, time, buddy_alias, body, event_type)
|
607
|
-
super(sender, time, buddy_alias, body)
|
608
|
-
@event_type = event_type
|
609
|
-
end
|
610
|
-
attr_accessor :event_type
|
611
|
-
|
612
|
-
def to_s
|
613
|
-
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
614
|
-
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
615
|
-
end
|
611
|
+
attr_accessor :event_type
|
612
|
+
|
613
|
+
def to_s
|
614
|
+
return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
|
615
|
+
@event_type, @sender, @time, @buddy_alias, @styled_body)
|
616
616
|
end
|
617
|
+
end
|
617
618
|
end # end module
|