pidgin2adium 3.0.0 → 3.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,118 @@
1
+ module Pidgin2Adium
2
+ # Balances tags of string using a modified stack. Returns a balanced
3
+ # string, but also affects the text passed into it!
4
+ # Use text = balance_tags(text).
5
+
6
+ # From Wordpress's formatting.php; rewritten in Ruby by Gabe
7
+ # Berke-Williams, 2009.
8
+ # Author:: Leonard Lin <leonard@acm.org>
9
+ # License:: GPL v2.0
10
+ # Copyright:: November 4, 2001
11
+ def Pidgin2Adium.balance_tags( text )
12
+ tagstack = []
13
+ stacksize = 0
14
+ tagqueue = ''
15
+ newtext = ''
16
+ single_tags = %w{br hr img input meta} # Known single-entity/self-closing tags
17
+ #nestable_tags = %w{blockquote div span} # Tags that can be immediately nested within themselves
18
+ nestable_tags = %w{blockquote div span font} # Tags that can be immediately nested within themselves
19
+ # 1: tagname, with possible leading "/"
20
+ # 2: attributes
21
+ tag_regex = /<(\/?\w*)\s*([^>]*)>/
22
+
23
+ # WP bug fix for comments - in case you REALLY meant to type '< !--'
24
+ text.gsub!('< !--', '< !--')
25
+
26
+ # WP bug fix for LOVE <3 (and other situations with '<' before a number)
27
+ text.gsub!(/<([0-9]{1})/, '&lt;\1')
28
+
29
+ while ( pos = (text =~ tag_regex) )
30
+ newtext << tagqueue
31
+ tag = $1.downcase
32
+ attributes = $2
33
+ matchlen = $~[0].size
34
+
35
+ # clear the shifter
36
+ tagqueue = ''
37
+ # Pop or Push
38
+ if (tag[0,1] == "/") # End Tag
39
+ tag.slice!(0,1)
40
+ # if too many closing tags
41
+ if(stacksize <= 0)
42
+ tag = ''
43
+ #or close to be safe: tag = '/' << tag
44
+ elsif (tagstack[stacksize - 1] == tag) # found closing tag
45
+ # if stacktop value == tag close value then pop
46
+ tag = '</' << tag << '>' # Close Tag
47
+ # Pop
48
+ tagstack.pop
49
+ stacksize -= 1
50
+ else # closing tag not at top, search for it
51
+ (stacksize-1).downto(0) do |j|
52
+ if (tagstack[j] == tag)
53
+ # add tag to tagqueue
54
+ ss = stacksize - 1
55
+ ss.downto(j) do |k|
56
+ tagqueue << '</' << tagstack.pop << '>'
57
+ stacksize -= 1
58
+ end
59
+ break
60
+ end
61
+ end
62
+ tag = ''
63
+ end
64
+ else
65
+ # Begin Tag
66
+
67
+ # Tag Cleaning
68
+ if( (attributes[-1,1] == '/') || (tag == '') )
69
+ # If: self-closing or '', don't do anything.
70
+ elsif ( single_tags.include?(tag) )
71
+ # ElseIf: it's a known single-entity tag but it doesn't close itself, do so
72
+ attributes << '/'
73
+ else
74
+ # Push the tag onto the stack
75
+ # If the top of the stack is the same as the tag we want to push, close previous tag
76
+ if ((stacksize > 0) &&
77
+ ! nestable_tags.include?(tag) &&
78
+ (tagstack[stacksize - 1] == tag))
79
+ tagqueue = '</' << tagstack.pop << '>'
80
+ stacksize -= 1
81
+ end
82
+ tagstack.push(tag)
83
+ stacksize += 1
84
+ end
85
+
86
+ # Attributes
87
+ if(attributes != '')
88
+ attributes = ' ' << attributes
89
+ end
90
+ tag = '<' << tag << attributes << '>'
91
+ #If already queuing a close tag, then put this tag on, too
92
+ if (tagqueue)
93
+ tagqueue << tag
94
+ tag = ''
95
+ end
96
+ end
97
+ newtext << text[0,pos] << tag
98
+ text = text[pos+matchlen, text.length - (pos+matchlen)]
99
+ end
100
+
101
+ # Clear Tag Queue
102
+ newtext << tagqueue
103
+
104
+ # Add Remaining text
105
+ newtext << text
106
+
107
+ # Empty Stack
108
+ tagstack.reverse_each do |t|
109
+ newtext << '</' << t << '>' # Add remaining tags to close
110
+ end
111
+
112
+ # WP fix for the bug with HTML comments
113
+ newtext.gsub!("< !--", "<!--")
114
+ newtext.gsub!("< !--", "< !--")
115
+
116
+ return newtext
117
+ end
118
+ end
@@ -1,72 +1,72 @@
1
1
  require 'pidgin2adium'
2
2
 
3
3
  module Pidgin2Adium
4
- # An easy way to batch-process a directory. Used by the pidgin2adium
5
- # command-line script.
6
- class LogConverter
7
- include Pidgin2Adium
8
- # You can add options using the _opts_ hash, which can have the
9
- # following keys, all of which are optional:
10
- # * *overwrite*: If true, then overwrite even if log is found.
11
- # Defaults to false.
12
- # * *output_dir*: The top-level dir to put the logs in.
13
- # Logs under output_dir are still each in their own folders, etc.
14
- # Defaults to Pidgin2Adium::ADIUM_LOG_DIR
15
- def initialize(pidgin_log_dir, aliases, opts = {})
16
- # parse_and_generate will process it for us
17
- @opts = opts
4
+ # An easy way to batch-process a directory. Used by the pidgin2adium
5
+ # command-line script.
6
+ class LogConverter
7
+ include Pidgin2Adium
8
+ # You can add options using the _opts_ hash, which can have the
9
+ # following keys, all of which are optional:
10
+ # * *overwrite*: If true, then overwrite even if log is found.
11
+ # Defaults to false.
12
+ # * *output_dir*: The top-level dir to put the logs in.
13
+ # Logs under output_dir are still each in their own folders, etc.
14
+ # Defaults to Pidgin2Adium::ADIUM_LOG_DIR
15
+ def initialize(pidgin_log_dir, aliases, opts = {})
16
+ # parse_and_generate will process it for us
17
+ @opts = opts
18
18
 
19
- @pidgin_log_dir = File.expand_path(pidgin_log_dir)
20
- @my_aliases = aliases
19
+ @pidgin_log_dir = File.expand_path(pidgin_log_dir)
20
+ @my_aliases = aliases
21
21
 
22
- unless File.directory?(@pidgin_log_dir)
23
- puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
24
- raise Errno::ENOENT
25
- end
26
- end
22
+ unless File.directory?(@pidgin_log_dir)
23
+ puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
24
+ raise Errno::ENOENT
25
+ end
26
+ end
27
27
 
28
- # Runs Pidgin2Adium::parse_and_generate on every log file in directory
29
- # provided in new, then deletes Adium's search indexes to force
30
- # it to rescan logs on startup.
31
- def start
32
- log_msg "Begin converting."
33
- begin
34
- files_path = get_all_chat_files(@pidgin_log_dir)
35
- rescue Errno::EACCES => bang
36
- error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
37
- error("Details: #{bang.message}")
38
- raise Errno::EACCES
39
- end
28
+ # Runs Pidgin2Adium::parse_and_generate on every log file in directory
29
+ # provided in new, then deletes Adium's search indexes to force
30
+ # it to rescan logs on startup.
31
+ def start
32
+ log_msg "Begin converting."
33
+ begin
34
+ files_path = get_all_chat_files(@pidgin_log_dir)
35
+ rescue Errno::EACCES => bang
36
+ error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
37
+ error("Details: #{bang.message}")
38
+ raise Errno::EACCES
39
+ end
40
40
 
41
- total_files = files_path.size
42
- total_successes = 0
43
- log_msg("#{total_files} files to convert.")
44
- files_path.each_with_index do |fname, i|
45
- log_msg(
46
- sprintf("[%d/%d] Converting %s...",
47
- (i+1), total_files, fname)
48
- )
49
- result = parse_and_generate(fname, @my_aliases, @opts)
50
- total_successes += 1 if result == true
51
- end
41
+ total_files = files_path.size
42
+ total_successes = 0
43
+ log_msg("#{total_files} files to convert.")
44
+ files_path.each_with_index do |fname, i|
45
+ log_msg(
46
+ sprintf("[%d/%d] Converting %s...",
47
+ (i+1), total_files, fname)
48
+ )
49
+ result = parse_and_generate(fname, @my_aliases, @opts)
50
+ total_successes += 1 if result == true
51
+ end
52
52
 
53
- delete_search_indexes()
53
+ delete_search_indexes()
54
54
 
55
- log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
56
- puts "Minor error messages:"
57
- puts @@oops_messages.join("\n")
58
- puts "Major error messages:"
59
- puts @@error_messages.join("\n")
60
- end
55
+ log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
56
+ puts "Minor error messages:"
57
+ puts @@oops_messages.join("\n")
58
+ puts "Major error messages:"
59
+ puts @@error_messages.join("\n")
60
+ end
61
61
 
62
- ###########
63
- private
64
- ###########
62
+ ###########
63
+ private
64
+ ###########
65
65
 
66
- def get_all_chat_files(dir)
67
- return [] if File.basename(dir) == ".system"
68
- # recurse into each subdir
69
- return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
70
- end
66
+ def get_all_chat_files(dir)
67
+ return [] if File.basename(dir) == ".system"
68
+ # recurse into each subdir
69
+ return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
71
70
  end
71
+ end # END LogConverter class
72
72
  end
@@ -1,102 +1,102 @@
1
1
  require 'fileutils'
2
2
 
3
3
  module Pidgin2Adium
4
- # A holding object for the result of LogParser.parse. It makes the
5
- # instance variable @chat_lines available, which is an array of Message
6
- # subclass instances (XMLMessage, Event, etc.)
7
- # Here is a list of the instance variables for each class in @chat_lines:
8
- #
9
- # <b>All of these variables are read/write.</b>
10
- # All:: sender, time, buddy_alias
11
- # XMLMessage:: body
12
- # AutoReplyMessage:: body
13
- # Event:: body, event_type
14
- # StatusMessage:: status
15
- class LogFile
16
- include Pidgin2Adium
17
- def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
18
- @chat_lines = chat_lines
19
- @user_SN = user_SN
20
- @partner_SN = partner_SN
21
- @adium_chat_time_start = adium_chat_time_start
4
+ # A holding object for the result of LogParser.parse. It makes the
5
+ # instance variable @chat_lines available, which is an array of Message
6
+ # subclass instances (XMLMessage, Event, etc.)
7
+ # Here is a list of the instance variables for each class in @chat_lines:
8
+ #
9
+ # <b>All of these variables are read/write.</b>
10
+ # All:: sender, time, buddy_alias
11
+ # XMLMessage:: body
12
+ # AutoReplyMessage:: body
13
+ # Event:: body, event_type
14
+ # StatusMessage:: status
15
+ class LogFile
16
+ include Pidgin2Adium
17
+ def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
18
+ @chat_lines = chat_lines
19
+ @user_SN = user_SN
20
+ @partner_SN = partner_SN
21
+ @adium_chat_time_start = adium_chat_time_start
22
22
 
23
- # @chat_str is generated when to_s is called
24
- @chat_str = nil
25
-
26
- # key is for Pidgin, value is for Adium
27
- # Just used for <service>.<screenname> in directory structure
28
- service_name_map = {'aim' => 'AIM',
29
- 'jabber' =>'Jabber',
30
- 'gtalk'=> 'GTalk',
31
- 'icq' => 'ICQ',
32
- 'qq' => 'QQ',
33
- 'msn' => 'MSN',
34
- 'yahoo' => 'Yahoo!'}
35
-
36
- @service = service_name_map[service.downcase]
37
- end
38
-
39
- attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
23
+ # @chat_str is generated when to_s is called
24
+ @chat_str = nil
40
25
 
41
- # Returns contents of log file
42
- def to_s
43
- if @chat_str.nil?
44
- # Faster than inject() or each()
45
- @chat_str = @chat_lines.map{|l| l.to_s }.join
46
- end
47
- return @chat_str
48
- end
49
-
50
- def each(&blk)
51
- @chat_lines.each{|l| yield l }
52
- end
26
+ # key is for Pidgin, value is for Adium
27
+ # Just used for <service>.<screenname> in directory structure
28
+ service_name_map = {'aim' => 'AIM',
29
+ 'jabber' =>'Jabber',
30
+ 'gtalk'=> 'GTalk',
31
+ 'icq' => 'ICQ',
32
+ 'qq' => 'QQ',
33
+ 'msn' => 'MSN',
34
+ 'yahoo' => 'Yahoo!'}
53
35
 
54
- # Set overwrite=true to create a logfile even if logfile already exists.
55
- # Returns one of:
56
- # * false (if an error occurred),
57
- # * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
58
- # * the path to the new Adium log file.
59
- def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
60
- # output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
61
- output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
62
- # output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
63
- output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
64
- begin
65
- FileUtils.mkdir_p(output_dir)
66
- rescue => bang
67
- error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
68
- return false
69
- end
70
- if overwrite
71
- unless File.exist?(output_path)
72
- # File doesn't exist, but maybe it does with a different
73
- # time zone. Check for a file that differs only in time
74
- # zone and, if found, change @output_path to target it.
75
- maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
76
- unless maybe_matches.empty?
77
- output_path = maybe_matches[0]
78
- end
79
- end
80
- else
81
- if File.exist?(output_path)
82
- return FILE_EXISTS
83
- end
84
- end
36
+ @service = service_name_map[service.downcase]
37
+ end
38
+
39
+ attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
40
+
41
+ # Returns contents of log file
42
+ def to_s
43
+ if @chat_str.nil?
44
+ # Faster than inject() or each()
45
+ @chat_str = @chat_lines.map{|l| l.to_s }.join
46
+ end
47
+ return @chat_str
48
+ end
49
+
50
+ def each(&blk)
51
+ @chat_lines.each{|l| yield l }
52
+ end
53
+
54
+ # Set overwrite=true to create a logfile even if logfile already exists.
55
+ # Returns one of:
56
+ # * false (if an error occurred),
57
+ # * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
58
+ # * the path to the new Adium log file.
59
+ def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
60
+ # output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
61
+ output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
62
+ # output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
63
+ output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
64
+ begin
65
+ FileUtils.mkdir_p(output_dir)
66
+ rescue => bang
67
+ error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
68
+ return false
69
+ end
70
+ if overwrite
71
+ unless File.exist?(output_path)
72
+ # File doesn't exist, but maybe it does with a different
73
+ # time zone. Check for a file that differs only in time
74
+ # zone and, if found, change @output_path to target it.
75
+ maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
76
+ unless maybe_matches.empty?
77
+ output_path = maybe_matches[0]
78
+ end
79
+ end
80
+ else
81
+ if File.exist?(output_path)
82
+ return FILE_EXISTS
83
+ end
84
+ end
85
85
 
86
- begin
87
- outfile = File.new(output_path, 'w')
88
- rescue => bang
89
- error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
90
- return false
91
- end
86
+ begin
87
+ outfile = File.new(output_path, 'w')
88
+ rescue => bang
89
+ error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
90
+ return false
91
+ end
92
92
 
93
- # no \n before </chat> because @chat_str (from to_s) has it already
94
- outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
95
- '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
96
- @user_SN, @service, self.to_s)
97
- outfile.close
93
+ # no \n before </chat> because @chat_str (from to_s) has it already
94
+ outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
95
+ '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
96
+ @user_SN, @service, self.to_s)
97
+ outfile.close
98
98
 
99
- return output_path
100
- end
99
+ return output_path
101
100
  end
101
+ end # END LogFile class
102
102
  end
@@ -1,6 +1,6 @@
1
1
  # Contains the class BasicParser and its subclasses, HtmlLogParser and
2
2
  # TextFileParser, which parse the file passed into it and return a LogFile
3
- # object.
3
+ # object.
4
4
  #
5
5
  # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
6
6
  # using these classes directly.
@@ -11,607 +11,608 @@ require 'balance_tags_c'
11
11
  require 'pidgin2adium/log_file'
12
12
 
13
13
  module Pidgin2Adium
14
- # Empty class. Raise'd by LogParser if the first line of a log is not
15
- # parseable.
16
- class InvalidFirstLineError < StandardError; end
17
-
18
- # BasicParser is a base class. Its subclasses are TextLogParser and
19
- # HtmlLogParser.
20
- #
21
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
22
- # using this class directly.
23
- class BasicParser
24
- include Pidgin2Adium
25
- def initialize(src_path, user_aliases)
26
- @src_path = src_path
27
- # Whitespace is removed for easy matching later on.
28
- @user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
29
- # @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
30
- # alias.
31
- # Set an initial value just in case the first message doesn't give
32
- # us an alias.
33
- @user_alias = user_aliases.split(',')[0]
34
-
35
- @tz_offset = get_time_zone_offset()
36
-
37
- file = File.new(@src_path, 'r')
38
- @first_line = file.readline
39
- @file_content = file.read
40
- file.close
41
-
42
- # Time regexes must be set before pre_parse().
43
- # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
44
- # ONLY used (if at all) in first line of chat ("Conversation with...at...")
45
- @time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
46
- # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
47
- @time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
48
- # sometimes a line in a chat doesn't have a full timestamp
49
- # "04:22:05 AM" => %w{04 22 05 AM}
50
- @minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
51
-
52
- # Whether or not the first line is parseable.
53
- @first_line_is_valid = true
54
- begin
55
- @service,
56
- @user_SN,
57
- @partner_SN,
58
- # @basic_time_info is for files that only have the full
59
- # timestamp at the top; we can use it to fill in the minimal
60
- # per-line timestamps. It has only 3 elements (year, month,
61
- # dayofmonth) because you should be able to fill everything
62
- # else in. If you can't, something's wrong.
63
- @basic_time_info,
64
- # When the chat started, in Adium's format
65
- @adium_chat_time_start = pre_parse()
66
- rescue InvalidFirstLineError
67
- @first_line_is_valid = false
68
- error("Failed to parse, invalid first line: #{@src_path}")
69
- return # stop processing
70
- end
71
-
72
- # @status_map, @lib_purple_events, and @events are used in
73
- # create_status_or_event_msg
74
- @status_map = {
75
- /(.+) logged in\.$/ => 'online',
76
- /(.+) logged out\.$/ => 'offline',
77
- /(.+) has signed on\.$/ => 'online',
78
- /(.+) has signed off\.$/ => 'offline',
79
- /(.+) has gone away\.$/ => 'away',
80
- /(.+) is no longer away\.$/ => 'available',
81
- /(.+) has become idle\.$/ => 'idle',
82
- /(.+) is no longer idle\.$/ => 'available'
83
- }
84
-
85
- # lib_purple_events are all of event_type libPurple
86
- @lib_purple_events = [
87
- # file transfer
88
- /Starting transfer of .+ from (.+)/,
89
- /^Offering to send .+ to (.+)$/,
90
- /(.+) is offering to send file/,
91
- /^Transfer of file .+ complete$/,
92
- /Error reading|writing|accessing .+: .+/,
93
- /You cancell?ed the transfer of/,
94
- /File transfer cancelled/,
95
- /(.+?) cancell?ed the transfer of/,
96
- /(.+?) cancelled the file transfer/,
97
- # Direct IM - actual (dis)connect events are their own types
98
- /^Attempting to connect to (.+) at .+ for Direct IM\./,
99
- /^Asking (.+) to connect to us at .+ for Direct IM\./,
100
- /^Attempting to connect via proxy server\.$/,
101
- /^Direct IM with (.+) failed/,
102
- # encryption
103
- /Received message encrypted with wrong key/,
104
- /^Requesting key\.\.\.$/,
105
- /^Outgoing message lost\.$/,
106
- /^Conflicting Key Received!$/,
107
- /^Error in decryption- asking for resend\.\.\.$/,
108
- /^Making new key pair\.\.\.$/,
109
- # sending errors
110
- /^Last outgoing message not received properly- resetting$/,
111
- /Resending\.\.\./,
112
- # connection errors
113
- /Lost connection with the remote user:.+/,
114
- # chats
115
- /^.+ entered the room\.$/,
116
- /^.+ left the room\.$/
117
- ]
118
-
119
- # non-libpurple events
120
- # Each key maps to an event_type string. The keys will be matched against a line of chat
121
- # and the partner's alias will be in regex group 1, IF the alias is matched.
122
- @event_map = {
123
- # .+ is not an alias, it's a proxy server so no grouping
124
- /^Attempting to connect to .+\.$/ => 'direct-im-connect',
125
- # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
126
- /^Direct IM established$/ => 'directIMConnected',
127
- /Unable to send message/ => 'chat-error',
128
- /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
129
- /User information not available/ => 'chat-error'
130
- }
131
-
132
- @ignore_events = [
133
- # Adium ignores SN/alias changes.
134
- /^.+? is now known as .+?\.<br\/?>$/
135
- ]
136
- end
137
-
138
- # This method returns a LogFile instance, or false if an error occurred.
139
- def parse
140
- return false unless @first_line_is_valid
141
- @file_content = cleanup(@file_content).split("\n")
142
-
143
- @file_content.map! do |line|
144
- # "next" returns nil which is removed by compact
145
- next if line =~ /^\s+$/
146
- if line =~ @line_regex
147
- create_msg($~.captures)
148
- elsif line =~ @line_regex_status
149
- msg = create_status_or_event_msg($~.captures)
150
- # Error occurred while parsing
151
- return false if msg == false
152
- else
153
- error "Could not parse line:"
154
- p line
155
- return false
156
- end
157
- end
158
- @file_content.compact!
159
- return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
160
- end
161
- # Prevent parse from being called directly from BasicParser, since
162
- # it uses subclassing magic.
163
- protected :parse
164
-
165
- #################
166
- private
167
- #################
168
-
169
- def get_time_zone_offset()
170
- # We must have a tz_offset or else the Adium Chat Log viewer
171
- # doesn't read the date correctly and then:
172
- # 1) the log has an empty start date column in the viewer
173
- # 2) The timestamps are all the same for the whole log
174
- tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
175
- if tz_match and tz_match[1]
176
- tz_offset = tz_match[1]
177
- else
178
- # "-0500" (3d rather than 2d to allow for "+")
179
- tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
180
- end
181
- return tz_offset
182
- end
183
-
184
- #--
185
- # Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
186
- # 2008-10-05T22:26:20-0800
187
- # HOWEVER:
188
- # If it's the first line, then return it like this (note periods):
189
- # 2008-10-05T22.26.20-0800
190
- # because it will be used in the filename.
191
- #++
192
- # Converts a pidgin datestamp to an Adium one.
193
- def create_adium_time(time, is_first_line = false)
194
- # parsed_date = [year, month, day, hour, min, sec]
195
- if time =~ @time_regex
196
- year, month, day, hour, min, sec = $1.to_i,
197
- $2.to_i,
198
- $3.to_i,
199
- $4.to_i,
200
- $5.to_i,
201
- $6.to_i
202
- elsif is_first_line and time =~ @time_regex_first_line
203
- hour = $4.to_i
204
- if $7 == 'PM' and hour != 12
205
- hour += 12
206
- end
207
- year, month, day, min, sec = $3.to_i, # year
208
- $1.to_i, # month
209
- $2.to_i, # day
210
- # already did hour
211
- $5.to_i, # minutes
212
- $6.to_i # seconds
213
- elsif time =~ @minimal_time_regex
214
- # "04:22:05" => %w{04 22 05}
215
- hour = $1.to_i
216
- if $4 == 'PM' and hour != 12
217
- hour += 12
218
- end
219
- year, month, day = @basic_time_info
220
- min = $2.to_i
221
- sec = $3.to_i
222
- else
223
- error("You have found an odd timestamp. Please report it to the developer.")
224
- log_msg("The timestamp: #{time}")
225
- log_msg("Continuing...")
226
- year,month,day,hour,min,sec = ParseDate.parsedate(time)
227
- end
228
- if is_first_line
229
- adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
230
- else
231
- adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
232
- end
233
- return adium_time
234
- end
235
-
236
- # Extract required data from the file. Run by parse.
237
- def pre_parse
238
- # Deal with first line.
239
-
240
- # the first line is special. It tells us (in order of regex groups):
241
- # 1) who we're talking to
242
- # 2) what time/date
243
- # 3) what SN we used
244
- # 4) what protocol (AIM, icq, jabber...)
245
- first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
246
- if first_line_match.nil?
247
- raise InvalidFirstLineError
248
- else
249
- service = first_line_match[4]
250
- # @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
251
- user_SN = first_line_match[3].downcase.tr(' ', '')
252
- partner_SN = first_line_match[1]
253
- pidgin_chat_time_start = first_line_match[2]
254
- basic_time_info = case pidgin_chat_time_start
255
- when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
256
- when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
257
- end
258
- adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
259
- return [service,
260
- user_SN,
261
- partner_SN,
262
- basic_time_info,
263
- adium_chat_time_start]
264
- end
265
- end
266
-
267
- def get_sender_by_alias(alias_name)
268
- no_action = alias_name.sub(/^\*{3}/, '')
269
- if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
270
- # Set the current alias being used of the ones in @user_aliases
271
- @user_alias = no_action
272
- return @user_SN
273
- else
274
- return @partner_SN
275
- end
276
- end
277
-
278
- #--
279
- # create_msg takes an array of captures from matching against
280
- # @line_regex and returns a Message object or one of its subclasses.
281
- # It can be used for TextLogParser and HtmlLogParser because both of
282
- # them return data in the same indexes in the matches array.
283
- #++
284
- def create_msg(matches)
285
- msg = nil
286
- # Either a regular message line or an auto-reply/away message.
287
- time = create_adium_time(matches[0])
288
- buddy_alias = matches[1]
289
- sender = get_sender_by_alias(buddy_alias)
290
- body = matches[3]
291
- if matches[2] # auto-reply
292
- msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
293
- else
294
- # normal message
295
- msg = XMLMessage.new(sender, time, buddy_alias, body)
296
- end
297
- return msg
298
- end
299
-
300
- #--
301
- # create_status_or_event_msg takes an array of +MatchData+ captures from
302
- # matching against @line_regex_status and returns an Event or Status.
303
- # Returns nil if it's a message that should be ignored, or false if an
304
- # error occurred.
305
- #++
306
- def create_status_or_event_msg(matches)
307
- # ["22:58:00", "BuddyName logged in."]
308
- # 0: time
309
- # 1: status message or event
310
- msg = nil
311
- time = create_adium_time(matches[0])
312
- str = matches[1]
313
- # Return nil, which will get compact'ed out
314
- return nil if @ignore_events.detect{|regex| str =~ regex }
315
-
316
- regex, status = @status_map.detect{|regex, status| str =~ regex}
317
- if regex and status
318
- # Status message
319
- buddy_alias = regex.match(str)[1]
320
- sender = get_sender_by_alias(buddy_alias)
321
- msg = StatusMessage.new(sender, time, buddy_alias, status)
322
- else
323
- # Test for event
324
- regex = @lib_purple_events.detect{|regex| str =~ regex }
325
- event_type = 'libpurpleEvent' if regex
326
- unless regex and event_type
327
- # not a libpurple event, try others
328
- if @event_map.detect{|regex,event_type| str =~ regex}
329
- regex, event_type = $1, $2
330
- else
331
- error(sprintf("Error parsing status or event message, no status or event found: %p", str))
332
- return false
333
- end
334
- end
335
- if regex and event_type
336
- regex_matches = regex.match(str)
337
- # Event message
338
- if regex_matches.size == 1
339
- # No alias - this means it's the user
340
- buddy_alias = @user_alias
341
- sender = @user_SN
342
- else
343
- buddy_alias = regex_matches[1]
344
- sender = get_sender_by_alias(buddy_alias)
345
- end
346
- msg = Event.new(sender, time, buddy_alias, str, event_type)
347
- end
348
- end
349
- return msg
350
- end
14
+ # Empty class. Raise'd by LogParser if the first line of a log is not
15
+ # parseable.
16
+ class InvalidFirstLineError < StandardError; end
17
+
18
+ # BasicParser is a base class. Its subclasses are TextLogParser and
19
+ # HtmlLogParser.
20
+ #
21
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
22
+ # using this class directly.
23
+ class BasicParser
24
+ include Pidgin2Adium
25
+ def initialize(src_path, user_aliases)
26
+ @src_path = src_path
27
+ # Whitespace is removed for easy matching later on.
28
+ @user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
29
+ # @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
30
+ # alias.
31
+ # Set an initial value just in case the first message doesn't give
32
+ # us an alias.
33
+ @user_alias = user_aliases.split(',')[0]
34
+
35
+ @tz_offset = get_time_zone_offset()
36
+
37
+ file = File.new(@src_path, 'r')
38
+ @first_line = file.readline
39
+ @file_content = file.read
40
+ file.close
41
+
42
+ # Time regexes must be set before pre_parse().
43
+ # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
44
+ # ONLY used (if at all) in first line of chat ("Conversation with...at...")
45
+ @time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
46
+ # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
47
+ @time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
48
+ # sometimes a line in a chat doesn't have a full timestamp
49
+ # "04:22:05 AM" => %w{04 22 05 AM}
50
+ @minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
51
+
52
+ # Whether or not the first line is parseable.
53
+ @first_line_is_valid = true
54
+ begin
55
+ @service,
56
+ @user_SN,
57
+ @partner_SN,
58
+ # @basic_time_info is for files that only have the full
59
+ # timestamp at the top; we can use it to fill in the minimal
60
+ # per-line timestamps. It has only 3 elements (year, month,
61
+ # dayofmonth) because you should be able to fill everything
62
+ # else in. If you can't, something's wrong.
63
+ @basic_time_info,
64
+ # When the chat started, in Adium's format
65
+ @adium_chat_time_start = pre_parse()
66
+ rescue InvalidFirstLineError
67
+ @first_line_is_valid = false
68
+ error("Failed to parse, invalid first line: #{@src_path}")
69
+ return # stop processing
70
+ end
71
+
72
+ # @status_map, @lib_purple_events, and @events are used in
73
+ # create_status_or_event_msg
74
+ @status_map = {
75
+ /(.+) logged in\.$/ => 'online',
76
+ /(.+) logged out\.$/ => 'offline',
77
+ /(.+) has signed on\.$/ => 'online',
78
+ /(.+) has signed off\.$/ => 'offline',
79
+ /(.+) has gone away\.$/ => 'away',
80
+ /(.+) is no longer away\.$/ => 'available',
81
+ /(.+) has become idle\.$/ => 'idle',
82
+ /(.+) is no longer idle\.$/ => 'available'
83
+ }
84
+
85
+ # lib_purple_events are all of event_type libPurple
86
+ @lib_purple_events = [
87
+ # file transfer
88
+ /Starting transfer of .+ from (.+)/,
89
+ /^Offering to send .+ to (.+)$/,
90
+ /(.+) is offering to send file/,
91
+ /^Transfer of file .+ complete$/,
92
+ /Error reading|writing|accessing .+: .+/,
93
+ /You cancell?ed the transfer of/,
94
+ /File transfer cancelled/,
95
+ /(.+?) cancell?ed the transfer of/,
96
+ /(.+?) cancelled the file transfer/,
97
+ # Direct IM - actual (dis)connect events are their own types
98
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
99
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
100
+ /^Attempting to connect via proxy server\.$/,
101
+ /^Direct IM with (.+) failed/,
102
+ # encryption
103
+ /Received message encrypted with wrong key/,
104
+ /^Requesting key\.\.\.$/,
105
+ /^Outgoing message lost\.$/,
106
+ /^Conflicting Key Received!$/,
107
+ /^Error in decryption- asking for resend\.\.\.$/,
108
+ /^Making new key pair\.\.\.$/,
109
+ # sending errors
110
+ /^Last outgoing message not received properly- resetting$/,
111
+ /Resending\.\.\./,
112
+ # connection errors
113
+ /Lost connection with the remote user:.+/,
114
+ # chats
115
+ /^.+ entered the room\.$/,
116
+ /^.+ left the room\.$/
117
+ ]
118
+
119
+ # non-libpurple events
120
+ # Each key maps to an event_type string. The keys will be matched against a line of chat
121
+ # and the partner's alias will be in regex group 1, IF the alias is matched.
122
+ @event_map = {
123
+ # .+ is not an alias, it's a proxy server so no grouping
124
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
125
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
126
+ /^Direct IM established$/ => 'directIMConnected',
127
+ /Unable to send message/ => 'chat-error',
128
+ /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
129
+ /User information not available/ => 'chat-error'
130
+ }
131
+
132
+ @ignore_events = [
133
+ # Adium ignores SN/alias changes.
134
+ /^.+? is now known as .+?\.<br\/?>$/
135
+ ]
351
136
  end
352
137
 
353
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
354
- # using this class directly.
355
- class TextLogParser < BasicParser
356
- def initialize(src_path, user_aliases)
357
- super(src_path, user_aliases)
358
- @timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
359
-
360
- # @line_regex matches a line in a TXT log file other than the first
361
- # @line_regex matchdata:
362
- # 0: timestamp
363
- # 1: screen name or alias, if alias set
364
- # 2: "<AUTO-REPLY>" or nil
365
- # 3: message body
366
- @line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
367
- # @line_regex_status matches a status line
368
- # @line_regex_status matchdata:
369
- # 0: timestamp
370
- # 1: status message
371
- @line_regex_status = /#{@timestamp_rx} ([^:]+)/o
372
- end
373
-
374
- public :parse
375
-
376
- #################
377
- private
378
- #################
379
-
380
- def cleanup(text)
381
- text.tr!("\r", '')
382
- # Replace newlines with "<br/>" unless they end a chat line.
383
- text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
384
- # Escape entities since this will be in XML
385
- text.gsub!('&', '&amp;') # escape '&' first
386
- text.gsub!('<', '&lt;')
387
- text.gsub!('>', '&gt;')
388
- text.gsub!('"', '&quot;')
389
- text.gsub!("'", '&apos;')
390
- return text
391
- end
138
+ # This method returns a LogFile instance, or false if an error occurred.
139
+ def parse
140
+ return false unless @first_line_is_valid
141
+ @file_content = cleanup(@file_content).split("\n")
142
+
143
+ @file_content.map! do |line|
144
+ # "next" returns nil which is removed by compact
145
+ next if line =~ /^\s+$/
146
+ if line =~ @line_regex
147
+ create_msg($~.captures)
148
+ elsif line =~ @line_regex_status
149
+ msg = create_status_or_event_msg($~.captures)
150
+ # Error occurred while parsing
151
+ return false if msg == false
152
+ else
153
+ error "Could not parse line:"
154
+ p line
155
+ return false
156
+ end
157
+ end
158
+ @file_content.compact!
159
+ return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
160
+ end
161
+ # Prevent parse from being called directly from BasicParser, since
162
+ # it uses subclassing magic.
163
+ protected :parse
164
+
165
+ #################
166
+ private
167
+ #################
168
+
169
+ def get_time_zone_offset()
170
+ # We must have a tz_offset or else the Adium Chat Log viewer
171
+ # doesn't read the date correctly and then:
172
+ # 1) the log has an empty start date column in the viewer
173
+ # 2) The timestamps are all the same for the whole log
174
+ tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
175
+ if tz_match and tz_match[1]
176
+ tz_offset = tz_match[1]
177
+ else
178
+ # "-0500" (3d rather than 2d to allow for "+")
179
+ tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
180
+ end
181
+ return tz_offset
182
+ end
183
+
184
+ #--
185
+ # Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
186
+ # 2008-10-05T22:26:20-0800
187
+ # HOWEVER:
188
+ # If it's the first line, then return it like this (note periods):
189
+ # 2008-10-05T22.26.20-0800
190
+ # because it will be used in the filename.
191
+ #++
192
+ # Converts a pidgin datestamp to an Adium one.
193
+ def create_adium_time(time, is_first_line = false)
194
+ # parsed_date = [year, month, day, hour, min, sec]
195
+ if time =~ @time_regex
196
+ year, month, day, hour, min, sec = $1.to_i,
197
+ $2.to_i,
198
+ $3.to_i,
199
+ $4.to_i,
200
+ $5.to_i,
201
+ $6.to_i
202
+ elsif is_first_line and time =~ @time_regex_first_line
203
+ hour = $4.to_i
204
+ if $7 == 'PM' and hour != 12
205
+ hour += 12
206
+ end
207
+ year, month, day, min, sec = $3.to_i, # year
208
+ $1.to_i, # month
209
+ $2.to_i, # day
210
+ # already did hour
211
+ $5.to_i, # minutes
212
+ $6.to_i # seconds
213
+ elsif time =~ @minimal_time_regex
214
+ # "04:22:05" => %w{04 22 05}
215
+ hour = $1.to_i
216
+ if $4 == 'PM' and hour != 12
217
+ hour += 12
218
+ end
219
+ year, month, day = @basic_time_info
220
+ min = $2.to_i
221
+ sec = $3.to_i
222
+ else
223
+ error("You have found an odd timestamp. Please report it to the developer.")
224
+ log_msg("The timestamp: #{time}")
225
+ log_msg("Continuing...")
226
+ year,month,day,hour,min,sec = ParseDate.parsedate(time)
227
+ end
228
+ if is_first_line
229
+ adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
230
+ else
231
+ adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
232
+ end
233
+ return adium_time
234
+ end
235
+
236
+ # Extract required data from the file. Run by parse.
237
+ def pre_parse
238
+ # Deal with first line.
239
+
240
+ # the first line is special. It tells us (in order of regex groups):
241
+ # 1) who we're talking to
242
+ # 2) what time/date
243
+ # 3) what SN we used
244
+ # 4) what protocol (AIM, icq, jabber...)
245
+ first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
246
+ if first_line_match.nil?
247
+ raise InvalidFirstLineError
248
+ else
249
+ service = first_line_match[4]
250
+ # @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
251
+ user_SN = first_line_match[3].downcase.tr(' ', '')
252
+ partner_SN = first_line_match[1]
253
+ pidgin_chat_time_start = first_line_match[2]
254
+ basic_time_info = case pidgin_chat_time_start
255
+ when @time_regex then [$1.to_i, $2.to_i, $3.to_i]
256
+ when @time_regex_first_line then [$3.to_i, $1.to_i, $2.to_i]
257
+ end
258
+ adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
259
+ return [service,
260
+ user_SN,
261
+ partner_SN,
262
+ basic_time_info,
263
+ adium_chat_time_start]
264
+ end
265
+ end
266
+
267
+ def get_sender_by_alias(alias_name)
268
+ no_action = alias_name.sub(/^\*{3}/, '')
269
+ if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
270
+ # Set the current alias being used of the ones in @user_aliases
271
+ @user_alias = no_action
272
+ return @user_SN
273
+ else
274
+ return @partner_SN
275
+ end
276
+ end
277
+
278
+ #--
279
+ # create_msg takes an array of captures from matching against
280
+ # @line_regex and returns a Message object or one of its subclasses.
281
+ # It can be used for TextLogParser and HtmlLogParser because both of
282
+ # them return data in the same indexes in the matches array.
283
+ #++
284
+ def create_msg(matches)
285
+ msg = nil
286
+ # Either a regular message line or an auto-reply/away message.
287
+ time = create_adium_time(matches[0])
288
+ buddy_alias = matches[1]
289
+ sender = get_sender_by_alias(buddy_alias)
290
+ body = matches[3]
291
+ if matches[2] # auto-reply
292
+ msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
293
+ else
294
+ # normal message
295
+ msg = XMLMessage.new(sender, time, buddy_alias, body)
296
+ end
297
+ return msg
298
+ end
299
+
300
+ #--
301
+ # create_status_or_event_msg takes an array of +MatchData+ captures from
302
+ # matching against @line_regex_status and returns an Event or Status.
303
+ # Returns nil if it's a message that should be ignored, or false if an
304
+ # error occurred.
305
+ #++
306
+ def create_status_or_event_msg(matches)
307
+ # ["22:58:00", "BuddyName logged in."]
308
+ # 0: time
309
+ # 1: status message or event
310
+ msg = nil
311
+ time = create_adium_time(matches[0])
312
+ str = matches[1]
313
+ # Return nil, which will get compact'ed out
314
+ return nil if @ignore_events.detect{|regex| str =~ regex }
315
+
316
+ regex, status = @status_map.detect{|regex, status| str =~ regex}
317
+ if regex and status
318
+ # Status message
319
+ buddy_alias = regex.match(str)[1]
320
+ sender = get_sender_by_alias(buddy_alias)
321
+ msg = StatusMessage.new(sender, time, buddy_alias, status)
322
+ else
323
+ # Test for event
324
+ regex = @lib_purple_events.detect{|regex| str =~ regex }
325
+ event_type = 'libpurpleEvent' if regex
326
+ unless regex and event_type
327
+ # not a libpurple event, try others
328
+ if @event_map.detect{|regex,event_type| str =~ regex}
329
+ regex, event_type = $1, $2
330
+ else
331
+ error(sprintf("Error parsing status or event message, no status or event found: %p", str))
332
+ return false
333
+ end
334
+ end
335
+ if regex and event_type
336
+ regex_matches = regex.match(str)
337
+ # Event message
338
+ if regex_matches.size == 1
339
+ # No alias - this means it's the user
340
+ buddy_alias = @user_alias
341
+ sender = @user_SN
342
+ else
343
+ buddy_alias = regex_matches[1]
344
+ sender = get_sender_by_alias(buddy_alias)
345
+ end
346
+ msg = Event.new(sender, time, buddy_alias, str, event_type)
347
+ end
348
+ end
349
+ return msg
350
+ end
351
+ end # END BasicParser class
352
+
353
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
354
+ # using this class directly.
355
+ class TextLogParser < BasicParser
356
+ def initialize(src_path, user_aliases)
357
+ super(src_path, user_aliases)
358
+ @timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
359
+
360
+ # @line_regex matches a line in a TXT log file other than the first
361
+ # @line_regex matchdata:
362
+ # 0: timestamp
363
+ # 1: screen name or alias, if alias set
364
+ # 2: "<AUTO-REPLY>" or nil
365
+ # 3: message body
366
+ @line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
367
+
368
+ # @line_regex_status matches a status line
369
+ # @line_regex_status matchdata:
370
+ # 0: timestamp
371
+ # 1: status message
372
+ @line_regex_status = /#{@timestamp_rx} ([^:]+)/o
392
373
  end
393
374
 
394
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
395
- # of using this class directly.
396
- class HtmlLogParser < BasicParser
397
- def initialize(src_path, user_aliases)
398
- super(src_path, user_aliases)
399
- @timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
400
-
401
- # @line_regex matches a line in an HTML log file other than the
402
- # first time matches on either "2008-11-17 14:12" or "14:12"
403
- # @line_regex match obj:
404
- # 0: timestamp, extended or not
405
- # 1: screen name or alias, if alias set
406
- # 2: "&lt;AUTO-REPLY&gt;" or nil
407
- # 3: message body
408
- # The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
409
- @line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.+)<br ?\/>/o
410
- # @line_regex_status matches a status line
411
- # @line_regex_status match obj:
412
- # 0: timestamp
413
- # 1: status message
414
- @line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
415
- end
416
-
417
- public :parse
418
-
419
- #################
420
- private
421
- #################
422
-
423
- # Returns a cleaned string.
424
- # Removes the following tags from _text_:
425
- # * html
426
- # * body
427
- # * font
428
- # * a with no innertext, e.g. <a href="blah"></a>
429
- # And removes the following style declarations:
430
- # * color: #000000 (just turns text black)
431
- # * font-family
432
- # * font-size
433
- # * background
434
- # * em (really it's changed to <span style="font-style: italic;">)
435
- # Since each <span> has only one style declaration, spans with these
436
- # declarations are removed (but the text inside them is preserved).
437
- def cleanup(text)
438
- # Sometimes this is in there. I don't know why.
439
- text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
440
- # We can remove <font> safely since Pidgin and Adium both show bold
441
- # using <span style="font-weight: bold;"> except Pidgin uses single
442
- # quotes while Adium uses double quotes.
443
- text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
444
-
445
- text.tr!("\r", '')
446
- # Remove empty lines
447
- text.gsub!("\n\n", "\n")
448
-
449
- # Remove newlines that end the file, since they screw up the
450
- # newline -> <br/> conversion
451
- text.gsub!(/\n\Z/, '')
452
-
453
- # Replace newlines with "<br/>" unless they end a chat line.
454
- # This must go after we remove <font> tags.
455
- text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
456
-
457
- # These empty links are sometimes appended to every line in a chat,
458
- # for some weird reason. Remove them.
459
- text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
460
-
461
- # Replace single quotes inside tags with double quotes so we can
462
- # easily change single quotes to entities.
463
- # For spans, removes a space after the final declaration if it exists.
464
- text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
465
- text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
375
+ public :parse
376
+
377
+ #################
378
+ private
379
+ #################
380
+
381
+ def cleanup(text)
382
+ text.tr!("\r", '')
383
+ # Replace newlines with "<br/>" unless they end a chat line.
384
+ text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
385
+ # Escape entities since this will be in XML
386
+ text.gsub!('&', '&amp;') # escape '&' first
387
+ text.gsub!('<', '&lt;')
388
+ text.gsub!('>', '&gt;')
389
+ text.gsub!('"', '&quot;')
390
+ text.gsub!("'", '&apos;')
391
+ return text
392
+ end
393
+ end
394
+
395
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
396
+ # of using this class directly.
397
+ class HtmlLogParser < BasicParser
398
+ def initialize(src_path, user_aliases)
399
+ super(src_path, user_aliases)
400
+ @timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
401
+
402
+ # @line_regex matches a line in an HTML log file other than the
403
+ # first time matches on either "2008-11-17 14:12" or "14:12"
404
+ # @line_regex match obj:
405
+ # 0: timestamp, extended or not
406
+ # 1: screen name or alias, if alias set
407
+ # 2: "&lt;AUTO-REPLY&gt;" or nil
408
+ # 3: message body
409
+ # The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
410
+ @line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.+)<br ?\/>/o
411
+ # @line_regex_status matches a status line
412
+ # @line_regex_status match obj:
413
+ # 0: timestamp
414
+ # 1: status message
415
+ @line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
416
+ end
417
+
418
+ public :parse
419
+
420
+ #################
421
+ private
422
+ #################
423
+
424
+ # Returns a cleaned string.
425
+ # Removes the following tags from _text_:
426
+ # * html
427
+ # * body
428
+ # * font
429
+ # * a with no innertext, e.g. <a href="blah"></a>
430
+ # And removes the following style declarations:
431
+ # * color: #000000 (just turns text black)
432
+ # * font-family
433
+ # * font-size
434
+ # * background
435
+ # * em (really it's changed to <span style="font-style: italic;">)
436
+ # Since each <span> has only one style declaration, spans with these
437
+ # declarations are removed (but the text inside them is preserved).
438
+ def cleanup(text)
439
+ # Sometimes this is in there. I don't know why.
440
+ text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
441
+ # We can remove <font> safely since Pidgin and Adium both show bold
442
+ # using <span style="font-weight: bold;"> except Pidgin uses single
443
+ # quotes while Adium uses double quotes.
444
+ text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
445
+
446
+ text.tr!("\r", '')
447
+ # Remove empty lines
448
+ text.gsub!("\n\n", "\n")
449
+
450
+ # Remove newlines that end the file, since they screw up the
451
+ # newline -> <br/> conversion
452
+ text.gsub!(/\n\Z/, '')
453
+
454
+ # Replace newlines with "<br/>" unless they end a chat line.
455
+ # This must go after we remove <font> tags.
456
+ text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
457
+
458
+ # These empty links are sometimes appended to every line in a chat,
459
+ # for some weird reason. Remove them.
460
+ text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
461
+
462
+ # Replace single quotes inside tags with double quotes so we can
463
+ # easily change single quotes to entities.
464
+ # For spans, removes a space after the final declaration if it exists.
465
+ text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
466
+ text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
466
467
  =begin
467
- text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
468
- text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
469
- text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
468
+ text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
469
+ text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
470
+ text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
470
471
  =end
471
- text.gsub!("'", '&apos;')
472
-
473
- # This actually does match stuff, but doesn't group it correctly. :(
474
- # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
475
- text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
476
- # Remove empty spans.
477
- next if $2 == ''
478
-
479
- # style = style declaration
480
- # innertext = text inside <span>
481
- style, innertext = $1, $2
482
- # TODO: replace double quotes with "&quot;", but only outside tags; may still be tags inside spans
483
- # innertext.gsub!("")
484
-
485
- styleparts = style.split(/; ?/)
486
- styleparts.map! do |p|
487
- if p[0,5] == 'color'
488
- if p.include?('color: #000000')
489
- next
490
- elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
491
- # Regarding the bit with the ">", sometimes this happens:
492
- # <span style="color: #000000>today;">today was busy</span>
493
- # Then p = "color: #000000>today"
494
- # Or it can end in ">;", with no text before the semicolon.
495
- # So keep the color but remove the ">" and anything following it.
496
- next($1)
497
- end
498
- else
499
- # don't remove font-weight
500
- case p
501
- when /^font-family/: next
502
- when /^font-size/: next
503
- when /^background/: next
504
- end
505
- end
506
- end.compact!
507
- unless styleparts.empty?
508
- style = styleparts.join('; ')
509
- innertext = "<span style=\"#{style};\">#{innertext}</span>"
510
- end
511
- innertext
512
- end
513
- # Pidgin uses <em>, Adium uses <span>
514
- if text.gsub!('<em>', '<span style="font-style: italic;">')
515
- text.gsub!('</em>', '</span>')
516
- end
517
- return text
518
- end
472
+ text.gsub!("'", '&apos;')
473
+
474
+ # This actually does match stuff, but doesn't group it correctly. :(
475
+ # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
476
+ text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
477
+ # Remove empty spans.
478
+ next if $2 == ''
479
+
480
+ # style = style declaration
481
+ # innertext = text inside <span>
482
+ style, innertext = $1, $2
483
+ # TODO: replace double quotes with "&quot;", but only outside tags; may still be tags inside spans
484
+ # innertext.gsub!("")
485
+
486
+ styleparts = style.split(/; ?/)
487
+ styleparts.map! do |p|
488
+ if p[0,5] == 'color'
489
+ if p.include?('color: #000000')
490
+ next
491
+ elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
492
+ # Regarding the bit with the ">", sometimes this happens:
493
+ # <span style="color: #000000>today;">today was busy</span>
494
+ # Then p = "color: #000000>today"
495
+ # Or it can end in ">;", with no text before the semicolon.
496
+ # So keep the color but remove the ">" and anything following it.
497
+ next($1)
498
+ end
499
+ else
500
+ # don't remove font-weight
501
+ case p
502
+ when /^font-family/ then next
503
+ when /^font-size/ then next
504
+ when /^background/ then next
505
+ end
506
+ end
507
+ end.compact!
508
+ unless styleparts.empty?
509
+ style = styleparts.join('; ')
510
+ innertext = "<span style=\"#{style};\">#{innertext}</span>"
511
+ end
512
+ innertext
513
+ end
514
+ # Pidgin uses <em>, Adium uses <span>
515
+ if text.gsub!('<em>', '<span style="font-style: italic;">')
516
+ text.gsub!('</em>', '</span>')
517
+ end
518
+ return text
519
+ end
520
+ end # END HtmlLogParser class
521
+
522
+ # A holding object for each line of the chat. It is subclassed as
523
+ # appropriate (eg AutoReplyMessage). Each subclass (but not Message
524
+ # itself) has its own to_s which prints out its information in a format
525
+ # appropriate for putting in an Adium log file.
526
+ # Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
527
+ class Message
528
+ def initialize(sender, time, buddy_alias)
529
+ # The sender's screen name
530
+ @sender = sender
531
+ # The time the message was sent, in Adium format (e.g.
532
+ # "2008-10-05T22:26:20-0800")
533
+ @time = time
534
+ # The receiver's alias (NOT screen name)
535
+ @buddy_alias = buddy_alias
536
+ end
537
+ attr_accessor :sender, :time, :buddy_alias
538
+ end
539
+
540
+ # Basic message with body text (as opposed to pure status messages, which
541
+ # have no body).
542
+ class XMLMessage < Message
543
+ def initialize(sender, time, buddy_alias, body)
544
+ super(sender, time, buddy_alias)
545
+ @body = body
546
+ @styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
547
+ normalize_body!()
548
+ end
549
+ attr_accessor :body
550
+
551
+ def to_s
552
+ return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
553
+ @sender, @time, @buddy_alias, @styled_body)
554
+ end
555
+
556
+ #################
557
+ private
558
+ #################
559
+
560
+ # Balances mismatched tags, normalizes body style, and fixes actions
561
+ # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
562
+ # "*Buddy waves at you*").
563
+ def normalize_body!
564
+ normalize_body_entities!()
565
+ # Fix mismatched tags. Yes, it's faster to do it per-message
566
+ # than all at once.
567
+ @body = Pidgin2Adium.balance_tags_c(@body)
568
+ if @buddy_alias[0,3] == '***'
569
+ # "***<alias>" is what pidgin sets as the alias for a /me action
570
+ @buddy_alias.slice!(0,3)
571
+ @body = '*' << @body << '*'
572
+ end
519
573
  end
520
574
 
521
- # A holding object for each line of the chat. It is subclassed as
522
- # appropriate (eg AutoReplyMessage). Each subclass (but not Message
523
- # itself) has its own to_s which prints out its information in a format
524
- # appropriate for putting in an Adium log file.
525
- # Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
526
- class Message
527
- def initialize(sender, time, buddy_alias)
528
- # The sender's screen name
529
- @sender = sender
530
- # The time the message was sent, in Adium format (e.g.
531
- # "2008-10-05T22:26:20-0800")
532
- @time = time
533
- # The receiver's alias (NOT screen name)
534
- @buddy_alias = buddy_alias
535
- end
536
- attr_accessor :sender, :time, :buddy_alias
575
+ # Escapes entities.
576
+ def normalize_body_entities!
577
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
578
+ @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
537
579
  end
538
-
539
- # Basic message with body text (as opposed to pure status messages, which
540
- # have no body).
541
- class XMLMessage < Message
542
- def initialize(sender, time, buddy_alias, body)
543
- super(sender, time, buddy_alias)
544
- @body = body
545
- @styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
546
- normalize_body!()
547
- end
548
- attr_accessor :body
549
-
550
- def to_s
551
- return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
552
- @sender, @time, @buddy_alias, @styled_body)
553
- end
554
-
555
- #################
556
- private
557
- #################
558
-
559
- # Balances mismatched tags, normalizes body style, and fixes actions
560
- # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
561
- # "*Buddy waves at you*").
562
- def normalize_body!
563
- normalize_body_entities!()
564
- # Fix mismatched tags. Yes, it's faster to do it per-message
565
- # than all at once.
566
- @body = Pidgin2Adium.balance_tags_c(@body)
567
- if @buddy_alias[0,3] == '***'
568
- # "***<alias>" is what pidgin sets as the alias for a /me action
569
- @buddy_alias.slice!(0,3)
570
- @body = '*' << @body << '*'
571
- end
572
- end
573
-
574
- # Escapes entities.
575
- def normalize_body_entities!
576
- # Convert '&' to '&amp;' only if it's not followed by an entity.
577
- @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
578
- end
580
+ end # END XMLMessage
581
+
582
+ # An auto reply message.
583
+ class AutoReplyMessage < XMLMessage
584
+ def to_s
585
+ return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
586
+ @sender, @time, @buddy_alias, @styled_body)
579
587
  end
588
+ end
580
589
 
581
- # An auto reply message.
582
- class AutoReplyMessage < XMLMessage
583
- def to_s
584
- return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
585
- @sender, @time, @buddy_alias, @styled_body)
586
- end
590
+ # A message saying e.g. "Blahblah has gone away."
591
+ class StatusMessage < Message
592
+ def initialize(sender, time, buddy_alias, status)
593
+ super(sender, time, buddy_alias)
594
+ @status = status
587
595
  end
596
+ attr_accessor :status
588
597
 
589
- # A message saying e.g. "Blahblah has gone away."
590
- class StatusMessage < Message
591
- def initialize(sender, time, buddy_alias, status)
592
- super(sender, time, buddy_alias)
593
- @status = status
594
- end
595
- attr_accessor :status
596
-
597
- def to_s
598
- return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
599
- end
598
+ def to_s
599
+ return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
600
+ end
601
+ end
602
+
603
+ # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
604
+ # messages to display what Adium calls events. These include sending a file,
605
+ # starting a Direct IM connection, or an error in chat.
606
+ class Event < XMLMessage
607
+ def initialize(sender, time, buddy_alias, body, event_type)
608
+ super(sender, time, buddy_alias, body)
609
+ @event_type = event_type
600
610
  end
601
-
602
- # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
603
- # messages to display what Adium calls events. These include sending a file,
604
- # starting a Direct IM connection, or an error in chat.
605
- class Event < XMLMessage
606
- def initialize(sender, time, buddy_alias, body, event_type)
607
- super(sender, time, buddy_alias, body)
608
- @event_type = event_type
609
- end
610
- attr_accessor :event_type
611
-
612
- def to_s
613
- return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
614
- @event_type, @sender, @time, @buddy_alias, @styled_body)
615
- end
611
+ attr_accessor :event_type
612
+
613
+ def to_s
614
+ return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
615
+ @event_type, @sender, @time, @buddy_alias, @styled_body)
616
616
  end
617
+ end
617
618
  end # end module