pidgin2adium 3.0.0 → 3.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,118 @@
1
+ module Pidgin2Adium
2
+ # Balances tags of string using a modified stack. Returns a balanced
3
+ # string, but also affects the text passed into it!
4
+ # Use text = balance_tags(text).
5
+
6
+ # From Wordpress's formatting.php; rewritten in Ruby by Gabe
7
+ # Berke-Williams, 2009.
8
+ # Author:: Leonard Lin <leonard@acm.org>
9
+ # License:: GPL v2.0
10
+ # Copyright:: November 4, 2001
11
+ def Pidgin2Adium.balance_tags( text )
12
+ tagstack = []
13
+ stacksize = 0
14
+ tagqueue = ''
15
+ newtext = ''
16
+ single_tags = %w{br hr img input meta} # Known single-entity/self-closing tags
17
+ #nestable_tags = %w{blockquote div span} # Tags that can be immediately nested within themselves
18
+ nestable_tags = %w{blockquote div span font} # Tags that can be immediately nested within themselves
19
+ # 1: tagname, with possible leading "/"
20
+ # 2: attributes
21
+ tag_regex = /<(\/?\w*)\s*([^>]*)>/
22
+
23
+ # WP bug fix for comments - in case you REALLY meant to type '< !--'
24
+ text.gsub!('< !--', '< !--')
25
+
26
+ # WP bug fix for LOVE <3 (and other situations with '<' before a number)
27
+ text.gsub!(/<([0-9]{1})/, '&lt;\1')
28
+
29
+ while ( pos = (text =~ tag_regex) )
30
+ newtext << tagqueue
31
+ tag = $1.downcase
32
+ attributes = $2
33
+ matchlen = $~[0].size
34
+
35
+ # clear the shifter
36
+ tagqueue = ''
37
+ # Pop or Push
38
+ if (tag[0,1] == "/") # End Tag
39
+ tag.slice!(0,1)
40
+ # if too many closing tags
41
+ if(stacksize <= 0)
42
+ tag = ''
43
+ #or close to be safe: tag = '/' << tag
44
+ elsif (tagstack[stacksize - 1] == tag) # found closing tag
45
+ # if stacktop value == tag close value then pop
46
+ tag = '</' << tag << '>' # Close Tag
47
+ # Pop
48
+ tagstack.pop
49
+ stacksize -= 1
50
+ else # closing tag not at top, search for it
51
+ (stacksize-1).downto(0) do |j|
52
+ if (tagstack[j] == tag)
53
+ # add tag to tagqueue
54
+ ss = stacksize - 1
55
+ ss.downto(j) do |k|
56
+ tagqueue << '</' << tagstack.pop << '>'
57
+ stacksize -= 1
58
+ end
59
+ break
60
+ end
61
+ end
62
+ tag = ''
63
+ end
64
+ else
65
+ # Begin Tag
66
+
67
+ # Tag Cleaning
68
+ if( (attributes[-1,1] == '/') || (tag == '') )
69
+ # If: self-closing or '', don't do anything.
70
+ elsif ( single_tags.include?(tag) )
71
+ # ElseIf: it's a known single-entity tag but it doesn't close itself, do so
72
+ attributes << '/'
73
+ else
74
+ # Push the tag onto the stack
75
+ # If the top of the stack is the same as the tag we want to push, close previous tag
76
+ if ((stacksize > 0) &&
77
+ ! nestable_tags.include?(tag) &&
78
+ (tagstack[stacksize - 1] == tag))
79
+ tagqueue = '</' << tagstack.pop << '>'
80
+ stacksize -= 1
81
+ end
82
+ tagstack.push(tag)
83
+ stacksize += 1
84
+ end
85
+
86
+ # Attributes
87
+ if(attributes != '')
88
+ attributes = ' ' << attributes
89
+ end
90
+ tag = '<' << tag << attributes << '>'
91
+ #If already queuing a close tag, then put this tag on, too
92
+ if (tagqueue)
93
+ tagqueue << tag
94
+ tag = ''
95
+ end
96
+ end
97
+ newtext << text[0,pos] << tag
98
+ text = text[pos+matchlen, text.length - (pos+matchlen)]
99
+ end
100
+
101
+ # Clear Tag Queue
102
+ newtext << tagqueue
103
+
104
+ # Add Remaining text
105
+ newtext << text
106
+
107
+ # Empty Stack
108
+ tagstack.reverse_each do |t|
109
+ newtext << '</' << t << '>' # Add remaining tags to close
110
+ end
111
+
112
+ # WP fix for the bug with HTML comments
113
+ newtext.gsub!("< !--", "<!--")
114
+ newtext.gsub!("< !--", "< !--")
115
+
116
+ return newtext
117
+ end
118
+ end
@@ -1,72 +1,72 @@
1
1
  require 'pidgin2adium'
2
2
 
3
3
  module Pidgin2Adium
4
- # An easy way to batch-process a directory. Used by the pidgin2adium
5
- # command-line script.
6
- class LogConverter
7
- include Pidgin2Adium
8
- # You can add options using the _opts_ hash, which can have the
9
- # following keys, all of which are optional:
10
- # * *overwrite*: If true, then overwrite even if log is found.
11
- # Defaults to false.
12
- # * *output_dir*: The top-level dir to put the logs in.
13
- # Logs under output_dir are still each in their own folders, etc.
14
- # Defaults to Pidgin2Adium::ADIUM_LOG_DIR
15
- def initialize(pidgin_log_dir, aliases, opts = {})
16
- # parse_and_generate will process it for us
17
- @opts = opts
4
+ # An easy way to batch-process a directory. Used by the pidgin2adium
5
+ # command-line script.
6
+ class LogConverter
7
+ include Pidgin2Adium
8
+ # You can add options using the _opts_ hash, which can have the
9
+ # following keys, all of which are optional:
10
+ # * *overwrite*: If true, then overwrite even if log is found.
11
+ # Defaults to false.
12
+ # * *output_dir*: The top-level dir to put the logs in.
13
+ # Logs under output_dir are still each in their own folders, etc.
14
+ # Defaults to Pidgin2Adium::ADIUM_LOG_DIR
15
+ def initialize(pidgin_log_dir, aliases, opts = {})
16
+ # parse_and_generate will process it for us
17
+ @opts = opts
18
18
 
19
- @pidgin_log_dir = File.expand_path(pidgin_log_dir)
20
- @my_aliases = aliases
19
+ @pidgin_log_dir = File.expand_path(pidgin_log_dir)
20
+ @my_aliases = aliases
21
21
 
22
- unless File.directory?(@pidgin_log_dir)
23
- puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
24
- raise Errno::ENOENT
25
- end
26
- end
22
+ unless File.directory?(@pidgin_log_dir)
23
+ puts "Source directory #{@pidgin_log_dir} does not exist or is not a directory."
24
+ raise Errno::ENOENT
25
+ end
26
+ end
27
27
 
28
- # Runs Pidgin2Adium::parse_and_generate on every log file in directory
29
- # provided in new, then deletes Adium's search indexes to force
30
- # it to rescan logs on startup.
31
- def start
32
- log_msg "Begin converting."
33
- begin
34
- files_path = get_all_chat_files(@pidgin_log_dir)
35
- rescue Errno::EACCES => bang
36
- error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
37
- error("Details: #{bang.message}")
38
- raise Errno::EACCES
39
- end
28
+ # Runs Pidgin2Adium::parse_and_generate on every log file in directory
29
+ # provided in new, then deletes Adium's search indexes to force
30
+ # it to rescan logs on startup.
31
+ def start
32
+ log_msg "Begin converting."
33
+ begin
34
+ files_path = get_all_chat_files(@pidgin_log_dir)
35
+ rescue Errno::EACCES => bang
36
+ error("Sorry, permission denied for getting Pidgin chat files from #{@pidgin_log_dir}.")
37
+ error("Details: #{bang.message}")
38
+ raise Errno::EACCES
39
+ end
40
40
 
41
- total_files = files_path.size
42
- total_successes = 0
43
- log_msg("#{total_files} files to convert.")
44
- files_path.each_with_index do |fname, i|
45
- log_msg(
46
- sprintf("[%d/%d] Converting %s...",
47
- (i+1), total_files, fname)
48
- )
49
- result = parse_and_generate(fname, @my_aliases, @opts)
50
- total_successes += 1 if result == true
51
- end
41
+ total_files = files_path.size
42
+ total_successes = 0
43
+ log_msg("#{total_files} files to convert.")
44
+ files_path.each_with_index do |fname, i|
45
+ log_msg(
46
+ sprintf("[%d/%d] Converting %s...",
47
+ (i+1), total_files, fname)
48
+ )
49
+ result = parse_and_generate(fname, @my_aliases, @opts)
50
+ total_successes += 1 if result == true
51
+ end
52
52
 
53
- delete_search_indexes()
53
+ delete_search_indexes()
54
54
 
55
- log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
56
- puts "Minor error messages:"
57
- puts @@oops_messages.join("\n")
58
- puts "Major error messages:"
59
- puts @@error_messages.join("\n")
60
- end
55
+ log_msg "Finished converting! Converted #{total_successes} files of #{total_files} total."
56
+ puts "Minor error messages:"
57
+ puts @@oops_messages.join("\n")
58
+ puts "Major error messages:"
59
+ puts @@error_messages.join("\n")
60
+ end
61
61
 
62
- ###########
63
- private
64
- ###########
62
+ ###########
63
+ private
64
+ ###########
65
65
 
66
- def get_all_chat_files(dir)
67
- return [] if File.basename(dir) == ".system"
68
- # recurse into each subdir
69
- return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
70
- end
66
+ def get_all_chat_files(dir)
67
+ return [] if File.basename(dir) == ".system"
68
+ # recurse into each subdir
69
+ return (Dir.glob("#{@pidgin_log_dir}/**/*.{htm,html,txt}") - BAD_DIRS)
71
70
  end
71
+ end # END LogConverter class
72
72
  end
@@ -1,102 +1,102 @@
1
1
  require 'fileutils'
2
2
 
3
3
  module Pidgin2Adium
4
- # A holding object for the result of LogParser.parse. It makes the
5
- # instance variable @chat_lines available, which is an array of Message
6
- # subclass instances (XMLMessage, Event, etc.)
7
- # Here is a list of the instance variables for each class in @chat_lines:
8
- #
9
- # <b>All of these variables are read/write.</b>
10
- # All:: sender, time, buddy_alias
11
- # XMLMessage:: body
12
- # AutoReplyMessage:: body
13
- # Event:: body, event_type
14
- # StatusMessage:: status
15
- class LogFile
16
- include Pidgin2Adium
17
- def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
18
- @chat_lines = chat_lines
19
- @user_SN = user_SN
20
- @partner_SN = partner_SN
21
- @adium_chat_time_start = adium_chat_time_start
4
+ # A holding object for the result of LogParser.parse. It makes the
5
+ # instance variable @chat_lines available, which is an array of Message
6
+ # subclass instances (XMLMessage, Event, etc.)
7
+ # Here is a list of the instance variables for each class in @chat_lines:
8
+ #
9
+ # <b>All of these variables are read/write.</b>
10
+ # All:: sender, time, buddy_alias
11
+ # XMLMessage:: body
12
+ # AutoReplyMessage:: body
13
+ # Event:: body, event_type
14
+ # StatusMessage:: status
15
+ class LogFile
16
+ include Pidgin2Adium
17
+ def initialize(chat_lines, service, user_SN, partner_SN, adium_chat_time_start)
18
+ @chat_lines = chat_lines
19
+ @user_SN = user_SN
20
+ @partner_SN = partner_SN
21
+ @adium_chat_time_start = adium_chat_time_start
22
22
 
23
- # @chat_str is generated when to_s is called
24
- @chat_str = nil
25
-
26
- # key is for Pidgin, value is for Adium
27
- # Just used for <service>.<screenname> in directory structure
28
- service_name_map = {'aim' => 'AIM',
29
- 'jabber' =>'Jabber',
30
- 'gtalk'=> 'GTalk',
31
- 'icq' => 'ICQ',
32
- 'qq' => 'QQ',
33
- 'msn' => 'MSN',
34
- 'yahoo' => 'Yahoo!'}
35
-
36
- @service = service_name_map[service.downcase]
37
- end
38
-
39
- attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
23
+ # @chat_str is generated when to_s is called
24
+ @chat_str = nil
40
25
 
41
- # Returns contents of log file
42
- def to_s
43
- if @chat_str.nil?
44
- # Faster than inject() or each()
45
- @chat_str = @chat_lines.map{|l| l.to_s }.join
46
- end
47
- return @chat_str
48
- end
49
-
50
- def each(&blk)
51
- @chat_lines.each{|l| yield l }
52
- end
26
+ # key is for Pidgin, value is for Adium
27
+ # Just used for <service>.<screenname> in directory structure
28
+ service_name_map = {'aim' => 'AIM',
29
+ 'jabber' =>'Jabber',
30
+ 'gtalk'=> 'GTalk',
31
+ 'icq' => 'ICQ',
32
+ 'qq' => 'QQ',
33
+ 'msn' => 'MSN',
34
+ 'yahoo' => 'Yahoo!'}
53
35
 
54
- # Set overwrite=true to create a logfile even if logfile already exists.
55
- # Returns one of:
56
- # * false (if an error occurred),
57
- # * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
58
- # * the path to the new Adium log file.
59
- def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
60
- # output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
61
- output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
62
- # output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
63
- output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
64
- begin
65
- FileUtils.mkdir_p(output_dir)
66
- rescue => bang
67
- error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
68
- return false
69
- end
70
- if overwrite
71
- unless File.exist?(output_path)
72
- # File doesn't exist, but maybe it does with a different
73
- # time zone. Check for a file that differs only in time
74
- # zone and, if found, change @output_path to target it.
75
- maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
76
- unless maybe_matches.empty?
77
- output_path = maybe_matches[0]
78
- end
79
- end
80
- else
81
- if File.exist?(output_path)
82
- return FILE_EXISTS
83
- end
84
- end
36
+ @service = service_name_map[service.downcase]
37
+ end
38
+
39
+ attr_reader :chat_lines, :service, :user_SN, :partner_SN, :adium_chat_time_start
40
+
41
+ # Returns contents of log file
42
+ def to_s
43
+ if @chat_str.nil?
44
+ # Faster than inject() or each()
45
+ @chat_str = @chat_lines.map{|l| l.to_s }.join
46
+ end
47
+ return @chat_str
48
+ end
49
+
50
+ def each(&blk)
51
+ @chat_lines.each{|l| yield l }
52
+ end
53
+
54
+ # Set overwrite=true to create a logfile even if logfile already exists.
55
+ # Returns one of:
56
+ # * false (if an error occurred),
57
+ # * Pidgin2Adium::FILE_EXISTS if the file to be generated already exists and overwrite=false, or
58
+ # * the path to the new Adium log file.
59
+ def write_out(overwrite = false, output_dir_base = ADIUM_LOG_DIR)
60
+ # output_dir_base + "/buddyname (2009-08-04T18.38.50-0700).chatlog"
61
+ output_dir = File.join(output_dir_base, "#{@service}.#{@user_SN}", @partner_SN, "#{@partner_SN} (#{@adium_chat_time_start}).chatlog")
62
+ # output_dir + "/buddyname (2009-08-04T18.38.50-0700).chatlog/buddyname (2009-08-04T18.38.50-0700).xml"
63
+ output_path = output_dir + '/' + "#{@partner_SN} (#{@adium_chat_time_start}).xml"
64
+ begin
65
+ FileUtils.mkdir_p(output_dir)
66
+ rescue => bang
67
+ error "Could not create destination directory for log file. (Details: #{bang.class}: #{bang.message})"
68
+ return false
69
+ end
70
+ if overwrite
71
+ unless File.exist?(output_path)
72
+ # File doesn't exist, but maybe it does with a different
73
+ # time zone. Check for a file that differs only in time
74
+ # zone and, if found, change @output_path to target it.
75
+ maybe_matches = Dir.glob(output_dir_base + '/' << File.basename(output_path).sub(/-\d{4}\)\.chatlog$/, '') << '/*')
76
+ unless maybe_matches.empty?
77
+ output_path = maybe_matches[0]
78
+ end
79
+ end
80
+ else
81
+ if File.exist?(output_path)
82
+ return FILE_EXISTS
83
+ end
84
+ end
85
85
 
86
- begin
87
- outfile = File.new(output_path, 'w')
88
- rescue => bang
89
- error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
90
- return false
91
- end
86
+ begin
87
+ outfile = File.new(output_path, 'w')
88
+ rescue => bang
89
+ error "Could not open log file for writing. (Details: #{bang.class}: #{bang.message})"
90
+ return false
91
+ end
92
92
 
93
- # no \n before </chat> because @chat_str (from to_s) has it already
94
- outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
95
- '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
96
- @user_SN, @service, self.to_s)
97
- outfile.close
93
+ # no \n before </chat> because @chat_str (from to_s) has it already
94
+ outfile.printf('<?xml version="1.0" encoding="UTF-8" ?>'<<"\n"+
95
+ '<chat xmlns="http://purl.org/net/ulf/ns/0.4-02" account="%s" service="%s">'<<"\n"<<'%s</chat>',
96
+ @user_SN, @service, self.to_s)
97
+ outfile.close
98
98
 
99
- return output_path
100
- end
99
+ return output_path
101
100
  end
101
+ end # END LogFile class
102
102
  end
@@ -1,6 +1,6 @@
1
1
  # Contains the class BasicParser and its subclasses, HtmlLogParser and
2
2
  # TextFileParser, which parse the file passed into it and return a LogFile
3
- # object.
3
+ # object.
4
4
  #
5
5
  # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
6
6
  # using these classes directly.
@@ -11,607 +11,608 @@ require 'balance_tags_c'
11
11
  require 'pidgin2adium/log_file'
12
12
 
13
13
  module Pidgin2Adium
14
- # Empty class. Raise'd by LogParser if the first line of a log is not
15
- # parseable.
16
- class InvalidFirstLineError < StandardError; end
17
-
18
- # BasicParser is a base class. Its subclasses are TextLogParser and
19
- # HtmlLogParser.
20
- #
21
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
22
- # using this class directly.
23
- class BasicParser
24
- include Pidgin2Adium
25
- def initialize(src_path, user_aliases)
26
- @src_path = src_path
27
- # Whitespace is removed for easy matching later on.
28
- @user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
29
- # @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
30
- # alias.
31
- # Set an initial value just in case the first message doesn't give
32
- # us an alias.
33
- @user_alias = user_aliases.split(',')[0]
34
-
35
- @tz_offset = get_time_zone_offset()
36
-
37
- file = File.new(@src_path, 'r')
38
- @first_line = file.readline
39
- @file_content = file.read
40
- file.close
41
-
42
- # Time regexes must be set before pre_parse().
43
- # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
44
- # ONLY used (if at all) in first line of chat ("Conversation with...at...")
45
- @time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
46
- # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
47
- @time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
48
- # sometimes a line in a chat doesn't have a full timestamp
49
- # "04:22:05 AM" => %w{04 22 05 AM}
50
- @minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
51
-
52
- # Whether or not the first line is parseable.
53
- @first_line_is_valid = true
54
- begin
55
- @service,
56
- @user_SN,
57
- @partner_SN,
58
- # @basic_time_info is for files that only have the full
59
- # timestamp at the top; we can use it to fill in the minimal
60
- # per-line timestamps. It has only 3 elements (year, month,
61
- # dayofmonth) because you should be able to fill everything
62
- # else in. If you can't, something's wrong.
63
- @basic_time_info,
64
- # When the chat started, in Adium's format
65
- @adium_chat_time_start = pre_parse()
66
- rescue InvalidFirstLineError
67
- @first_line_is_valid = false
68
- error("Failed to parse, invalid first line: #{@src_path}")
69
- return # stop processing
70
- end
71
-
72
- # @status_map, @lib_purple_events, and @events are used in
73
- # create_status_or_event_msg
74
- @status_map = {
75
- /(.+) logged in\.$/ => 'online',
76
- /(.+) logged out\.$/ => 'offline',
77
- /(.+) has signed on\.$/ => 'online',
78
- /(.+) has signed off\.$/ => 'offline',
79
- /(.+) has gone away\.$/ => 'away',
80
- /(.+) is no longer away\.$/ => 'available',
81
- /(.+) has become idle\.$/ => 'idle',
82
- /(.+) is no longer idle\.$/ => 'available'
83
- }
84
-
85
- # lib_purple_events are all of event_type libPurple
86
- @lib_purple_events = [
87
- # file transfer
88
- /Starting transfer of .+ from (.+)/,
89
- /^Offering to send .+ to (.+)$/,
90
- /(.+) is offering to send file/,
91
- /^Transfer of file .+ complete$/,
92
- /Error reading|writing|accessing .+: .+/,
93
- /You cancell?ed the transfer of/,
94
- /File transfer cancelled/,
95
- /(.+?) cancell?ed the transfer of/,
96
- /(.+?) cancelled the file transfer/,
97
- # Direct IM - actual (dis)connect events are their own types
98
- /^Attempting to connect to (.+) at .+ for Direct IM\./,
99
- /^Asking (.+) to connect to us at .+ for Direct IM\./,
100
- /^Attempting to connect via proxy server\.$/,
101
- /^Direct IM with (.+) failed/,
102
- # encryption
103
- /Received message encrypted with wrong key/,
104
- /^Requesting key\.\.\.$/,
105
- /^Outgoing message lost\.$/,
106
- /^Conflicting Key Received!$/,
107
- /^Error in decryption- asking for resend\.\.\.$/,
108
- /^Making new key pair\.\.\.$/,
109
- # sending errors
110
- /^Last outgoing message not received properly- resetting$/,
111
- /Resending\.\.\./,
112
- # connection errors
113
- /Lost connection with the remote user:.+/,
114
- # chats
115
- /^.+ entered the room\.$/,
116
- /^.+ left the room\.$/
117
- ]
118
-
119
- # non-libpurple events
120
- # Each key maps to an event_type string. The keys will be matched against a line of chat
121
- # and the partner's alias will be in regex group 1, IF the alias is matched.
122
- @event_map = {
123
- # .+ is not an alias, it's a proxy server so no grouping
124
- /^Attempting to connect to .+\.$/ => 'direct-im-connect',
125
- # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
126
- /^Direct IM established$/ => 'directIMConnected',
127
- /Unable to send message/ => 'chat-error',
128
- /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
129
- /User information not available/ => 'chat-error'
130
- }
131
-
132
- @ignore_events = [
133
- # Adium ignores SN/alias changes.
134
- /^.+? is now known as .+?\.<br\/?>$/
135
- ]
136
- end
137
-
138
- # This method returns a LogFile instance, or false if an error occurred.
139
- def parse
140
- return false unless @first_line_is_valid
141
- @file_content = cleanup(@file_content).split("\n")
142
-
143
- @file_content.map! do |line|
144
- # "next" returns nil which is removed by compact
145
- next if line =~ /^\s+$/
146
- if line =~ @line_regex
147
- create_msg($~.captures)
148
- elsif line =~ @line_regex_status
149
- msg = create_status_or_event_msg($~.captures)
150
- # Error occurred while parsing
151
- return false if msg == false
152
- else
153
- error "Could not parse line:"
154
- p line
155
- return false
156
- end
157
- end
158
- @file_content.compact!
159
- return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
160
- end
161
- # Prevent parse from being called directly from BasicParser, since
162
- # it uses subclassing magic.
163
- protected :parse
164
-
165
- #################
166
- private
167
- #################
168
-
169
- def get_time_zone_offset()
170
- # We must have a tz_offset or else the Adium Chat Log viewer
171
- # doesn't read the date correctly and then:
172
- # 1) the log has an empty start date column in the viewer
173
- # 2) The timestamps are all the same for the whole log
174
- tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
175
- if tz_match and tz_match[1]
176
- tz_offset = tz_match[1]
177
- else
178
- # "-0500" (3d rather than 2d to allow for "+")
179
- tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
180
- end
181
- return tz_offset
182
- end
183
-
184
- #--
185
- # Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
186
- # 2008-10-05T22:26:20-0800
187
- # HOWEVER:
188
- # If it's the first line, then return it like this (note periods):
189
- # 2008-10-05T22.26.20-0800
190
- # because it will be used in the filename.
191
- #++
192
- # Converts a pidgin datestamp to an Adium one.
193
- def create_adium_time(time, is_first_line = false)
194
- # parsed_date = [year, month, day, hour, min, sec]
195
- if time =~ @time_regex
196
- year, month, day, hour, min, sec = $1.to_i,
197
- $2.to_i,
198
- $3.to_i,
199
- $4.to_i,
200
- $5.to_i,
201
- $6.to_i
202
- elsif is_first_line and time =~ @time_regex_first_line
203
- hour = $4.to_i
204
- if $7 == 'PM' and hour != 12
205
- hour += 12
206
- end
207
- year, month, day, min, sec = $3.to_i, # year
208
- $1.to_i, # month
209
- $2.to_i, # day
210
- # already did hour
211
- $5.to_i, # minutes
212
- $6.to_i # seconds
213
- elsif time =~ @minimal_time_regex
214
- # "04:22:05" => %w{04 22 05}
215
- hour = $1.to_i
216
- if $4 == 'PM' and hour != 12
217
- hour += 12
218
- end
219
- year, month, day = @basic_time_info
220
- min = $2.to_i
221
- sec = $3.to_i
222
- else
223
- error("You have found an odd timestamp. Please report it to the developer.")
224
- log_msg("The timestamp: #{time}")
225
- log_msg("Continuing...")
226
- year,month,day,hour,min,sec = ParseDate.parsedate(time)
227
- end
228
- if is_first_line
229
- adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
230
- else
231
- adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
232
- end
233
- return adium_time
234
- end
235
-
236
- # Extract required data from the file. Run by parse.
237
- def pre_parse
238
- # Deal with first line.
239
-
240
- # the first line is special. It tells us (in order of regex groups):
241
- # 1) who we're talking to
242
- # 2) what time/date
243
- # 3) what SN we used
244
- # 4) what protocol (AIM, icq, jabber...)
245
- first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
246
- if first_line_match.nil?
247
- raise InvalidFirstLineError
248
- else
249
- service = first_line_match[4]
250
- # @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
251
- user_SN = first_line_match[3].downcase.tr(' ', '')
252
- partner_SN = first_line_match[1]
253
- pidgin_chat_time_start = first_line_match[2]
254
- basic_time_info = case pidgin_chat_time_start
255
- when @time_regex: [$1.to_i, $2.to_i, $3.to_i]
256
- when @time_regex_first_line: [$3.to_i, $1.to_i, $2.to_i]
257
- end
258
- adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
259
- return [service,
260
- user_SN,
261
- partner_SN,
262
- basic_time_info,
263
- adium_chat_time_start]
264
- end
265
- end
266
-
267
- def get_sender_by_alias(alias_name)
268
- no_action = alias_name.sub(/^\*{3}/, '')
269
- if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
270
- # Set the current alias being used of the ones in @user_aliases
271
- @user_alias = no_action
272
- return @user_SN
273
- else
274
- return @partner_SN
275
- end
276
- end
277
-
278
- #--
279
- # create_msg takes an array of captures from matching against
280
- # @line_regex and returns a Message object or one of its subclasses.
281
- # It can be used for TextLogParser and HtmlLogParser because both of
282
- # them return data in the same indexes in the matches array.
283
- #++
284
- def create_msg(matches)
285
- msg = nil
286
- # Either a regular message line or an auto-reply/away message.
287
- time = create_adium_time(matches[0])
288
- buddy_alias = matches[1]
289
- sender = get_sender_by_alias(buddy_alias)
290
- body = matches[3]
291
- if matches[2] # auto-reply
292
- msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
293
- else
294
- # normal message
295
- msg = XMLMessage.new(sender, time, buddy_alias, body)
296
- end
297
- return msg
298
- end
299
-
300
- #--
301
- # create_status_or_event_msg takes an array of +MatchData+ captures from
302
- # matching against @line_regex_status and returns an Event or Status.
303
- # Returns nil if it's a message that should be ignored, or false if an
304
- # error occurred.
305
- #++
306
- def create_status_or_event_msg(matches)
307
- # ["22:58:00", "BuddyName logged in."]
308
- # 0: time
309
- # 1: status message or event
310
- msg = nil
311
- time = create_adium_time(matches[0])
312
- str = matches[1]
313
- # Return nil, which will get compact'ed out
314
- return nil if @ignore_events.detect{|regex| str =~ regex }
315
-
316
- regex, status = @status_map.detect{|regex, status| str =~ regex}
317
- if regex and status
318
- # Status message
319
- buddy_alias = regex.match(str)[1]
320
- sender = get_sender_by_alias(buddy_alias)
321
- msg = StatusMessage.new(sender, time, buddy_alias, status)
322
- else
323
- # Test for event
324
- regex = @lib_purple_events.detect{|regex| str =~ regex }
325
- event_type = 'libpurpleEvent' if regex
326
- unless regex and event_type
327
- # not a libpurple event, try others
328
- if @event_map.detect{|regex,event_type| str =~ regex}
329
- regex, event_type = $1, $2
330
- else
331
- error(sprintf("Error parsing status or event message, no status or event found: %p", str))
332
- return false
333
- end
334
- end
335
- if regex and event_type
336
- regex_matches = regex.match(str)
337
- # Event message
338
- if regex_matches.size == 1
339
- # No alias - this means it's the user
340
- buddy_alias = @user_alias
341
- sender = @user_SN
342
- else
343
- buddy_alias = regex_matches[1]
344
- sender = get_sender_by_alias(buddy_alias)
345
- end
346
- msg = Event.new(sender, time, buddy_alias, str, event_type)
347
- end
348
- end
349
- return msg
350
- end
14
+ # Empty class. Raise'd by LogParser if the first line of a log is not
15
+ # parseable.
16
+ class InvalidFirstLineError < StandardError; end
17
+
18
+ # BasicParser is a base class. Its subclasses are TextLogParser and
19
+ # HtmlLogParser.
20
+ #
21
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
22
+ # using this class directly.
23
+ class BasicParser
24
+ include Pidgin2Adium
25
+ def initialize(src_path, user_aliases)
26
+ @src_path = src_path
27
+ # Whitespace is removed for easy matching later on.
28
+ @user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
29
+ # @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
30
+ # alias.
31
+ # Set an initial value just in case the first message doesn't give
32
+ # us an alias.
33
+ @user_alias = user_aliases.split(',')[0]
34
+
35
+ @tz_offset = get_time_zone_offset()
36
+
37
+ file = File.new(@src_path, 'r')
38
+ @first_line = file.readline
39
+ @file_content = file.read
40
+ file.close
41
+
42
+ # Time regexes must be set before pre_parse().
43
+ # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007, 11, 02, 00, AM}
44
+ # ONLY used (if at all) in first line of chat ("Conversation with...at...")
45
+ @time_regex_first_line = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) (\d{1,2}):(\d{2}):(\d{2}) ([AP]M)$}
46
+ # "2007-04-17 12:33:13" => %w{2007, 04, 17, 12, 33, 13}
47
+ @time_regex = /^(\d{4})-(\d{2})-(\d{2}) (\d{2}):(\d{2}):(\d{2})$/
48
+ # sometimes a line in a chat doesn't have a full timestamp
49
+ # "04:22:05 AM" => %w{04 22 05 AM}
50
+ @minimal_time_regex = /^(\d{1,2}):(\d{2}):(\d{2})( [AP]M)?$/
51
+
52
+ # Whether or not the first line is parseable.
53
+ @first_line_is_valid = true
54
+ begin
55
+ @service,
56
+ @user_SN,
57
+ @partner_SN,
58
+ # @basic_time_info is for files that only have the full
59
+ # timestamp at the top; we can use it to fill in the minimal
60
+ # per-line timestamps. It has only 3 elements (year, month,
61
+ # dayofmonth) because you should be able to fill everything
62
+ # else in. If you can't, something's wrong.
63
+ @basic_time_info,
64
+ # When the chat started, in Adium's format
65
+ @adium_chat_time_start = pre_parse()
66
+ rescue InvalidFirstLineError
67
+ @first_line_is_valid = false
68
+ error("Failed to parse, invalid first line: #{@src_path}")
69
+ return # stop processing
70
+ end
71
+
72
+ # @status_map, @lib_purple_events, and @events are used in
73
+ # create_status_or_event_msg
74
+ @status_map = {
75
+ /(.+) logged in\.$/ => 'online',
76
+ /(.+) logged out\.$/ => 'offline',
77
+ /(.+) has signed on\.$/ => 'online',
78
+ /(.+) has signed off\.$/ => 'offline',
79
+ /(.+) has gone away\.$/ => 'away',
80
+ /(.+) is no longer away\.$/ => 'available',
81
+ /(.+) has become idle\.$/ => 'idle',
82
+ /(.+) is no longer idle\.$/ => 'available'
83
+ }
84
+
85
+ # lib_purple_events are all of event_type libPurple
86
+ @lib_purple_events = [
87
+ # file transfer
88
+ /Starting transfer of .+ from (.+)/,
89
+ /^Offering to send .+ to (.+)$/,
90
+ /(.+) is offering to send file/,
91
+ /^Transfer of file .+ complete$/,
92
+ /Error reading|writing|accessing .+: .+/,
93
+ /You cancell?ed the transfer of/,
94
+ /File transfer cancelled/,
95
+ /(.+?) cancell?ed the transfer of/,
96
+ /(.+?) cancelled the file transfer/,
97
+ # Direct IM - actual (dis)connect events are their own types
98
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
99
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
100
+ /^Attempting to connect via proxy server\.$/,
101
+ /^Direct IM with (.+) failed/,
102
+ # encryption
103
+ /Received message encrypted with wrong key/,
104
+ /^Requesting key\.\.\.$/,
105
+ /^Outgoing message lost\.$/,
106
+ /^Conflicting Key Received!$/,
107
+ /^Error in decryption- asking for resend\.\.\.$/,
108
+ /^Making new key pair\.\.\.$/,
109
+ # sending errors
110
+ /^Last outgoing message not received properly- resetting$/,
111
+ /Resending\.\.\./,
112
+ # connection errors
113
+ /Lost connection with the remote user:.+/,
114
+ # chats
115
+ /^.+ entered the room\.$/,
116
+ /^.+ left the room\.$/
117
+ ]
118
+
119
+ # non-libpurple events
120
+ # Each key maps to an event_type string. The keys will be matched against a line of chat
121
+ # and the partner's alias will be in regex group 1, IF the alias is matched.
122
+ @event_map = {
123
+ # .+ is not an alias, it's a proxy server so no grouping
124
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
125
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
126
+ /^Direct IM established$/ => 'directIMConnected',
127
+ /Unable to send message/ => 'chat-error',
128
+ /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
129
+ /User information not available/ => 'chat-error'
130
+ }
131
+
132
+ @ignore_events = [
133
+ # Adium ignores SN/alias changes.
134
+ /^.+? is now known as .+?\.<br\/?>$/
135
+ ]
351
136
  end
352
137
 
353
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
354
- # using this class directly.
355
- class TextLogParser < BasicParser
356
- def initialize(src_path, user_aliases)
357
- super(src_path, user_aliases)
358
- @timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
359
-
360
- # @line_regex matches a line in a TXT log file other than the first
361
- # @line_regex matchdata:
362
- # 0: timestamp
363
- # 1: screen name or alias, if alias set
364
- # 2: "<AUTO-REPLY>" or nil
365
- # 3: message body
366
- @line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
367
- # @line_regex_status matches a status line
368
- # @line_regex_status matchdata:
369
- # 0: timestamp
370
- # 1: status message
371
- @line_regex_status = /#{@timestamp_rx} ([^:]+)/o
372
- end
373
-
374
- public :parse
375
-
376
- #################
377
- private
378
- #################
379
-
380
- def cleanup(text)
381
- text.tr!("\r", '')
382
- # Replace newlines with "<br/>" unless they end a chat line.
383
- text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
384
- # Escape entities since this will be in XML
385
- text.gsub!('&', '&amp;') # escape '&' first
386
- text.gsub!('<', '&lt;')
387
- text.gsub!('>', '&gt;')
388
- text.gsub!('"', '&quot;')
389
- text.gsub!("'", '&apos;')
390
- return text
391
- end
138
+ # This method returns a LogFile instance, or false if an error occurred.
139
+ def parse
140
+ return false unless @first_line_is_valid
141
+ @file_content = cleanup(@file_content).split("\n")
142
+
143
+ @file_content.map! do |line|
144
+ # "next" returns nil which is removed by compact
145
+ next if line =~ /^\s+$/
146
+ if line =~ @line_regex
147
+ create_msg($~.captures)
148
+ elsif line =~ @line_regex_status
149
+ msg = create_status_or_event_msg($~.captures)
150
+ # Error occurred while parsing
151
+ return false if msg == false
152
+ else
153
+ error "Could not parse line:"
154
+ p line
155
+ return false
156
+ end
157
+ end
158
+ @file_content.compact!
159
+ return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
160
+ end
161
+ # Prevent parse from being called directly from BasicParser, since
162
+ # it uses subclassing magic.
163
+ protected :parse
164
+
165
+ #################
166
+ private
167
+ #################
168
+
169
+ def get_time_zone_offset()
170
+ # We must have a tz_offset or else the Adium Chat Log viewer
171
+ # doesn't read the date correctly and then:
172
+ # 1) the log has an empty start date column in the viewer
173
+ # 2) The timestamps are all the same for the whole log
174
+ tz_match = /([-\+]\d+)[A-Z]{3}\.(?:txt|htm|html)/.match(@src_path)
175
+ if tz_match and tz_match[1]
176
+ tz_offset = tz_match[1]
177
+ else
178
+ # "-0500" (3d rather than 2d to allow for "+")
179
+ tz_offset = sprintf('%+03d00', Time.zone_offset(Time.now.zone) / 3600)
180
+ end
181
+ return tz_offset
182
+ end
183
+
184
+ #--
185
+ # Adium time format: YYYY-MM-DD\THH:MM:SS[+-]TZ_HRS like:
186
+ # 2008-10-05T22:26:20-0800
187
+ # HOWEVER:
188
+ # If it's the first line, then return it like this (note periods):
189
+ # 2008-10-05T22.26.20-0800
190
+ # because it will be used in the filename.
191
+ #++
192
+ # Converts a pidgin datestamp to an Adium one.
193
+ def create_adium_time(time, is_first_line = false)
194
+ # parsed_date = [year, month, day, hour, min, sec]
195
+ if time =~ @time_regex
196
+ year, month, day, hour, min, sec = $1.to_i,
197
+ $2.to_i,
198
+ $3.to_i,
199
+ $4.to_i,
200
+ $5.to_i,
201
+ $6.to_i
202
+ elsif is_first_line and time =~ @time_regex_first_line
203
+ hour = $4.to_i
204
+ if $7 == 'PM' and hour != 12
205
+ hour += 12
206
+ end
207
+ year, month, day, min, sec = $3.to_i, # year
208
+ $1.to_i, # month
209
+ $2.to_i, # day
210
+ # already did hour
211
+ $5.to_i, # minutes
212
+ $6.to_i # seconds
213
+ elsif time =~ @minimal_time_regex
214
+ # "04:22:05" => %w{04 22 05}
215
+ hour = $1.to_i
216
+ if $4 == 'PM' and hour != 12
217
+ hour += 12
218
+ end
219
+ year, month, day = @basic_time_info
220
+ min = $2.to_i
221
+ sec = $3.to_i
222
+ else
223
+ error("You have found an odd timestamp. Please report it to the developer.")
224
+ log_msg("The timestamp: #{time}")
225
+ log_msg("Continuing...")
226
+ year,month,day,hour,min,sec = ParseDate.parsedate(time)
227
+ end
228
+ if is_first_line
229
+ adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H.%M.%S#{@tz_offset}")
230
+ else
231
+ adium_time = Time.local(year,month,day,hour,min,sec).strftime("%Y-%m-%dT%H:%M:%S#{@tz_offset}")
232
+ end
233
+ return adium_time
234
+ end
235
+
236
+ # Extract required data from the file. Run by parse.
237
+ def pre_parse
238
+ # Deal with first line.
239
+
240
+ # the first line is special. It tells us (in order of regex groups):
241
+ # 1) who we're talking to
242
+ # 2) what time/date
243
+ # 3) what SN we used
244
+ # 4) what protocol (AIM, icq, jabber...)
245
+ first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
246
+ if first_line_match.nil?
247
+ raise InvalidFirstLineError
248
+ else
249
+ service = first_line_match[4]
250
+ # @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
251
+ user_SN = first_line_match[3].downcase.tr(' ', '')
252
+ partner_SN = first_line_match[1]
253
+ pidgin_chat_time_start = first_line_match[2]
254
+ basic_time_info = case pidgin_chat_time_start
255
+ when @time_regex then [$1.to_i, $2.to_i, $3.to_i]
256
+ when @time_regex_first_line then [$3.to_i, $1.to_i, $2.to_i]
257
+ end
258
+ adium_chat_time_start = create_adium_time(pidgin_chat_time_start, true)
259
+ return [service,
260
+ user_SN,
261
+ partner_SN,
262
+ basic_time_info,
263
+ adium_chat_time_start]
264
+ end
265
+ end
266
+
267
+ def get_sender_by_alias(alias_name)
268
+ no_action = alias_name.sub(/^\*{3}/, '')
269
+ if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
270
+ # Set the current alias being used of the ones in @user_aliases
271
+ @user_alias = no_action
272
+ return @user_SN
273
+ else
274
+ return @partner_SN
275
+ end
276
+ end
277
+
278
+ #--
279
+ # create_msg takes an array of captures from matching against
280
+ # @line_regex and returns a Message object or one of its subclasses.
281
+ # It can be used for TextLogParser and HtmlLogParser because both of
282
+ # them return data in the same indexes in the matches array.
283
+ #++
284
+ def create_msg(matches)
285
+ msg = nil
286
+ # Either a regular message line or an auto-reply/away message.
287
+ time = create_adium_time(matches[0])
288
+ buddy_alias = matches[1]
289
+ sender = get_sender_by_alias(buddy_alias)
290
+ body = matches[3]
291
+ if matches[2] # auto-reply
292
+ msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
293
+ else
294
+ # normal message
295
+ msg = XMLMessage.new(sender, time, buddy_alias, body)
296
+ end
297
+ return msg
298
+ end
299
+
300
+ #--
301
+ # create_status_or_event_msg takes an array of +MatchData+ captures from
302
+ # matching against @line_regex_status and returns an Event or Status.
303
+ # Returns nil if it's a message that should be ignored, or false if an
304
+ # error occurred.
305
+ #++
306
+ def create_status_or_event_msg(matches)
307
+ # ["22:58:00", "BuddyName logged in."]
308
+ # 0: time
309
+ # 1: status message or event
310
+ msg = nil
311
+ time = create_adium_time(matches[0])
312
+ str = matches[1]
313
+ # Return nil, which will get compact'ed out
314
+ return nil if @ignore_events.detect{|regex| str =~ regex }
315
+
316
+ regex, status = @status_map.detect{|regex, status| str =~ regex}
317
+ if regex and status
318
+ # Status message
319
+ buddy_alias = regex.match(str)[1]
320
+ sender = get_sender_by_alias(buddy_alias)
321
+ msg = StatusMessage.new(sender, time, buddy_alias, status)
322
+ else
323
+ # Test for event
324
+ regex = @lib_purple_events.detect{|regex| str =~ regex }
325
+ event_type = 'libpurpleEvent' if regex
326
+ unless regex and event_type
327
+ # not a libpurple event, try others
328
+ if @event_map.detect{|regex,event_type| str =~ regex}
329
+ regex, event_type = $1, $2
330
+ else
331
+ error(sprintf("Error parsing status or event message, no status or event found: %p", str))
332
+ return false
333
+ end
334
+ end
335
+ if regex and event_type
336
+ regex_matches = regex.match(str)
337
+ # Event message
338
+ if regex_matches.size == 1
339
+ # No alias - this means it's the user
340
+ buddy_alias = @user_alias
341
+ sender = @user_SN
342
+ else
343
+ buddy_alias = regex_matches[1]
344
+ sender = get_sender_by_alias(buddy_alias)
345
+ end
346
+ msg = Event.new(sender, time, buddy_alias, str, event_type)
347
+ end
348
+ end
349
+ return msg
350
+ end
351
+ end # END BasicParser class
352
+
353
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
354
+ # using this class directly.
355
+ class TextLogParser < BasicParser
356
+ def initialize(src_path, user_aliases)
357
+ super(src_path, user_aliases)
358
+ @timestamp_rx = '\((\d{1,2}:\d{1,2}:\d{1,2})\)'
359
+
360
+ # @line_regex matches a line in a TXT log file other than the first
361
+ # @line_regex matchdata:
362
+ # 0: timestamp
363
+ # 1: screen name or alias, if alias set
364
+ # 2: "<AUTO-REPLY>" or nil
365
+ # 3: message body
366
+ @line_regex = /#{@timestamp_rx} (.*?) ?(<AUTO-REPLY>)?: (.*)/o
367
+
368
+ # @line_regex_status matches a status line
369
+ # @line_regex_status matchdata:
370
+ # 0: timestamp
371
+ # 1: status message
372
+ @line_regex_status = /#{@timestamp_rx} ([^:]+)/o
392
373
  end
393
374
 
394
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
395
- # of using this class directly.
396
- class HtmlLogParser < BasicParser
397
- def initialize(src_path, user_aliases)
398
- super(src_path, user_aliases)
399
- @timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
400
-
401
- # @line_regex matches a line in an HTML log file other than the
402
- # first time matches on either "2008-11-17 14:12" or "14:12"
403
- # @line_regex match obj:
404
- # 0: timestamp, extended or not
405
- # 1: screen name or alias, if alias set
406
- # 2: "&lt;AUTO-REPLY&gt;" or nil
407
- # 3: message body
408
- # The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
409
- @line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.+)<br ?\/>/o
410
- # @line_regex_status matches a status line
411
- # @line_regex_status match obj:
412
- # 0: timestamp
413
- # 1: status message
414
- @line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
415
- end
416
-
417
- public :parse
418
-
419
- #################
420
- private
421
- #################
422
-
423
- # Returns a cleaned string.
424
- # Removes the following tags from _text_:
425
- # * html
426
- # * body
427
- # * font
428
- # * a with no innertext, e.g. <a href="blah"></a>
429
- # And removes the following style declarations:
430
- # * color: #000000 (just turns text black)
431
- # * font-family
432
- # * font-size
433
- # * background
434
- # * em (really it's changed to <span style="font-style: italic;">)
435
- # Since each <span> has only one style declaration, spans with these
436
- # declarations are removed (but the text inside them is preserved).
437
- def cleanup(text)
438
- # Sometimes this is in there. I don't know why.
439
- text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
440
- # We can remove <font> safely since Pidgin and Adium both show bold
441
- # using <span style="font-weight: bold;"> except Pidgin uses single
442
- # quotes while Adium uses double quotes.
443
- text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
444
-
445
- text.tr!("\r", '')
446
- # Remove empty lines
447
- text.gsub!("\n\n", "\n")
448
-
449
- # Remove newlines that end the file, since they screw up the
450
- # newline -> <br/> conversion
451
- text.gsub!(/\n\Z/, '')
452
-
453
- # Replace newlines with "<br/>" unless they end a chat line.
454
- # This must go after we remove <font> tags.
455
- text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
456
-
457
- # These empty links are sometimes appended to every line in a chat,
458
- # for some weird reason. Remove them.
459
- text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
460
-
461
- # Replace single quotes inside tags with double quotes so we can
462
- # easily change single quotes to entities.
463
- # For spans, removes a space after the final declaration if it exists.
464
- text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
465
- text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
375
+ public :parse
376
+
377
+ #################
378
+ private
379
+ #################
380
+
381
+ def cleanup(text)
382
+ text.tr!("\r", '')
383
+ # Replace newlines with "<br/>" unless they end a chat line.
384
+ text.gsub!(/\n(?!#{@timestamp_rx}|\Z)/, '<br/>')
385
+ # Escape entities since this will be in XML
386
+ text.gsub!('&', '&amp;') # escape '&' first
387
+ text.gsub!('<', '&lt;')
388
+ text.gsub!('>', '&gt;')
389
+ text.gsub!('"', '&quot;')
390
+ text.gsub!("'", '&apos;')
391
+ return text
392
+ end
393
+ end
394
+
395
+ # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead
396
+ # of using this class directly.
397
+ class HtmlLogParser < BasicParser
398
+ def initialize(src_path, user_aliases)
399
+ super(src_path, user_aliases)
400
+ @timestamp_rx = '\(((?:\d{4}-\d{2}-\d{2} )?\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?)\)'
401
+
402
+ # @line_regex matches a line in an HTML log file other than the
403
+ # first time matches on either "2008-11-17 14:12" or "14:12"
404
+ # @line_regex match obj:
405
+ # 0: timestamp, extended or not
406
+ # 1: screen name or alias, if alias set
407
+ # 2: "&lt;AUTO-REPLY&gt;" or nil
408
+ # 3: message body
409
+ # The ":" is optional to allow for strings like "(17:12:21) <b>***Gabe B-W</b> is confused<br/>"
410
+ @line_regex = /#{@timestamp_rx} ?<b>(.+?) ?(&lt;AUTO-REPLY&gt;)?:?<\/b> ?(.+)<br ?\/>/o
411
+ # @line_regex_status matches a status line
412
+ # @line_regex_status match obj:
413
+ # 0: timestamp
414
+ # 1: status message
415
+ @line_regex_status = /#{@timestamp_rx} ?<b> (.+)<\/b><br ?\/>/o
416
+ end
417
+
418
+ public :parse
419
+
420
+ #################
421
+ private
422
+ #################
423
+
424
+ # Returns a cleaned string.
425
+ # Removes the following tags from _text_:
426
+ # * html
427
+ # * body
428
+ # * font
429
+ # * a with no innertext, e.g. <a href="blah"></a>
430
+ # And removes the following style declarations:
431
+ # * color: #000000 (just turns text black)
432
+ # * font-family
433
+ # * font-size
434
+ # * background
435
+ # * em (really it's changed to <span style="font-style: italic;">)
436
+ # Since each <span> has only one style declaration, spans with these
437
+ # declarations are removed (but the text inside them is preserved).
438
+ def cleanup(text)
439
+ # Sometimes this is in there. I don't know why.
440
+ text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
441
+ # We can remove <font> safely since Pidgin and Adium both show bold
442
+ # using <span style="font-weight: bold;"> except Pidgin uses single
443
+ # quotes while Adium uses double quotes.
444
+ text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
445
+
446
+ text.tr!("\r", '')
447
+ # Remove empty lines
448
+ text.gsub!("\n\n", "\n")
449
+
450
+ # Remove newlines that end the file, since they screw up the
451
+ # newline -> <br/> conversion
452
+ text.gsub!(/\n\Z/, '')
453
+
454
+ # Replace newlines with "<br/>" unless they end a chat line.
455
+ # This must go after we remove <font> tags.
456
+ text.gsub!(/\n(?!#{@timestamp_rx})/, '<br/>')
457
+
458
+ # These empty links are sometimes appended to every line in a chat,
459
+ # for some weird reason. Remove them.
460
+ text.gsub!(%r{<a href=('").+?\1>\s*?</a>}, '')
461
+
462
+ # Replace single quotes inside tags with double quotes so we can
463
+ # easily change single quotes to entities.
464
+ # For spans, removes a space after the final declaration if it exists.
465
+ text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
466
+ text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
466
467
  =begin
467
- text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
468
- text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
469
- text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
468
+ text.gsub!(/<a href='(.+?)'>/, '<a href="\1">')
469
+ text.gsub!(/<img src='([^']+?)'/, '<img src="\1"')
470
+ text.gsub!(/ alt='([^']+?)'/, ' alt="\1"')
470
471
  =end
471
- text.gsub!("'", '&apos;')
472
-
473
- # This actually does match stuff, but doesn't group it correctly. :(
474
- # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
475
- text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
476
- # Remove empty spans.
477
- next if $2 == ''
478
-
479
- # style = style declaration
480
- # innertext = text inside <span>
481
- style, innertext = $1, $2
482
- # TODO: replace double quotes with "&quot;", but only outside tags; may still be tags inside spans
483
- # innertext.gsub!("")
484
-
485
- styleparts = style.split(/; ?/)
486
- styleparts.map! do |p|
487
- if p[0,5] == 'color'
488
- if p.include?('color: #000000')
489
- next
490
- elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
491
- # Regarding the bit with the ">", sometimes this happens:
492
- # <span style="color: #000000>today;">today was busy</span>
493
- # Then p = "color: #000000>today"
494
- # Or it can end in ">;", with no text before the semicolon.
495
- # So keep the color but remove the ">" and anything following it.
496
- next($1)
497
- end
498
- else
499
- # don't remove font-weight
500
- case p
501
- when /^font-family/: next
502
- when /^font-size/: next
503
- when /^background/: next
504
- end
505
- end
506
- end.compact!
507
- unless styleparts.empty?
508
- style = styleparts.join('; ')
509
- innertext = "<span style=\"#{style};\">#{innertext}</span>"
510
- end
511
- innertext
512
- end
513
- # Pidgin uses <em>, Adium uses <span>
514
- if text.gsub!('<em>', '<span style="font-style: italic;">')
515
- text.gsub!('</em>', '</span>')
516
- end
517
- return text
518
- end
472
+ text.gsub!("'", '&apos;')
473
+
474
+ # This actually does match stuff, but doesn't group it correctly. :(
475
+ # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
476
+ text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
477
+ # Remove empty spans.
478
+ next if $2 == ''
479
+
480
+ # style = style declaration
481
+ # innertext = text inside <span>
482
+ style, innertext = $1, $2
483
+ # TODO: replace double quotes with "&quot;", but only outside tags; may still be tags inside spans
484
+ # innertext.gsub!("")
485
+
486
+ styleparts = style.split(/; ?/)
487
+ styleparts.map! do |p|
488
+ if p[0,5] == 'color'
489
+ if p.include?('color: #000000')
490
+ next
491
+ elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
492
+ # Regarding the bit with the ">", sometimes this happens:
493
+ # <span style="color: #000000>today;">today was busy</span>
494
+ # Then p = "color: #000000>today"
495
+ # Or it can end in ">;", with no text before the semicolon.
496
+ # So keep the color but remove the ">" and anything following it.
497
+ next($1)
498
+ end
499
+ else
500
+ # don't remove font-weight
501
+ case p
502
+ when /^font-family/ then next
503
+ when /^font-size/ then next
504
+ when /^background/ then next
505
+ end
506
+ end
507
+ end.compact!
508
+ unless styleparts.empty?
509
+ style = styleparts.join('; ')
510
+ innertext = "<span style=\"#{style};\">#{innertext}</span>"
511
+ end
512
+ innertext
513
+ end
514
+ # Pidgin uses <em>, Adium uses <span>
515
+ if text.gsub!('<em>', '<span style="font-style: italic;">')
516
+ text.gsub!('</em>', '</span>')
517
+ end
518
+ return text
519
+ end
520
+ end # END HtmlLogParser class
521
+
522
+ # A holding object for each line of the chat. It is subclassed as
523
+ # appropriate (eg AutoReplyMessage). Each subclass (but not Message
524
+ # itself) has its own to_s which prints out its information in a format
525
+ # appropriate for putting in an Adium log file.
526
+ # Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
527
+ class Message
528
+ def initialize(sender, time, buddy_alias)
529
+ # The sender's screen name
530
+ @sender = sender
531
+ # The time the message was sent, in Adium format (e.g.
532
+ # "2008-10-05T22:26:20-0800")
533
+ @time = time
534
+ # The receiver's alias (NOT screen name)
535
+ @buddy_alias = buddy_alias
536
+ end
537
+ attr_accessor :sender, :time, :buddy_alias
538
+ end
539
+
540
+ # Basic message with body text (as opposed to pure status messages, which
541
+ # have no body).
542
+ class XMLMessage < Message
543
+ def initialize(sender, time, buddy_alias, body)
544
+ super(sender, time, buddy_alias)
545
+ @body = body
546
+ @styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
547
+ normalize_body!()
548
+ end
549
+ attr_accessor :body
550
+
551
+ def to_s
552
+ return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
553
+ @sender, @time, @buddy_alias, @styled_body)
554
+ end
555
+
556
+ #################
557
+ private
558
+ #################
559
+
560
+ # Balances mismatched tags, normalizes body style, and fixes actions
561
+ # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
562
+ # "*Buddy waves at you*").
563
+ def normalize_body!
564
+ normalize_body_entities!()
565
+ # Fix mismatched tags. Yes, it's faster to do it per-message
566
+ # than all at once.
567
+ @body = Pidgin2Adium.balance_tags_c(@body)
568
+ if @buddy_alias[0,3] == '***'
569
+ # "***<alias>" is what pidgin sets as the alias for a /me action
570
+ @buddy_alias.slice!(0,3)
571
+ @body = '*' << @body << '*'
572
+ end
519
573
  end
520
574
 
521
- # A holding object for each line of the chat. It is subclassed as
522
- # appropriate (eg AutoReplyMessage). Each subclass (but not Message
523
- # itself) has its own to_s which prints out its information in a format
524
- # appropriate for putting in an Adium log file.
525
- # Subclasses: XMLMessage, AutoReplyMessage, StatusMessage, Event.
526
- class Message
527
- def initialize(sender, time, buddy_alias)
528
- # The sender's screen name
529
- @sender = sender
530
- # The time the message was sent, in Adium format (e.g.
531
- # "2008-10-05T22:26:20-0800")
532
- @time = time
533
- # The receiver's alias (NOT screen name)
534
- @buddy_alias = buddy_alias
535
- end
536
- attr_accessor :sender, :time, :buddy_alias
575
+ # Escapes entities.
576
+ def normalize_body_entities!
577
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
578
+ @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
537
579
  end
538
-
539
- # Basic message with body text (as opposed to pure status messages, which
540
- # have no body).
541
- class XMLMessage < Message
542
- def initialize(sender, time, buddy_alias, body)
543
- super(sender, time, buddy_alias)
544
- @body = body
545
- @styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
546
- normalize_body!()
547
- end
548
- attr_accessor :body
549
-
550
- def to_s
551
- return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
552
- @sender, @time, @buddy_alias, @styled_body)
553
- end
554
-
555
- #################
556
- private
557
- #################
558
-
559
- # Balances mismatched tags, normalizes body style, and fixes actions
560
- # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
561
- # "*Buddy waves at you*").
562
- def normalize_body!
563
- normalize_body_entities!()
564
- # Fix mismatched tags. Yes, it's faster to do it per-message
565
- # than all at once.
566
- @body = Pidgin2Adium.balance_tags_c(@body)
567
- if @buddy_alias[0,3] == '***'
568
- # "***<alias>" is what pidgin sets as the alias for a /me action
569
- @buddy_alias.slice!(0,3)
570
- @body = '*' << @body << '*'
571
- end
572
- end
573
-
574
- # Escapes entities.
575
- def normalize_body_entities!
576
- # Convert '&' to '&amp;' only if it's not followed by an entity.
577
- @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
578
- end
580
+ end # END XMLMessage
581
+
582
+ # An auto reply message.
583
+ class AutoReplyMessage < XMLMessage
584
+ def to_s
585
+ return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
586
+ @sender, @time, @buddy_alias, @styled_body)
579
587
  end
588
+ end
580
589
 
581
- # An auto reply message.
582
- class AutoReplyMessage < XMLMessage
583
- def to_s
584
- return sprintf('<message sender="%s" time="%s" auto="true" alias="%s">%s</message>' << "\n",
585
- @sender, @time, @buddy_alias, @styled_body)
586
- end
590
+ # A message saying e.g. "Blahblah has gone away."
591
+ class StatusMessage < Message
592
+ def initialize(sender, time, buddy_alias, status)
593
+ super(sender, time, buddy_alias)
594
+ @status = status
587
595
  end
596
+ attr_accessor :status
588
597
 
589
- # A message saying e.g. "Blahblah has gone away."
590
- class StatusMessage < Message
591
- def initialize(sender, time, buddy_alias, status)
592
- super(sender, time, buddy_alias)
593
- @status = status
594
- end
595
- attr_accessor :status
596
-
597
- def to_s
598
- return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
599
- end
598
+ def to_s
599
+ return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n", @status, @sender, @time, @buddy_alias)
600
+ end
601
+ end
602
+
603
+ # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
604
+ # messages to display what Adium calls events. These include sending a file,
605
+ # starting a Direct IM connection, or an error in chat.
606
+ class Event < XMLMessage
607
+ def initialize(sender, time, buddy_alias, body, event_type)
608
+ super(sender, time, buddy_alias, body)
609
+ @event_type = event_type
600
610
  end
601
-
602
- # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
603
- # messages to display what Adium calls events. These include sending a file,
604
- # starting a Direct IM connection, or an error in chat.
605
- class Event < XMLMessage
606
- def initialize(sender, time, buddy_alias, body, event_type)
607
- super(sender, time, buddy_alias, body)
608
- @event_type = event_type
609
- end
610
- attr_accessor :event_type
611
-
612
- def to_s
613
- return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
614
- @event_type, @sender, @time, @buddy_alias, @styled_body)
615
- end
611
+ attr_accessor :event_type
612
+
613
+ def to_s
614
+ return sprintf('<event type="%s" sender="%s" time="%s" alias="%s">%s</event>',
615
+ @event_type, @sender, @time, @buddy_alias, @styled_body)
616
616
  end
617
+ end
617
618
  end # end module