pipio 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.rspec +2 -0
  4. data/.simplecov +5 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE +20 -0
  8. data/NEWS.md +10 -0
  9. data/README.md +88 -0
  10. data/Rakefile +13 -0
  11. data/lib/pipio.rb +34 -0
  12. data/lib/pipio/alias_registry.rb +26 -0
  13. data/lib/pipio/chat.rb +39 -0
  14. data/lib/pipio/cleaners/html_cleaner.rb +95 -0
  15. data/lib/pipio/cleaners/text_cleaner.rb +15 -0
  16. data/lib/pipio/file_reader.rb +29 -0
  17. data/lib/pipio/message_creators/auto_or_xml_message_creator.rb +25 -0
  18. data/lib/pipio/message_creators/event_message_creator.rb +47 -0
  19. data/lib/pipio/message_creators/status_message_creator.rb +19 -0
  20. data/lib/pipio/messages/auto_reply_message.rb +7 -0
  21. data/lib/pipio/messages/event.rb +67 -0
  22. data/lib/pipio/messages/message.rb +23 -0
  23. data/lib/pipio/messages/status_message.rb +26 -0
  24. data/lib/pipio/messages/xml_message.rb +43 -0
  25. data/lib/pipio/metadata.rb +34 -0
  26. data/lib/pipio/metadata_parser.rb +55 -0
  27. data/lib/pipio/parser_factory.rb +32 -0
  28. data/lib/pipio/parsers/basic_parser.rb +83 -0
  29. data/lib/pipio/parsers/html_log_parser.rb +22 -0
  30. data/lib/pipio/parsers/null_parser.rb +9 -0
  31. data/lib/pipio/parsers/text_log_parser.rb +21 -0
  32. data/lib/pipio/tag_balancer.rb +163 -0
  33. data/lib/pipio/time_parser.rb +36 -0
  34. data/lib/pipio/version.rb +3 -0
  35. data/pipio.gemspec +27 -0
  36. data/spec/pipio/alias_registry_spec.rb +37 -0
  37. data/spec/pipio/chat_spec.rb +66 -0
  38. data/spec/pipio/cleaners/html_cleaner_spec.rb +102 -0
  39. data/spec/pipio/cleaners/text_cleaner_spec.rb +29 -0
  40. data/spec/pipio/file_reader_spec.rb +130 -0
  41. data/spec/pipio/messages/auto_reply_message_spec.rb +40 -0
  42. data/spec/pipio/messages/event_spec.rb +41 -0
  43. data/spec/pipio/messages/status_message_spec.rb +43 -0
  44. data/spec/pipio/messages/xml_message_spec.rb +55 -0
  45. data/spec/pipio/metadata_parser_spec.rb +81 -0
  46. data/spec/pipio/metadata_spec.rb +72 -0
  47. data/spec/pipio/parser_factory_spec.rb +31 -0
  48. data/spec/pipio/parsers/html_log_parser_spec.rb +160 -0
  49. data/spec/pipio/parsers/null_parser_spec.rb +13 -0
  50. data/spec/pipio/parsers/text_log_parser_spec.rb +37 -0
  51. data/spec/pipio/tag_balancer_spec.rb +16 -0
  52. data/spec/pipio/time_parser_spec.rb +66 -0
  53. data/spec/pipio_spec.rb +63 -0
  54. data/spec/spec_helper.rb +18 -0
  55. data/spec/support/chat_builder.rb +29 -0
  56. data/spec/support/chat_builder_helpers.rb +41 -0
  57. data/spec/support/file_builder.rb +22 -0
  58. data/spec/support/html_chat_builder.rb +67 -0
  59. data/spec/support/logfiles/2006-12-21.223606.txt +3 -0
  60. data/spec/support/logfiles/2008-01-15.071445-0500PST.htm +5 -0
  61. data/spec/support/logfiles/2008-01-15.071445-0500PST.html +5 -0
  62. data/spec/support/text_chat_builder.rb +21 -0
  63. data/spec/test-output/README.md +1 -0
  64. data/spec/test-output/html_log_output.xml +6 -0
  65. data/spec/test-output/text_log_output.xml +4 -0
  66. metadata +193 -0
@@ -0,0 +1,25 @@
1
+ module Pipio
2
+ class AutoOrXmlMessageCreator
3
+ def initialize(text, time, sender_screen_name, sender_alias, is_auto_reply)
4
+ @text = text
5
+ @time = time
6
+ @sender_screen_name = sender_screen_name
7
+ @sender_alias = sender_alias
8
+ @is_auto_reply = is_auto_reply
9
+ end
10
+
11
+ def create
12
+ if auto_reply?
13
+ AutoReplyMessage.new(@sender_screen_name, @time, @sender_alias, @text)
14
+ else
15
+ XMLMessage.new(@sender_screen_name, @time, @sender_alias, @text)
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def auto_reply?
22
+ !! @is_auto_reply
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,47 @@
1
+ module Pipio
2
+ class EventMessageCreator
3
+ def initialize(text, time, sender_alias, sender_screen_name, alias_registry)
4
+ @text = text
5
+ @time = time
6
+ @sender_alias = sender_alias
7
+ @sender_screen_name = sender_screen_name
8
+ @alias_registry = alias_registry
9
+ end
10
+
11
+ def create
12
+ create_lib_purple_event_message ||
13
+ create_non_lib_purple_event_message
14
+ end
15
+
16
+ private
17
+
18
+ def create_lib_purple_event_message
19
+ regex = Event::LIB_PURPLE.detect { |rxp| @text =~ rxp }
20
+ if regex
21
+ event_type = 'libpurpleEvent'
22
+ create_event_message_from(regex, event_type)
23
+ end
24
+ end
25
+
26
+ def create_non_lib_purple_event_message
27
+ regex, event_type = Event::MAP.detect { |rxp,ev_type| @text =~ rxp }
28
+ if regex && event_type
29
+ create_event_message_from(regex, event_type)
30
+ end
31
+ end
32
+
33
+ def create_event_message_from(regex, event_type)
34
+ regex_matches = regex.match(@text)
35
+ if regex_matches.size == 1
36
+ # No alias - this means it's the user
37
+ sender_alias = @sender_alias
38
+ sender_screen_name = @sender_screen_name
39
+ else
40
+ sender_alias = regex_matches[1]
41
+ sender_screen_name = @alias_registry[sender_alias]
42
+ end
43
+
44
+ Event.new(sender_screen_name, @time, sender_alias, @text, event_type)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,19 @@
1
+ module Pipio
2
+ class StatusMessageCreator
3
+ def initialize(text, time, alias_registry)
4
+ @text = text
5
+ @time = time
6
+ @alias_registry = alias_registry
7
+ end
8
+
9
+ def create
10
+ regex, status = StatusMessage::MAP.detect { |rxp, stat| @text =~ rxp }
11
+
12
+ if regex && status
13
+ sender_alias = regex.match(@text)[1]
14
+ sender_screen_name = @alias_registry[sender_alias]
15
+ StatusMessage.new(sender_screen_name, @time, sender_alias, status)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,7 @@
1
+ module Pipio
2
+ class AutoReplyMessage < XMLMessage
3
+ def to_s
4
+ %(<message sender="#{sender_screen_name}" time="#{adium_formatted_time}" auto="true" alias="#{@sender_alias}">#{@styled_body}</message>\n)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,67 @@
1
+ module Pipio
2
+ # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
3
+ # messages to display what Adium calls events. These include sending a file,
4
+ # starting a Direct IM connection, or an error in chat.
5
+ class Event < XMLMessage
6
+ # All of event_type libPurple.
7
+ LIB_PURPLE = [
8
+ # file transfer
9
+ /Starting transfer of .+ from (.+)/,
10
+ /^Offering to send .+ to (.+)$/,
11
+ /(.+) is offering to send file/,
12
+ /^Transfer of file .+ complete$/,
13
+ /Error reading|writing|accessing .+: .+/,
14
+ /You cancell?ed the transfer of/,
15
+ /File transfer cancelled/,
16
+ /(.+?) cancell?ed the transfer of/,
17
+ /(.+?) cancelled the file transfer/,
18
+ # Direct IM - actual (dis)connect events are their own types
19
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
20
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
21
+ /^Attempting to connect via proxy server\.$/,
22
+ /^Direct IM with (.+) failed/,
23
+ # encryption
24
+ /Received message encrypted with wrong key/,
25
+ /^Requesting key\.\.\.$/,
26
+ /^Outgoing message lost\.$/,
27
+ /^Conflicting Key Received!$/,
28
+ /^Error in decryption- asking for resend\.\.\.$/,
29
+ /^Making new key pair\.\.\.$/,
30
+ # sending errors
31
+ /^Last outgoing message not received properly- resetting$/,
32
+ /Resending\.\.\./,
33
+ # connection errors
34
+ /Lost connection with the remote user:.+/,
35
+ # chats
36
+ /^.+ entered the room\.$/,
37
+ /^.+ left the room\.$/
38
+ ]
39
+
40
+ # Adium ignores SN/alias changes.
41
+ IGNORE = [/^.+? is now known as .+?\.<br\/?>$/]
42
+
43
+ # Each key maps to an event_type string. The keys will be matched against
44
+ # a line of chat and the partner's alias will be in regex group 1, IF the
45
+ # alias is matched.
46
+ MAP = {
47
+ # .+ is not an alias, it's a proxy server so no grouping
48
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
49
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
50
+ /^Direct IM established$/ => 'directIMConnected',
51
+ /Unable to send message/ => 'chat-error',
52
+ /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
53
+ /User information not available/ => 'chat-error'
54
+ }
55
+
56
+ def initialize(sender_screen_name, time, sender_alias, body, event_type)
57
+ super(sender_screen_name, time, sender_alias, body)
58
+ @event_type = event_type
59
+ end
60
+
61
+ attr_reader :event_type
62
+
63
+ def to_s
64
+ %(<event type="#{@event_type}" sender="#{@sender_screen_name}" time="#{adium_formatted_time}" alias="#{@sender_alias}">#{@styled_body}</event>)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,23 @@
1
+ module Pipio
2
+ # A holding object for each line of the chat. It is subclassed as
3
+ # appropriate (eg AutoReplyMessage). Each subclass (but not Message
4
+ # itself) has its own to_s which prints out its information in a format
5
+ # appropriate for putting in an Adium log file.
6
+ class Message
7
+ include Comparable
8
+
9
+ def initialize(sender_screen_name, time, sender_alias)
10
+ @sender_screen_name = sender_screen_name
11
+ @time = time
12
+ @sender_alias = sender_alias
13
+ end
14
+
15
+ attr_reader :sender_screen_name, :time, :sender_alias
16
+
17
+ private
18
+
19
+ def adium_formatted_time
20
+ @time.xmlschema
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,26 @@
1
+ module Pipio
2
+ # A message saying e.g. "Blahblah has gone away."
3
+ class StatusMessage < Message
4
+ MAP = {
5
+ /(.+) logged in\.$/ => 'online',
6
+ /(.+) logged out\.$/ => 'offline',
7
+ /(.+) has signed on\.$/ => 'online',
8
+ /(.+) has signed off\.$/ => 'offline',
9
+ /(.+) has gone away\.$/ => 'away',
10
+ /(.+) is no longer away\.$/ => 'available',
11
+ /(.+) has become idle\.$/ => 'idle',
12
+ /(.+) is no longer idle\.$/ => 'available'
13
+ }
14
+
15
+ def initialize(sender_screen_name, time, sender_alias, status)
16
+ super(sender_screen_name, time, sender_alias)
17
+ @status = status
18
+ end
19
+
20
+ attr_reader :status
21
+
22
+ def to_s
23
+ %(<status type="#{@status}" sender="#{@sender_screen_name}" time="#{adium_formatted_time}" alias="#{@sender_alias}"/>\n)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ module Pipio
2
+ # Basic message with body text (as opposed to pure status messages, which
3
+ # have no body).
4
+ class XMLMessage < Message
5
+ def initialize(sender_screen_name, time, sender_alias, body)
6
+ super(sender_screen_name, time, sender_alias)
7
+ @body = normalize(body)
8
+ @styled_body = %(<div><span style="font-family: Helvetica; font-size: 12pt;">#{@body}</span></div>)
9
+ end
10
+
11
+ attr_reader :body
12
+
13
+ def to_s
14
+ %(<message sender="#{@sender_screen_name}" time="#{adium_formatted_time}" alias="#{@sender_alias}">#{@styled_body}</message>\n)
15
+ end
16
+
17
+ private
18
+
19
+ # Balances mismatched tags, normalizes body style, and fixes actions
20
+ # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
21
+ # "*Buddy waves at you*").
22
+ def normalize(string)
23
+ new_body = normalize_entities(string)
24
+ # Fix mismatched tags. Yes, it's faster to do it per-message
25
+ # than all at once.
26
+ new_body = Pipio::TagBalancer.new(new_body).balance
27
+ if @sender_alias[0,3] == '***'
28
+ # "***<alias>" is what pidgin sets as the alias for a /me action
29
+ @sender_alias.slice!(0,3)
30
+ new_body = "*#{new_body}*"
31
+ end
32
+
33
+ new_body
34
+ end
35
+
36
+ # Escapes all entities in string except for "&lt;", "&gt;", "&amp;", "&quot;",
37
+ # and "&apos;".
38
+ def normalize_entities(string)
39
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
40
+ string.gsub(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,34 @@
1
+ module Pipio
2
+ class Metadata
3
+ def initialize(metadata_hash)
4
+ @service = metadata_hash[:service]
5
+ @my_screen_name = normalize_screen_name(metadata_hash[:my_screen_name])
6
+ @their_screen_name = metadata_hash[:their_screen_name]
7
+ @start_time = metadata_hash[:start_time]
8
+ end
9
+
10
+ attr_reader :my_screen_name, :their_screen_name, :start_time, :service
11
+
12
+ def valid?
13
+ [@their_screen_name, @my_screen_name, @start_time, @service].all?
14
+ end
15
+
16
+ def start_year
17
+ @start_time.year
18
+ end
19
+
20
+ def start_month
21
+ @start_time.mon
22
+ end
23
+
24
+ def start_mday
25
+ @start_time.mday
26
+ end
27
+
28
+ private
29
+
30
+ def normalize_screen_name(screen_name)
31
+ screen_name && screen_name.downcase.gsub(' ', '')
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,55 @@
1
+ module Pipio
2
+ class MetadataParser
3
+ def initialize(first_line)
4
+ @first_line = first_line || ''
5
+ end
6
+
7
+ def parse
8
+ {
9
+ my_screen_name: my_screen_name,
10
+ their_screen_name: their_screen_name,
11
+ start_time: start_time,
12
+ service: service
13
+ }
14
+ end
15
+
16
+ private
17
+
18
+ def service
19
+ match = @first_line.match(/\(([a-z]+)\)/)
20
+ if match
21
+ match[1]
22
+ end
23
+ end
24
+
25
+ def their_screen_name
26
+ match = @first_line.match(/Conversation with (.+?) at/)
27
+ if match
28
+ match[1]
29
+ end
30
+ end
31
+
32
+ def my_screen_name
33
+ match = @first_line.match(/ on ([^()]+) /)
34
+ if match
35
+ match[1]
36
+ end
37
+ end
38
+
39
+ def start_time
40
+ match = @first_line.match(%r{ at ([-\d/APM: ]+) on})
41
+ if match
42
+ timestamp = match[1]
43
+ parse_time(timestamp)
44
+ end
45
+ end
46
+
47
+ def parse_time(timestamp)
48
+ begin
49
+ Time.parse(timestamp)
50
+ rescue ArgumentError
51
+ TimeParser.new(nil, nil, nil).parse(timestamp)
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,32 @@
1
+ module Pipio
2
+ class ParserFactory
3
+ PARSER_FOR_EXTENSION = {
4
+ "html" => HtmlLogParser,
5
+ "htm" => HtmlLogParser,
6
+ "txt" => TextLogParser
7
+ }
8
+
9
+ def initialize(logfile_path, aliases)
10
+ @logfile_path = logfile_path
11
+ @aliases = aliases
12
+ end
13
+
14
+ def parser
15
+ parser_class.new(@logfile_path, @aliases)
16
+ end
17
+
18
+ private
19
+
20
+ def parser_class
21
+ PARSER_FOR_EXTENSION.fetch(extension, NullParser)
22
+ end
23
+
24
+ def extension
25
+ extension_with_leading_period[1..-1]
26
+ end
27
+
28
+ def extension_with_leading_period
29
+ File.extname(@logfile_path).downcase
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,83 @@
1
+ module Pipio
2
+ class BasicParser
3
+ def initialize(source_file_path, my_aliases, line_regex, line_regex_status, cleaner)
4
+ @my_aliases = my_aliases.split(',')
5
+ @line_regex = line_regex
6
+ @line_regex_status = line_regex_status
7
+ @my_alias = @my_aliases.first
8
+
9
+ @file_reader = FileReader.new(source_file_path, cleaner)
10
+ end
11
+
12
+ # This method returns a Chat instance, or false if it could not parse the
13
+ # file.
14
+ def parse
15
+ if pre_parse
16
+ messages = @file_reader.other_lines.map do |line|
17
+ basic_message_match = @line_regex.match(line)
18
+ meta_message_match = @line_regex_status.match(line)
19
+ if basic_message_match
20
+ create_message(basic_message_match)
21
+ elsif meta_message_match
22
+ create_status_or_event_message(meta_message_match)
23
+ end
24
+ end
25
+
26
+ Chat.new(messages, @metadata)
27
+ end
28
+ end
29
+
30
+ # Extract required data from the file. Run by parse.
31
+ def pre_parse
32
+ @file_reader.read
33
+ metadata = Metadata.new(MetadataParser.new(@file_reader.first_line).parse)
34
+ if metadata.valid?
35
+ @metadata = metadata
36
+ @alias_registry = AliasRegistry.new(@metadata.their_screen_name)
37
+ @my_aliases.each do |my_alias|
38
+ @alias_registry[my_alias] = @metadata.my_screen_name
39
+ end
40
+ end
41
+ end
42
+
43
+ def create_message(match_data)
44
+ # Either a regular message line or an auto-reply/away message.
45
+ time = time_parser.parse(match_data[:timestamp])
46
+ if time
47
+ my_alias = match_data[:sn_or_alias]
48
+ my_screen_name = @alias_registry[my_alias]
49
+ body = match_data[:body]
50
+ is_auto_reply = match_data[:auto_reply]
51
+
52
+ AutoOrXmlMessageCreator.new(body, time, my_screen_name, my_alias, is_auto_reply).create
53
+ end
54
+ end
55
+
56
+ def create_status_or_event_message(match_data)
57
+ time = time_parser.parse(match_data[:timestamp])
58
+ str = match_data[:body]
59
+
60
+ if time && event_we_care_about?(str)
61
+ create_status_message(str, time) || create_event_message(str, time)
62
+ end
63
+ end
64
+
65
+ def time_parser
66
+ @time_parser ||= TimeParser.new(@metadata.start_year, @metadata.start_month, @metadata.start_mday)
67
+ end
68
+
69
+ private
70
+
71
+ def event_we_care_about?(str)
72
+ Event::IGNORE.none? { |regex| str =~ regex }
73
+ end
74
+
75
+ def create_event_message(text, time)
76
+ EventMessageCreator.new(text, time, @my_alias, @metadata.my_screen_name, @alias_registry).create
77
+ end
78
+
79
+ def create_status_message(text, time)
80
+ StatusMessageCreator.new(text, time, @alias_registry).create
81
+ end
82
+ end
83
+ end