pipio 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.rspec +2 -0
  4. data/.simplecov +5 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE +20 -0
  8. data/NEWS.md +10 -0
  9. data/README.md +88 -0
  10. data/Rakefile +13 -0
  11. data/lib/pipio.rb +34 -0
  12. data/lib/pipio/alias_registry.rb +26 -0
  13. data/lib/pipio/chat.rb +39 -0
  14. data/lib/pipio/cleaners/html_cleaner.rb +95 -0
  15. data/lib/pipio/cleaners/text_cleaner.rb +15 -0
  16. data/lib/pipio/file_reader.rb +29 -0
  17. data/lib/pipio/message_creators/auto_or_xml_message_creator.rb +25 -0
  18. data/lib/pipio/message_creators/event_message_creator.rb +47 -0
  19. data/lib/pipio/message_creators/status_message_creator.rb +19 -0
  20. data/lib/pipio/messages/auto_reply_message.rb +7 -0
  21. data/lib/pipio/messages/event.rb +67 -0
  22. data/lib/pipio/messages/message.rb +23 -0
  23. data/lib/pipio/messages/status_message.rb +26 -0
  24. data/lib/pipio/messages/xml_message.rb +43 -0
  25. data/lib/pipio/metadata.rb +34 -0
  26. data/lib/pipio/metadata_parser.rb +55 -0
  27. data/lib/pipio/parser_factory.rb +32 -0
  28. data/lib/pipio/parsers/basic_parser.rb +83 -0
  29. data/lib/pipio/parsers/html_log_parser.rb +22 -0
  30. data/lib/pipio/parsers/null_parser.rb +9 -0
  31. data/lib/pipio/parsers/text_log_parser.rb +21 -0
  32. data/lib/pipio/tag_balancer.rb +163 -0
  33. data/lib/pipio/time_parser.rb +36 -0
  34. data/lib/pipio/version.rb +3 -0
  35. data/pipio.gemspec +27 -0
  36. data/spec/pipio/alias_registry_spec.rb +37 -0
  37. data/spec/pipio/chat_spec.rb +66 -0
  38. data/spec/pipio/cleaners/html_cleaner_spec.rb +102 -0
  39. data/spec/pipio/cleaners/text_cleaner_spec.rb +29 -0
  40. data/spec/pipio/file_reader_spec.rb +130 -0
  41. data/spec/pipio/messages/auto_reply_message_spec.rb +40 -0
  42. data/spec/pipio/messages/event_spec.rb +41 -0
  43. data/spec/pipio/messages/status_message_spec.rb +43 -0
  44. data/spec/pipio/messages/xml_message_spec.rb +55 -0
  45. data/spec/pipio/metadata_parser_spec.rb +81 -0
  46. data/spec/pipio/metadata_spec.rb +72 -0
  47. data/spec/pipio/parser_factory_spec.rb +31 -0
  48. data/spec/pipio/parsers/html_log_parser_spec.rb +160 -0
  49. data/spec/pipio/parsers/null_parser_spec.rb +13 -0
  50. data/spec/pipio/parsers/text_log_parser_spec.rb +37 -0
  51. data/spec/pipio/tag_balancer_spec.rb +16 -0
  52. data/spec/pipio/time_parser_spec.rb +66 -0
  53. data/spec/pipio_spec.rb +63 -0
  54. data/spec/spec_helper.rb +18 -0
  55. data/spec/support/chat_builder.rb +29 -0
  56. data/spec/support/chat_builder_helpers.rb +41 -0
  57. data/spec/support/file_builder.rb +22 -0
  58. data/spec/support/html_chat_builder.rb +67 -0
  59. data/spec/support/logfiles/2006-12-21.223606.txt +3 -0
  60. data/spec/support/logfiles/2008-01-15.071445-0500PST.htm +5 -0
  61. data/spec/support/logfiles/2008-01-15.071445-0500PST.html +5 -0
  62. data/spec/support/text_chat_builder.rb +21 -0
  63. data/spec/test-output/README.md +1 -0
  64. data/spec/test-output/html_log_output.xml +6 -0
  65. data/spec/test-output/text_log_output.xml +4 -0
  66. metadata +193 -0
@@ -0,0 +1,25 @@
1
+ module Pipio
2
+ class AutoOrXmlMessageCreator
3
+ def initialize(text, time, sender_screen_name, sender_alias, is_auto_reply)
4
+ @text = text
5
+ @time = time
6
+ @sender_screen_name = sender_screen_name
7
+ @sender_alias = sender_alias
8
+ @is_auto_reply = is_auto_reply
9
+ end
10
+
11
+ def create
12
+ if auto_reply?
13
+ AutoReplyMessage.new(@sender_screen_name, @time, @sender_alias, @text)
14
+ else
15
+ XMLMessage.new(@sender_screen_name, @time, @sender_alias, @text)
16
+ end
17
+ end
18
+
19
+ private
20
+
21
+ def auto_reply?
22
+ !! @is_auto_reply
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,47 @@
1
+ module Pipio
2
+ class EventMessageCreator
3
+ def initialize(text, time, sender_alias, sender_screen_name, alias_registry)
4
+ @text = text
5
+ @time = time
6
+ @sender_alias = sender_alias
7
+ @sender_screen_name = sender_screen_name
8
+ @alias_registry = alias_registry
9
+ end
10
+
11
+ def create
12
+ create_lib_purple_event_message ||
13
+ create_non_lib_purple_event_message
14
+ end
15
+
16
+ private
17
+
18
+ def create_lib_purple_event_message
19
+ regex = Event::LIB_PURPLE.detect { |rxp| @text =~ rxp }
20
+ if regex
21
+ event_type = 'libpurpleEvent'
22
+ create_event_message_from(regex, event_type)
23
+ end
24
+ end
25
+
26
+ def create_non_lib_purple_event_message
27
+ regex, event_type = Event::MAP.detect { |rxp,ev_type| @text =~ rxp }
28
+ if regex && event_type
29
+ create_event_message_from(regex, event_type)
30
+ end
31
+ end
32
+
33
+ def create_event_message_from(regex, event_type)
34
+ regex_matches = regex.match(@text)
35
+ if regex_matches.size == 1
36
+ # No alias - this means it's the user
37
+ sender_alias = @sender_alias
38
+ sender_screen_name = @sender_screen_name
39
+ else
40
+ sender_alias = regex_matches[1]
41
+ sender_screen_name = @alias_registry[sender_alias]
42
+ end
43
+
44
+ Event.new(sender_screen_name, @time, sender_alias, @text, event_type)
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,19 @@
1
+ module Pipio
2
+ class StatusMessageCreator
3
+ def initialize(text, time, alias_registry)
4
+ @text = text
5
+ @time = time
6
+ @alias_registry = alias_registry
7
+ end
8
+
9
+ def create
10
+ regex, status = StatusMessage::MAP.detect { |rxp, stat| @text =~ rxp }
11
+
12
+ if regex && status
13
+ sender_alias = regex.match(@text)[1]
14
+ sender_screen_name = @alias_registry[sender_alias]
15
+ StatusMessage.new(sender_screen_name, @time, sender_alias, status)
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,7 @@
1
+ module Pipio
2
+ class AutoReplyMessage < XMLMessage
3
+ def to_s
4
+ %(<message sender="#{sender_screen_name}" time="#{adium_formatted_time}" auto="true" alias="#{@sender_alias}">#{@styled_body}</message>\n)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,67 @@
1
+ module Pipio
2
+ # Pidgin does not have Events, but Adium does. Pidgin mostly uses system
3
+ # messages to display what Adium calls events. These include sending a file,
4
+ # starting a Direct IM connection, or an error in chat.
5
+ class Event < XMLMessage
6
+ # All of event_type libPurple.
7
+ LIB_PURPLE = [
8
+ # file transfer
9
+ /Starting transfer of .+ from (.+)/,
10
+ /^Offering to send .+ to (.+)$/,
11
+ /(.+) is offering to send file/,
12
+ /^Transfer of file .+ complete$/,
13
+ /Error reading|writing|accessing .+: .+/,
14
+ /You cancell?ed the transfer of/,
15
+ /File transfer cancelled/,
16
+ /(.+?) cancell?ed the transfer of/,
17
+ /(.+?) cancelled the file transfer/,
18
+ # Direct IM - actual (dis)connect events are their own types
19
+ /^Attempting to connect to (.+) at .+ for Direct IM\./,
20
+ /^Asking (.+) to connect to us at .+ for Direct IM\./,
21
+ /^Attempting to connect via proxy server\.$/,
22
+ /^Direct IM with (.+) failed/,
23
+ # encryption
24
+ /Received message encrypted with wrong key/,
25
+ /^Requesting key\.\.\.$/,
26
+ /^Outgoing message lost\.$/,
27
+ /^Conflicting Key Received!$/,
28
+ /^Error in decryption- asking for resend\.\.\.$/,
29
+ /^Making new key pair\.\.\.$/,
30
+ # sending errors
31
+ /^Last outgoing message not received properly- resetting$/,
32
+ /Resending\.\.\./,
33
+ # connection errors
34
+ /Lost connection with the remote user:.+/,
35
+ # chats
36
+ /^.+ entered the room\.$/,
37
+ /^.+ left the room\.$/
38
+ ]
39
+
40
+ # Adium ignores SN/alias changes.
41
+ IGNORE = [/^.+? is now known as .+?\.<br\/?>$/]
42
+
43
+ # Each key maps to an event_type string. The keys will be matched against
44
+ # a line of chat and the partner's alias will be in regex group 1, IF the
45
+ # alias is matched.
46
+ MAP = {
47
+ # .+ is not an alias, it's a proxy server so no grouping
48
+ /^Attempting to connect to .+\.$/ => 'direct-im-connect',
49
+ # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
50
+ /^Direct IM established$/ => 'directIMConnected',
51
+ /Unable to send message/ => 'chat-error',
52
+ /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
53
+ /User information not available/ => 'chat-error'
54
+ }
55
+
56
+ def initialize(sender_screen_name, time, sender_alias, body, event_type)
57
+ super(sender_screen_name, time, sender_alias, body)
58
+ @event_type = event_type
59
+ end
60
+
61
+ attr_reader :event_type
62
+
63
+ def to_s
64
+ %(<event type="#{@event_type}" sender="#{@sender_screen_name}" time="#{adium_formatted_time}" alias="#{@sender_alias}">#{@styled_body}</event>)
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,23 @@
1
+ module Pipio
2
+ # A holding object for each line of the chat. It is subclassed as
3
+ # appropriate (eg AutoReplyMessage). Each subclass (but not Message
4
+ # itself) has its own to_s which prints out its information in a format
5
+ # appropriate for putting in an Adium log file.
6
+ class Message
7
+ include Comparable
8
+
9
+ def initialize(sender_screen_name, time, sender_alias)
10
+ @sender_screen_name = sender_screen_name
11
+ @time = time
12
+ @sender_alias = sender_alias
13
+ end
14
+
15
+ attr_reader :sender_screen_name, :time, :sender_alias
16
+
17
+ private
18
+
19
+ def adium_formatted_time
20
+ @time.xmlschema
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,26 @@
1
+ module Pipio
2
+ # A message saying e.g. "Blahblah has gone away."
3
+ class StatusMessage < Message
4
+ MAP = {
5
+ /(.+) logged in\.$/ => 'online',
6
+ /(.+) logged out\.$/ => 'offline',
7
+ /(.+) has signed on\.$/ => 'online',
8
+ /(.+) has signed off\.$/ => 'offline',
9
+ /(.+) has gone away\.$/ => 'away',
10
+ /(.+) is no longer away\.$/ => 'available',
11
+ /(.+) has become idle\.$/ => 'idle',
12
+ /(.+) is no longer idle\.$/ => 'available'
13
+ }
14
+
15
+ def initialize(sender_screen_name, time, sender_alias, status)
16
+ super(sender_screen_name, time, sender_alias)
17
+ @status = status
18
+ end
19
+
20
+ attr_reader :status
21
+
22
+ def to_s
23
+ %(<status type="#{@status}" sender="#{@sender_screen_name}" time="#{adium_formatted_time}" alias="#{@sender_alias}"/>\n)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,43 @@
1
+ module Pipio
2
+ # Basic message with body text (as opposed to pure status messages, which
3
+ # have no body).
4
+ class XMLMessage < Message
5
+ def initialize(sender_screen_name, time, sender_alias, body)
6
+ super(sender_screen_name, time, sender_alias)
7
+ @body = normalize(body)
8
+ @styled_body = %(<div><span style="font-family: Helvetica; font-size: 12pt;">#{@body}</span></div>)
9
+ end
10
+
11
+ attr_reader :body
12
+
13
+ def to_s
14
+ %(<message sender="#{@sender_screen_name}" time="#{adium_formatted_time}" alias="#{@sender_alias}">#{@styled_body}</message>\n)
15
+ end
16
+
17
+ private
18
+
19
+ # Balances mismatched tags, normalizes body style, and fixes actions
20
+ # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
21
+ # "*Buddy waves at you*").
22
+ def normalize(string)
23
+ new_body = normalize_entities(string)
24
+ # Fix mismatched tags. Yes, it's faster to do it per-message
25
+ # than all at once.
26
+ new_body = Pipio::TagBalancer.new(new_body).balance
27
+ if @sender_alias[0,3] == '***'
28
+ # "***<alias>" is what pidgin sets as the alias for a /me action
29
+ @sender_alias.slice!(0,3)
30
+ new_body = "*#{new_body}*"
31
+ end
32
+
33
+ new_body
34
+ end
35
+
36
+ # Escapes all entities in string except for "&lt;", "&gt;", "&amp;", "&quot;",
37
+ # and "&apos;".
38
+ def normalize_entities(string)
39
+ # Convert '&' to '&amp;' only if it's not followed by an entity.
40
+ string.gsub(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,34 @@
1
+ module Pipio
2
+ class Metadata
3
+ def initialize(metadata_hash)
4
+ @service = metadata_hash[:service]
5
+ @my_screen_name = normalize_screen_name(metadata_hash[:my_screen_name])
6
+ @their_screen_name = metadata_hash[:their_screen_name]
7
+ @start_time = metadata_hash[:start_time]
8
+ end
9
+
10
+ attr_reader :my_screen_name, :their_screen_name, :start_time, :service
11
+
12
+ def valid?
13
+ [@their_screen_name, @my_screen_name, @start_time, @service].all?
14
+ end
15
+
16
+ def start_year
17
+ @start_time.year
18
+ end
19
+
20
+ def start_month
21
+ @start_time.mon
22
+ end
23
+
24
+ def start_mday
25
+ @start_time.mday
26
+ end
27
+
28
+ private
29
+
30
+ def normalize_screen_name(screen_name)
31
+ screen_name && screen_name.downcase.gsub(' ', '')
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,55 @@
1
+ module Pipio
2
+ class MetadataParser
3
+ def initialize(first_line)
4
+ @first_line = first_line || ''
5
+ end
6
+
7
+ def parse
8
+ {
9
+ my_screen_name: my_screen_name,
10
+ their_screen_name: their_screen_name,
11
+ start_time: start_time,
12
+ service: service
13
+ }
14
+ end
15
+
16
+ private
17
+
18
+ def service
19
+ match = @first_line.match(/\(([a-z]+)\)/)
20
+ if match
21
+ match[1]
22
+ end
23
+ end
24
+
25
+ def their_screen_name
26
+ match = @first_line.match(/Conversation with (.+?) at/)
27
+ if match
28
+ match[1]
29
+ end
30
+ end
31
+
32
+ def my_screen_name
33
+ match = @first_line.match(/ on ([^()]+) /)
34
+ if match
35
+ match[1]
36
+ end
37
+ end
38
+
39
+ def start_time
40
+ match = @first_line.match(%r{ at ([-\d/APM: ]+) on})
41
+ if match
42
+ timestamp = match[1]
43
+ parse_time(timestamp)
44
+ end
45
+ end
46
+
47
+ def parse_time(timestamp)
48
+ begin
49
+ Time.parse(timestamp)
50
+ rescue ArgumentError
51
+ TimeParser.new(nil, nil, nil).parse(timestamp)
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,32 @@
1
+ module Pipio
2
+ class ParserFactory
3
+ PARSER_FOR_EXTENSION = {
4
+ "html" => HtmlLogParser,
5
+ "htm" => HtmlLogParser,
6
+ "txt" => TextLogParser
7
+ }
8
+
9
+ def initialize(logfile_path, aliases)
10
+ @logfile_path = logfile_path
11
+ @aliases = aliases
12
+ end
13
+
14
+ def parser
15
+ parser_class.new(@logfile_path, @aliases)
16
+ end
17
+
18
+ private
19
+
20
+ def parser_class
21
+ PARSER_FOR_EXTENSION.fetch(extension, NullParser)
22
+ end
23
+
24
+ def extension
25
+ extension_with_leading_period[1..-1]
26
+ end
27
+
28
+ def extension_with_leading_period
29
+ File.extname(@logfile_path).downcase
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,83 @@
1
+ module Pipio
2
+ class BasicParser
3
+ def initialize(source_file_path, my_aliases, line_regex, line_regex_status, cleaner)
4
+ @my_aliases = my_aliases.split(',')
5
+ @line_regex = line_regex
6
+ @line_regex_status = line_regex_status
7
+ @my_alias = @my_aliases.first
8
+
9
+ @file_reader = FileReader.new(source_file_path, cleaner)
10
+ end
11
+
12
+ # This method returns a Chat instance, or false if it could not parse the
13
+ # file.
14
+ def parse
15
+ if pre_parse
16
+ messages = @file_reader.other_lines.map do |line|
17
+ basic_message_match = @line_regex.match(line)
18
+ meta_message_match = @line_regex_status.match(line)
19
+ if basic_message_match
20
+ create_message(basic_message_match)
21
+ elsif meta_message_match
22
+ create_status_or_event_message(meta_message_match)
23
+ end
24
+ end
25
+
26
+ Chat.new(messages, @metadata)
27
+ end
28
+ end
29
+
30
+ # Extract required data from the file. Run by parse.
31
+ def pre_parse
32
+ @file_reader.read
33
+ metadata = Metadata.new(MetadataParser.new(@file_reader.first_line).parse)
34
+ if metadata.valid?
35
+ @metadata = metadata
36
+ @alias_registry = AliasRegistry.new(@metadata.their_screen_name)
37
+ @my_aliases.each do |my_alias|
38
+ @alias_registry[my_alias] = @metadata.my_screen_name
39
+ end
40
+ end
41
+ end
42
+
43
+ def create_message(match_data)
44
+ # Either a regular message line or an auto-reply/away message.
45
+ time = time_parser.parse(match_data[:timestamp])
46
+ if time
47
+ my_alias = match_data[:sn_or_alias]
48
+ my_screen_name = @alias_registry[my_alias]
49
+ body = match_data[:body]
50
+ is_auto_reply = match_data[:auto_reply]
51
+
52
+ AutoOrXmlMessageCreator.new(body, time, my_screen_name, my_alias, is_auto_reply).create
53
+ end
54
+ end
55
+
56
+ def create_status_or_event_message(match_data)
57
+ time = time_parser.parse(match_data[:timestamp])
58
+ str = match_data[:body]
59
+
60
+ if time && event_we_care_about?(str)
61
+ create_status_message(str, time) || create_event_message(str, time)
62
+ end
63
+ end
64
+
65
+ def time_parser
66
+ @time_parser ||= TimeParser.new(@metadata.start_year, @metadata.start_month, @metadata.start_mday)
67
+ end
68
+
69
+ private
70
+
71
+ def event_we_care_about?(str)
72
+ Event::IGNORE.none? { |regex| str =~ regex }
73
+ end
74
+
75
+ def create_event_message(text, time)
76
+ EventMessageCreator.new(text, time, @my_alias, @metadata.my_screen_name, @alias_registry).create
77
+ end
78
+
79
+ def create_status_message(text, time)
80
+ StatusMessageCreator.new(text, time, @alias_registry).create
81
+ end
82
+ end
83
+ end