pipio 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.rspec +2 -0
  4. data/.simplecov +5 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE +20 -0
  8. data/NEWS.md +10 -0
  9. data/README.md +88 -0
  10. data/Rakefile +13 -0
  11. data/lib/pipio.rb +34 -0
  12. data/lib/pipio/alias_registry.rb +26 -0
  13. data/lib/pipio/chat.rb +39 -0
  14. data/lib/pipio/cleaners/html_cleaner.rb +95 -0
  15. data/lib/pipio/cleaners/text_cleaner.rb +15 -0
  16. data/lib/pipio/file_reader.rb +29 -0
  17. data/lib/pipio/message_creators/auto_or_xml_message_creator.rb +25 -0
  18. data/lib/pipio/message_creators/event_message_creator.rb +47 -0
  19. data/lib/pipio/message_creators/status_message_creator.rb +19 -0
  20. data/lib/pipio/messages/auto_reply_message.rb +7 -0
  21. data/lib/pipio/messages/event.rb +67 -0
  22. data/lib/pipio/messages/message.rb +23 -0
  23. data/lib/pipio/messages/status_message.rb +26 -0
  24. data/lib/pipio/messages/xml_message.rb +43 -0
  25. data/lib/pipio/metadata.rb +34 -0
  26. data/lib/pipio/metadata_parser.rb +55 -0
  27. data/lib/pipio/parser_factory.rb +32 -0
  28. data/lib/pipio/parsers/basic_parser.rb +83 -0
  29. data/lib/pipio/parsers/html_log_parser.rb +22 -0
  30. data/lib/pipio/parsers/null_parser.rb +9 -0
  31. data/lib/pipio/parsers/text_log_parser.rb +21 -0
  32. data/lib/pipio/tag_balancer.rb +163 -0
  33. data/lib/pipio/time_parser.rb +36 -0
  34. data/lib/pipio/version.rb +3 -0
  35. data/pipio.gemspec +27 -0
  36. data/spec/pipio/alias_registry_spec.rb +37 -0
  37. data/spec/pipio/chat_spec.rb +66 -0
  38. data/spec/pipio/cleaners/html_cleaner_spec.rb +102 -0
  39. data/spec/pipio/cleaners/text_cleaner_spec.rb +29 -0
  40. data/spec/pipio/file_reader_spec.rb +130 -0
  41. data/spec/pipio/messages/auto_reply_message_spec.rb +40 -0
  42. data/spec/pipio/messages/event_spec.rb +41 -0
  43. data/spec/pipio/messages/status_message_spec.rb +43 -0
  44. data/spec/pipio/messages/xml_message_spec.rb +55 -0
  45. data/spec/pipio/metadata_parser_spec.rb +81 -0
  46. data/spec/pipio/metadata_spec.rb +72 -0
  47. data/spec/pipio/parser_factory_spec.rb +31 -0
  48. data/spec/pipio/parsers/html_log_parser_spec.rb +160 -0
  49. data/spec/pipio/parsers/null_parser_spec.rb +13 -0
  50. data/spec/pipio/parsers/text_log_parser_spec.rb +37 -0
  51. data/spec/pipio/tag_balancer_spec.rb +16 -0
  52. data/spec/pipio/time_parser_spec.rb +66 -0
  53. data/spec/pipio_spec.rb +63 -0
  54. data/spec/spec_helper.rb +18 -0
  55. data/spec/support/chat_builder.rb +29 -0
  56. data/spec/support/chat_builder_helpers.rb +41 -0
  57. data/spec/support/file_builder.rb +22 -0
  58. data/spec/support/html_chat_builder.rb +67 -0
  59. data/spec/support/logfiles/2006-12-21.223606.txt +3 -0
  60. data/spec/support/logfiles/2008-01-15.071445-0500PST.htm +5 -0
  61. data/spec/support/logfiles/2008-01-15.071445-0500PST.html +5 -0
  62. data/spec/support/text_chat_builder.rb +21 -0
  63. data/spec/test-output/README.md +1 -0
  64. data/spec/test-output/html_log_output.xml +6 -0
  65. data/spec/test-output/text_log_output.xml +4 -0
  66. metadata +193 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a27a6c6a854887062351e5a362482a44990ce35e
4
+ data.tar.gz: 455730fbf37e619f747cfe89d0612fb42ee85999
5
+ SHA512:
6
+ metadata.gz: 0192a3e9a130603f28698edc216007c30321a921c4b66a6a3cea5368a02262f0be2037802408b5a7576552c1d625e246da06447f4f5ac37ccd735695b18c86c0
7
+ data.tar.gz: 532da7ab24471befe059573f39d99dd65b214de408783215f0e3e73272088550d03d531834b882a4af14349b7904c1c3e50ce371947ae3bf562f61cf96acb07b
@@ -0,0 +1,27 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ tags
21
+
22
+ ## PROJECT::SPECIFIC
23
+ .bundle
24
+ .rvmrc
25
+ spec/output-dir
26
+ spec/nonexistent_output_dir
27
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ -r spec_helper
@@ -0,0 +1,5 @@
1
+ if ENV["COVERAGE"]
2
+ SimpleCov.start do
3
+ add_filter "/spec/"
4
+ end
5
+ end
@@ -0,0 +1,12 @@
1
+ rvm:
2
+ - 2.1.2
3
+ - 2.1.1
4
+ - 2.0.0
5
+ - 1.9.3
6
+ - 1.9.2
7
+ branches:
8
+ only:
9
+ - master
10
+ notifications:
11
+ email:
12
+ - false
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Gabriel Berke-Williams
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/NEWS.md ADDED
@@ -0,0 +1,10 @@
1
+ ## 0.0.1 (unreleased)
2
+
3
+ Extract Pidgin2Adium, commit hash 96c443fd244b3d2d57564bf17c68d3ec1bcb48ff, into
4
+ this library, Pipio. There are also other commits in this release that are not in
5
+ Pidgin2Adium as of that commit:
6
+
7
+ * Expose `Chat#my_screen_name`
8
+ * Rename `Chat#lines` to `Chat#messages`
9
+ * Expose `Chat#service`
10
+ * Join the lines of `Chat#to_s` with "\n" instead of nothing
@@ -0,0 +1,88 @@
1
+ # pipio [![Build Status](https://secure.travis-ci.org/gabebw/pipio.png)](http://travis-ci.org/gabebw/pipio) [![Code Climate](https://codeclimate.com/github/gabebw/pipio.png)](https://codeclimate.com/github/gabebw/pipio)
2
+
3
+ Pipio parses [Pidgin](http://pidgin.im/) (formerly gaim) logs. It can output
4
+ them in Adium format by calling `to_s` on a `Pipio::Chat` object or any of the
5
+ message objects in `Pipio::Chat#messages`.
6
+
7
+ ## For the impatient
8
+
9
+ To deal with meta-information about the chat itself:
10
+
11
+ path_to_chat_log = File.expand_path('~/path/to/chat_log.html') # or .txt
12
+ chat = Pipio.parse(path_to_chat_log, "Gabe B-W,Gabe,Other Alias")
13
+ if chat
14
+ puts "Screen name of the person you chatted with: #{chat.their_screen_name}"
15
+ puts "Time the chat started: #{chat.start_time_xmlschema}"
16
+ puts "Chat contents, in adium format:"
17
+ puts chat.to_s
18
+ else
19
+ puts "Oh no! Could not parse! Please open an issue."
20
+ puts path_to_chat_log
21
+ exit 1
22
+ end
23
+
24
+ Or, to deal with individual messages in a chat:
25
+
26
+ chat = Pipio.parse("/path/to/log/file.html", "gabe,gbw,gabeb-w")
27
+ chat.each do |message|
28
+ puts "Screen name of person who sent this message: #{message.sender_screen_name}"
29
+ puts "Alias of person who sent this message: #{message.sender_alias}"
30
+ puts "Time message was sent: #{message.time}"
31
+
32
+ if message.respond_to?(:body)
33
+ puts "Message body: #{message.body}"
34
+ if message.respond_to?(:event)
35
+ puts "Event type: #{message.event_type}"
36
+ end
37
+ elsif message.respond_to?(:status)
38
+ puts "Status: #{message.status}"
39
+ end
40
+
41
+ puts "Message in Adium format: #{message}"
42
+ end
43
+
44
+ ## The fine print
45
+
46
+ This library needs access to aliases to work correctly, which may require a bit
47
+ of explanation. Adium and Pidgin allow you to set aliases for buddies as well as
48
+ for yourself, so that you show up in chats as (for example) `Me` instead of as
49
+ `best_screen_name_ever_018845`.
50
+
51
+ However, Pidgin then uses aliases in the log file instead of the actual screen
52
+ name, which complicates things. To parse properly, this gem needs to know which
53
+ aliases belong to you so it can map them to the correct screen name. If it
54
+ encounters an alias that you did not list, it assumes that it belongs to the
55
+ person to whom you are chatting. Note that aliases are lower-cased and space is
56
+ removed, so providing `Gabe B-W, GBW` is the same as providing `gabeb-w,gbw`.
57
+
58
+ You do not need to provide your screenname in the alias list.
59
+
60
+ ## INSTALL
61
+
62
+ gem install pipio
63
+
64
+ ## Testing
65
+
66
+ To get a coverage report, run `rake` with the `COVERAGE` environment variable
67
+ set:
68
+
69
+ COVERAGE=1 rake
70
+
71
+ ## THANKS
72
+
73
+ With thanks to Li Ma, whose [blog post](http://li-ma.blogspot.com/2008/10/pidgin-log-file-to-adium-log-converter.html)
74
+ helped tremendously.
75
+
76
+ ## Note on Patches/Pull Requests
77
+
78
+ * Fork the project.
79
+ * Make your feature addition or bug fix.
80
+ * Add tests for it. This is important so I don't break it in a
81
+ future version unintentionally.
82
+ * Commit, do not mess with rakefile, version, or history.
83
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
84
+ * Send me a pull request. Bonus points for topic branches.
85
+
86
+ ## Copyright
87
+
88
+ Copyright (c) 2009-2014 Gabe Berke-Williams. See LICENSE for details.
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ require 'bundler/setup'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rspec/core/rake_task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task :coverage do
10
+ system "COVERAGE=1 rake ; open coverage/index.html"
11
+ end
12
+
13
+ task default: :spec
@@ -0,0 +1,34 @@
1
+ require 'time'
2
+
3
+ require 'pipio/version'
4
+ require 'pipio/chat'
5
+ require 'pipio/tag_balancer'
6
+ require 'pipio/time_parser'
7
+ require 'pipio/metadata'
8
+ require 'pipio/metadata_parser'
9
+ require 'pipio/alias_registry'
10
+ require 'pipio/file_reader'
11
+ require 'pipio/parsers/null_parser'
12
+ require 'pipio/parsers/basic_parser'
13
+ require 'pipio/parsers/text_log_parser'
14
+ require 'pipio/parsers/html_log_parser'
15
+ require 'pipio/parser_factory'
16
+ require 'pipio/messages/message'
17
+ require 'pipio/messages/xml_message'
18
+ require 'pipio/messages/auto_reply_message'
19
+ require 'pipio/messages/event'
20
+ require 'pipio/messages/status_message'
21
+ require 'pipio/message_creators/event_message_creator'
22
+ require 'pipio/message_creators/status_message_creator'
23
+ require 'pipio/message_creators/auto_or_xml_message_creator'
24
+ require 'pipio/cleaners/html_cleaner'
25
+ require 'pipio/cleaners/text_cleaner'
26
+
27
+ module Pipio
28
+ # Parses the log at the given path into a Chat.
29
+ def self.parse(logfile_path, my_aliases)
30
+ full_path = File.expand_path(logfile_path)
31
+ factory = ParserFactory.new(full_path, my_aliases)
32
+ factory.parser.parse
33
+ end
34
+ end
@@ -0,0 +1,26 @@
1
+ module Pipio
2
+ # Map aliases ("Gabe B-W") to screen names ("cool_dragon_88").
3
+ class AliasRegistry
4
+ def initialize(default)
5
+ @items = Hash.new(normalize(default))
6
+ end
7
+
8
+ def []=(alias_name, screen_name)
9
+ @items[alias_name] = normalize(screen_name)
10
+ end
11
+
12
+ def [](alias_name)
13
+ @items[without_action(alias_name)]
14
+ end
15
+
16
+ private
17
+
18
+ def normalize(screen_name)
19
+ screen_name.gsub(' ', '').downcase
20
+ end
21
+
22
+ def without_action(alias_name)
23
+ alias_name.sub(/^\*{3}/, '')
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,39 @@
1
+ module Pipio
2
+ # The container object for each Message in a chat. It includes the Enumerable
3
+ # module, so each/map/reject etc all work and will iterate over the Messages.
4
+ class Chat
5
+ include Enumerable
6
+
7
+ def initialize(messages, metadata)
8
+ @messages = messages
9
+ @metadata = metadata
10
+ end
11
+
12
+ attr_reader :messages
13
+
14
+ def start_time_xmlschema
15
+ @metadata.start_time.xmlschema
16
+ end
17
+
18
+ def my_screen_name
19
+ @metadata.my_screen_name
20
+ end
21
+
22
+ def their_screen_name
23
+ @metadata.their_screen_name
24
+ end
25
+
26
+ def service
27
+ @metadata.service
28
+ end
29
+
30
+ def to_s
31
+ map(&:to_s).join("\n")
32
+ end
33
+
34
+ # Iterate over each Message.
35
+ def each(&block)
36
+ @messages.each(&block)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,95 @@
1
+ module Pipio
2
+ module Cleaners
3
+ class HtmlCleaner
4
+ # Returns a cleaned string.
5
+ # Removes the following tags from _text_:
6
+ # * html
7
+ # * body
8
+ # * font
9
+ # * a with no innertext, e.g. <a href="blah"></a>
10
+ # And removes the following style declarations:
11
+ # * color: #000000 (just turns text black)
12
+ # * font-family
13
+ # * font-size
14
+ # * background
15
+ # * em (really it's changed to <span style="font-style: italic;">)
16
+ # Since each <span> has only one style declaration, spans with these
17
+ # declarations are removed (but the text inside them is preserved).
18
+ def self.clean(text)
19
+ # Sometimes this is in there. I don't know why.
20
+ text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
21
+ # We can remove <font> safely since Pidgin and Adium both show bold
22
+ # using <span style="font-weight: bold;"> except Pidgin uses single
23
+ # quotes while Adium uses double quotes.
24
+ text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
25
+
26
+ text.tr!("\r", '')
27
+ # Remove empty lines
28
+ text.gsub!("\n\n", "\n")
29
+
30
+ # Remove newlines that end the file, since they screw up the
31
+ # newline -> <br/> conversion
32
+ text.gsub!(/\n\Z/, '')
33
+
34
+ # Replace newlines with "<br/>" unless they end a chat line.
35
+ # This must go after we remove <font> tags.
36
+ text.gsub!(/\n(?!#{HtmlLogParser::TIMESTAMP_REGEX})/, '<br/>')
37
+
38
+ # These empty links are sometimes appended to every line in a chat,
39
+ # for some weird reason. Remove them.
40
+ text.gsub!(%r{<a href=['"].+?['"]>\s*?</a>}, '')
41
+
42
+ # Replace single quotes inside tags with double quotes so we can
43
+ # easily change single quotes to entities.
44
+ # For spans, removes a space after the final declaration if it exists.
45
+ text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
46
+ text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
47
+ text.gsub!("'", '&apos;')
48
+
49
+ # This actually does match stuff, but doesn't group it correctly. :(
50
+ # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
51
+ text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
52
+ # Remove empty spans.
53
+ next if $2 == ''
54
+
55
+ # style = style declaration
56
+ # innertext = text inside <span>
57
+ style, innertext = $1, $2
58
+
59
+ styleparts = style.split(/; ?/)
60
+ styleparts.map! do |p|
61
+ if p[0,5] == 'color'
62
+ if p.include?('color: #000000')
63
+ next
64
+ elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
65
+ # Regarding the bit with the ">", sometimes this happens:
66
+ # <span style="color: #000000>today;">today was busy</span>
67
+ # Then p = "color: #000000>today"
68
+ # Or it can end in ">;", with no text before the semicolon.
69
+ # So keep the color but remove the ">" and anything following it.
70
+ next($1)
71
+ end
72
+ else
73
+ # don't remove font-weight
74
+ case p
75
+ when /^font-family/ then next
76
+ when /^font-size/ then next
77
+ when /^background/ then next
78
+ end
79
+ end
80
+ end.compact!
81
+ unless styleparts.empty?
82
+ style = styleparts.join('; ')
83
+ innertext = "<span style=\"#{style};\">#{innertext}</span>"
84
+ end
85
+ innertext
86
+ end
87
+ # Pidgin uses <em>, Adium uses <span>
88
+ if text.gsub!('<em>', '<span style="font-style: italic;">')
89
+ text.gsub!('</em>', '</span>')
90
+ end
91
+ text
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,15 @@
1
+ module Pipio
2
+ module Cleaners
3
+ class TextCleaner
4
+ def self.clean(line)
5
+ # Escape entities since this will be in XML
6
+ line.gsub("\r", '').
7
+ gsub('&', '&amp;').
8
+ gsub('<', '&lt;').
9
+ gsub('>', '&gt;').
10
+ gsub('"', '&quot;').
11
+ gsub("'", '&apos;')
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ module Pipio
2
+ class FileReader
3
+ def initialize(path_to_file, cleaner)
4
+ @path_to_file = path_to_file
5
+ @first_line = ''
6
+ @other_lines = []
7
+ @cleaner = cleaner
8
+ end
9
+
10
+ attr_reader :first_line, :other_lines
11
+
12
+ def read
13
+ if File.exist?(@path_to_file)
14
+ open(@path_to_file) do |file|
15
+ @first_line = file.readline.strip
16
+ @other_lines = file.readlines.map(&:strip)
17
+ end
18
+
19
+ clean_other_lines
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def clean_other_lines
26
+ @other_lines.map! { |line| @cleaner.clean(line) }.reject!(&:empty?)
27
+ end
28
+ end
29
+ end