pipio 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +27 -0
  3. data/.rspec +2 -0
  4. data/.simplecov +5 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +3 -0
  7. data/LICENSE +20 -0
  8. data/NEWS.md +10 -0
  9. data/README.md +88 -0
  10. data/Rakefile +13 -0
  11. data/lib/pipio.rb +34 -0
  12. data/lib/pipio/alias_registry.rb +26 -0
  13. data/lib/pipio/chat.rb +39 -0
  14. data/lib/pipio/cleaners/html_cleaner.rb +95 -0
  15. data/lib/pipio/cleaners/text_cleaner.rb +15 -0
  16. data/lib/pipio/file_reader.rb +29 -0
  17. data/lib/pipio/message_creators/auto_or_xml_message_creator.rb +25 -0
  18. data/lib/pipio/message_creators/event_message_creator.rb +47 -0
  19. data/lib/pipio/message_creators/status_message_creator.rb +19 -0
  20. data/lib/pipio/messages/auto_reply_message.rb +7 -0
  21. data/lib/pipio/messages/event.rb +67 -0
  22. data/lib/pipio/messages/message.rb +23 -0
  23. data/lib/pipio/messages/status_message.rb +26 -0
  24. data/lib/pipio/messages/xml_message.rb +43 -0
  25. data/lib/pipio/metadata.rb +34 -0
  26. data/lib/pipio/metadata_parser.rb +55 -0
  27. data/lib/pipio/parser_factory.rb +32 -0
  28. data/lib/pipio/parsers/basic_parser.rb +83 -0
  29. data/lib/pipio/parsers/html_log_parser.rb +22 -0
  30. data/lib/pipio/parsers/null_parser.rb +9 -0
  31. data/lib/pipio/parsers/text_log_parser.rb +21 -0
  32. data/lib/pipio/tag_balancer.rb +163 -0
  33. data/lib/pipio/time_parser.rb +36 -0
  34. data/lib/pipio/version.rb +3 -0
  35. data/pipio.gemspec +27 -0
  36. data/spec/pipio/alias_registry_spec.rb +37 -0
  37. data/spec/pipio/chat_spec.rb +66 -0
  38. data/spec/pipio/cleaners/html_cleaner_spec.rb +102 -0
  39. data/spec/pipio/cleaners/text_cleaner_spec.rb +29 -0
  40. data/spec/pipio/file_reader_spec.rb +130 -0
  41. data/spec/pipio/messages/auto_reply_message_spec.rb +40 -0
  42. data/spec/pipio/messages/event_spec.rb +41 -0
  43. data/spec/pipio/messages/status_message_spec.rb +43 -0
  44. data/spec/pipio/messages/xml_message_spec.rb +55 -0
  45. data/spec/pipio/metadata_parser_spec.rb +81 -0
  46. data/spec/pipio/metadata_spec.rb +72 -0
  47. data/spec/pipio/parser_factory_spec.rb +31 -0
  48. data/spec/pipio/parsers/html_log_parser_spec.rb +160 -0
  49. data/spec/pipio/parsers/null_parser_spec.rb +13 -0
  50. data/spec/pipio/parsers/text_log_parser_spec.rb +37 -0
  51. data/spec/pipio/tag_balancer_spec.rb +16 -0
  52. data/spec/pipio/time_parser_spec.rb +66 -0
  53. data/spec/pipio_spec.rb +63 -0
  54. data/spec/spec_helper.rb +18 -0
  55. data/spec/support/chat_builder.rb +29 -0
  56. data/spec/support/chat_builder_helpers.rb +41 -0
  57. data/spec/support/file_builder.rb +22 -0
  58. data/spec/support/html_chat_builder.rb +67 -0
  59. data/spec/support/logfiles/2006-12-21.223606.txt +3 -0
  60. data/spec/support/logfiles/2008-01-15.071445-0500PST.htm +5 -0
  61. data/spec/support/logfiles/2008-01-15.071445-0500PST.html +5 -0
  62. data/spec/support/text_chat_builder.rb +21 -0
  63. data/spec/test-output/README.md +1 -0
  64. data/spec/test-output/html_log_output.xml +6 -0
  65. data/spec/test-output/text_log_output.xml +4 -0
  66. metadata +193 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: a27a6c6a854887062351e5a362482a44990ce35e
4
+ data.tar.gz: 455730fbf37e619f747cfe89d0612fb42ee85999
5
+ SHA512:
6
+ metadata.gz: 0192a3e9a130603f28698edc216007c30321a921c4b66a6a3cea5368a02262f0be2037802408b5a7576552c1d625e246da06447f4f5ac37ccd735695b18c86c0
7
+ data.tar.gz: 532da7ab24471befe059573f39d99dd65b214de408783215f0e3e73272088550d03d531834b882a4af14349b7904c1c3e50ce371947ae3bf562f61cf96acb07b
@@ -0,0 +1,27 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+ tags
21
+
22
+ ## PROJECT::SPECIFIC
23
+ .bundle
24
+ .rvmrc
25
+ spec/output-dir
26
+ spec/nonexistent_output_dir
27
+ Gemfile.lock
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ -r spec_helper
@@ -0,0 +1,5 @@
1
+ if ENV["COVERAGE"]
2
+ SimpleCov.start do
3
+ add_filter "/spec/"
4
+ end
5
+ end
@@ -0,0 +1,12 @@
1
+ rvm:
2
+ - 2.1.2
3
+ - 2.1.1
4
+ - 2.0.0
5
+ - 1.9.3
6
+ - 1.9.2
7
+ branches:
8
+ only:
9
+ - master
10
+ notifications:
11
+ email:
12
+ - false
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Gabriel Berke-Williams
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/NEWS.md ADDED
@@ -0,0 +1,10 @@
1
+ ## 0.0.1 (unreleased)
2
+
3
+ Extract Pidgin2Adium, commit hash 96c443fd244b3d2d57564bf17c68d3ec1bcb48ff, into
4
+ this library, Pipio. There are also other commits in this release that are not in
5
+ Pidgin2Adium as of that commit:
6
+
7
+ * Expose `Chat#my_screen_name`
8
+ * Rename `Chat#lines` to `Chat#messages`
9
+ * Expose `Chat#service`
10
+ * Join the lines of `Chat#to_s` with "\n" instead of nothing
@@ -0,0 +1,88 @@
1
+ # pipio [![Build Status](https://secure.travis-ci.org/gabebw/pipio.png)](http://travis-ci.org/gabebw/pipio) [![Code Climate](https://codeclimate.com/github/gabebw/pipio.png)](https://codeclimate.com/github/gabebw/pipio)
2
+
3
+ Pipio parses [Pidgin](http://pidgin.im/) (formerly gaim) logs. It can output
4
+ them in Adium format by calling `to_s` on a `Pipio::Chat` object or any of the
5
+ message objects in `Pipio::Chat#messages`.
6
+
7
+ ## For the impatient
8
+
9
+ To deal with meta-information about the chat itself:
10
+
11
+ path_to_chat_log = File.expand_path('~/path/to/chat_log.html') # or .txt
12
+ chat = Pipio.parse(path_to_chat_log, "Gabe B-W,Gabe,Other Alias")
13
+ if chat
14
+ puts "Screen name of the person you chatted with: #{chat.their_screen_name}"
15
+ puts "Time the chat started: #{chat.start_time_xmlschema}"
16
+ puts "Chat contents, in adium format:"
17
+ puts chat.to_s
18
+ else
19
+ puts "Oh no! Could not parse! Please open an issue."
20
+ puts path_to_chat_log
21
+ exit 1
22
+ end
23
+
24
+ Or, to deal with individual messages in a chat:
25
+
26
+ chat = Pipio.parse("/path/to/log/file.html", "gabe,gbw,gabeb-w")
27
+ chat.each do |message|
28
+ puts "Screen name of person who sent this message: #{message.sender_screen_name}"
29
+ puts "Alias of person who sent this message: #{message.sender_alias}"
30
+ puts "Time message was sent: #{message.time}"
31
+
32
+ if message.respond_to?(:body)
33
+ puts "Message body: #{message.body}"
34
+ if message.respond_to?(:event)
35
+ puts "Event type: #{message.event_type}"
36
+ end
37
+ elsif message.respond_to?(:status)
38
+ puts "Status: #{message.status}"
39
+ end
40
+
41
+ puts "Message in Adium format: #{message}"
42
+ end
43
+
44
+ ## The fine print
45
+
46
+ This library needs access to aliases to work correctly, which may require a bit
47
+ of explanation. Adium and Pidgin allow you to set aliases for buddies as well as
48
+ for yourself, so that you show up in chats as (for example) `Me` instead of as
49
+ `best_screen_name_ever_018845`.
50
+
51
+ However, Pidgin then uses aliases in the log file instead of the actual screen
52
+ name, which complicates things. To parse properly, this gem needs to know which
53
+ aliases belong to you so it can map them to the correct screen name. If it
54
+ encounters an alias that you did not list, it assumes that it belongs to the
55
+ person to whom you are chatting. Note that aliases are lower-cased and space is
56
+ removed, so providing `Gabe B-W, GBW` is the same as providing `gabeb-w,gbw`.
57
+
58
+ You do not need to provide your screenname in the alias list.
59
+
60
+ ## INSTALL
61
+
62
+ gem install pipio
63
+
64
+ ## Testing
65
+
66
+ To get a coverage report, run `rake` with the `COVERAGE` environment variable
67
+ set:
68
+
69
+ COVERAGE=1 rake
70
+
71
+ ## THANKS
72
+
73
+ With thanks to Li Ma, whose [blog post](http://li-ma.blogspot.com/2008/10/pidgin-log-file-to-adium-log-converter.html)
74
+ helped tremendously.
75
+
76
+ ## Note on Patches/Pull Requests
77
+
78
+ * Fork the project.
79
+ * Make your feature addition or bug fix.
80
+ * Add tests for it. This is important so I don't break it in a
81
+ future version unintentionally.
82
+ * Commit, do not mess with rakefile, version, or history.
83
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
84
+ * Send me a pull request. Bonus points for topic branches.
85
+
86
+ ## Copyright
87
+
88
+ Copyright (c) 2009-2014 Gabe Berke-Williams. See LICENSE for details.
@@ -0,0 +1,13 @@
1
+ require 'bundler'
2
+ require 'bundler/setup'
3
+ Bundler::GemHelper.install_tasks
4
+
5
+ require 'rspec/core/rake_task'
6
+
7
+ RSpec::Core::RakeTask.new(:spec)
8
+
9
+ task :coverage do
10
+ system "COVERAGE=1 rake ; open coverage/index.html"
11
+ end
12
+
13
+ task default: :spec
@@ -0,0 +1,34 @@
1
+ require 'time'
2
+
3
+ require 'pipio/version'
4
+ require 'pipio/chat'
5
+ require 'pipio/tag_balancer'
6
+ require 'pipio/time_parser'
7
+ require 'pipio/metadata'
8
+ require 'pipio/metadata_parser'
9
+ require 'pipio/alias_registry'
10
+ require 'pipio/file_reader'
11
+ require 'pipio/parsers/null_parser'
12
+ require 'pipio/parsers/basic_parser'
13
+ require 'pipio/parsers/text_log_parser'
14
+ require 'pipio/parsers/html_log_parser'
15
+ require 'pipio/parser_factory'
16
+ require 'pipio/messages/message'
17
+ require 'pipio/messages/xml_message'
18
+ require 'pipio/messages/auto_reply_message'
19
+ require 'pipio/messages/event'
20
+ require 'pipio/messages/status_message'
21
+ require 'pipio/message_creators/event_message_creator'
22
+ require 'pipio/message_creators/status_message_creator'
23
+ require 'pipio/message_creators/auto_or_xml_message_creator'
24
+ require 'pipio/cleaners/html_cleaner'
25
+ require 'pipio/cleaners/text_cleaner'
26
+
27
+ module Pipio
28
+ # Parses the log at the given path into a Chat.
29
+ def self.parse(logfile_path, my_aliases)
30
+ full_path = File.expand_path(logfile_path)
31
+ factory = ParserFactory.new(full_path, my_aliases)
32
+ factory.parser.parse
33
+ end
34
+ end
@@ -0,0 +1,26 @@
1
+ module Pipio
2
+ # Map aliases ("Gabe B-W") to screen names ("cool_dragon_88").
3
+ class AliasRegistry
4
+ def initialize(default)
5
+ @items = Hash.new(normalize(default))
6
+ end
7
+
8
+ def []=(alias_name, screen_name)
9
+ @items[alias_name] = normalize(screen_name)
10
+ end
11
+
12
+ def [](alias_name)
13
+ @items[without_action(alias_name)]
14
+ end
15
+
16
+ private
17
+
18
+ def normalize(screen_name)
19
+ screen_name.gsub(' ', '').downcase
20
+ end
21
+
22
+ def without_action(alias_name)
23
+ alias_name.sub(/^\*{3}/, '')
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,39 @@
1
+ module Pipio
2
+ # The container object for each Message in a chat. It includes the Enumerable
3
+ # module, so each/map/reject etc all work and will iterate over the Messages.
4
+ class Chat
5
+ include Enumerable
6
+
7
+ def initialize(messages, metadata)
8
+ @messages = messages
9
+ @metadata = metadata
10
+ end
11
+
12
+ attr_reader :messages
13
+
14
+ def start_time_xmlschema
15
+ @metadata.start_time.xmlschema
16
+ end
17
+
18
+ def my_screen_name
19
+ @metadata.my_screen_name
20
+ end
21
+
22
+ def their_screen_name
23
+ @metadata.their_screen_name
24
+ end
25
+
26
+ def service
27
+ @metadata.service
28
+ end
29
+
30
+ def to_s
31
+ map(&:to_s).join("\n")
32
+ end
33
+
34
+ # Iterate over each Message.
35
+ def each(&block)
36
+ @messages.each(&block)
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,95 @@
1
+ module Pipio
2
+ module Cleaners
3
+ class HtmlCleaner
4
+ # Returns a cleaned string.
5
+ # Removes the following tags from _text_:
6
+ # * html
7
+ # * body
8
+ # * font
9
+ # * a with no innertext, e.g. <a href="blah"></a>
10
+ # And removes the following style declarations:
11
+ # * color: #000000 (just turns text black)
12
+ # * font-family
13
+ # * font-size
14
+ # * background
15
+ # * em (really it's changed to <span style="font-style: italic;">)
16
+ # Since each <span> has only one style declaration, spans with these
17
+ # declarations are removed (but the text inside them is preserved).
18
+ def self.clean(text)
19
+ # Sometimes this is in there. I don't know why.
20
+ text.gsub!(%r{&lt;/FONT HSPACE='\d'>}, '')
21
+ # We can remove <font> safely since Pidgin and Adium both show bold
22
+ # using <span style="font-weight: bold;"> except Pidgin uses single
23
+ # quotes while Adium uses double quotes.
24
+ text.gsub!(/<\/?(?:html|body|font)(?: .+?)?>/, '') # very important!
25
+
26
+ text.tr!("\r", '')
27
+ # Remove empty lines
28
+ text.gsub!("\n\n", "\n")
29
+
30
+ # Remove newlines that end the file, since they screw up the
31
+ # newline -> <br/> conversion
32
+ text.gsub!(/\n\Z/, '')
33
+
34
+ # Replace newlines with "<br/>" unless they end a chat line.
35
+ # This must go after we remove <font> tags.
36
+ text.gsub!(/\n(?!#{HtmlLogParser::TIMESTAMP_REGEX})/, '<br/>')
37
+
38
+ # These empty links are sometimes appended to every line in a chat,
39
+ # for some weird reason. Remove them.
40
+ text.gsub!(%r{<a href=['"].+?['"]>\s*?</a>}, '')
41
+
42
+ # Replace single quotes inside tags with double quotes so we can
43
+ # easily change single quotes to entities.
44
+ # For spans, removes a space after the final declaration if it exists.
45
+ text.gsub!(/<span style='([^']+?;) ?'>/, '<span style="\1">')
46
+ text.gsub!(/([a-z]+=)'(.+?)'/, '\1"\2"')
47
+ text.gsub!("'", '&apos;')
48
+
49
+ # This actually does match stuff, but doesn't group it correctly. :(
50
+ # text.gsub!(%r{<span style="((?:.+?;)+)">(.*?)</span>}) do |s|
51
+ text.gsub!(%r{<span style="(.+?)">(.*?)</span>}) do |s|
52
+ # Remove empty spans.
53
+ next if $2 == ''
54
+
55
+ # style = style declaration
56
+ # innertext = text inside <span>
57
+ style, innertext = $1, $2
58
+
59
+ styleparts = style.split(/; ?/)
60
+ styleparts.map! do |p|
61
+ if p[0,5] == 'color'
62
+ if p.include?('color: #000000')
63
+ next
64
+ elsif p =~ /(color: #[0-9a-fA-F]{6})(>.*)?/
65
+ # Regarding the bit with the ">", sometimes this happens:
66
+ # <span style="color: #000000>today;">today was busy</span>
67
+ # Then p = "color: #000000>today"
68
+ # Or it can end in ">;", with no text before the semicolon.
69
+ # So keep the color but remove the ">" and anything following it.
70
+ next($1)
71
+ end
72
+ else
73
+ # don't remove font-weight
74
+ case p
75
+ when /^font-family/ then next
76
+ when /^font-size/ then next
77
+ when /^background/ then next
78
+ end
79
+ end
80
+ end.compact!
81
+ unless styleparts.empty?
82
+ style = styleparts.join('; ')
83
+ innertext = "<span style=\"#{style};\">#{innertext}</span>"
84
+ end
85
+ innertext
86
+ end
87
+ # Pidgin uses <em>, Adium uses <span>
88
+ if text.gsub!('<em>', '<span style="font-style: italic;">')
89
+ text.gsub!('</em>', '</span>')
90
+ end
91
+ text
92
+ end
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,15 @@
1
+ module Pipio
2
+ module Cleaners
3
+ class TextCleaner
4
+ def self.clean(line)
5
+ # Escape entities since this will be in XML
6
+ line.gsub("\r", '').
7
+ gsub('&', '&amp;').
8
+ gsub('<', '&lt;').
9
+ gsub('>', '&gt;').
10
+ gsub('"', '&quot;').
11
+ gsub("'", '&apos;')
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,29 @@
1
+ module Pipio
2
+ class FileReader
3
+ def initialize(path_to_file, cleaner)
4
+ @path_to_file = path_to_file
5
+ @first_line = ''
6
+ @other_lines = []
7
+ @cleaner = cleaner
8
+ end
9
+
10
+ attr_reader :first_line, :other_lines
11
+
12
+ def read
13
+ if File.exist?(@path_to_file)
14
+ open(@path_to_file) do |file|
15
+ @first_line = file.readline.strip
16
+ @other_lines = file.readlines.map(&:strip)
17
+ end
18
+
19
+ clean_other_lines
20
+ end
21
+ end
22
+
23
+ private
24
+
25
+ def clean_other_lines
26
+ @other_lines.map! { |line| @cleaner.clean(line) }.reject!(&:empty?)
27
+ end
28
+ end
29
+ end