pidgin2adium 3.3.0 → 4.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -3
  3. data/.rspec +1 -0
  4. data/.simplecov +5 -0
  5. data/.travis.yml +12 -0
  6. data/Gemfile +6 -1
  7. data/LICENSE +17 -17
  8. data/NEWS.md +89 -0
  9. data/README.md +60 -0
  10. data/Rakefile +5 -23
  11. data/bin/pidgin2adium +19 -90
  12. data/lib/pidgin2adium.rb +4 -136
  13. data/lib/pidgin2adium/adium_chat_file_creator.rb +64 -0
  14. data/lib/pidgin2adium/file_finder.rb +23 -0
  15. data/lib/pidgin2adium/runner.rb +23 -0
  16. data/lib/pidgin2adium/version.rb +1 -1
  17. data/pidgin2adium.gemspec +25 -21
  18. data/spec/features/parse_pidgin_log_file_spec.rb +50 -0
  19. data/spec/fixtures/input/input.html +3 -0
  20. data/spec/fixtures/output.xml +5 -0
  21. data/spec/pidgin2adium/adium_chat_file_creator_spec.rb +89 -0
  22. data/spec/pidgin2adium/file_finder_spec.rb +63 -0
  23. data/spec/spec_helper.rb +17 -59
  24. metadata +96 -89
  25. data/.autotest +0 -28
  26. data/ChangeLog +0 -79
  27. data/Manifest.txt +0 -18
  28. data/README.rdoc +0 -122
  29. data/config/website.yml +0 -2
  30. data/ext/balance_tags_c/balance_tags_c.c +0 -198
  31. data/ext/balance_tags_c/extconf.rb +0 -4
  32. data/lib/pidgin2adium/log_converter.rb +0 -71
  33. data/lib/pidgin2adium/log_file.rb +0 -100
  34. data/lib/pidgin2adium/log_parser.rb +0 -2
  35. data/lib/pidgin2adium/message.rb +0 -2
  36. data/lib/pidgin2adium/messages/all.rb +0 -5
  37. data/lib/pidgin2adium/messages/auto_reply_message.rb +0 -11
  38. data/lib/pidgin2adium/messages/event.rb +0 -17
  39. data/lib/pidgin2adium/messages/message.rb +0 -39
  40. data/lib/pidgin2adium/messages/status_message.rb +0 -17
  41. data/lib/pidgin2adium/messages/xml_message.rb +0 -40
  42. data/lib/pidgin2adium/parsers/all.rb +0 -3
  43. data/lib/pidgin2adium/parsers/basic_parser.rb +0 -456
  44. data/lib/pidgin2adium/parsers/html_log_parser.rb +0 -125
  45. data/lib/pidgin2adium/parsers/text_log_parser.rb +0 -39
  46. data/spec/balance_tags_c_extn_spec.rb +0 -47
  47. data/spec/basic_parser_spec.rb +0 -219
  48. data/spec/html_log_parser_spec.rb +0 -150
  49. data/spec/log_converter_spec.rb +0 -48
  50. data/spec/log_file_spec.rb +0 -176
  51. data/spec/logfiles/2006-12-21.223606.txt +0 -3
  52. data/spec/logfiles/2008-01-15.071445-0500PST.htm +0 -5
  53. data/spec/logfiles/2008-01-15.071445-0500PST.html +0 -5
  54. data/spec/pidgin2adium_spec.rb +0 -252
  55. data/spec/spec.opts +0 -1
  56. data/spec/test-output/README.md +0 -1
  57. data/spec/test-output/html_log_output.xml +0 -6
  58. data/spec/test-output/text_log_output.xml +0 -4
  59. data/spec/text_log_parser_spec.rb +0 -42
  60. data/tasks/extconf.rake +0 -8
  61. data/tasks/extconf/balance_tags_c.rake +0 -47
@@ -1,17 +0,0 @@
1
- # The Message class's subclasses, each used for holding one line of a chat.
2
-
3
- module Pidgin2Adium
4
- # A message saying e.g. "Blahblah has gone away."
5
- class StatusMessage < Message
6
- def initialize(sender, time, buddy_alias, status)
7
- super(sender, time, buddy_alias)
8
- @status = status
9
- end
10
- attr_accessor :status
11
-
12
- def to_s
13
- return sprintf('<status type="%s" sender="%s" time="%s" alias="%s"/>' << "\n",
14
- @status, @sender, @time, @buddy_alias)
15
- end
16
- end
17
- end
@@ -1,40 +0,0 @@
1
- module Pidgin2Adium
2
- # Basic message with body text (as opposed to pure status messages, which
3
- # have no body).
4
- class XMLMessage < Message
5
- def initialize(sender, time, buddy_alias, body)
6
- super(sender, time, buddy_alias)
7
- @body = body
8
- @styled_body = '<div><span style="font-family: Helvetica; font-size: 12pt;">%s</span></div>' % @body
9
- normalize_body!()
10
- end
11
- attr_accessor :body
12
-
13
- def to_s
14
- return sprintf('<message sender="%s" time="%s" alias="%s">%s</message>' << "\n",
15
- @sender, @time, @buddy_alias, @styled_body)
16
- end
17
-
18
- # Balances mismatched tags, normalizes body style, and fixes actions
19
- # so they are in Adium style (Pidgin uses "***Buddy waves at you", Adium uses
20
- # "*Buddy waves at you*").
21
- def normalize_body!
22
- normalize_body_entities!()
23
- # Fix mismatched tags. Yes, it's faster to do it per-message
24
- # than all at once.
25
- @body = Pidgin2Adium.balance_tags_c(@body)
26
- if @buddy_alias[0,3] == '***'
27
- # "***<alias>" is what pidgin sets as the alias for a /me action
28
- @buddy_alias.slice!(0,3)
29
- @body = '*' << @body << '*'
30
- end
31
- end
32
-
33
- # Escapes all entities in @body except for "&lt;", "&gt;", "&amp;", "&quot;",
34
- # and "&apos;".
35
- def normalize_body_entities!
36
- # Convert '&' to '&amp;' only if it's not followed by an entity.
37
- @body.gsub!(/&(?!lt|gt|amp|quot|apos)/, '&amp;')
38
- end
39
- end
40
- end
@@ -1,3 +0,0 @@
1
- require 'pidgin2adium/parsers/basic_parser.rb'
2
- require 'pidgin2adium/parsers/text_log_parser.rb'
3
- require 'pidgin2adium/parsers/html_log_parser.rb'
@@ -1,456 +0,0 @@
1
- # Contains the BasicParser class.
2
- # For its subclasses, see html_log_parser.rb and text_log_parser.rb.
3
- # The subclasses parse the file passed into it and return a LogFile object.
4
- # The BasicParser class just provides some common functionality.
5
- #
6
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
7
- # using these classes directly.
8
-
9
- require 'date'
10
- require 'time'
11
-
12
- require 'pidgin2adium/log_file'
13
- require 'pidgin2adium/messages/all'
14
-
15
- module Pidgin2Adium
16
- # Empty class. Raise'd by LogParser if the first line of a log is not
17
- # parseable.
18
- class InvalidFirstLineError < StandardError; end
19
-
20
- # BasicParser is a base class. Its subclasses are TextLogParser and
21
- # HtmlLogParser.
22
- #
23
- # Please use Pidgin2Adium.parse or Pidgin2Adium.parse_and_generate instead of
24
- # using this class directly.
25
- class BasicParser
26
- include Pidgin2Adium
27
-
28
- # Minimal times don't have a date
29
- MINIMAL_TIME_REGEX = /^\d{1,2}:\d{1,2}:\d{1,2}(?: [AP]M)?$/
30
-
31
- # Time regexes must be set before pre_parse!().
32
- # "4/18/2007 11:02:00 AM" => %w{4, 18, 2007}
33
- # ONLY used (if at all) in first line of chat ("Conversation with...at...")
34
- TIME_REGEX_FIRST_LINE = %r{^(\d{1,2})/(\d{1,2})/(\d{4}) \d{1,2}:\d{2}:\d{2} [AP]M$}
35
- # "2007-04-17 12:33:13" => %w{2007, 04, 17}
36
- TIME_REGEX = /^(\d{4})-(\d{2})-(\d{2}) \d{2}:\d{2}:\d{2}$/
37
-
38
- # force_conversion: Should we continue to convert after hitting an unparseable line?
39
- def initialize(src_path, user_aliases, force_conversion = false)
40
- @src_path = src_path
41
- # Whitespace is removed for easy matching later on.
42
- @user_aliases = user_aliases.split(',').map!{|x| x.downcase.gsub(/\s+/,'') }.uniq
43
-
44
- @force_conversion = force_conversion
45
- # @user_alias is set each time get_sender_by_alias is called. It is a non-normalized
46
- # alias.
47
- # Set an initial value just in case the first message doesn't give
48
- # us an alias.
49
- @user_alias = user_aliases.split(',')[0]
50
-
51
- @log_file_is_valid = true
52
- begin
53
- file = File.new(@src_path, 'r')
54
- @first_line = file.readline
55
- @file_content = file.read
56
- file.close
57
- rescue Errno::ENOENT
58
- oops("#{@src_path} doesn't exist! Continuing...")
59
- @log_file_is_valid = false
60
- return nil
61
- end
62
-
63
- begin
64
- successfully_set_variables = pre_parse!
65
- if not successfully_set_variables
66
- error("Failed to set some key variables: #{@src_path}")
67
- @log_file_is_valid = false
68
- return
69
- end
70
- rescue InvalidFirstLineError
71
- # The first line isn't parseable
72
- @log_file_is_valid = false
73
- error("Failed to parse, invalid first line: #{@src_path}")
74
- return # stop processing
75
- end
76
-
77
- # @status_map, @lib_purple_events, and @events are used in
78
- # create_status_or_event_msg
79
- @status_map = {
80
- /(.+) logged in\.$/ => 'online',
81
- /(.+) logged out\.$/ => 'offline',
82
- /(.+) has signed on\.$/ => 'online',
83
- /(.+) has signed off\.$/ => 'offline',
84
- /(.+) has gone away\.$/ => 'away',
85
- /(.+) is no longer away\.$/ => 'available',
86
- /(.+) has become idle\.$/ => 'idle',
87
- /(.+) is no longer idle\.$/ => 'available'
88
- }
89
-
90
- # lib_purple_events are all of event_type libPurple
91
- @lib_purple_events = [
92
- # file transfer
93
- /Starting transfer of .+ from (.+)/,
94
- /^Offering to send .+ to (.+)$/,
95
- /(.+) is offering to send file/,
96
- /^Transfer of file .+ complete$/,
97
- /Error reading|writing|accessing .+: .+/,
98
- /You cancell?ed the transfer of/,
99
- /File transfer cancelled/,
100
- /(.+?) cancell?ed the transfer of/,
101
- /(.+?) cancelled the file transfer/,
102
- # Direct IM - actual (dis)connect events are their own types
103
- /^Attempting to connect to (.+) at .+ for Direct IM\./,
104
- /^Asking (.+) to connect to us at .+ for Direct IM\./,
105
- /^Attempting to connect via proxy server\.$/,
106
- /^Direct IM with (.+) failed/,
107
- # encryption
108
- /Received message encrypted with wrong key/,
109
- /^Requesting key\.\.\.$/,
110
- /^Outgoing message lost\.$/,
111
- /^Conflicting Key Received!$/,
112
- /^Error in decryption- asking for resend\.\.\.$/,
113
- /^Making new key pair\.\.\.$/,
114
- # sending errors
115
- /^Last outgoing message not received properly- resetting$/,
116
- /Resending\.\.\./,
117
- # connection errors
118
- /Lost connection with the remote user:.+/,
119
- # chats
120
- /^.+ entered the room\.$/,
121
- /^.+ left the room\.$/
122
- ]
123
-
124
- # non-libpurple events
125
- # Each key maps to an event_type string. The keys will be matched against a line of chat
126
- # and the partner's alias will be in regex group 1, IF the alias is matched.
127
- @event_map = {
128
- # .+ is not an alias, it's a proxy server so no grouping
129
- /^Attempting to connect to .+\.$/ => 'direct-im-connect',
130
- # NB: pidgin doesn't track when Direct IM is disconnected, AFAIK
131
- /^Direct IM established$/ => 'directIMConnected',
132
- /Unable to send message/ => 'chat-error',
133
- /You missed .+ messages from (.+) because they were too large/ => 'chat-error',
134
- /User information not available/ => 'chat-error'
135
- }
136
-
137
- @ignore_events = [
138
- # Adium ignores SN/alias changes.
139
- /^.+? is now known as .+?\.<br\/?>$/
140
- ]
141
- end
142
-
143
- # This method returns a LogFile instance, or false if an error occurred.
144
- def parse
145
- # Prevent parse from being called directly from BasicParser, since
146
- # it uses subclassing magic.
147
- if self.class == BasicParser
148
- oops("Please don't call parse directly from BasicParser. Use a subclass :)")
149
- return false
150
- end
151
- return false unless @log_file_is_valid
152
- @file_content = cleanup(@file_content).split("\n")
153
-
154
- @file_content.map! do |line|
155
- # "next" returns nil which is removed by compact
156
- next if line =~ /^\s+$/
157
- if line =~ @line_regex
158
- create_msg($~.captures)
159
- elsif line =~ @line_regex_status
160
- msg = create_status_or_event_msg($~.captures)
161
- if msg == false
162
- if force_conversion?
163
- nil # will get compacted out
164
- else
165
- # Error occurred while parsing
166
- return false
167
- end
168
- end
169
- else
170
- error "Could not parse line:"
171
- p line
172
- return false
173
- end
174
- end
175
- @file_content.compact!
176
- return LogFile.new(@file_content, @service, @user_SN, @partner_SN, @adium_chat_time_start)
177
- end
178
-
179
- # Returns a Time object, or nil if the format string doesn't match the
180
- # time string.
181
- def strptime(time, format)
182
- date_hash = Date._strptime(time, format)
183
- return nil if date_hash.nil?
184
- # Fill in any blanks using @basic_time_info
185
- date_hash = @basic_time_info.merge(date_hash)
186
- time = Time.local(date_hash[:year], date_hash[:mon], date_hash[:mday],
187
- date_hash[:hour], date_hash[:min], date_hash[:sec],
188
- date_hash[:sec_fraction], date_hash[:zone])
189
- time
190
- end
191
-
192
- # Tries to parse _time_ (a string) according to the formats in _formats_, which
193
- # should be an array of strings. For more on acceptable format strings,
194
- # see the official documentation for Time.strptime. Returns a Time
195
- # object or nil (if no formats matched).
196
- def try_to_parse_time_with_formats(time, formats)
197
- parsed = nil
198
- formats.each do |format|
199
- parsed = strptime(time, format)
200
- break unless parsed.nil?
201
- end
202
- parsed
203
- end
204
-
205
- def try_to_parse_time(time)
206
- formats = [
207
- "%m/%d/%Y %I:%M:%S %P", # 01/22/2008 03:01:45 PM
208
- "%Y-%m-%d %H:%M:%S", # 2008-01-22 23:08:24
209
- "%Y/%m/%d %H:%M:%S", # 2008/01/22 04:01:45
210
- "%Y-%m-%d %H:%M:%S", # 2008-01-22 04:01:45
211
- '%a %d %b %Y %H:%M:%S %p %Z', # "Sat 18 Apr 2009 10:43:35 AM PDT"
212
- '%a %b %d %H:%M:%S %Y' # "Wed May 24 19:00:33 2006"
213
- ]
214
- try_to_parse_time_with_formats(time, formats)
215
- end
216
-
217
- def try_to_parse_minimal_time(minimal_time)
218
- formats = [
219
- "%I:%M:%S %P", # 04:01:45 AM
220
- "%H:%M:%S" # 23:01:45
221
- ]
222
-
223
- try_to_parse_time_with_formats(minimal_time, formats)
224
- end
225
-
226
- # Returns true if the time is minimal, i.e. doesn't include a date.
227
- # Otherwise returns false.
228
- def is_minimal_time?(str)
229
- not str.strip.match(MINIMAL_TIME_REGEX).nil?
230
- end
231
-
232
- # Converts a pidgin datestamp to an Adium one.
233
- # Returns a string representation of _time_ or
234
- # nil if it couldn't parse the provided _time_.
235
- def create_adium_time(time)
236
- return nil if time.nil?
237
- if is_minimal_time?(time)
238
- datetime = try_to_parse_minimal_time(time)
239
- else
240
- begin
241
- datetime = DateTime.parse(time)
242
- rescue ArgumentError
243
- datetime = try_to_parse_time(time)
244
- if datetime.nil?
245
- Pidgin2Adium.oops("#{time} couldn't be parsed. Please open an issue on GitHub: https://github.com/gabebw/pidgin2adium/issues")
246
- return nil
247
- end
248
- end
249
- end
250
-
251
- return nil if datetime.nil?
252
-
253
- # Instead of dealing with Ruby 1.9 vs Ruby 1.8, DateTime vs Date vs
254
- # Time, and #xmlschema vs #iso8601, just use strftime.
255
- datetime.strftime('%Y-%m-%dT%H:%M:%S%Z')
256
- end
257
-
258
- # Extract required data from the file. Run by parse. Sets these
259
- # variables:
260
- # * @service
261
- # * @user_SN
262
- # * @partner_SN
263
- # * @basic_time_info
264
- # * @adium_chat_time_start
265
- # Returns true if none of these variables are false or nil.
266
- def pre_parse!
267
- # Deal with first line.
268
-
269
- # the first line is special. It tells us (in order of regex groups):
270
- # 1) who we're talking to
271
- # 2) what time/date
272
- # 3) what SN we used
273
- # 4) what protocol (AIM, icq, jabber...)
274
- first_line_match = /Conversation with (.+?) at (.+?) on (.+?) \((.+?)\)/.match(@first_line)
275
- if first_line_match.nil?
276
- raise InvalidFirstLineError
277
- else
278
- # first_line_match is like so:
279
- # ["Conversation with BUDDY_PERSON at 2006-12-21 22:36:06 on awesome SN (aim)",
280
- # "BUDDY_PERSON",
281
- # "2006-12-21 22:36:06",
282
- # "awesome SN",
283
- # "aim"]
284
- @service = first_line_match[4]
285
- # @user_SN is normalized to avoid "AIM.name" and "AIM.na me" folders
286
- @user_SN = first_line_match[3].downcase.tr(' ', '')
287
- @partner_SN = first_line_match[1]
288
- pidgin_chat_time_start = first_line_match[2]
289
- # @basic_time_info is for files that only have the full
290
- # timestamp at the top; we can use it to fill in the minimal
291
- # per-line timestamps. It is a hash with 3 keys:
292
- # * :year
293
- # * :mon
294
- # * :mday (day of month)
295
- # You should be able to fill everything else in. If you can't,
296
- # something's wrong.
297
- @basic_time_info = case pidgin_chat_time_start
298
- when TIME_REGEX
299
- {:year => $1.to_i,
300
- :mon => $2.to_i,
301
- :mday => $3.to_i}
302
- when TIME_REGEX_FIRST_LINE
303
- {:year => $3.to_i,
304
- :mon => $1.to_i,
305
- :mday => $2.to_i}
306
- else
307
- nil
308
- end
309
- if @basic_time_info.nil?
310
- begin
311
- parsed_time = DateTime.parse(pidgin_chat_time_start)
312
- @basic_time_info = {:year => parsed_time.year,
313
- :mon => parsed_time.mon,
314
- :mday => parsed_time.mday}
315
- rescue ArgumentError
316
- # Couldn't parse the date
317
- Pidgin2Adium.oops("#{@src_path}: couldn't parse the date in the first line.")
318
- @basic_time_info = nil
319
- end
320
- end
321
-
322
- # Note: need @basic_time_info set for create_adium_time
323
- # When the chat started, in Adium's format
324
- @adium_chat_time_start = create_adium_time(pidgin_chat_time_start)
325
-
326
- first_line_variables = [@service,
327
- @user_SN,
328
- @partner_SN,
329
- @basic_time_info,
330
- @adium_chat_time_start]
331
- if first_line_variables.all?
332
- true
333
- else
334
- # Print an informative error message
335
- unset_variable_names = []
336
- unset_variable_names << 'service' if @service.nil?
337
- unset_variable_names << 'user_SN' if @user_SN.nil?
338
- unset_variable_names << 'partner_SN' if @partner_SN.nil?
339
- unset_variable_names << 'basic_time_info' if @basic_time_info.nil?
340
- unset_variable_names << 'adium_chat_time_start' if @adium_chat_time_start.nil?
341
- Pidgin2Adium.oops("Couldn't set these variables: #{unset_variable_names.join(', ')}")
342
- false
343
- end
344
- end
345
- end
346
-
347
- def get_sender_by_alias(alias_name)
348
- no_action = alias_name.sub(/^\*{3}/, '')
349
- if @user_aliases.include? no_action.downcase.gsub(/\s+/, '')
350
- # Set the current alias being used of the ones in @user_aliases
351
- @user_alias = no_action
352
- return @user_SN
353
- else
354
- return @partner_SN
355
- end
356
- end
357
-
358
- #--
359
- # create_msg takes an array of captures from matching against
360
- # @line_regex and returns a Message object or one of its subclasses.
361
- # It can be used for TextLogParser and HtmlLogParser because both of
362
- # they return data in the same indexes in the matches array.
363
- #++
364
- def create_msg(matches)
365
- msg = nil
366
- # Either a regular message line or an auto-reply/away message.
367
- time = create_adium_time(matches[0])
368
- return nil if time.nil?
369
- buddy_alias = matches[1]
370
- sender = get_sender_by_alias(buddy_alias)
371
- body = matches[3]
372
- if matches[2] # auto-reply
373
- msg = AutoReplyMessage.new(sender, time, buddy_alias, body)
374
- else
375
- # normal message
376
- msg = XMLMessage.new(sender, time, buddy_alias, body)
377
- end
378
- return msg
379
- end
380
-
381
- #--
382
- # create_status_or_event_msg takes an array of +MatchData+ captures from
383
- # matching against @line_regex_status and returns an Event or Status.
384
- # Returns nil if it's a message that should be ignored, or false if an
385
- # error occurred.
386
- #++
387
- def create_status_or_event_msg(matches)
388
- # ["22:58:00", "BuddyName logged in."]
389
- # 0: time
390
- # 1: status message or event
391
- msg = nil
392
- time = create_adium_time(matches[0])
393
- return nil if time.nil?
394
- str = matches[1]
395
- # Return nil, which will get compact'ed out
396
- return nil if @ignore_events.detect{|regex| str =~ regex }
397
-
398
- regex, status = @status_map.detect{|rxp, stat| str =~ rxp}
399
- if regex and status
400
- # Status message
401
- buddy_alias = regex.match(str)[1]
402
- sender = get_sender_by_alias(buddy_alias)
403
- msg = StatusMessage.new(sender, time, buddy_alias, status)
404
- else
405
- # Test for event
406
- regex = @lib_purple_events.detect{|rxp| str =~ rxp }
407
- event_type = 'libpurpleEvent' if regex
408
- unless regex and event_type
409
- # not a libpurple event, try others
410
- regex, event_type = @event_map.detect{|rxp,ev_type| str =~ rxp}
411
- unless regex and event_type
412
- if force_conversion?
413
- unless printed_conversion_error?
414
- error("#{@src_path} was converted with the following errors:")
415
- printed_conversion_error!
416
- end
417
- end
418
-
419
- error(sprintf("%sError parsing status or event message, no status or event found: %p",
420
- force_conversion? ? "\t" : '', # indent if we're forcing conversion
421
- str))
422
- return false
423
- end
424
- end
425
-
426
- if regex and event_type
427
- regex_matches = regex.match(str)
428
- # Event message
429
- if regex_matches.size == 1
430
- # No alias - this means it's the user
431
- buddy_alias = @user_alias
432
- sender = @user_SN
433
- else
434
- buddy_alias = regex_matches[1]
435
- sender = get_sender_by_alias(buddy_alias)
436
- end
437
- msg = Event.new(sender, time, buddy_alias, str, event_type)
438
- end
439
- end
440
- return msg
441
- end
442
-
443
- # Should we continue to convert after hitting an unparseable line?
444
- def force_conversion?
445
- !! @force_conversion
446
- end
447
-
448
- def printed_conversion_error?
449
- @printed_conversion_error == true
450
- end
451
-
452
- def printed_conversion_error!
453
- @printed_conversion_error = true
454
- end
455
- end # END BasicParser class
456
- end