virginity 0.3.31

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/lib/virginity.rb +6 -0
  3. data/lib/virginity/api_extensions.rb +87 -0
  4. data/lib/virginity/api_extensions/fields_to_json.rb +82 -0
  5. data/lib/virginity/api_extensions/fields_to_xml.rb +151 -0
  6. data/lib/virginity/bnf.rb +84 -0
  7. data/lib/virginity/dir_info.rb +93 -0
  8. data/lib/virginity/dir_info/content_line.rb +146 -0
  9. data/lib/virginity/dir_info/line_folding.rb +60 -0
  10. data/lib/virginity/dir_info/param.rb +208 -0
  11. data/lib/virginity/dir_info/query.rb +144 -0
  12. data/lib/virginity/encoding_decoding.rb +177 -0
  13. data/lib/virginity/encodings.rb +36 -0
  14. data/lib/virginity/fixes.rb +230 -0
  15. data/lib/virginity/vcard.rb +244 -0
  16. data/lib/virginity/vcard/base_field.rb +126 -0
  17. data/lib/virginity/vcard/categories.rb +57 -0
  18. data/lib/virginity/vcard/cleaning.rb +364 -0
  19. data/lib/virginity/vcard/field.rb +22 -0
  20. data/lib/virginity/vcard/field/params.rb +93 -0
  21. data/lib/virginity/vcard/field_values.rb +10 -0
  22. data/lib/virginity/vcard/field_values/binary.rb +22 -0
  23. data/lib/virginity/vcard/field_values/boolean.rb +14 -0
  24. data/lib/virginity/vcard/field_values/case_insensitive_value.rb +13 -0
  25. data/lib/virginity/vcard/field_values/date.rb +16 -0
  26. data/lib/virginity/vcard/field_values/integer.rb +15 -0
  27. data/lib/virginity/vcard/field_values/optional_structured_text.rb +35 -0
  28. data/lib/virginity/vcard/field_values/separated_text.rb +59 -0
  29. data/lib/virginity/vcard/field_values/structured_text.rb +71 -0
  30. data/lib/virginity/vcard/field_values/text.rb +23 -0
  31. data/lib/virginity/vcard/field_values/uri.rb +15 -0
  32. data/lib/virginity/vcard/fields.rb +284 -0
  33. data/lib/virginity/vcard/fields_osx.rb +95 -0
  34. data/lib/virginity/vcard/fields_soocial.rb +45 -0
  35. data/lib/virginity/vcard/name_handler.rb +151 -0
  36. data/lib/virginity/vcard/patching.rb +262 -0
  37. data/lib/virginity/vcard21.rb +2 -0
  38. data/lib/virginity/vcard21/base.rb +30 -0
  39. data/lib/virginity/vcard21/parser.rb +359 -0
  40. data/lib/virginity/vcard21/reader.rb +103 -0
  41. data/lib/virginity/vcard21/writer.rb +139 -0
  42. metadata +111 -0
@@ -0,0 +1,2 @@
1
+ require "virginity/vcard21/reader"
2
+ require "virginity/vcard21/writer"
@@ -0,0 +1,30 @@
1
+ module Virginity
2
+ module Vcard21
3
+ # FIXME: X-FUNAMBOL-INSTANTMESSENGER shouldnt be here! it's just to get funambol's thunderbird client to work
4
+ KNOWNTYPES = %w(DOM INTL POSTAL PARCEL HOME WORK PREF VOICE FAX MSG CELL PAGER BBS MODEM CAR ISDN VIDEO AOL APPLELINK ATTMAIL CIS EWORLD INTERNET IBMMAIL MCIMAIL POWERSHARE PRODIGY TLX X400 GIF CGM WMF BMP MET PMB DIB PICT TIFF PDF PS JPEG QTIME MPEG MPEG2 AVI WAVE AIFF PCM X509 PGP) +
5
+ %w(X-FUNAMBOL-INSTANTMESSENGER INTERNET) # additions to work with common errors. This means that we now cannot have a param with the name INTERNET in vcard2.1
6
+
7
+ ENCODING = /^ENCODING$/i
8
+ BASE64 = /^BASE64$/i
9
+ QUOTED_PRINTABLE = /^quoted-printable$/i
10
+ SEVEN_BIT = /^7bit$/i
11
+ EIGHT_BIT = /^8bit$/i
12
+
13
+ def self.base64_param?(param)
14
+ param.key =~ ENCODING and param.value =~ BASE64
15
+ end
16
+
17
+ def self.qp_param?(param)
18
+ param.key =~ ENCODING and param.value =~ QUOTED_PRINTABLE
19
+ end
20
+
21
+ def self.seven_bit?(param)
22
+ param.key =~ ENCODING and param.value =~ SEVEN_BIT
23
+ end
24
+
25
+ def self.eight_bit?(param)
26
+ param.key =~ ENCODING and param.value =~ EIGHT_BIT
27
+ end
28
+
29
+ end
30
+ end
@@ -0,0 +1,359 @@
1
+ require 'virginity/vcard21/base'
2
+
3
+ module Virginity
4
+ module Vcard21
5
+ class Parser
6
+ include Virginity::Encodings
7
+
8
+ def initialize(input, options = {})
9
+ @input = StringScanner.new(input)
10
+ @options = options
11
+ end
12
+
13
+ def pr(s)
14
+ puts "#{@input.pos}:\t#{@input.peek(40).inspect}\t#{s}"
15
+ end
16
+
17
+ def parse!
18
+ parse_start or raise ParseError, 'error parsing vCard2.1'
19
+ end
20
+
21
+ #
22
+ # helpers
23
+ #
24
+
25
+ def rollback(pos)
26
+ @input.pos = pos
27
+ nil
28
+ end
29
+
30
+ def try(method)
31
+ pos = @input.pos
32
+ catch :rollback do
33
+ return (method.call or throw :rollback)
34
+ end
35
+ rollback(pos)
36
+ end
37
+
38
+ def star(method)
39
+ list = []
40
+ until (result = try(method)).nil? do
41
+ list << result
42
+ end
43
+ list
44
+ end
45
+
46
+ def one_star(method)
47
+ head = try(method) or return nil
48
+ [head] + star(method)
49
+ end
50
+
51
+ #
52
+ # parsing differently encoded and folded strings
53
+ #
54
+
55
+ def parse_folded_literal(literal)
56
+ # pr "parse_literal(#{literal.inspect})@#{@input.pos}".white
57
+ success = true
58
+ literal = unescape_literal(literal)
59
+ literal.split(//).each do |ch|
60
+ input = @input.get_byte
61
+ input = parse("'\n' one_ws") if input == "\r" # CRLF followed by LWSP needs to be regarded as LWSP == Linear White Space
62
+ # pr "#{ch.inspect}==#{input.inspect}?".white
63
+ literal << input
64
+ success &= (ch == input)
65
+ end
66
+ success ? literal : nil
67
+ end
68
+
69
+
70
+ # def parse_folded_literal(literal)
71
+ # exp = /#{literal.split(//).map {|s| "#{s}(=\r\n\s)+" }.join}/
72
+ # puts exp
73
+ # end
74
+
75
+ # def parse_sevenbitascii
76
+ # buffer = ""
77
+ # until ["\r", "\n"].include? @input.peek(1)
78
+ # input = @input.get_byte
79
+ # raise TopDown::DoesNotParse if input.each_byte.any? { |b| b > 127 }
80
+ # buffer << input
81
+ # end
82
+ # buffer
83
+ # end
84
+
85
+ # The process of moving from this folded multiple-line representation of a property definition to its single line representation is called “unfolding”. Unfolding is accomplished by regarding CRLF immediately followed by a LWSP-char as equivalent to the LWSP-char.
86
+ CRLF_LWSP = /\r?\n[\ |\t]/
87
+ CRLF = /\r?\n/
88
+ ONE_OR_MORE_CRLF = /(\r?\n)+/
89
+ def parse_sevenbitascii
90
+ buffer = ""
91
+ done = false
92
+ until done
93
+ if s = @input.scan(CRLF_LWSP)
94
+ buffer << s[-1] unless @options[:vcard21_line_folding_with_extra_space]
95
+ elsif @input.check(CRLF)
96
+ return to_ascii(buffer)
97
+ else
98
+ buffer << @input.get_byte
99
+ end
100
+ end
101
+ to_ascii buffer
102
+ end
103
+
104
+ # everything up to the beginning of CRLF_LWSP or CRLF
105
+ LINE_7BIT = /[^\r\n]*/
106
+ def parse_sevenbitascii
107
+ buffer = ""
108
+ while true
109
+ buffer << @input.scan(LINE_7BIT)
110
+ if s = @input.scan(CRLF_LWSP) # continuation string
111
+ buffer << s[-1] unless @options[:vcard21_line_folding_with_extra_space]
112
+ else # if @input.check(CRLF) # this cannot be false
113
+ return to_ascii(buffer)
114
+ end
115
+ end
116
+ to_ascii buffer
117
+ end
118
+
119
+
120
+ EQUALS = "="
121
+ def parse_quoted_printable
122
+ buffer = ""
123
+ while true
124
+ input = @input.get_byte
125
+ return buffer if input.empty? # at end of stream
126
+ followed_by_crlf = !@input.match?(CRLF).nil?
127
+ if input == EQUALS and followed_by_crlf
128
+ parse_crlf
129
+ elsif followed_by_crlf
130
+ buffer << input
131
+ return buffer
132
+ else
133
+ buffer << input
134
+ end
135
+ end
136
+ end
137
+
138
+ QP_LINE_CONTINUATION = /(.*)=\r?\n$/
139
+ # FIXME: this could be much faster in inline C, since now, we're creating 2 objects per crlf and scanning a line at least twice.
140
+ def parse_quoted_printable
141
+ buffer = ""
142
+ while true
143
+ match = @input.scan_until(CRLF)
144
+ if m = match.match(QP_LINE_CONTINUATION)
145
+ buffer << m[1]
146
+ else
147
+ @input.pos -= 1 # leave the newline to be scanned
148
+ buffer << match.chomp
149
+ return buffer
150
+ end
151
+ end
152
+ end
153
+
154
+ # def parse_base64
155
+ # buffer = ""
156
+ # while true
157
+ # input = @input.get_byte
158
+ # buffer << input unless input =~ /[\s]/
159
+ # return buffer if input == "" # at end of stream
160
+ # return buffer if input == "\n" and (@input.peek(2) == "\r\n" or @input.peek(1) == "\n")
161
+ # end
162
+ # end
163
+
164
+ # base64 = <MIME RFC 1521 base64 text>
165
+ # ; the end of the text is marked with two CRLF sequences
166
+ # ; this results in one blank line before the start of the next property
167
+ # if this vcard has one broken base64 field and a correct one, our nice fallback will fail. but well...
168
+ EMPTY_LINE = /\r?\n\s*\r?\n/
169
+ def parse_base64
170
+ # scan until an empty line occurs
171
+ buffer = @input.scan_until(EMPTY_LINE) or return nil
172
+ @input.pos -= 1
173
+ buffer.gsub!(/\s/, '')
174
+ to_ascii buffer
175
+ end
176
+
177
+ def parse_broken_base64
178
+ # scan until an unindented line is encountered
179
+ buffer = @input.scan_until(/\n(?=[^\s])/) or return nil
180
+ @input.pos -= 1
181
+ buffer.gsub!(/\s/, '')
182
+ to_ascii buffer
183
+ end
184
+
185
+ def parse_crlf
186
+ @input.scan(CRLF)
187
+ end
188
+
189
+ # NON_WORD_CHARD = ["[", "]", "=", ":", ".", ","]
190
+ # NON_XWORD_CHARS = ["[", "]", "=", ":", ".", ",", ";"]
191
+ # word := char [word]
192
+ WORD = /[^\[\]\=\:\.\,]+/
193
+ XWORD = /[^\[\]\=\:\.\,\;]+/ # /[\w-]+/ ???
194
+ X_XWORD = /X-[^\[\]\=\:\.\,\;]+/i
195
+ def parse_xword
196
+ @input.scan(XWORD)
197
+ end
198
+
199
+ KNOWNTYPES_LITERALS = Regexp.union(*KNOWNTYPES)
200
+ def parse_knowntype
201
+ value = (@input.scan(KNOWNTYPES_LITERALS) or @input.scan(XWORD)) or return nil
202
+ Param.new("TYPE", value)
203
+ end
204
+
205
+ COMMA = /\,/
206
+ # params := 1*(';' [ws] param [ws])
207
+ def parse_params
208
+ params = []
209
+ while p = parse_param
210
+ params << p
211
+ # some programs send us 2.1 cards with params in the 3.0-shorthand version "TYPE=fax,work
212
+ # I added support for that although it is not according to the specs.
213
+ if @input.scan(COMMA)
214
+ val = @input.scan(XWORD) || ""
215
+ params << Param.new(to_ascii(p.key), val)
216
+ end
217
+ end
218
+ params
219
+ end
220
+
221
+ SEMICOLON = /\;/
222
+ def parse_param
223
+ # param := ('TYPE' / 'VALUE' / 'ENCODING' / 'CHARSET' / 'LANGUAGE' / 'X-' xword) [ws] '=' [ws] xword / knowntype
224
+ @input.skip(SEMICOLON) or return nil
225
+ @input.skip(OPTIONAL_WS)
226
+ param = (parse_param_key_value or parse_knowntype) or return nil
227
+ @input.skip(OPTIONAL_WS)
228
+ if param.key =~ ENCODING
229
+ @encoding = case param.value
230
+ when BASE64
231
+ :base64
232
+ when QUOTED_PRINTABLE
233
+ :quoted_printable
234
+ end
235
+ end
236
+ param
237
+ end
238
+
239
+ EQUALS_REGEXP = /=/
240
+ WS_EQUALS_WS = /[\ |\t]*\=[\ |\t]*/
241
+ def parse_param_key_value
242
+ pos = @input.pos
243
+ key = parse_param_key or return nil
244
+ key.upcase!
245
+ @input.skip(WS_EQUALS_WS) or return rollback(pos)
246
+ value = @input.scan(XWORD) || ""
247
+ Param.new(key, value)
248
+ end
249
+
250
+ PARAM_KEY = /(TYPE|VALUE|ENCODING|CHARSET|LANGUAGE)/i
251
+ def parse_param_key
252
+ @input.scan(PARAM_KEY) or @input.scan(X_XWORD)
253
+ end
254
+
255
+ OPTIONAL_WSLS = /(\ |\t|\r\n|\n)*/
256
+ OPTIONAL_WS = /[\ |\t]*/
257
+
258
+ # produces an array of hashes
259
+ # start := [wsls] vcard [wsls]
260
+ def parse_start
261
+ @input.skip(OPTIONAL_WSLS)
262
+ vcard = parse_vcard or return nil
263
+ @input.skip(OPTIONAL_WSLS)
264
+ vcard
265
+ end
266
+
267
+ # 'BEGIN' [ws] ':' [ws] 'VCARD' [ws] 1*CRLF items *CRLF 'END' [ws] ':' [ws] 'VCARD'
268
+ # vcard := beginvcard items *crlf endvcard
269
+ def parse_vcard
270
+ beginvcard = parse_beginvcard or return nil
271
+ items = parse_items or return nil
272
+ @input.skip(ONE_OR_MORE_CRLF) # and ignore it if there are none
273
+ endvcard = parse_endvcard or return nil
274
+ [beginvcard] + items + [endvcard]
275
+ end
276
+
277
+ COLON = /:/
278
+ BEGIN_WS_COLON_WS = /BEGIN[:space:]*:[:space:]*/
279
+ VCARD = /VCARD/i
280
+ # 'BEGIN' [ws] ':' [ws] 'VCARD' [ws] 1*crlf
281
+ def parse_beginvcard
282
+ @input.skip(BEGIN_WS_COLON_WS) or return nil
283
+ @input.skip(VCARD) or return nil
284
+ @input.skip(OPTIONAL_WS)
285
+ @input.skip(ONE_OR_MORE_CRLF) or return nil
286
+ { :name => "BEGIN", :value => "VCARD" }
287
+ end
288
+
289
+ END_WS_COLON_WS = /END[:space:]*:[:space:]*/
290
+ # 'END' [ws] ':' [ws] 'VCARD'
291
+ def parse_endvcard
292
+ @input.skip(END_WS_COLON_WS) or return nil
293
+ @input.skip(VCARD) or return nil
294
+ { :name => "END", :value => "VCARD" }
295
+ end
296
+
297
+ # ( items *crlf item ) / item <--- left recursion!
298
+ # (item *crlf) items / item <-- right recursion, better for my parser
299
+ # 1*(item *crlf) <-- simplification
300
+ def parse_items
301
+ one_star method(:parse_item)
302
+ end
303
+
304
+ # item := [groups] name [params] ':' value crlf
305
+ def parse_item
306
+ pos = @input.pos
307
+ groups = parse_groups
308
+ name = parse_name or return rollback(pos)
309
+ @encoding = nil
310
+ params = parse_params
311
+ @input.skip(COLON) or return rollback(pos)
312
+ value = parse_value or return rollback(pos)
313
+ @input.skip(ONE_OR_MORE_CRLF) or return rollback(pos)
314
+ { :groups => groups, :name => name, :params => params, :value => value }
315
+ end
316
+
317
+ # groups := groups . word / word
318
+ # group := group*
319
+ def parse_groups
320
+ groups = []
321
+ while x = parse_group
322
+ groups << x
323
+ end
324
+ groups
325
+ end
326
+
327
+ DOT = /\./
328
+ # group := word .
329
+ def parse_group
330
+ pos = @input.pos
331
+ word = @input.scan(WORD) and @input.skip(DOT) or return rollback(pos)
332
+ word
333
+ end
334
+
335
+ # name := 'LOGO' / 'PHOTO' / 'LABEL' / 'FN' / 'TITLE' / 'SOUND' / 'VERSION' / 'TEL' / 'EMAIL' / 'TZ' / 'GEO' / 'NOTE' / 'URL' / 'BDAY' / 'ROLE' / 'REV' / 'UID' / 'KEY' / 'MAILER' / 'X-' word #; these may be "folded"
336
+ # name := xword # any word except begin or end, those are 'special'
337
+ BEGIN_END = /^(BEGIN|END)$/i
338
+ def parse_name
339
+ word = @input.scan(XWORD) or return nil
340
+ return nil if word =~ BEGIN_END
341
+ word
342
+ end
343
+
344
+ # value := sevenbitascii / quotedprintable / base64
345
+ def parse_value
346
+ case @encoding
347
+ when :quoted_printable
348
+ parse_quoted_printable
349
+ when :base64
350
+ parse_base64 or parse_broken_base64
351
+ else
352
+ parse_sevenbitascii
353
+ end
354
+ end
355
+ end
356
+
357
+ end
358
+ end
359
+
@@ -0,0 +1,103 @@
1
+ require 'virginity/vcard21/base'
2
+ require 'virginity/vcard21/parser'
3
+
4
+ module Virginity
5
+ module Vcard21
6
+
7
+ class ParseError < Error; end
8
+
9
+ module Reader # for DirectoryInformation
10
+ def from_vcard21(string)
11
+ dirinfo = DirectoryInformation.new
12
+ dirinfo.lines = lines_from_vcard21(string)
13
+ dirinfo
14
+ end
15
+
16
+ # remove QUOTED-PRINTABLE-encoding
17
+ def reencode_quoted_printable!(line)
18
+ line[:params] ||= []
19
+ line[:params].delete_if { |p| Vcard21::qp_param?(p) }
20
+ # FIXME encoding. reencoding could fail because the characters are not encodable as text
21
+ if line[:value].include?(";") # if the unencoded value contains ";" it's a list
22
+ v = line[:value].split(";").map { |e| EncodingDecoding::decode_quoted_printable(e) }
23
+ line[:value] = EncodingDecoding::encode_text_list(v, ";")
24
+ elsif line[:value].include?(",")
25
+ v = line[:value].split(",").map { |e| EncodingDecoding::decode_quoted_printable(e) }
26
+ line[:value] = EncodingDecoding::encode_text_list(v, ",")
27
+ else
28
+ v = EncodingDecoding::decode_quoted_printable(line[:value])
29
+ line[:value] = EncodingDecoding::encode_text(v)
30
+ end
31
+ line
32
+ end
33
+
34
+ def convert_base64_to_b!(line)
35
+ line[:params] ||= []
36
+ line[:params].delete_if { |p| Vcard21::base64_param?(p) }
37
+ line[:params] << Param.new("ENCODING", "b")
38
+ line
39
+ end
40
+
41
+ def convert_charsets!(line)
42
+ line[:params] ||= []
43
+ charset = line[:params].find { |p| p.key == "CHARSET" }
44
+ line[:value] = line[:value].force_encoding(charset.value).encode
45
+ line[:params].delete charset
46
+ line
47
+ end
48
+
49
+ LATIN1 = "ISO-8859-1"
50
+ def guess_charset_for_part!(s)
51
+ s.force_encoding(Encoding::UTF_8) if s.encoding == Encoding::BINARY
52
+ return s if s.valid_encoding?
53
+
54
+ s = s.dup.force_encoding(LATIN1).encode
55
+ raise Virginity::InvalidEncoding, "can't fix #{s.to_s.inspect}" unless s.valid_encoding?
56
+ end
57
+
58
+ def guess_charset!(line)
59
+ line[:value] = guess_charset_for_part!(line[:value])
60
+ end
61
+
62
+ def line21_parts(string)
63
+ parser = Vcard21::Parser.new(string+"\n")
64
+ line = parser.parse_item
65
+ fix_vcard21_line!(line)
66
+ group = line[:groups] ? line[:groups].first : nil
67
+ [group, line[:name], line[:params] || [], line[:value]]
68
+ rescue
69
+ raise ParseError, string.inspect
70
+ end
71
+
72
+ def read_21_line(string)
73
+ group, name, params, value = line21_parts(string)
74
+ ContentLine.new(name, value, params, group)
75
+ end
76
+
77
+ def fix_vcard21_line!(line)
78
+ unless line[:params].nil?
79
+ reencode_quoted_printable!(line) if line[:params].any? { |p| Vcard21::qp_param?(p) }
80
+ convert_base64_to_b!(line) if line[:params].any? { |p| Vcard21::base64_param?(p) }
81
+ convert_charsets!(line) if line[:params].any? { |p| p.key == "CHARSET" }
82
+ end
83
+ guess_charset!(line)
84
+ if position = line[:value] =~ UNSUPPORTED_CONTROL_CHARS
85
+ raise "unsupported control character in line #{line.inspect} at character #{position}: 0x#{line[:value][position].to_s(16)}"
86
+ end
87
+ line
88
+ end
89
+
90
+ UNSUPPORTED_CONTROL_CHARS = /\x01|\x02|\x03|\x04|\x05|\x06|\x07|\x08|\x0e|\x0f|\x10|\x11|\x12|\x13|\x14|\x15|\x16|\x17|\x18|\x19|\x1a|\x1b|\x1c|\x1d|\x1e|\x1f|\x7f/
91
+
92
+ def lines_from_vcard21(string, options = {})
93
+ lines = Vcard21::Parser.new(string, options).parse!
94
+ lines.each { |line| fix_vcard21_line!(line) }
95
+ lines.map do |line|
96
+ group = line[:groups].nil? ? nil : line[:groups].first
97
+ ContentLine.new(line[:name], line[:value], line[:params] || [], group, :no_deep_copy => true)
98
+ end
99
+ end
100
+ end
101
+
102
+ end
103
+ end