hippie_csv 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 25ff6543c2223eed975723e6b5718081b2f03e33
4
- data.tar.gz: 5f62a6c390164ade3e7880ed59171f56f6d608fe
3
+ metadata.gz: f53a562b556d85f2c893855cf0aa34ea6126afab
4
+ data.tar.gz: 9c1a2954a38b2c01b1a9dcbc13c25fb21e9799be
5
5
  SHA512:
6
- metadata.gz: 43c0883650a3091512b8c60e55c882a22ea60fe0f946a9e74280739e0765306296fc53b8807189f9aecf71b76f41c7d23758056e025281d73e83639a301a4147
7
- data.tar.gz: 8318a4e154ddce8ef9d5d835faf30b5ae08a94a252350380d59538a34e01a7de7c07ee968e521c1efd34d53a15720f21372beda179d0ca3513c2a02a644abc35
6
+ metadata.gz: 7db7d04670863b9bffc7c66d8c1410b57892b4729819c6755816bd3e5deffcf0acc2af75870d6e26b41dc5643f1ef7eb8ec9cce86aa9868fff49129c2872e204
7
+ data.tar.gz: 2a587740e0ee24c021a373c71e4c3038447de70595d7ff1e89d86dc51aa696083a238cef221bdb2e6970a53eca11d407b6a0c5ee11df1449e32305ce25113055
@@ -1,10 +1,12 @@
1
- require 'csv'
1
+ require "csv"
2
2
 
3
3
  module HippieCSV
4
4
  QUOTE_CHARACTERS = %w(" ' |).freeze
5
5
  DELIMETERS = %W(, ; \t).freeze
6
6
  ENCODING = "utf-8".freeze
7
7
  ALTERNATE_ENCODING = "utf-16".freeze
8
- FIELD_SAMPLE_COUNT = 10
8
+ FIELD_SAMPLE_COUNT = 10.freeze
9
+ ENCODING_SAMPLE_CHARACTER_COUNT = 10000.freeze
9
10
  ENCODING_WITH_BOM = "bom|#{ENCODING}".freeze
11
+ BLANK_LINE_REGEX = /^,+\r+$/.freeze
10
12
  end
@@ -10,16 +10,18 @@ module HippieCSV
10
10
 
11
11
  def encode(string)
12
12
  unless string.valid_encoding?
13
- current_encoding = CharDet.detect(string)["encoding"]
14
-
15
- string = if !current_encoding.nil?
16
- string.encode(ENCODING, current_encoding)
17
- else
18
- string.encode(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
19
- .encode(ENCODING, ALTERNATE_ENCODING)
13
+ string = begin
14
+ current_encoding = detect_encoding(string)
15
+ if !current_encoding.nil?
16
+ string.encode(ENCODING, current_encoding)
17
+ else
18
+ magical_encode(string)
19
+ end
20
+ rescue Encoding::InvalidByteSequenceError
21
+ magical_encode(string)
20
22
  end
21
23
  end
22
-
24
+ string.gsub!(BLANK_LINE_REGEX, "")
23
25
  string.encode(string.encoding, universal_newline: true)
24
26
  end
25
27
 
@@ -59,6 +61,15 @@ module HippieCSV
59
61
 
60
62
  private
61
63
 
64
+ def detect_encoding(string)
65
+ CharDet.detect(string[0..ENCODING_SAMPLE_CHARACTER_COUNT])["encoding"]
66
+ end
67
+
68
+ def magical_encode(string)
69
+ string.encode(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
70
+ .encode(ENCODING, ALTERNATE_ENCODING)
71
+ end
72
+
62
73
  def field_count(file, delimeter, quote_character)
63
74
  csv = CSV.new(file, col_sep: delimeter, quote_char: quote_character)
64
75
  csv.lazy.take(FIELD_SAMPLE_COUNT).map(&:size).inject(:+)
@@ -1,3 +1,3 @@
1
1
  module HippieCSV
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.6"
3
3
  end