hippie_csv 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/hippie_csv/constants.rb +4 -2
- data/lib/hippie_csv/support.rb +19 -8
- data/lib/hippie_csv/version.rb +1 -1
- data/spec/fixtures/never_ordered.csv +1038094 -0
- data/spec/hippie_csv/version_spec.rb +1 -1
- data/spec/hippie_csv_spec.rb +11 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f53a562b556d85f2c893855cf0aa34ea6126afab
|
4
|
+
data.tar.gz: 9c1a2954a38b2c01b1a9dcbc13c25fb21e9799be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7db7d04670863b9bffc7c66d8c1410b57892b4729819c6755816bd3e5deffcf0acc2af75870d6e26b41dc5643f1ef7eb8ec9cce86aa9868fff49129c2872e204
|
7
|
+
data.tar.gz: 2a587740e0ee24c021a373c71e4c3038447de70595d7ff1e89d86dc51aa696083a238cef221bdb2e6970a53eca11d407b6a0c5ee11df1449e32305ce25113055
|
data/lib/hippie_csv/constants.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
|
-
require
|
1
|
+
require "csv"
|
2
2
|
|
3
3
|
module HippieCSV
|
4
4
|
QUOTE_CHARACTERS = %w(" ' |).freeze
|
5
5
|
DELIMETERS = %W(, ; \t).freeze
|
6
6
|
ENCODING = "utf-8".freeze
|
7
7
|
ALTERNATE_ENCODING = "utf-16".freeze
|
8
|
-
FIELD_SAMPLE_COUNT = 10
|
8
|
+
FIELD_SAMPLE_COUNT = 10.freeze
|
9
|
+
ENCODING_SAMPLE_CHARACTER_COUNT = 10000.freeze
|
9
10
|
ENCODING_WITH_BOM = "bom|#{ENCODING}".freeze
|
11
|
+
BLANK_LINE_REGEX = /^,+\r+$/.freeze
|
10
12
|
end
|
data/lib/hippie_csv/support.rb
CHANGED
@@ -10,16 +10,18 @@ module HippieCSV
|
|
10
10
|
|
11
11
|
def encode(string)
|
12
12
|
unless string.valid_encoding?
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
13
|
+
string = begin
|
14
|
+
current_encoding = detect_encoding(string)
|
15
|
+
if !current_encoding.nil?
|
16
|
+
string.encode(ENCODING, current_encoding)
|
17
|
+
else
|
18
|
+
magical_encode(string)
|
19
|
+
end
|
20
|
+
rescue Encoding::InvalidByteSequenceError
|
21
|
+
magical_encode(string)
|
20
22
|
end
|
21
23
|
end
|
22
|
-
|
24
|
+
string.gsub!(BLANK_LINE_REGEX, "")
|
23
25
|
string.encode(string.encoding, universal_newline: true)
|
24
26
|
end
|
25
27
|
|
@@ -59,6 +61,15 @@ module HippieCSV
|
|
59
61
|
|
60
62
|
private
|
61
63
|
|
64
|
+
def detect_encoding(string)
|
65
|
+
CharDet.detect(string[0..ENCODING_SAMPLE_CHARACTER_COUNT])["encoding"]
|
66
|
+
end
|
67
|
+
|
68
|
+
def magical_encode(string)
|
69
|
+
string.encode(ALTERNATE_ENCODING, ENCODING, invalid: :replace, replace: "")
|
70
|
+
.encode(ENCODING, ALTERNATE_ENCODING)
|
71
|
+
end
|
72
|
+
|
62
73
|
def field_count(file, delimeter, quote_character)
|
63
74
|
csv = CSV.new(file, col_sep: delimeter, quote_char: quote_character)
|
64
75
|
csv.lazy.take(FIELD_SAMPLE_COUNT).map(&:size).inject(:+)
|
data/lib/hippie_csv/version.rb
CHANGED