bmg 0.18.14 → 0.18.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/bmg/reader/csv.rb +49 -14
- data/lib/bmg/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 14260453d9c3f5d24b5ef6975072e452af309485
|
4
|
+
data.tar.gz: f6de19178d2b4404c9fbe248d42f9c596cee410e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e53da5df4abb7095036dc275948a70e4af9f1945e2fd446244e765d7fead1258a9e564d9076baa3efc4ee29566810a5176c76d2c3c953de906a7ff36a3a86dff
|
7
|
+
data.tar.gz: a6f0cb03795e5b954563f774bc65c7879a78049928d06da935017a8dfcdb2bdd563c4ab08922cb95f774d68714ab108bb480d0722249ec5e6cc67fed5ed0064a
|
data/lib/bmg/reader/csv.rb
CHANGED
@@ -10,20 +10,18 @@ module Bmg
|
|
10
10
|
}
|
11
11
|
|
12
12
|
def initialize(type, path_or_io, options = {})
|
13
|
-
|
13
|
+
require 'csv'
|
14
|
+
|
14
15
|
@path_or_io = path_or_io
|
15
|
-
@options =
|
16
|
-
|
17
|
-
@options[:col_sep] ||= infer_col_sep
|
18
|
-
@options[:quote_char] ||= infer_quote_char
|
19
|
-
end
|
16
|
+
@options = handle_options(options)
|
17
|
+
@type = handle_type(type)
|
20
18
|
end
|
21
19
|
|
22
20
|
def each
|
23
21
|
return to_enum unless block_given?
|
24
|
-
|
25
|
-
|
26
|
-
|
22
|
+
|
23
|
+
with_csv do |csv|
|
24
|
+
csv.each do |row|
|
27
25
|
yield tuple(row)
|
28
26
|
end
|
29
27
|
end
|
@@ -44,33 +42,70 @@ module Bmg
|
|
44
42
|
row.to_hash.each_with_object({}){|(k,v),h| h[k.to_sym] = v }
|
45
43
|
end
|
46
44
|
|
45
|
+
def handle_type(type)
|
46
|
+
return type if type.knows_attrlist?
|
47
|
+
|
48
|
+
type.with_attrlist(infer_attrlist)
|
49
|
+
end
|
50
|
+
|
51
|
+
def infer_attrlist
|
52
|
+
with_csv do |csv|
|
53
|
+
csv.each do |row|
|
54
|
+
return tuple(row).keys
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
def handle_options(options)
|
60
|
+
options = DEFAULT_OPTIONS.merge(options)
|
61
|
+
if options[:smart] && !@path_or_io.is_a?(IO)
|
62
|
+
options[:col_sep] ||= infer_col_sep
|
63
|
+
options[:quote_char] ||= infer_quote_char(options[:col_sep])
|
64
|
+
end
|
65
|
+
options
|
66
|
+
end
|
67
|
+
|
47
68
|
def infer_col_sep
|
48
69
|
sniff(text_portion, [",","\t",";"], ",")
|
49
70
|
end
|
50
71
|
|
51
|
-
def infer_quote_char
|
52
|
-
sniff(text_portion, ["'
|
72
|
+
def infer_quote_char(col_sep)
|
73
|
+
sniff(text_portion, ['"',"'"], '"'){|quote|
|
74
|
+
/#{quote}#{col_sep}#{quote}|^#{quote}|#{quote}$/
|
75
|
+
}
|
53
76
|
end
|
54
77
|
|
55
78
|
def text_portion
|
56
|
-
@text_portion ||= with_io{|io| io.readlines(
|
79
|
+
@text_portion ||= with_io{|io| io.readlines(50).join("\n") }
|
57
80
|
end
|
58
81
|
|
59
82
|
def with_io(&bl)
|
60
83
|
case @path_or_io
|
61
84
|
when IO, StringIO
|
85
|
+
@path_or_io.rewind if @path_or_io.respond_to?(:rewind)
|
62
86
|
bl.call(@path_or_io)
|
63
87
|
else
|
64
88
|
File.open(@path_or_io, "r", &bl)
|
65
89
|
end
|
66
90
|
end
|
67
91
|
|
92
|
+
def with_csv(&bl)
|
93
|
+
with_io do |io|
|
94
|
+
yield ::CSV.new(io, **csv_options)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
68
98
|
# Finds the best candidate among `candidates` for a separator
|
69
99
|
# found in `str`. If none is found, returns `default`.
|
70
|
-
def sniff(str, candidates, default)
|
100
|
+
def sniff(str, candidates, default, &bl)
|
71
101
|
snif = {}
|
72
102
|
candidates.each {|delim|
|
73
|
-
|
103
|
+
counter = bl ? bl.call(delim) : delim
|
104
|
+
snif[delim] = if counter.is_a?(Regexp)
|
105
|
+
str.scan(counter).length
|
106
|
+
else
|
107
|
+
str.count(counter)
|
108
|
+
end
|
74
109
|
}
|
75
110
|
snif = snif.sort {|a,b| b[1] <=> a[1] }
|
76
111
|
snif.size > 0 ? snif[0][0] : default
|
data/lib/bmg/version.rb
CHANGED