redshift-connector-data_file 7.2.0 → 7.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/redshift_connector/reader/redshift_csv.rb +82 -18
- data/redshift-connector-data_file.gemspec +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 701f14f9327e2b44caf41f66a410518da508a2b9
|
4
|
+
data.tar.gz: 21afe712a15a3ebd31ef0c86fb03150c124608dc
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ed483e215690fc976b60969b3ab6ada05a68941ef7e17899fdfde6fc3d28f442b05626f1257c8f351a222078daffcedee2c74030baf3aabbe6dd5e77a74de96
|
7
|
+
data.tar.gz: 8dd76812b9a5c79f4c3cd4da6994442f2703546d4750e78770f9faff747dcab9a313843de9f8061b3f443944c9f8233f6ee316f19f26ef7990cabdb0a89785cf
|
@@ -15,41 +15,105 @@ module RedshiftConnector
|
|
15
15
|
# f :: IO
|
16
16
|
def initialize(f)
|
17
17
|
@f = f
|
18
|
+
@s = ScanBuffer.new(@f)
|
18
19
|
end
|
19
20
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
yield parse_row(line, @f.lineno)
|
21
|
+
def each_row
|
22
|
+
s = @s
|
23
|
+
while row = parse_row(@s)
|
24
|
+
yield row
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
-
|
28
|
+
alias each each_row
|
29
|
+
|
30
|
+
def read_row
|
31
|
+
return nil if @s.eof?
|
32
|
+
parse_row(@s)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def parse_row(s)
|
38
|
+
s.next_row or return nil
|
29
39
|
row = []
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
40
|
+
begin
|
41
|
+
first = false
|
42
|
+
column = s.scan_column
|
43
|
+
unless column
|
44
|
+
raise Reader::MalformedCSVException, "CSV parse error: unterminated column or row at line #{s.lineno}"
|
45
|
+
end
|
46
|
+
row.push unescape_column(column)
|
47
|
+
end while s.read_separator
|
48
|
+
unless s.read_eol
|
49
|
+
raise Reader::MalformedCSVException, "CSV parse error: missing column separator at line #{s.lineno}"
|
37
50
|
end
|
38
51
|
row
|
39
52
|
end
|
40
53
|
|
41
54
|
UNESCAPE_MAP = {
|
42
|
-
'\\
|
43
|
-
"\\'" => "'",
|
44
|
-
'\\,' => ',',
|
55
|
+
'\\t' => "\t",
|
45
56
|
'\\r' => "\r",
|
46
57
|
'\\n' => "\n",
|
47
|
-
'\\\\' => '\\'
|
48
58
|
}
|
49
59
|
|
50
60
|
def unescape_column(col)
|
51
61
|
charmap = UNESCAPE_MAP
|
52
|
-
col[1...-1].gsub(/\\./) {|s| charmap[s] }
|
62
|
+
col[1...-1].gsub(/\\./m) {|s| charmap[s] || s[1,1] }
|
63
|
+
end
|
64
|
+
|
65
|
+
class ScanBuffer
|
66
|
+
def initialize(f)
|
67
|
+
@f = f
|
68
|
+
@s = StringScanner.new("")
|
69
|
+
@eof = false
|
70
|
+
end
|
71
|
+
|
72
|
+
def eof?
|
73
|
+
@s.eos? && @eof
|
74
|
+
end
|
75
|
+
|
76
|
+
def lineno
|
77
|
+
@f.lineno
|
78
|
+
end
|
79
|
+
|
80
|
+
def next_row
|
81
|
+
fill_buffer
|
82
|
+
end
|
83
|
+
|
84
|
+
MAX_COLUMN_LENGTH = (1.2 * (1024 ** 3)).to_i # 1.2MB
|
85
|
+
|
86
|
+
def scan_column
|
87
|
+
s = @s
|
88
|
+
s.skip(/[ \t]+/)
|
89
|
+
until column = s.scan(/"(?:\\.|[^"\\]+)*"/m)
|
90
|
+
fill_buffer or return nil
|
91
|
+
return nil if s.eos?
|
92
|
+
if s.rest_size > MAX_COLUMN_LENGTH
|
93
|
+
raise Reader::MalformedCSVException, "CSV parse error: too long column at line #{@f.lineno}"
|
94
|
+
end
|
95
|
+
end
|
96
|
+
column
|
97
|
+
end
|
98
|
+
|
99
|
+
def fill_buffer
|
100
|
+
line = @f.gets
|
101
|
+
if line
|
102
|
+
@s << line
|
103
|
+
true
|
104
|
+
else
|
105
|
+
@eof = true
|
106
|
+
false
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def read_separator
|
111
|
+
@s.skip(/[ \t]*,/)
|
112
|
+
end
|
113
|
+
|
114
|
+
def read_eol
|
115
|
+
@s.skip(/[ \t\r]*(?:\n|\z)/)
|
116
|
+
end
|
53
117
|
end
|
54
118
|
end
|
55
119
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshift-connector-data_file
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 7.
|
4
|
+
version: 7.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Hidekazu Kobayashi
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2018-04-09 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: aws-sdk-s3
|
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
132
132
|
version: '0'
|
133
133
|
requirements: []
|
134
134
|
rubyforge_project:
|
135
|
-
rubygems_version: 2.6.
|
135
|
+
rubygems_version: 2.6.14
|
136
136
|
signing_key:
|
137
137
|
specification_version: 4
|
138
138
|
summary: Utility classes for exported data files from Redshift
|