redshift-connector-data_file 7.2.0 → 7.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b6e59a40168cc873c02377298bc0ed3ff93dfdf7
4
- data.tar.gz: 4eb2c6f3c78ef2b742dfea6e5ca795344a686f58
3
+ metadata.gz: 701f14f9327e2b44caf41f66a410518da508a2b9
4
+ data.tar.gz: 21afe712a15a3ebd31ef0c86fb03150c124608dc
5
5
  SHA512:
6
- metadata.gz: 3beaaacb6c6ea5b743bed86d6f0e86ba95f424f6ffa72b60a725482da151f47deda1b489bbe8b7d54bb8254c632b33b7d9e92e70361efc1b48de52bdf7b0ef0d
7
- data.tar.gz: 3189ab027a2e2a5626471475cf18e942c6e4b84223e7f67b07c1572e7a258a20ff851a84fb1b6dcf1406a757e0687650f56abd48fa775aeecb094a5a8c90aca8
6
+ metadata.gz: 3ed483e215690fc976b60969b3ab6ada05a68941ef7e17899fdfde6fc3d28f442b05626f1257c8f351a222078daffcedee2c74030baf3aabbe6dd5e77a74de96
7
+ data.tar.gz: 8dd76812b9a5c79f4c3cd4da6994442f2703546d4750e78770f9faff747dcab9a313843de9f8061b3f443944c9f8233f6ee316f19f26ef7990cabdb0a89785cf
@@ -15,41 +15,105 @@ module RedshiftConnector
15
15
  # f :: IO
16
16
  def initialize(f)
17
17
  @f = f
18
+ @s = ScanBuffer.new(@f)
18
19
  end
19
20
 
20
- def each
21
- # We can use simple #each_line to read single row
22
- # because line terminators are always escaped by UNLOAD.
23
- @f.each_line do |line|
24
- yield parse_row(line, @f.lineno)
21
+ def each_row
22
+ s = @s
23
+ while row = parse_row(@s)
24
+ yield row
25
25
  end
26
26
  end
27
27
 
28
- def parse_row(line, lineno = nil)
28
+ alias each each_row
29
+
30
+ def read_row
31
+ return nil if @s.eof?
32
+ parse_row(@s)
33
+ end
34
+
35
+ private
36
+
37
+ def parse_row(s)
38
+ s.next_row or return nil
29
39
  row = []
30
- s = StringScanner.new(line)
31
- s.skip(/\s+/)
32
- until s.eos?
33
- col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise Reader::MalformedCSVException, "CSV parse error at line #{lineno}"
34
- row.push unescape_column(col)
35
- s.skip(/\s*/) # skip line terminator on line ends
36
- s.skip(/,\s*/)
40
+ begin
41
+ first = false
42
+ column = s.scan_column
43
+ unless column
44
+ raise Reader::MalformedCSVException, "CSV parse error: unterminated column or row at line #{s.lineno}"
45
+ end
46
+ row.push unescape_column(column)
47
+ end while s.read_separator
48
+ unless s.read_eol
49
+ raise Reader::MalformedCSVException, "CSV parse error: missing column separator at line #{s.lineno}"
37
50
  end
38
51
  row
39
52
  end
40
53
 
41
54
  UNESCAPE_MAP = {
42
- '\\"' => '"',
43
- "\\'" => "'",
44
- '\\,' => ',',
55
+ '\\t' => "\t",
45
56
  '\\r' => "\r",
46
57
  '\\n' => "\n",
47
- '\\\\' => '\\'
48
58
  }
49
59
 
50
60
  def unescape_column(col)
51
61
  charmap = UNESCAPE_MAP
52
- col[1...-1].gsub(/\\./) {|s| charmap[s] }
62
+ col[1...-1].gsub(/\\./m) {|s| charmap[s] || s[1,1] }
63
+ end
64
+
65
+ class ScanBuffer
66
+ def initialize(f)
67
+ @f = f
68
+ @s = StringScanner.new("")
69
+ @eof = false
70
+ end
71
+
72
+ def eof?
73
+ @s.eos? && @eof
74
+ end
75
+
76
+ def lineno
77
+ @f.lineno
78
+ end
79
+
80
+ def next_row
81
+ fill_buffer
82
+ end
83
+
84
+ MAX_COLUMN_LENGTH = (1.2 * (1024 ** 3)).to_i # 1.2MB
85
+
86
+ def scan_column
87
+ s = @s
88
+ s.skip(/[ \t]+/)
89
+ until column = s.scan(/"(?:\\.|[^"\\]+)*"/m)
90
+ fill_buffer or return nil
91
+ return nil if s.eos?
92
+ if s.rest_size > MAX_COLUMN_LENGTH
93
+ raise Reader::MalformedCSVException, "CSV parse error: too long column at line #{@f.lineno}"
94
+ end
95
+ end
96
+ column
97
+ end
98
+
99
+ def fill_buffer
100
+ line = @f.gets
101
+ if line
102
+ @s << line
103
+ true
104
+ else
105
+ @eof = true
106
+ false
107
+ end
108
+ end
109
+
110
+ def read_separator
111
+ @s.skip(/[ \t]*,/)
112
+ end
113
+
114
+ def read_eol
115
+ @s.skip(/[ \t\r]*(?:\n|\z)/)
116
+ end
53
117
  end
54
118
  end
55
119
  end
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "redshift-connector-data_file"
3
- spec.version = "7.2.0"
3
+ spec.version = "7.3.0"
4
4
  spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
5
5
  spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
6
6
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector-data_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.2.0
4
+ version: 7.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-10-30 00:00:00.000000000 Z
12
+ date: 2018-04-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk-s3
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  version: '0'
133
133
  requirements: []
134
134
  rubyforge_project:
135
- rubygems_version: 2.6.11
135
+ rubygems_version: 2.6.14
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: Utility classes for exported data files from Redshift