redshift-connector-data_file 7.2.0 → 7.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b6e59a40168cc873c02377298bc0ed3ff93dfdf7
4
- data.tar.gz: 4eb2c6f3c78ef2b742dfea6e5ca795344a686f58
3
+ metadata.gz: 701f14f9327e2b44caf41f66a410518da508a2b9
4
+ data.tar.gz: 21afe712a15a3ebd31ef0c86fb03150c124608dc
5
5
  SHA512:
6
- metadata.gz: 3beaaacb6c6ea5b743bed86d6f0e86ba95f424f6ffa72b60a725482da151f47deda1b489bbe8b7d54bb8254c632b33b7d9e92e70361efc1b48de52bdf7b0ef0d
7
- data.tar.gz: 3189ab027a2e2a5626471475cf18e942c6e4b84223e7f67b07c1572e7a258a20ff851a84fb1b6dcf1406a757e0687650f56abd48fa775aeecb094a5a8c90aca8
6
+ metadata.gz: 3ed483e215690fc976b60969b3ab6ada05a68941ef7e17899fdfde6fc3d28f442b05626f1257c8f351a222078daffcedee2c74030baf3aabbe6dd5e77a74de96
7
+ data.tar.gz: 8dd76812b9a5c79f4c3cd4da6994442f2703546d4750e78770f9faff747dcab9a313843de9f8061b3f443944c9f8233f6ee316f19f26ef7990cabdb0a89785cf
@@ -15,41 +15,105 @@ module RedshiftConnector
15
15
  # f :: IO
16
16
  def initialize(f)
17
17
  @f = f
18
+ @s = ScanBuffer.new(@f)
18
19
  end
19
20
 
20
- def each
21
- # We can use simple #each_line to read single row
22
- # because line terminators are always escaped by UNLOAD.
23
- @f.each_line do |line|
24
- yield parse_row(line, @f.lineno)
21
+ def each_row
22
+ s = @s
23
+ while row = parse_row(@s)
24
+ yield row
25
25
  end
26
26
  end
27
27
 
28
- def parse_row(line, lineno = nil)
28
+ alias each each_row
29
+
30
+ def read_row
31
+ return nil if @s.eof?
32
+ parse_row(@s)
33
+ end
34
+
35
+ private
36
+
37
+ def parse_row(s)
38
+ s.next_row or return nil
29
39
  row = []
30
- s = StringScanner.new(line)
31
- s.skip(/\s+/)
32
- until s.eos?
33
- col = s.scan(/"(?:\\.|[^"\\]+)*"/) or raise Reader::MalformedCSVException, "CSV parse error at line #{lineno}"
34
- row.push unescape_column(col)
35
- s.skip(/\s*/) # skip line terminator on line ends
36
- s.skip(/,\s*/)
40
+ begin
41
+ first = false
42
+ column = s.scan_column
43
+ unless column
44
+ raise Reader::MalformedCSVException, "CSV parse error: unterminated column or row at line #{s.lineno}"
45
+ end
46
+ row.push unescape_column(column)
47
+ end while s.read_separator
48
+ unless s.read_eol
49
+ raise Reader::MalformedCSVException, "CSV parse error: missing column separator at line #{s.lineno}"
37
50
  end
38
51
  row
39
52
  end
40
53
 
41
54
  UNESCAPE_MAP = {
42
- '\\"' => '"',
43
- "\\'" => "'",
44
- '\\,' => ',',
55
+ '\\t' => "\t",
45
56
  '\\r' => "\r",
46
57
  '\\n' => "\n",
47
- '\\\\' => '\\'
48
58
  }
49
59
 
50
60
  def unescape_column(col)
51
61
  charmap = UNESCAPE_MAP
52
- col[1...-1].gsub(/\\./) {|s| charmap[s] }
62
+ col[1...-1].gsub(/\\./m) {|s| charmap[s] || s[1,1] }
63
+ end
64
+
65
+ class ScanBuffer
66
+ def initialize(f)
67
+ @f = f
68
+ @s = StringScanner.new("")
69
+ @eof = false
70
+ end
71
+
72
+ def eof?
73
+ @s.eos? && @eof
74
+ end
75
+
76
+ def lineno
77
+ @f.lineno
78
+ end
79
+
80
+ def next_row
81
+ fill_buffer
82
+ end
83
+
84
+ MAX_COLUMN_LENGTH = (1.2 * (1024 ** 3)).to_i # 1.2MB
85
+
86
+ def scan_column
87
+ s = @s
88
+ s.skip(/[ \t]+/)
89
+ until column = s.scan(/"(?:\\.|[^"\\]+)*"/m)
90
+ fill_buffer or return nil
91
+ return nil if s.eos?
92
+ if s.rest_size > MAX_COLUMN_LENGTH
93
+ raise Reader::MalformedCSVException, "CSV parse error: too long column at line #{@f.lineno}"
94
+ end
95
+ end
96
+ column
97
+ end
98
+
99
+ def fill_buffer
100
+ line = @f.gets
101
+ if line
102
+ @s << line
103
+ true
104
+ else
105
+ @eof = true
106
+ false
107
+ end
108
+ end
109
+
110
+ def read_separator
111
+ @s.skip(/[ \t]*,/)
112
+ end
113
+
114
+ def read_eol
115
+ @s.skip(/[ \t\r]*(?:\n|\z)/)
116
+ end
53
117
  end
54
118
  end
55
119
  end
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "redshift-connector-data_file"
3
- spec.version = "7.2.0"
3
+ spec.version = "7.3.0"
4
4
  spec.authors = ["Hidekazu Kobayashi", "Minero Aoki"]
5
5
  spec.email = ["kobahide789@gmail.com", "aamine@loveruby.net"]
6
6
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: redshift-connector-data_file
3
3
  version: !ruby/object:Gem::Version
4
- version: 7.2.0
4
+ version: 7.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Hidekazu Kobayashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-10-30 00:00:00.000000000 Z
12
+ date: 2018-04-09 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: aws-sdk-s3
@@ -132,7 +132,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
132
132
  version: '0'
133
133
  requirements: []
134
134
  rubyforge_project:
135
- rubygems_version: 2.6.11
135
+ rubygems_version: 2.6.14
136
136
  signing_key:
137
137
  specification_version: 4
138
138
  summary: Utility classes for exported data files from Redshift