redshift_csv_file 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 988bff806bddf57141c4d2819c6c440fbdc25955a5b83060299e66699dfdca2b
4
+ data.tar.gz: 013c197fbffc732dc8ade4554059c00d2f9cd4764a25b4b91aca50102a245df3
5
+ SHA512:
6
+ metadata.gz: 19f59a3acde973ef62d222aeb40624555b11c0add2b0d7b194891e2929282e8b56d5a2f175ad5569fa46bfea58b9b7a02f78064c8ca316ed1c29afcea9bd3e7d
7
+ data.tar.gz: 2a65476327e518e612f1011e73a9c1870d0fe1cdeffcb52338c7739c158242ac6b1f2e31a483de808aecff27fc2a4fa7ec27debcc24164441c0a6f935b85a78a
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Bricolage Org
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,25 @@
1
+ # redshift_csv_file library
2
+
3
+ redshift_csv_file is a Redshift-specific CSV file parser in Ruby.
4
+ Amazon Redshift generates non-standard CSV format, special handling is required.
5
+
6
+ ## Usage
7
+
8
+ ```
9
+ require 'redshift_csv_file'
10
+
11
+ File.open('unloaded_file.csv') {|f|
12
+ RedshiftCsvFile.new(f).each_row do |col1, col2, col3|
13
+ p [col1, col2, col3] # => ["value1", "value2", "value3"]
14
+ end
15
+ }
16
+ ```
17
+
18
+ ## License
19
+
20
+ MIT license.
21
+ See LICENSE file for details.
22
+
23
+ ## Author
24
+
25
+ Minero Aoki
@@ -0,0 +1,111 @@
1
+ require 'strscan'
2
+
3
+ # Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
4
+ # UNLOAD escapes data by '\' (backslash character), we cannot use standard CSV class.
5
+ class RedshiftCsvFile
6
+ class MalformedCSVException < StandardError; end
7
+
8
+ # f :: IO
9
+ def initialize(f)
10
+ @f = f
11
+ @s = ScanBuffer.new(@f)
12
+ end
13
+
14
+ def each_row
15
+ s = @s
16
+ while row = parse_row(@s)
17
+ yield row
18
+ end
19
+ end
20
+
21
+ alias each each_row
22
+
23
+ def read_row
24
+ return nil if @s.eof?
25
+ parse_row(@s)
26
+ end
27
+
28
+ private
29
+
30
+ def parse_row(s)
31
+ s.next_row or return nil
32
+ row = []
33
+ begin
34
+ first = false
35
+ column = s.scan_column
36
+ unless column
37
+ raise MalformedCSVException, "CSV parse error: unterminated column or row at line #{s.lineno}"
38
+ end
39
+ row.push unescape_column(column)
40
+ end while s.read_separator
41
+ unless s.read_eol
42
+ raise MalformedCSVException, "CSV parse error: missing column separator at line #{s.lineno}"
43
+ end
44
+ row
45
+ end
46
+
47
+ UNESCAPE_MAP = {
48
+ '\\t' => "\t",
49
+ '\\r' => "\r",
50
+ '\\n' => "\n",
51
+ }
52
+
53
+ def unescape_column(col)
54
+ charmap = UNESCAPE_MAP
55
+ col[1...-1].gsub(/\\./m) {|s| charmap[s] || s[1,1] }
56
+ end
57
+
58
+ class ScanBuffer
59
+ def initialize(f)
60
+ @f = f
61
+ @s = StringScanner.new("")
62
+ @eof = false
63
+ end
64
+
65
+ def eof?
66
+ @s.eos? && @eof
67
+ end
68
+
69
+ def lineno
70
+ @f.lineno
71
+ end
72
+
73
+ def next_row
74
+ fill_buffer
75
+ end
76
+
77
+ MAX_COLUMN_LENGTH = (1.2 * (1024 ** 3)).to_i # 1.2MB
78
+
79
+ def scan_column
80
+ s = @s
81
+ s.skip(/[ \t]+/)
82
+ until column = s.scan(/"(?:\\.|[^"\\]+)*"/m)
83
+ fill_buffer or return nil
84
+ return nil if s.eos?
85
+ if s.rest_size > MAX_COLUMN_LENGTH
86
+ raise MalformedCSVException, "CSV parse error: too long column at line #{@f.lineno}"
87
+ end
88
+ end
89
+ column
90
+ end
91
+
92
+ def fill_buffer
93
+ line = @f.gets
94
+ if line
95
+ @s << line
96
+ true
97
+ else
98
+ @eof = true
99
+ false
100
+ end
101
+ end
102
+
103
+ def read_separator
104
+ @s.skip(/[ \t]*,/)
105
+ end
106
+
107
+ def read_eol
108
+ @s.skip(/[ \t\r]*(?:\n|\z)/)
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,19 @@
1
+ Gem::Specification.new do |s|
2
+ s.platform = Gem::Platform::RUBY
3
+ s.name = 'redshift_csv_file'
4
+ s.version = '1.0.0'
5
+ s.summary = 'Redshift unloaded CSV file parser'
6
+ s.description = 'redshift_csv_file is a Redshift-specific CSV file parser. Amazon Redshift generates non-standard CSV format, special handling is required.'
7
+ s.license = 'MIT'
8
+
9
+ s.author = ['Minero Aoki']
10
+ s.email = 'aamine@loveruby.net'
11
+ s.homepage = 'https://github.com/bricolages/redshift_csv_file'
12
+
13
+ s.files = `git ls-files -z`.split("\x0").reject {|f| f.match(%r{^(test|spec|features)/}) }
14
+ s.require_path = 'lib'
15
+
16
+ s.required_ruby_version = '>= 2.3.0'
17
+ s.add_development_dependency 'test-unit'
18
+ s.add_development_dependency 'rake'
19
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redshift_csv_file
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Minero Aoki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-07-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: test-unit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: redshift_csv_file is a Redshift-specific CSV file parser. Amazon Redshift
42
+ generates non-standard CSV format, special handling is required.
43
+ email: aamine@loveruby.net
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - LICENSE
51
+ - README.md
52
+ - lib/redshift_csv_file.rb
53
+ - redshift_csv_file.gemspec
54
+ homepage: https://github.com/bricolages/redshift_csv_file
55
+ licenses:
56
+ - MIT
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.3.0
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubygems_version: 3.0.3
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: Redshift unloaded CSV file parser
77
+ test_files: []