redshift_csv_file 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 988bff806bddf57141c4d2819c6c440fbdc25955a5b83060299e66699dfdca2b
4
+ data.tar.gz: 013c197fbffc732dc8ade4554059c00d2f9cd4764a25b4b91aca50102a245df3
5
+ SHA512:
6
+ metadata.gz: 19f59a3acde973ef62d222aeb40624555b11c0add2b0d7b194891e2929282e8b56d5a2f175ad5569fa46bfea58b9b7a02f78064c8ca316ed1c29afcea9bd3e7d
7
+ data.tar.gz: 2a65476327e518e612f1011e73a9c1870d0fe1cdeffcb52338c7739c158242ac6b1f2e31a483de808aecff27fc2a4fa7ec27debcc24164441c0a6f935b85a78a
@@ -0,0 +1 @@
1
+ Gemfile.lock
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source "https://rubygems.org"
2
+ gemspec
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2019 Bricolage Org
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,25 @@
1
+ # redshift_csv_file library
2
+
3
+ redshift_csv_file is a Redshift-specific CSV file parser in Ruby.
4
+ Amazon Redshift generates non-standard CSV format, special handling is required.
5
+
6
+ ## Usage
7
+
8
+ ```
9
+ require 'redshift_csv_file'
10
+
11
+ File.open('unloaded_file.csv') {|f|
12
+ RedshiftCsvFile.new(f).each_row do |col1, col2, col3|
13
+ p [col1, col2, col3] # => ["value1", "value2", "value3"]
14
+ end
15
+ }
16
+ ```
17
+
18
+ ## License
19
+
20
+ MIT license.
21
+ See LICENSE file for details.
22
+
23
+ ## Author
24
+
25
+ Minero Aoki
@@ -0,0 +1,111 @@
1
+ require 'strscan'
2
+
3
+ # Reads CSV file generated by Redshift UNLOAD statement (with option ADDQUOTES ESCAPE).
4
+ # UNLOAD escapes data by '\' (backslash character), we cannot use standard CSV class.
5
+ class RedshiftCsvFile
6
+ class MalformedCSVException < StandardError; end
7
+
8
+ # f :: IO
9
+ def initialize(f)
10
+ @f = f
11
+ @s = ScanBuffer.new(@f)
12
+ end
13
+
14
+ def each_row
15
+ s = @s
16
+ while row = parse_row(@s)
17
+ yield row
18
+ end
19
+ end
20
+
21
+ alias each each_row
22
+
23
+ def read_row
24
+ return nil if @s.eof?
25
+ parse_row(@s)
26
+ end
27
+
28
+ private
29
+
30
+ def parse_row(s)
31
+ s.next_row or return nil
32
+ row = []
33
+ begin
34
+ first = false
35
+ column = s.scan_column
36
+ unless column
37
+ raise MalformedCSVException, "CSV parse error: unterminated column or row at line #{s.lineno}"
38
+ end
39
+ row.push unescape_column(column)
40
+ end while s.read_separator
41
+ unless s.read_eol
42
+ raise MalformedCSVException, "CSV parse error: missing column separator at line #{s.lineno}"
43
+ end
44
+ row
45
+ end
46
+
47
+ UNESCAPE_MAP = {
48
+ '\\t' => "\t",
49
+ '\\r' => "\r",
50
+ '\\n' => "\n",
51
+ }
52
+
53
+ def unescape_column(col)
54
+ charmap = UNESCAPE_MAP
55
+ col[1...-1].gsub(/\\./m) {|s| charmap[s] || s[1,1] }
56
+ end
57
+
58
+ class ScanBuffer
59
+ def initialize(f)
60
+ @f = f
61
+ @s = StringScanner.new("")
62
+ @eof = false
63
+ end
64
+
65
+ def eof?
66
+ @s.eos? && @eof
67
+ end
68
+
69
+ def lineno
70
+ @f.lineno
71
+ end
72
+
73
+ def next_row
74
+ fill_buffer
75
+ end
76
+
77
+ MAX_COLUMN_LENGTH = (1.2 * (1024 ** 3)).to_i # 1.2MB
78
+
79
+ def scan_column
80
+ s = @s
81
+ s.skip(/[ \t]+/)
82
+ until column = s.scan(/"(?:\\.|[^"\\]+)*"/m)
83
+ fill_buffer or return nil
84
+ return nil if s.eos?
85
+ if s.rest_size > MAX_COLUMN_LENGTH
86
+ raise MalformedCSVException, "CSV parse error: too long column at line #{@f.lineno}"
87
+ end
88
+ end
89
+ column
90
+ end
91
+
92
+ def fill_buffer
93
+ line = @f.gets
94
+ if line
95
+ @s << line
96
+ true
97
+ else
98
+ @eof = true
99
+ false
100
+ end
101
+ end
102
+
103
+ def read_separator
104
+ @s.skip(/[ \t]*,/)
105
+ end
106
+
107
+ def read_eol
108
+ @s.skip(/[ \t\r]*(?:\n|\z)/)
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,19 @@
1
+ Gem::Specification.new do |s|
2
+ s.platform = Gem::Platform::RUBY
3
+ s.name = 'redshift_csv_file'
4
+ s.version = '1.0.0'
5
+ s.summary = 'Redshift unloaded CSV file parser'
6
+ s.description = 'redshift_csv_file is a Redshift-specific CSV file parser. Amazon Redshift generates non-standard CSV format, special handling is required.'
7
+ s.license = 'MIT'
8
+
9
+ s.author = ['Minero Aoki']
10
+ s.email = 'aamine@loveruby.net'
11
+ s.homepage = 'https://github.com/bricolages/redshift_csv_file'
12
+
13
+ s.files = `git ls-files -z`.split("\x0").reject {|f| f.match(%r{^(test|spec|features)/}) }
14
+ s.require_path = 'lib'
15
+
16
+ s.required_ruby_version = '>= 2.3.0'
17
+ s.add_development_dependency 'test-unit'
18
+ s.add_development_dependency 'rake'
19
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: redshift_csv_file
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Minero Aoki
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2019-07-16 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: test-unit
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ description: redshift_csv_file is a Redshift-specific CSV file parser. Amazon Redshift
42
+ generates non-standard CSV format, special handling is required.
43
+ email: aamine@loveruby.net
44
+ executables: []
45
+ extensions: []
46
+ extra_rdoc_files: []
47
+ files:
48
+ - ".gitignore"
49
+ - Gemfile
50
+ - LICENSE
51
+ - README.md
52
+ - lib/redshift_csv_file.rb
53
+ - redshift_csv_file.gemspec
54
+ homepage: https://github.com/bricolages/redshift_csv_file
55
+ licenses:
56
+ - MIT
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.3.0
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubygems_version: 3.0.3
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: Redshift unloaded CSV file parser
77
+ test_files: []