csvrecord 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +0 -2
- data/Rakefile +5 -1
- data/lib/csvrecord.rb +3 -5
- data/lib/csvrecord/version.rb +1 -1
- metadata +16 -4
- data/lib/csvrecord/reader.rb +0 -188
- data/test/test_reader.rb +0 -119
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77369b93b97cff13a7159870a4343cb22ad86dec
|
4
|
+
data.tar.gz: cbc687395deb700c55ed7ca79687da11a7fb8913
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8d27de0a2c8e8ee2ddfcc024802845a6bce1f05dbc94aa1911fec1cc287837a5b5739e46e93cd5d6db6af678c6ad80e326ea09850c95bb342a27c5063d61fdc
|
7
|
+
data.tar.gz: 548b5db8a78788d5c5e624ed44420b961bf0668f9289c876825fb7f09314bc0281f42ca2eb7d6dba11ece65c58de985e6630889230652524f31481e2603cecf2
|
data/Manifest.txt
CHANGED
@@ -6,12 +6,10 @@ Rakefile
|
|
6
6
|
lib/csvrecord.rb
|
7
7
|
lib/csvrecord/base.rb
|
8
8
|
lib/csvrecord/builder.rb
|
9
|
-
lib/csvrecord/reader.rb
|
10
9
|
lib/csvrecord/version.rb
|
11
10
|
test/data/beer.csv
|
12
11
|
test/data/beer11.csv
|
13
12
|
test/helper.rb
|
14
|
-
test/test_reader.rb
|
15
13
|
test/test_record.rb
|
16
14
|
test/test_record_auto.rb
|
17
15
|
test/test_version.rb
|
data/Rakefile
CHANGED
@@ -14,9 +14,13 @@ Hoe.spec 'csvrecord' do
|
|
14
14
|
self.email = 'wwwmake@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file
|
17
|
+
self.readme_file = 'README.md'
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
|
+
self.extra_deps = [
|
21
|
+
['csvreader']
|
22
|
+
]
|
23
|
+
|
20
24
|
self.licenses = ['Public Domain']
|
21
25
|
|
22
26
|
self.spec_extras = {
|
data/lib/csvrecord.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
require '
|
3
|
+
###
|
4
|
+
# 3rd party gems
|
5
|
+
require 'csvreader'
|
6
6
|
|
7
7
|
|
8
8
|
###
|
@@ -10,8 +10,6 @@ require 'pp'
|
|
10
10
|
require 'csvrecord/version' # let version always go first
|
11
11
|
require 'csvrecord/base'
|
12
12
|
require 'csvrecord/builder'
|
13
|
-
require 'csvrecord/reader'
|
14
|
-
|
15
13
|
|
16
14
|
|
17
15
|
puts CsvRecord.banner # say hello
|
data/lib/csvrecord/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvrecord
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csvreader
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rdoc
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -57,12 +71,10 @@ files:
|
|
57
71
|
- lib/csvrecord.rb
|
58
72
|
- lib/csvrecord/base.rb
|
59
73
|
- lib/csvrecord/builder.rb
|
60
|
-
- lib/csvrecord/reader.rb
|
61
74
|
- lib/csvrecord/version.rb
|
62
75
|
- test/data/beer.csv
|
63
76
|
- test/data/beer11.csv
|
64
77
|
- test/helper.rb
|
65
|
-
- test/test_reader.rb
|
66
78
|
- test/test_record.rb
|
67
79
|
- test/test_record_auto.rb
|
68
80
|
- test/test_version.rb
|
data/lib/csvrecord/reader.rb
DELETED
@@ -1,188 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
|
5
|
-
|
6
|
-
## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
|
7
|
-
## use __CSV__ or similar? or just ::CSV ??
|
8
|
-
|
9
|
-
class Configuration
|
10
|
-
|
11
|
-
puts "CSV::VERSION:"
|
12
|
-
puts CSV::VERSION
|
13
|
-
|
14
|
-
puts "builtin CSV::Converters:"
|
15
|
-
pp CSV::Converters
|
16
|
-
|
17
|
-
puts "CSV::DEFAULT_OPTIONS:"
|
18
|
-
pp CSV::DEFAULT_OPTIONS
|
19
|
-
|
20
|
-
## register our own converters
|
21
|
-
## check if strip gets called for nil values too?
|
22
|
-
CSV::Converters[:strip] = ->(field) { field.strip }
|
23
|
-
|
24
|
-
|
25
|
-
attr_accessor :sep ## col_sep (column separator)
|
26
|
-
|
27
|
-
def initialize
|
28
|
-
@sep = ','
|
29
|
-
## note: do NOT add headers as global - should ALWAYS be explicit
|
30
|
-
## headers (true/false) - changes resultset and requires different processing!!!
|
31
|
-
|
32
|
-
self ## return self for chaining
|
33
|
-
end
|
34
|
-
|
35
|
-
def blank?( line )
|
36
|
-
## note: blank line does NOT include "blank" with spaces only!!
|
37
|
-
## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
|
38
|
-
## see skip_blanks in default_options
|
39
|
-
line.empty?
|
40
|
-
end
|
41
|
-
|
42
|
-
## lines starting with # (note: only leading spaces allowed)
|
43
|
-
COMMENTS_REGEX = /^\s*#/
|
44
|
-
BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
|
45
|
-
SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
|
46
|
-
|
47
|
-
def skip?( line )
|
48
|
-
## check if comment line - skip comments
|
49
|
-
## see skip_lines in default_options
|
50
|
-
line =~ SKIP_REGEX
|
51
|
-
end
|
52
|
-
|
53
|
-
## built-in (default) options
|
54
|
-
## todo: find a better name?
|
55
|
-
def default_options
|
56
|
-
## note:
|
57
|
-
## do NOT include sep character and
|
58
|
-
## do NOT include headers true/false here
|
59
|
-
##
|
60
|
-
## make default sep its own "global" default config
|
61
|
-
## e.g. Csv.config.sep =
|
62
|
-
|
63
|
-
## common options
|
64
|
-
## skip comments starting with #
|
65
|
-
## skip blank lines
|
66
|
-
## strip leading and trailing spaces
|
67
|
-
## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
|
68
|
-
defaults = {
|
69
|
-
skip_blanks: true, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
|
70
|
-
skip_lines: SKIP_REGEX,
|
71
|
-
:converters => :strip
|
72
|
-
}
|
73
|
-
defaults
|
74
|
-
end
|
75
|
-
end # class Configuration
|
76
|
-
|
77
|
-
|
78
|
-
## lets you use
|
79
|
-
## Csv.configure do |config|
|
80
|
-
## config.sep = ',' ## or "/t"
|
81
|
-
## end
|
82
|
-
|
83
|
-
def self.configure
|
84
|
-
yield( config )
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.config
|
88
|
-
@config ||= Configuration.new
|
89
|
-
end
|
90
|
-
end # module Csvv
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
####
|
95
|
-
## use our own wrapper
|
96
|
-
|
97
|
-
class CsvReader
|
98
|
-
|
99
|
-
####################
|
100
|
-
# helper methods
|
101
|
-
def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
|
102
|
-
## return row values as array of strings
|
103
|
-
if row_or_array.is_a?( CSV::Row )
|
104
|
-
row = row_or_array
|
105
|
-
row.fields ## gets array of string of field values
|
106
|
-
else ## assume "classic" array of strings
|
107
|
-
array = row_or_array
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
def self.foreach( path, sep: Csv.config.sep, headers: true )
|
114
|
-
csv_options = Csv.config.default_options.merge(
|
115
|
-
headers: headers,
|
116
|
-
col_sep: sep,
|
117
|
-
external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
|
118
|
-
)
|
119
|
-
|
120
|
-
CSV.foreach( path, csv_options ) do |row|
|
121
|
-
yield( row ) ## check/todo: use block.call( row ) ## why? why not?
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
|
126
|
-
def self.read( path, sep: Csv.config.sep, headers: true )
|
127
|
-
## note: use our own file.open
|
128
|
-
## always use utf-8 for now
|
129
|
-
## check/todo: add skip option bom too - why? why not?
|
130
|
-
txt = File.open( path, 'r:utf-8' )
|
131
|
-
parse( txt, sep: sep, headers: headers )
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.parse( txt, sep: Csv.config.sep, headers: true )
|
135
|
-
csv_options = Csv.config.default_options.merge(
|
136
|
-
headers: headers,
|
137
|
-
col_sep: sep
|
138
|
-
)
|
139
|
-
## pp csv_options
|
140
|
-
CSV.parse( txt, csv_options )
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.parse_line( txt, sep: Csv.config.sep )
|
144
|
-
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
145
|
-
csv_options = Csv.config.default_options.merge(
|
146
|
-
headers: false, ## note: always turn off headers!!!!!!
|
147
|
-
col_sep: sep
|
148
|
-
)
|
149
|
-
## pp csv_options
|
150
|
-
CSV.parse_line( txt, csv_options )
|
151
|
-
end
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
156
|
-
# read first lines (only)
|
157
|
-
# and parse with csv to get header from csv library itself
|
158
|
-
#
|
159
|
-
# check - if there's an easier or built-in way for the csv library
|
160
|
-
|
161
|
-
## readlines until
|
162
|
-
## - NOT a comments line or
|
163
|
-
## - NOT a blank line
|
164
|
-
|
165
|
-
lines = ''
|
166
|
-
File.open( path, 'r:utf-8' ) do |f|
|
167
|
-
|
168
|
-
## todo/fix: how to handle empty files or files without headers?!
|
169
|
-
|
170
|
-
## todo/check if readline includes \n\r too??
|
171
|
-
## yes! - line include \n e.g.
|
172
|
-
## "Brewery,City,Name,Abv\n" or
|
173
|
-
## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
|
174
|
-
loop do
|
175
|
-
line = f.readline
|
176
|
-
lines << line
|
177
|
-
break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
## puts "lines:"
|
182
|
-
## pp lines
|
183
|
-
|
184
|
-
## note: do NOT use headers: true to get "plain" data array (no hash records)
|
185
|
-
## hash record does NOT work for single line/row
|
186
|
-
parse_line( lines, sep: sep )
|
187
|
-
end # method self.header
|
188
|
-
end # class CsvReader
|
data/test/test_reader.rb
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/test_reader.rb
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class TestReader < MiniTest::Test
|
11
|
-
|
12
|
-
def test_read
|
13
|
-
puts "== read: beer.csv:"
|
14
|
-
table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv" ) ## returns CSV::Table
|
15
|
-
|
16
|
-
pp table.class.name
|
17
|
-
pp table
|
18
|
-
pp table.to_a ## note: includes header (first row with column names)
|
19
|
-
|
20
|
-
table.each do |row| ## note: will skip (NOT include) header row!!
|
21
|
-
pp row
|
22
|
-
end
|
23
|
-
puts " #{table.size} rows" ## note: again will skip (NOT include) header row in count!!!
|
24
|
-
assert_equal 6, table.size
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_read_header_false
|
28
|
-
puts "== read (headers: false): beer.csv:"
|
29
|
-
data = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv", headers: false )
|
30
|
-
|
31
|
-
pp data.class.name
|
32
|
-
pp data
|
33
|
-
|
34
|
-
data.each do |row|
|
35
|
-
pp row
|
36
|
-
end
|
37
|
-
puts " #{data.size} rows"
|
38
|
-
assert_equal 7, data.size ## note: include header row in count
|
39
|
-
end
|
40
|
-
|
41
|
-
|
42
|
-
def test_read11
|
43
|
-
puts "== read: beer11.csv:"
|
44
|
-
table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer11.csv" )
|
45
|
-
pp table
|
46
|
-
pp table.to_a ## note: includes header (first row with column names)
|
47
|
-
|
48
|
-
assert true
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
def test_parse_line
|
53
|
-
puts "== parse_line:"
|
54
|
-
row = CsvReader.parse_line( <<TXT )
|
55
|
-
Augustiner Bräu München, München, Edelstoff, 5.6%
|
56
|
-
Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
|
57
|
-
TXT
|
58
|
-
|
59
|
-
pp row
|
60
|
-
assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
|
61
|
-
end
|
62
|
-
|
63
|
-
def test_parse_line11
|
64
|
-
puts "== parse_line:"
|
65
|
-
row = CsvReader.parse_line( <<TXT )
|
66
|
-
#######
|
67
|
-
# try with some comments
|
68
|
-
# and blank lines even before header
|
69
|
-
|
70
|
-
Augustiner Bräu München, München, Edelstoff, 5.6%
|
71
|
-
Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
|
72
|
-
TXT
|
73
|
-
|
74
|
-
pp row
|
75
|
-
assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
|
76
|
-
end
|
77
|
-
|
78
|
-
def test_header
|
79
|
-
puts "== header: beer.csv:"
|
80
|
-
header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer.csv" )
|
81
|
-
pp header
|
82
|
-
assert_equal ['Brewery','City','Name','Abv'], header
|
83
|
-
end
|
84
|
-
|
85
|
-
def test_header11
|
86
|
-
puts "== header: beer11.csv:"
|
87
|
-
header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer11.csv" )
|
88
|
-
pp header
|
89
|
-
assert_equal ['Brewery','City','Name','Abv'], header
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
def test_foreach
|
94
|
-
puts "== foreach: beer.csv:"
|
95
|
-
CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer.csv" ) do |row|
|
96
|
-
pp row
|
97
|
-
pp row.fields
|
98
|
-
end
|
99
|
-
assert true
|
100
|
-
end
|
101
|
-
|
102
|
-
def test_foreach11
|
103
|
-
puts "== foreach: beer11.csv:"
|
104
|
-
CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv" ) do |row|
|
105
|
-
pp row
|
106
|
-
pp row.fields
|
107
|
-
end
|
108
|
-
assert true
|
109
|
-
end
|
110
|
-
|
111
|
-
def test_foreach_header_false
|
112
|
-
puts "== foreach (headers: false): beer11.csv:"
|
113
|
-
CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv", headers: false ) do |row|
|
114
|
-
pp row ## note: is Array (no .fields available!!!!!)
|
115
|
-
end
|
116
|
-
assert true
|
117
|
-
end
|
118
|
-
|
119
|
-
end # class TestReader
|