csvrecord 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Manifest.txt +0 -2
- data/Rakefile +5 -1
- data/lib/csvrecord.rb +3 -5
- data/lib/csvrecord/version.rb +1 -1
- metadata +16 -4
- data/lib/csvrecord/reader.rb +0 -188
- data/test/test_reader.rb +0 -119
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 77369b93b97cff13a7159870a4343cb22ad86dec
|
4
|
+
data.tar.gz: cbc687395deb700c55ed7ca79687da11a7fb8913
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f8d27de0a2c8e8ee2ddfcc024802845a6bce1f05dbc94aa1911fec1cc287837a5b5739e46e93cd5d6db6af678c6ad80e326ea09850c95bb342a27c5063d61fdc
|
7
|
+
data.tar.gz: 548b5db8a78788d5c5e624ed44420b961bf0668f9289c876825fb7f09314bc0281f42ca2eb7d6dba11ece65c58de985e6630889230652524f31481e2603cecf2
|
data/Manifest.txt
CHANGED
@@ -6,12 +6,10 @@ Rakefile
|
|
6
6
|
lib/csvrecord.rb
|
7
7
|
lib/csvrecord/base.rb
|
8
8
|
lib/csvrecord/builder.rb
|
9
|
-
lib/csvrecord/reader.rb
|
10
9
|
lib/csvrecord/version.rb
|
11
10
|
test/data/beer.csv
|
12
11
|
test/data/beer11.csv
|
13
12
|
test/helper.rb
|
14
|
-
test/test_reader.rb
|
15
13
|
test/test_record.rb
|
16
14
|
test/test_record_auto.rb
|
17
15
|
test/test_version.rb
|
data/Rakefile
CHANGED
@@ -14,9 +14,13 @@ Hoe.spec 'csvrecord' do
|
|
14
14
|
self.email = 'wwwmake@googlegroups.com'
|
15
15
|
|
16
16
|
# switch extension to .markdown for gihub formatting
|
17
|
-
self.readme_file
|
17
|
+
self.readme_file = 'README.md'
|
18
18
|
self.history_file = 'HISTORY.md'
|
19
19
|
|
20
|
+
self.extra_deps = [
|
21
|
+
['csvreader']
|
22
|
+
]
|
23
|
+
|
20
24
|
self.licenses = ['Public Domain']
|
21
25
|
|
22
26
|
self.spec_extras = {
|
data/lib/csvrecord.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
require '
|
3
|
+
###
|
4
|
+
# 3rd party gems
|
5
|
+
require 'csvreader'
|
6
6
|
|
7
7
|
|
8
8
|
###
|
@@ -10,8 +10,6 @@ require 'pp'
|
|
10
10
|
require 'csvrecord/version' # let version always go first
|
11
11
|
require 'csvrecord/base'
|
12
12
|
require 'csvrecord/builder'
|
13
|
-
require 'csvrecord/reader'
|
14
|
-
|
15
13
|
|
16
14
|
|
17
15
|
puts CsvRecord.banner # say hello
|
data/lib/csvrecord/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: csvrecord
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-08-
|
11
|
+
date: 2018-08-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: csvreader
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rdoc
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -57,12 +71,10 @@ files:
|
|
57
71
|
- lib/csvrecord.rb
|
58
72
|
- lib/csvrecord/base.rb
|
59
73
|
- lib/csvrecord/builder.rb
|
60
|
-
- lib/csvrecord/reader.rb
|
61
74
|
- lib/csvrecord/version.rb
|
62
75
|
- test/data/beer.csv
|
63
76
|
- test/data/beer11.csv
|
64
77
|
- test/helper.rb
|
65
|
-
- test/test_reader.rb
|
66
78
|
- test/test_record.rb
|
67
79
|
- test/test_record_auto.rb
|
68
80
|
- test/test_version.rb
|
data/lib/csvrecord/reader.rb
DELETED
@@ -1,188 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
|
4
|
-
module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
|
5
|
-
|
6
|
-
## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
|
7
|
-
## use __CSV__ or similar? or just ::CSV ??
|
8
|
-
|
9
|
-
class Configuration
|
10
|
-
|
11
|
-
puts "CSV::VERSION:"
|
12
|
-
puts CSV::VERSION
|
13
|
-
|
14
|
-
puts "builtin CSV::Converters:"
|
15
|
-
pp CSV::Converters
|
16
|
-
|
17
|
-
puts "CSV::DEFAULT_OPTIONS:"
|
18
|
-
pp CSV::DEFAULT_OPTIONS
|
19
|
-
|
20
|
-
## register our own converters
|
21
|
-
## check if strip gets called for nil values too?
|
22
|
-
CSV::Converters[:strip] = ->(field) { field.strip }
|
23
|
-
|
24
|
-
|
25
|
-
attr_accessor :sep ## col_sep (column separator)
|
26
|
-
|
27
|
-
def initialize
|
28
|
-
@sep = ','
|
29
|
-
## note: do NOT add headers as global - should ALWAYS be explicit
|
30
|
-
## headers (true/false) - changes resultset and requires different processing!!!
|
31
|
-
|
32
|
-
self ## return self for chaining
|
33
|
-
end
|
34
|
-
|
35
|
-
def blank?( line )
|
36
|
-
## note: blank line does NOT include "blank" with spaces only!!
|
37
|
-
## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
|
38
|
-
## see skip_blanks in default_options
|
39
|
-
line.empty?
|
40
|
-
end
|
41
|
-
|
42
|
-
## lines starting with # (note: only leading spaces allowed)
|
43
|
-
COMMENTS_REGEX = /^\s*#/
|
44
|
-
BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
|
45
|
-
SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
|
46
|
-
|
47
|
-
def skip?( line )
|
48
|
-
## check if comment line - skip comments
|
49
|
-
## see skip_lines in default_options
|
50
|
-
line =~ SKIP_REGEX
|
51
|
-
end
|
52
|
-
|
53
|
-
## built-in (default) options
|
54
|
-
## todo: find a better name?
|
55
|
-
def default_options
|
56
|
-
## note:
|
57
|
-
## do NOT include sep character and
|
58
|
-
## do NOT include headers true/false here
|
59
|
-
##
|
60
|
-
## make default sep its own "global" default config
|
61
|
-
## e.g. Csv.config.sep =
|
62
|
-
|
63
|
-
## common options
|
64
|
-
## skip comments starting with #
|
65
|
-
## skip blank lines
|
66
|
-
## strip leading and trailing spaces
|
67
|
-
## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
|
68
|
-
defaults = {
|
69
|
-
skip_blanks: true, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
|
70
|
-
skip_lines: SKIP_REGEX,
|
71
|
-
:converters => :strip
|
72
|
-
}
|
73
|
-
defaults
|
74
|
-
end
|
75
|
-
end # class Configuration
|
76
|
-
|
77
|
-
|
78
|
-
## lets you use
|
79
|
-
## Csv.configure do |config|
|
80
|
-
## config.sep = ',' ## or "/t"
|
81
|
-
## end
|
82
|
-
|
83
|
-
def self.configure
|
84
|
-
yield( config )
|
85
|
-
end
|
86
|
-
|
87
|
-
def self.config
|
88
|
-
@config ||= Configuration.new
|
89
|
-
end
|
90
|
-
end # module Csvv
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
####
|
95
|
-
## use our own wrapper
|
96
|
-
|
97
|
-
class CsvReader
|
98
|
-
|
99
|
-
####################
|
100
|
-
# helper methods
|
101
|
-
def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
|
102
|
-
## return row values as array of strings
|
103
|
-
if row_or_array.is_a?( CSV::Row )
|
104
|
-
row = row_or_array
|
105
|
-
row.fields ## gets array of string of field values
|
106
|
-
else ## assume "classic" array of strings
|
107
|
-
array = row_or_array
|
108
|
-
end
|
109
|
-
end
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
def self.foreach( path, sep: Csv.config.sep, headers: true )
|
114
|
-
csv_options = Csv.config.default_options.merge(
|
115
|
-
headers: headers,
|
116
|
-
col_sep: sep,
|
117
|
-
external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
|
118
|
-
)
|
119
|
-
|
120
|
-
CSV.foreach( path, csv_options ) do |row|
|
121
|
-
yield( row ) ## check/todo: use block.call( row ) ## why? why not?
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
|
126
|
-
def self.read( path, sep: Csv.config.sep, headers: true )
|
127
|
-
## note: use our own file.open
|
128
|
-
## always use utf-8 for now
|
129
|
-
## check/todo: add skip option bom too - why? why not?
|
130
|
-
txt = File.open( path, 'r:utf-8' )
|
131
|
-
parse( txt, sep: sep, headers: headers )
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.parse( txt, sep: Csv.config.sep, headers: true )
|
135
|
-
csv_options = Csv.config.default_options.merge(
|
136
|
-
headers: headers,
|
137
|
-
col_sep: sep
|
138
|
-
)
|
139
|
-
## pp csv_options
|
140
|
-
CSV.parse( txt, csv_options )
|
141
|
-
end
|
142
|
-
|
143
|
-
def self.parse_line( txt, sep: Csv.config.sep )
|
144
|
-
## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
|
145
|
-
csv_options = Csv.config.default_options.merge(
|
146
|
-
headers: false, ## note: always turn off headers!!!!!!
|
147
|
-
col_sep: sep
|
148
|
-
)
|
149
|
-
## pp csv_options
|
150
|
-
CSV.parse_line( txt, csv_options )
|
151
|
-
end
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
|
156
|
-
# read first lines (only)
|
157
|
-
# and parse with csv to get header from csv library itself
|
158
|
-
#
|
159
|
-
# check - if there's an easier or built-in way for the csv library
|
160
|
-
|
161
|
-
## readlines until
|
162
|
-
## - NOT a comments line or
|
163
|
-
## - NOT a blank line
|
164
|
-
|
165
|
-
lines = ''
|
166
|
-
File.open( path, 'r:utf-8' ) do |f|
|
167
|
-
|
168
|
-
## todo/fix: how to handle empty files or files without headers?!
|
169
|
-
|
170
|
-
## todo/check if readline includes \n\r too??
|
171
|
-
## yes! - line include \n e.g.
|
172
|
-
## "Brewery,City,Name,Abv\n" or
|
173
|
-
## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
|
174
|
-
loop do
|
175
|
-
line = f.readline
|
176
|
-
lines << line
|
177
|
-
break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
|
178
|
-
end
|
179
|
-
end
|
180
|
-
|
181
|
-
## puts "lines:"
|
182
|
-
## pp lines
|
183
|
-
|
184
|
-
## note: do NOT use headers: true to get "plain" data array (no hash records)
|
185
|
-
## hash record does NOT work for single line/row
|
186
|
-
parse_line( lines, sep: sep )
|
187
|
-
end # method self.header
|
188
|
-
end # class CsvReader
|
data/test/test_reader.rb
DELETED
@@ -1,119 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
###
|
4
|
-
# to run use
|
5
|
-
# ruby -I ./lib -I ./test test/test_reader.rb
|
6
|
-
|
7
|
-
|
8
|
-
require 'helper'
|
9
|
-
|
10
|
-
class TestReader < MiniTest::Test
|
11
|
-
|
12
|
-
def test_read
|
13
|
-
puts "== read: beer.csv:"
|
14
|
-
table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv" ) ## returns CSV::Table
|
15
|
-
|
16
|
-
pp table.class.name
|
17
|
-
pp table
|
18
|
-
pp table.to_a ## note: includes header (first row with column names)
|
19
|
-
|
20
|
-
table.each do |row| ## note: will skip (NOT include) header row!!
|
21
|
-
pp row
|
22
|
-
end
|
23
|
-
puts " #{table.size} rows" ## note: again will skip (NOT include) header row in count!!!
|
24
|
-
assert_equal 6, table.size
|
25
|
-
end
|
26
|
-
|
27
|
-
def test_read_header_false
|
28
|
-
puts "== read (headers: false): beer.csv:"
|
29
|
-
data = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv", headers: false )
|
30
|
-
|
31
|
-
pp data.class.name
|
32
|
-
pp data
|
33
|
-
|
34
|
-
data.each do |row|
|
35
|
-
pp row
|
36
|
-
end
|
37
|
-
puts " #{data.size} rows"
|
38
|
-
assert_equal 7, data.size ## note: include header row in count
|
39
|
-
end
|
40
|
-
|
41
|
-
|
42
|
-
def test_read11
|
43
|
-
puts "== read: beer11.csv:"
|
44
|
-
table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer11.csv" )
|
45
|
-
pp table
|
46
|
-
pp table.to_a ## note: includes header (first row with column names)
|
47
|
-
|
48
|
-
assert true
|
49
|
-
end
|
50
|
-
|
51
|
-
|
52
|
-
def test_parse_line
|
53
|
-
puts "== parse_line:"
|
54
|
-
row = CsvReader.parse_line( <<TXT )
|
55
|
-
Augustiner Bräu München, München, Edelstoff, 5.6%
|
56
|
-
Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
|
57
|
-
TXT
|
58
|
-
|
59
|
-
pp row
|
60
|
-
assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
|
61
|
-
end
|
62
|
-
|
63
|
-
def test_parse_line11
|
64
|
-
puts "== parse_line:"
|
65
|
-
row = CsvReader.parse_line( <<TXT )
|
66
|
-
#######
|
67
|
-
# try with some comments
|
68
|
-
# and blank lines even before header
|
69
|
-
|
70
|
-
Augustiner Bräu München, München, Edelstoff, 5.6%
|
71
|
-
Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
|
72
|
-
TXT
|
73
|
-
|
74
|
-
pp row
|
75
|
-
assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
|
76
|
-
end
|
77
|
-
|
78
|
-
def test_header
|
79
|
-
puts "== header: beer.csv:"
|
80
|
-
header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer.csv" )
|
81
|
-
pp header
|
82
|
-
assert_equal ['Brewery','City','Name','Abv'], header
|
83
|
-
end
|
84
|
-
|
85
|
-
def test_header11
|
86
|
-
puts "== header: beer11.csv:"
|
87
|
-
header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer11.csv" )
|
88
|
-
pp header
|
89
|
-
assert_equal ['Brewery','City','Name','Abv'], header
|
90
|
-
end
|
91
|
-
|
92
|
-
|
93
|
-
def test_foreach
|
94
|
-
puts "== foreach: beer.csv:"
|
95
|
-
CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer.csv" ) do |row|
|
96
|
-
pp row
|
97
|
-
pp row.fields
|
98
|
-
end
|
99
|
-
assert true
|
100
|
-
end
|
101
|
-
|
102
|
-
def test_foreach11
|
103
|
-
puts "== foreach: beer11.csv:"
|
104
|
-
CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv" ) do |row|
|
105
|
-
pp row
|
106
|
-
pp row.fields
|
107
|
-
end
|
108
|
-
assert true
|
109
|
-
end
|
110
|
-
|
111
|
-
def test_foreach_header_false
|
112
|
-
puts "== foreach (headers: false): beer11.csv:"
|
113
|
-
CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv", headers: false ) do |row|
|
114
|
-
pp row ## note: is Array (no .fields available!!!!!)
|
115
|
-
end
|
116
|
-
assert true
|
117
|
-
end
|
118
|
-
|
119
|
-
end # class TestReader
|