csvrecord 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f60b9d15d91c6d20b625bb343d895ad750f43a81
4
- data.tar.gz: 5510e7921a509fe40906de7f533f185bf244e8bd
3
+ metadata.gz: 77369b93b97cff13a7159870a4343cb22ad86dec
4
+ data.tar.gz: cbc687395deb700c55ed7ca79687da11a7fb8913
5
5
  SHA512:
6
- metadata.gz: 9853722319a53c73469238440eb2c249cd8c3829711acbd35efc60996316f8d8e763d5f61decdc4e3186baebf13536c46c72217fd92f08e6d675b7a6c088568b
7
- data.tar.gz: bd2922bf8c44a458abe70b5dceee9b405904f878eacdaf34a1454a2313c43517329d31841c2bc4faca5c6d3492220d9c5c3091115171046c087a2889a4415339
6
+ metadata.gz: f8d27de0a2c8e8ee2ddfcc024802845a6bce1f05dbc94aa1911fec1cc287837a5b5739e46e93cd5d6db6af678c6ad80e326ea09850c95bb342a27c5063d61fdc
7
+ data.tar.gz: 548b5db8a78788d5c5e624ed44420b961bf0668f9289c876825fb7f09314bc0281f42ca2eb7d6dba11ece65c58de985e6630889230652524f31481e2603cecf2
@@ -6,12 +6,10 @@ Rakefile
6
6
  lib/csvrecord.rb
7
7
  lib/csvrecord/base.rb
8
8
  lib/csvrecord/builder.rb
9
- lib/csvrecord/reader.rb
10
9
  lib/csvrecord/version.rb
11
10
  test/data/beer.csv
12
11
  test/data/beer11.csv
13
12
  test/helper.rb
14
- test/test_reader.rb
15
13
  test/test_record.rb
16
14
  test/test_record_auto.rb
17
15
  test/test_version.rb
data/Rakefile CHANGED
@@ -14,9 +14,13 @@ Hoe.spec 'csvrecord' do
14
14
  self.email = 'wwwmake@googlegroups.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
- self.readme_file = 'README.md'
17
+ self.readme_file = 'README.md'
18
18
  self.history_file = 'HISTORY.md'
19
19
 
20
+ self.extra_deps = [
21
+ ['csvreader']
22
+ ]
23
+
20
24
  self.licenses = ['Public Domain']
21
25
 
22
26
  self.spec_extras = {
@@ -1,8 +1,8 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'csv'
4
- require 'json'
5
- require 'pp'
3
+ ###
4
+ # 3rd party gems
5
+ require 'csvreader'
6
6
 
7
7
 
8
8
  ###
@@ -10,8 +10,6 @@ require 'pp'
10
10
  require 'csvrecord/version' # let version always go first
11
11
  require 'csvrecord/base'
12
12
  require 'csvrecord/builder'
13
- require 'csvrecord/reader'
14
-
15
13
 
16
14
 
17
15
  puts CsvRecord.banner # say hello
@@ -4,7 +4,7 @@
4
4
  module CsvRecord
5
5
 
6
6
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
- MINOR = 3
7
+ MINOR = 4
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvrecord
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-14 00:00:00.000000000 Z
11
+ date: 2018-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csvreader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: rdoc
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -57,12 +71,10 @@ files:
57
71
  - lib/csvrecord.rb
58
72
  - lib/csvrecord/base.rb
59
73
  - lib/csvrecord/builder.rb
60
- - lib/csvrecord/reader.rb
61
74
  - lib/csvrecord/version.rb
62
75
  - test/data/beer.csv
63
76
  - test/data/beer11.csv
64
77
  - test/helper.rb
65
- - test/test_reader.rb
66
78
  - test/test_record.rb
67
79
  - test/test_record_auto.rb
68
80
  - test/test_version.rb
@@ -1,188 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
5
-
6
- ## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
7
- ## use __CSV__ or similar? or just ::CSV ??
8
-
9
- class Configuration
10
-
11
- puts "CSV::VERSION:"
12
- puts CSV::VERSION
13
-
14
- puts "builtin CSV::Converters:"
15
- pp CSV::Converters
16
-
17
- puts "CSV::DEFAULT_OPTIONS:"
18
- pp CSV::DEFAULT_OPTIONS
19
-
20
- ## register our own converters
21
- ## check if strip gets called for nil values too?
22
- CSV::Converters[:strip] = ->(field) { field.strip }
23
-
24
-
25
- attr_accessor :sep ## col_sep (column separator)
26
-
27
- def initialize
28
- @sep = ','
29
- ## note: do NOT add headers as global - should ALWAYS be explicit
30
- ## headers (true/false) - changes resultset and requires different processing!!!
31
-
32
- self ## return self for chaining
33
- end
34
-
35
- def blank?( line )
36
- ## note: blank line does NOT include "blank" with spaces only!!
37
- ## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
38
- ## see skip_blanks in default_options
39
- line.empty?
40
- end
41
-
42
- ## lines starting with # (note: only leading spaces allowed)
43
- COMMENTS_REGEX = /^\s*#/
44
- BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
45
- SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
46
-
47
- def skip?( line )
48
- ## check if comment line - skip comments
49
- ## see skip_lines in default_options
50
- line =~ SKIP_REGEX
51
- end
52
-
53
- ## built-in (default) options
54
- ## todo: find a better name?
55
- def default_options
56
- ## note:
57
- ## do NOT include sep character and
58
- ## do NOT include headers true/false here
59
- ##
60
- ## make default sep its own "global" default config
61
- ## e.g. Csv.config.sep =
62
-
63
- ## common options
64
- ## skip comments starting with #
65
- ## skip blank lines
66
- ## strip leading and trailing spaces
67
- ## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
68
- defaults = {
69
- skip_blanks: true, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
70
- skip_lines: SKIP_REGEX,
71
- :converters => :strip
72
- }
73
- defaults
74
- end
75
- end # class Configuration
76
-
77
-
78
- ## lets you use
79
- ## Csv.configure do |config|
80
- ## config.sep = ',' ## or "/t"
81
- ## end
82
-
83
- def self.configure
84
- yield( config )
85
- end
86
-
87
- def self.config
88
- @config ||= Configuration.new
89
- end
90
- end # module Csvv
91
-
92
-
93
-
94
- ####
95
- ## use our own wrapper
96
-
97
- class CsvReader
98
-
99
- ####################
100
- # helper methods
101
- def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
102
- ## return row values as array of strings
103
- if row_or_array.is_a?( CSV::Row )
104
- row = row_or_array
105
- row.fields ## gets array of string of field values
106
- else ## assume "classic" array of strings
107
- array = row_or_array
108
- end
109
- end
110
-
111
-
112
-
113
- def self.foreach( path, sep: Csv.config.sep, headers: true )
114
- csv_options = Csv.config.default_options.merge(
115
- headers: headers,
116
- col_sep: sep,
117
- external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
118
- )
119
-
120
- CSV.foreach( path, csv_options ) do |row|
121
- yield( row ) ## check/todo: use block.call( row ) ## why? why not?
122
- end
123
- end
124
-
125
-
126
- def self.read( path, sep: Csv.config.sep, headers: true )
127
- ## note: use our own file.open
128
- ## always use utf-8 for now
129
- ## check/todo: add skip option bom too - why? why not?
130
- txt = File.open( path, 'r:utf-8' )
131
- parse( txt, sep: sep, headers: headers )
132
- end
133
-
134
- def self.parse( txt, sep: Csv.config.sep, headers: true )
135
- csv_options = Csv.config.default_options.merge(
136
- headers: headers,
137
- col_sep: sep
138
- )
139
- ## pp csv_options
140
- CSV.parse( txt, csv_options )
141
- end
142
-
143
- def self.parse_line( txt, sep: Csv.config.sep )
144
- ## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
145
- csv_options = Csv.config.default_options.merge(
146
- headers: false, ## note: always turn off headers!!!!!!
147
- col_sep: sep
148
- )
149
- ## pp csv_options
150
- CSV.parse_line( txt, csv_options )
151
- end
152
-
153
-
154
-
155
- def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
156
- # read first lines (only)
157
- # and parse with csv to get header from csv library itself
158
- #
159
- # check - if there's an easier or built-in way for the csv library
160
-
161
- ## readlines until
162
- ## - NOT a comments line or
163
- ## - NOT a blank line
164
-
165
- lines = ''
166
- File.open( path, 'r:utf-8' ) do |f|
167
-
168
- ## todo/fix: how to handle empty files or files without headers?!
169
-
170
- ## todo/check if readline includes \n\r too??
171
- ## yes! - line include \n e.g.
172
- ## "Brewery,City,Name,Abv\n" or
173
- ## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
174
- loop do
175
- line = f.readline
176
- lines << line
177
- break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
178
- end
179
- end
180
-
181
- ## puts "lines:"
182
- ## pp lines
183
-
184
- ## note: do NOT use headers: true to get "plain" data array (no hash records)
185
- ## hash record does NOT work for single line/row
186
- parse_line( lines, sep: sep )
187
- end # method self.header
188
- end # class CsvReader
@@ -1,119 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_reader.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestReader < MiniTest::Test
11
-
12
- def test_read
13
- puts "== read: beer.csv:"
14
- table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv" ) ## returns CSV::Table
15
-
16
- pp table.class.name
17
- pp table
18
- pp table.to_a ## note: includes header (first row with column names)
19
-
20
- table.each do |row| ## note: will skip (NOT include) header row!!
21
- pp row
22
- end
23
- puts " #{table.size} rows" ## note: again will skip (NOT include) header row in count!!!
24
- assert_equal 6, table.size
25
- end
26
-
27
- def test_read_header_false
28
- puts "== read (headers: false): beer.csv:"
29
- data = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv", headers: false )
30
-
31
- pp data.class.name
32
- pp data
33
-
34
- data.each do |row|
35
- pp row
36
- end
37
- puts " #{data.size} rows"
38
- assert_equal 7, data.size ## note: include header row in count
39
- end
40
-
41
-
42
- def test_read11
43
- puts "== read: beer11.csv:"
44
- table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer11.csv" )
45
- pp table
46
- pp table.to_a ## note: includes header (first row with column names)
47
-
48
- assert true
49
- end
50
-
51
-
52
- def test_parse_line
53
- puts "== parse_line:"
54
- row = CsvReader.parse_line( <<TXT )
55
- Augustiner Bräu München, München, Edelstoff, 5.6%
56
- Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
57
- TXT
58
-
59
- pp row
60
- assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
61
- end
62
-
63
- def test_parse_line11
64
- puts "== parse_line:"
65
- row = CsvReader.parse_line( <<TXT )
66
- #######
67
- # try with some comments
68
- # and blank lines even before header
69
-
70
- Augustiner Bräu München, München, Edelstoff, 5.6%
71
- Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
72
- TXT
73
-
74
- pp row
75
- assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
76
- end
77
-
78
- def test_header
79
- puts "== header: beer.csv:"
80
- header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer.csv" )
81
- pp header
82
- assert_equal ['Brewery','City','Name','Abv'], header
83
- end
84
-
85
- def test_header11
86
- puts "== header: beer11.csv:"
87
- header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer11.csv" )
88
- pp header
89
- assert_equal ['Brewery','City','Name','Abv'], header
90
- end
91
-
92
-
93
- def test_foreach
94
- puts "== foreach: beer.csv:"
95
- CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer.csv" ) do |row|
96
- pp row
97
- pp row.fields
98
- end
99
- assert true
100
- end
101
-
102
- def test_foreach11
103
- puts "== foreach: beer11.csv:"
104
- CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv" ) do |row|
105
- pp row
106
- pp row.fields
107
- end
108
- assert true
109
- end
110
-
111
- def test_foreach_header_false
112
- puts "== foreach (headers: false): beer11.csv:"
113
- CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv", headers: false ) do |row|
114
- pp row ## note: is Array (no .fields available!!!!!)
115
- end
116
- assert true
117
- end
118
-
119
- end # class TestReader