csvrecord 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f60b9d15d91c6d20b625bb343d895ad750f43a81
4
- data.tar.gz: 5510e7921a509fe40906de7f533f185bf244e8bd
3
+ metadata.gz: 77369b93b97cff13a7159870a4343cb22ad86dec
4
+ data.tar.gz: cbc687395deb700c55ed7ca79687da11a7fb8913
5
5
  SHA512:
6
- metadata.gz: 9853722319a53c73469238440eb2c249cd8c3829711acbd35efc60996316f8d8e763d5f61decdc4e3186baebf13536c46c72217fd92f08e6d675b7a6c088568b
7
- data.tar.gz: bd2922bf8c44a458abe70b5dceee9b405904f878eacdaf34a1454a2313c43517329d31841c2bc4faca5c6d3492220d9c5c3091115171046c087a2889a4415339
6
+ metadata.gz: f8d27de0a2c8e8ee2ddfcc024802845a6bce1f05dbc94aa1911fec1cc287837a5b5739e46e93cd5d6db6af678c6ad80e326ea09850c95bb342a27c5063d61fdc
7
+ data.tar.gz: 548b5db8a78788d5c5e624ed44420b961bf0668f9289c876825fb7f09314bc0281f42ca2eb7d6dba11ece65c58de985e6630889230652524f31481e2603cecf2
@@ -6,12 +6,10 @@ Rakefile
6
6
  lib/csvrecord.rb
7
7
  lib/csvrecord/base.rb
8
8
  lib/csvrecord/builder.rb
9
- lib/csvrecord/reader.rb
10
9
  lib/csvrecord/version.rb
11
10
  test/data/beer.csv
12
11
  test/data/beer11.csv
13
12
  test/helper.rb
14
- test/test_reader.rb
15
13
  test/test_record.rb
16
14
  test/test_record_auto.rb
17
15
  test/test_version.rb
data/Rakefile CHANGED
@@ -14,9 +14,13 @@ Hoe.spec 'csvrecord' do
14
14
  self.email = 'wwwmake@googlegroups.com'
15
15
 
16
16
  # switch extension to .markdown for gihub formatting
17
- self.readme_file = 'README.md'
17
+ self.readme_file = 'README.md'
18
18
  self.history_file = 'HISTORY.md'
19
19
 
20
+ self.extra_deps = [
21
+ ['csvreader']
22
+ ]
23
+
20
24
  self.licenses = ['Public Domain']
21
25
 
22
26
  self.spec_extras = {
@@ -1,8 +1,8 @@
1
1
  # encoding: utf-8
2
2
 
3
- require 'csv'
4
- require 'json'
5
- require 'pp'
3
+ ###
4
+ # 3rd party gems
5
+ require 'csvreader'
6
6
 
7
7
 
8
8
  ###
@@ -10,8 +10,6 @@ require 'pp'
10
10
  require 'csvrecord/version' # let version always go first
11
11
  require 'csvrecord/base'
12
12
  require 'csvrecord/builder'
13
- require 'csvrecord/reader'
14
-
15
13
 
16
14
 
17
15
  puts CsvRecord.banner # say hello
@@ -4,7 +4,7 @@
4
4
  module CsvRecord
5
5
 
6
6
  MAJOR = 0 ## todo: namespace inside version or something - why? why not??
7
- MINOR = 3
7
+ MINOR = 4
8
8
  PATCH = 0
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvrecord
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-14 00:00:00.000000000 Z
11
+ date: 2018-08-15 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csvreader
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: rdoc
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -57,12 +71,10 @@ files:
57
71
  - lib/csvrecord.rb
58
72
  - lib/csvrecord/base.rb
59
73
  - lib/csvrecord/builder.rb
60
- - lib/csvrecord/reader.rb
61
74
  - lib/csvrecord/version.rb
62
75
  - test/data/beer.csv
63
76
  - test/data/beer11.csv
64
77
  - test/helper.rb
65
- - test/test_reader.rb
66
78
  - test/test_record.rb
67
79
  - test/test_record_auto.rb
68
80
  - test/test_version.rb
@@ -1,188 +0,0 @@
1
- # encoding: utf-8
2
-
3
-
4
- module Csv ## check: rename to CsvSettings / CsvPref / CsvGlobals or similar - why? why not???
5
-
6
- ## STD_CSV_ENGINE = CSV ## to avoid name confusion use longer name - why? why not? find a better name?
7
- ## use __CSV__ or similar? or just ::CSV ??
8
-
9
- class Configuration
10
-
11
- puts "CSV::VERSION:"
12
- puts CSV::VERSION
13
-
14
- puts "builtin CSV::Converters:"
15
- pp CSV::Converters
16
-
17
- puts "CSV::DEFAULT_OPTIONS:"
18
- pp CSV::DEFAULT_OPTIONS
19
-
20
- ## register our own converters
21
- ## check if strip gets called for nil values too?
22
- CSV::Converters[:strip] = ->(field) { field.strip }
23
-
24
-
25
- attr_accessor :sep ## col_sep (column separator)
26
-
27
- def initialize
28
- @sep = ','
29
- ## note: do NOT add headers as global - should ALWAYS be explicit
30
- ## headers (true/false) - changes resultset and requires different processing!!!
31
-
32
- self ## return self for chaining
33
- end
34
-
35
- def blank?( line )
36
- ## note: blank line does NOT include "blank" with spaces only!!
37
- ## use BLANK_REGEX in skip_lines to clean-up/skip/remove/ignore
38
- ## see skip_blanks in default_options
39
- line.empty?
40
- end
41
-
42
- ## lines starting with # (note: only leading spaces allowed)
43
- COMMENTS_REGEX = /^\s*#/
44
- BLANK_REGEX = /^\s*$/ ## skip all whitespace lines - note: use "" or , for a blank record!!!
45
- SKIP_REGEX = Regexp.union( COMMENTS_REGEX, BLANK_REGEX )
46
-
47
- def skip?( line )
48
- ## check if comment line - skip comments
49
- ## see skip_lines in default_options
50
- line =~ SKIP_REGEX
51
- end
52
-
53
- ## built-in (default) options
54
- ## todo: find a better name?
55
- def default_options
56
- ## note:
57
- ## do NOT include sep character and
58
- ## do NOT include headers true/false here
59
- ##
60
- ## make default sep its own "global" default config
61
- ## e.g. Csv.config.sep =
62
-
63
- ## common options
64
- ## skip comments starting with #
65
- ## skip blank lines
66
- ## strip leading and trailing spaces
67
- ## NOTE/WARN: leading and trailing spaces NOT allowed/working with double quoted values!!!!
68
- defaults = {
69
- skip_blanks: true, ## note: skips lines with no whitespaces only!! (e.g. line with space is NOT blank!!)
70
- skip_lines: SKIP_REGEX,
71
- :converters => :strip
72
- }
73
- defaults
74
- end
75
- end # class Configuration
76
-
77
-
78
- ## lets you use
79
- ## Csv.configure do |config|
80
- ## config.sep = ',' ## or "/t"
81
- ## end
82
-
83
- def self.configure
84
- yield( config )
85
- end
86
-
87
- def self.config
88
- @config ||= Configuration.new
89
- end
90
- end # module Csvv
91
-
92
-
93
-
94
- ####
95
- ## use our own wrapper
96
-
97
- class CsvReader
98
-
99
- ####################
100
- # helper methods
101
- def self.unwrap( row_or_array ) ## unwrap row - find a better name? why? why not?
102
- ## return row values as array of strings
103
- if row_or_array.is_a?( CSV::Row )
104
- row = row_or_array
105
- row.fields ## gets array of string of field values
106
- else ## assume "classic" array of strings
107
- array = row_or_array
108
- end
109
- end
110
-
111
-
112
-
113
- def self.foreach( path, sep: Csv.config.sep, headers: true )
114
- csv_options = Csv.config.default_options.merge(
115
- headers: headers,
116
- col_sep: sep,
117
- external_encoding: 'utf-8' ## note: always (auto-)add utf-8 external encoding for now!!!
118
- )
119
-
120
- CSV.foreach( path, csv_options ) do |row|
121
- yield( row ) ## check/todo: use block.call( row ) ## why? why not?
122
- end
123
- end
124
-
125
-
126
- def self.read( path, sep: Csv.config.sep, headers: true )
127
- ## note: use our own file.open
128
- ## always use utf-8 for now
129
- ## check/todo: add skip option bom too - why? why not?
130
- txt = File.open( path, 'r:utf-8' )
131
- parse( txt, sep: sep, headers: headers )
132
- end
133
-
134
- def self.parse( txt, sep: Csv.config.sep, headers: true )
135
- csv_options = Csv.config.default_options.merge(
136
- headers: headers,
137
- col_sep: sep
138
- )
139
- ## pp csv_options
140
- CSV.parse( txt, csv_options )
141
- end
142
-
143
- def self.parse_line( txt, sep: Csv.config.sep )
144
- ## note: do NOT include headers option (otherwise single row gets skipped as first header row :-)
145
- csv_options = Csv.config.default_options.merge(
146
- headers: false, ## note: always turn off headers!!!!!!
147
- col_sep: sep
148
- )
149
- ## pp csv_options
150
- CSV.parse_line( txt, csv_options )
151
- end
152
-
153
-
154
-
155
- def self.header( path, sep: Csv.config.sep ) ## use header or headers - or use both (with alias)?
156
- # read first lines (only)
157
- # and parse with csv to get header from csv library itself
158
- #
159
- # check - if there's an easier or built-in way for the csv library
160
-
161
- ## readlines until
162
- ## - NOT a comments line or
163
- ## - NOT a blank line
164
-
165
- lines = ''
166
- File.open( path, 'r:utf-8' ) do |f|
167
-
168
- ## todo/fix: how to handle empty files or files without headers?!
169
-
170
- ## todo/check if readline includes \n\r too??
171
- ## yes! - line include \n e.g.
172
- ## "Brewery,City,Name,Abv\n" or
173
- ## "#######\n# try with some comments\n# and blank lines even before header\n\nBrewery,City,Name,Abv\n"
174
- loop do
175
- line = f.readline
176
- lines << line
177
- break unless Csv.config.skip?( line ) || Csv.config.blank?( line )
178
- end
179
- end
180
-
181
- ## puts "lines:"
182
- ## pp lines
183
-
184
- ## note: do NOT use headers: true to get "plain" data array (no hash records)
185
- ## hash record does NOT work for single line/row
186
- parse_line( lines, sep: sep )
187
- end # method self.header
188
- end # class CsvReader
@@ -1,119 +0,0 @@
1
- # encoding: utf-8
2
-
3
- ###
4
- # to run use
5
- # ruby -I ./lib -I ./test test/test_reader.rb
6
-
7
-
8
- require 'helper'
9
-
10
- class TestReader < MiniTest::Test
11
-
12
- def test_read
13
- puts "== read: beer.csv:"
14
- table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv" ) ## returns CSV::Table
15
-
16
- pp table.class.name
17
- pp table
18
- pp table.to_a ## note: includes header (first row with column names)
19
-
20
- table.each do |row| ## note: will skip (NOT include) header row!!
21
- pp row
22
- end
23
- puts " #{table.size} rows" ## note: again will skip (NOT include) header row in count!!!
24
- assert_equal 6, table.size
25
- end
26
-
27
- def test_read_header_false
28
- puts "== read (headers: false): beer.csv:"
29
- data = CsvReader.read( "#{CsvRecord.test_data_dir}/beer.csv", headers: false )
30
-
31
- pp data.class.name
32
- pp data
33
-
34
- data.each do |row|
35
- pp row
36
- end
37
- puts " #{data.size} rows"
38
- assert_equal 7, data.size ## note: include header row in count
39
- end
40
-
41
-
42
- def test_read11
43
- puts "== read: beer11.csv:"
44
- table = CsvReader.read( "#{CsvRecord.test_data_dir}/beer11.csv" )
45
- pp table
46
- pp table.to_a ## note: includes header (first row with column names)
47
-
48
- assert true
49
- end
50
-
51
-
52
- def test_parse_line
53
- puts "== parse_line:"
54
- row = CsvReader.parse_line( <<TXT )
55
- Augustiner Bräu München, München, Edelstoff, 5.6%
56
- Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
57
- TXT
58
-
59
- pp row
60
- assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
61
- end
62
-
63
- def test_parse_line11
64
- puts "== parse_line:"
65
- row = CsvReader.parse_line( <<TXT )
66
- #######
67
- # try with some comments
68
- # and blank lines even before header
69
-
70
- Augustiner Bräu München, München, Edelstoff, 5.6%
71
- Bayerische Staatsbrauerei Weihenstephan, Freising, Hefe Weissbier, 5.4%
72
- TXT
73
-
74
- pp row
75
- assert_equal ['Augustiner Bräu München', 'München', 'Edelstoff', '5.6%'], row
76
- end
77
-
78
- def test_header
79
- puts "== header: beer.csv:"
80
- header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer.csv" )
81
- pp header
82
- assert_equal ['Brewery','City','Name','Abv'], header
83
- end
84
-
85
- def test_header11
86
- puts "== header: beer11.csv:"
87
- header = CsvReader.header( "#{CsvRecord.test_data_dir}/beer11.csv" )
88
- pp header
89
- assert_equal ['Brewery','City','Name','Abv'], header
90
- end
91
-
92
-
93
- def test_foreach
94
- puts "== foreach: beer.csv:"
95
- CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer.csv" ) do |row|
96
- pp row
97
- pp row.fields
98
- end
99
- assert true
100
- end
101
-
102
- def test_foreach11
103
- puts "== foreach: beer11.csv:"
104
- CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv" ) do |row|
105
- pp row
106
- pp row.fields
107
- end
108
- assert true
109
- end
110
-
111
- def test_foreach_header_false
112
- puts "== foreach (headers: false): beer11.csv:"
113
- CsvReader.foreach( "#{CsvRecord.test_data_dir}/beer11.csv", headers: false ) do |row|
114
- pp row ## note: is Array (no .fields available!!!!!)
115
- end
116
- assert true
117
- end
118
-
119
- end # class TestReader