tabreader 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Manifest.txt +2 -0
- data/README.md +54 -28
- data/Rakefile +3 -3
- data/lib/tabreader.rb +8 -0
- data/lib/tabreader/reader.rb +131 -91
- data/lib/tabreader/reader_hash.rb +111 -0
- data/lib/tabreader/version.rb +2 -2
- data/test/helper.rb +4 -0
- data/test/test_reader.rb +0 -6
- data/test/test_reader_hash.rb +60 -0
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67dbbade5c8b10576053c868e1514c1f87ebb6c7
|
4
|
+
data.tar.gz: 2a65adb418fbb89876c47f0427473c0ec7cc3448
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e15ec15bf2e2ee097ef9e58aafca69a68c0796e3ccea125367b47a0ca81497373d7058e4e66c9328e463bc915cc7e6c6c7eb6e105612d6a558e2baecdc4c70ca
|
7
|
+
data.tar.gz: a900e51d7b15a509b8457b848c46ffc73fc3c26e41850671d5bc46416a99f45b5b006d6af38f3d23f2c0ee2ff5062ca4f515450d460742dd0551f5183d8f1db4
|
data/Manifest.txt
CHANGED
data/README.md
CHANGED
@@ -1,7 +1,7 @@
|
|
1
|
-
# tabreader - read in tabular datafiles in text in the
|
1
|
+
# tabreader - read in tabular datafiles in text in the tabular (TAB) format
|
2
2
|
|
3
|
-
* home :: [github.com/
|
4
|
-
* bugs :: [github.com/
|
3
|
+
* home :: [github.com/csv11/tabreader](https://github.com/csv11/tabreader)
|
4
|
+
* bugs :: [github.com/csv11/tabreader/issues](https://github.com/csv11/tabreader/issues)
|
5
5
|
* gem :: [rubygems.org/gems/tabreader](https://rubygems.org/gems/tabreader)
|
6
6
|
* rdoc :: [rubydoc.info/gems/tabreader](http://rubydoc.info/gems/tabreader)
|
7
7
|
* forum :: [wwwmake](http://groups.google.com/group/wwwmake)
|
@@ -22,7 +22,7 @@ or the "magic" packaged up in `TabReader`:
|
|
22
22
|
|
23
23
|
``` ruby
|
24
24
|
line = "1\t2\t3"
|
25
|
-
values =
|
25
|
+
values = Tab.parse_line( line ) ## or TAB.parse_line or TabReader.parse_line
|
26
26
|
pp values
|
27
27
|
# => ["1","2","3"]
|
28
28
|
```
|
@@ -30,58 +30,84 @@ pp values
|
|
30
30
|
or use the convenience helpers:
|
31
31
|
|
32
32
|
``` ruby
|
33
|
-
txt <<=
|
33
|
+
txt <<=TXT
|
34
34
|
1\t2\t3
|
35
35
|
4\t5\t6
|
36
|
-
|
36
|
+
TXT
|
37
37
|
|
38
|
-
records =
|
38
|
+
records = Tab.parse( txt ) ## or TAB.parse or TabReader.parse
|
39
39
|
pp records
|
40
40
|
# => [["1","2","3"],
|
41
|
-
# ["
|
41
|
+
# ["4","5","6"]]
|
42
42
|
|
43
43
|
# -or-
|
44
44
|
|
45
|
-
records =
|
45
|
+
records = Tab.read( "values.tab" ) ## or TAB.read or TabReader.read
|
46
46
|
pp records
|
47
47
|
# => [["1","2","3"],
|
48
|
-
# ["
|
48
|
+
# ["4","5","6"]]
|
49
49
|
|
50
50
|
# -or-
|
51
51
|
|
52
|
-
|
52
|
+
Tab.foreach( "values.tab" ) do |rec| ## or TAB.foreach or TabReader.foreach
|
53
53
|
pp rec
|
54
54
|
end
|
55
55
|
# => ["1","2","3"]
|
56
|
-
# => ["
|
56
|
+
# => ["4","5","6"]
|
57
|
+
```
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
### What about Enumerable?
|
62
|
+
|
63
|
+
Yes, every reader includes `Enumerable` and runs on `each`.
|
64
|
+
Use `new` or `open` without a block
|
65
|
+
to get the enumerator (iterator).
|
66
|
+
Example:
|
67
|
+
|
68
|
+
|
69
|
+
``` ruby
|
70
|
+
tab = Tab.new( "a\tb\tc" ) ## or TAB.new or TabReader.new
|
71
|
+
it = tab.to_enum
|
72
|
+
pp it.next
|
73
|
+
# => ["a","b","c"]
|
74
|
+
|
75
|
+
# -or-
|
76
|
+
|
77
|
+
tab = Tab.open( "values.tab" ) ## or TAB.open or TabReader.open
|
78
|
+
it = tab.to_enum
|
79
|
+
pp it.next
|
80
|
+
# => ["1","2","3"]
|
81
|
+
pp it.next
|
82
|
+
# => ["4","5","6"]
|
57
83
|
```
|
58
84
|
|
59
85
|
|
60
86
|
### What about headers?
|
61
87
|
|
62
|
-
Use the `
|
88
|
+
Use the `TabHash`
|
63
89
|
if the first line is a header (or if missing pass in the headers
|
64
90
|
as an array) and you want your records as hashes instead of arrays of values.
|
65
91
|
Example:
|
66
92
|
|
67
93
|
``` ruby
|
68
|
-
txt <<=
|
94
|
+
txt <<=TXT
|
69
95
|
A\tB\tC
|
70
96
|
1\t2\t3
|
71
97
|
4\t5\t6
|
72
|
-
|
98
|
+
TXT
|
73
99
|
|
74
|
-
records =
|
100
|
+
records = TabHash.parse( txt ) ## or TabHashReader
|
75
101
|
pp records
|
76
102
|
|
77
103
|
# -or-
|
78
104
|
|
79
|
-
txt2 <<=
|
105
|
+
txt2 <<=TXT
|
80
106
|
1\t2\t3
|
81
107
|
4\t5\t6
|
82
|
-
|
108
|
+
TXT
|
83
109
|
|
84
|
-
records =
|
110
|
+
records = TabHash.parse( txt2, headers: ["A","B","C"] )
|
85
111
|
pp records
|
86
112
|
|
87
113
|
# => [{"A": "1", "B": "2", "C": "3"},
|
@@ -89,14 +115,14 @@ pp records
|
|
89
115
|
|
90
116
|
# -or-
|
91
117
|
|
92
|
-
records =
|
118
|
+
records = TabHash.read( "hash.tab" )
|
93
119
|
pp records
|
94
120
|
# => [{"A": "1", "B": "2", "C": "3"},
|
95
121
|
# {"A": "4", "B": "5", "C": "6"}]
|
96
122
|
|
97
123
|
# -or-
|
98
124
|
|
99
|
-
|
125
|
+
TabHash.foreach( "hash.tab" ) do |rec|
|
100
126
|
pp rec
|
101
127
|
end
|
102
128
|
# => {"A": "1", "B": "2", "C": "3"}
|
@@ -109,11 +135,11 @@ end
|
|
109
135
|
|
110
136
|
## Frequently Asked Questions (FAQ) and Answers
|
111
137
|
|
112
|
-
### Q: Why NOT use `
|
138
|
+
### Q: Why NOT use `Csv.read( sep: "\t" )`?
|
113
139
|
|
114
|
-
|
140
|
+
TAB != CSV
|
115
141
|
|
116
|
-
The
|
142
|
+
The tabulator (TAB) format is an (even) simpler format than
|
117
143
|
the comma-separated values (CSV) classic format. How?
|
118
144
|
|
119
145
|
The tab format has NO escape rules.
|
@@ -138,23 +164,23 @@ for adding "literal" double quotes in CSV:
|
|
138
164
|
|
139
165
|
```
|
140
166
|
"""1""","""2""","""3"""
|
141
|
-
4,5,"
|
167
|
+
4,5,"Hamlet says, ""Seems,"" madam! Nay it is; I know not ""seems."""
|
142
168
|
```
|
143
169
|
|
144
170
|
vs
|
145
171
|
|
146
172
|
```
|
147
173
|
"1"→"2"→"3"
|
148
|
-
4→5→
|
174
|
+
4→5→Hamlet says, "Seems," madam! Nay it is; I know not "seems."
|
149
175
|
```
|
150
176
|
|
151
177
|
Thus, to avoid any surprises, do NOT use
|
152
|
-
`
|
178
|
+
`Csv.read( sep: "\t" )` and friends for tab.
|
153
179
|
Note: Simpler also equals faster :-).
|
154
180
|
|
155
181
|
|
156
182
|
|
157
|
-
### Q: What's the
|
183
|
+
### Q: What's the tabulator (TAB) format?
|
158
184
|
|
159
185
|
Let's reprint the (complete) tab spec(ification) right here
|
160
186
|
(in an edited simpler version):
|
data/Rakefile
CHANGED
@@ -5,10 +5,10 @@ Hoe.spec 'tabreader' do
|
|
5
5
|
|
6
6
|
self.version = TabReader::VERSION
|
7
7
|
|
8
|
-
self.summary = "tabreader - read in tabular datafiles in text in the
|
8
|
+
self.summary = "tabreader - read in tabular datafiles in text in the tabular (TAB) format"
|
9
9
|
self.description = summary
|
10
10
|
|
11
|
-
self.urls = ['https://github.com/
|
11
|
+
self.urls = ['https://github.com/csv11/tabreader']
|
12
12
|
|
13
13
|
self.author = 'Gerald Bauer'
|
14
14
|
self.email = 'wwwmake@googlegroups.com'
|
@@ -20,7 +20,7 @@ Hoe.spec 'tabreader' do
|
|
20
20
|
self.licenses = ['Public Domain']
|
21
21
|
|
22
22
|
self.spec_extras = {
|
23
|
-
|
23
|
+
required_ruby_version: '>= 2.2.2'
|
24
24
|
}
|
25
25
|
|
26
26
|
end
|
data/lib/tabreader.rb
CHANGED
@@ -2,15 +2,23 @@
|
|
2
2
|
|
3
3
|
|
4
4
|
require 'pp'
|
5
|
+
require 'logger'
|
5
6
|
|
6
7
|
|
7
8
|
###
|
8
9
|
# our own code
|
10
|
+
# check: use require_relative - why? why not?
|
9
11
|
require 'tabreader/version' # let version always go first
|
10
12
|
require 'tabreader/reader'
|
13
|
+
require 'tabreader/reader_hash'
|
11
14
|
|
12
15
|
|
13
16
|
|
17
|
+
## add some "convenience" shortcuts
|
18
|
+
TAB = TabReader
|
19
|
+
Tab = TabReader
|
20
|
+
TabHash = TabHashReader
|
21
|
+
|
14
22
|
|
15
23
|
|
16
24
|
puts TabReader.banner # say hello
|
data/lib/tabreader/reader.rb
CHANGED
@@ -14,124 +14,164 @@ class TabReader
|
|
14
14
|
## todo: add converters: e.g. strip (akk trim / ltrim / rtrim )
|
15
15
|
|
16
16
|
|
17
|
-
def self.read( path, headers: false )
|
18
|
-
txt = File.open( path, 'r:utf-8' ).read
|
19
|
-
## puts "#{path}:"
|
20
|
-
## pp txt
|
21
|
-
parse( txt, headers: headers )
|
22
|
-
end
|
23
17
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
if headers ## add values as name/value pairs e.g. array of hashes
|
36
|
-
if columns.nil?
|
37
|
-
columns = values ## first row is header row
|
38
|
-
else
|
39
|
-
## note: will cut-off values if values.size > columns.size
|
40
|
-
## add warning/error - why? why not?
|
41
|
-
## if values.size <= columns.size will get filled-up with nil
|
42
|
-
pairs = columns.zip(values)
|
43
|
-
## pp pairs
|
44
|
-
h = pairs.to_h
|
45
|
-
## pp h
|
46
|
-
|
47
|
-
rows << h
|
48
|
-
end
|
49
|
-
else ## add values as is e.g. array of array
|
50
|
-
rows << values
|
51
|
-
end
|
52
|
-
end
|
53
|
-
rows
|
18
|
+
###################################
|
19
|
+
## add simple logger with debug flag/switch
|
20
|
+
#
|
21
|
+
# use Parser.debug = true # to turn on
|
22
|
+
#
|
23
|
+
# todo/fix: use logutils instead of std logger - why? why not?
|
24
|
+
|
25
|
+
def self.build_logger()
|
26
|
+
l = Logger.new( STDOUT )
|
27
|
+
l.level = :info ## set to :info on start; note: is 0 (debug) by default
|
28
|
+
l
|
54
29
|
end
|
30
|
+
def self.logger() @@logger ||= build_logger; end
|
31
|
+
def logger() self.class.logger; end
|
55
32
|
|
56
|
-
def self.foreach( path, headers: false )
|
57
|
-
if headers.is_a?( Array )
|
58
|
-
columns = headers
|
59
|
-
else
|
60
|
-
columns = nil ## header row a.k.a. columns / fields
|
61
|
-
end
|
62
33
|
|
63
|
-
File.open( path, 'r:utf-8' ).each_line do |line|
|
64
|
-
pp line
|
65
|
-
values = parse_line( line )
|
66
|
-
if headers ## add values as name/value pairs e.g. array of hashes
|
67
|
-
if columns.nil?
|
68
|
-
columns = values ## first row is header row
|
69
|
-
else
|
70
|
-
pairs = columns.zip(values)
|
71
|
-
h = pairs.to_h
|
72
|
-
yield( h )
|
73
|
-
end
|
74
|
-
else ## add values as is e.g. array of array
|
75
|
-
yield( values )
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
# return nil
|
80
|
-
nil
|
81
|
-
end
|
82
34
|
|
83
35
|
|
84
36
|
def self.parse_line( line )
|
85
37
|
## check - can handle comments and blank lines too - why? why not?
|
86
38
|
## remove trailing newlines
|
87
39
|
|
40
|
+
logger.debug "line:" if logger.debug?
|
41
|
+
logger.debug line.pretty_inspect if logger.debug?
|
42
|
+
|
43
|
+
|
88
44
|
## note: chomp('') if is an empty string,
|
89
45
|
## it will remove all trailing newlines from the string.
|
90
|
-
|
91
|
-
line = line.chomp('')
|
46
|
+
## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
|
47
|
+
line = line.chomp( '' )
|
48
|
+
|
49
|
+
## line = line.strip ## strip leading and trailing whitespaces (space/tab) too
|
50
|
+
|
51
|
+
logger.debug line.pretty_inspect if logger.debug?
|
52
|
+
|
53
|
+
# if line.empty? ## skip blank lines
|
54
|
+
# logger.debug "skip blank line" if logger.debug?
|
55
|
+
# next
|
56
|
+
# end
|
57
|
+
|
58
|
+
# if line.start_with?( "#" ) ## skip comment lines
|
59
|
+
# logger.debug "skip comment line" if logger.debug?
|
60
|
+
# next
|
61
|
+
# end
|
62
|
+
|
63
|
+
values = line.split( "\t" )
|
64
|
+
logger.debug values.pretty_inspect if logger.debug?
|
92
65
|
|
93
|
-
values = line.split("\t")
|
94
66
|
values
|
95
67
|
end
|
96
68
|
|
97
|
-
def self.header( path )
|
98
|
-
line = File.open( path, 'r:utf-8' ) do |f|
|
99
|
-
if f.eof?
|
100
|
-
## handle empty file; return empty string; no readline call possible
|
101
|
-
## todo/check: return nil from header is no header or [] - why? why not?
|
102
|
-
## or throw exception end of file reached (EOFError) - why? why not?
|
103
|
-
""
|
104
|
-
else
|
105
|
-
f.readline
|
106
|
-
end
|
107
|
-
end
|
108
69
|
|
109
|
-
## note: line includes \n or \r\n at the end
|
110
|
-
## pp line
|
111
|
-
parse_line( line )
|
112
|
-
end
|
113
70
|
|
114
|
-
end # class TabReader
|
115
71
|
|
72
|
+
def self.open( path, mode=nil, &block ) ## rename path to filename or name - why? why not?
|
116
73
|
|
74
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
75
|
+
f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
76
|
+
tab = new( f )
|
117
77
|
|
78
|
+
# handle blocks like Ruby's open()
|
79
|
+
if block_given?
|
80
|
+
begin
|
81
|
+
block.call( tab )
|
82
|
+
ensure
|
83
|
+
tab.close
|
84
|
+
end
|
85
|
+
else
|
86
|
+
tab
|
87
|
+
end
|
88
|
+
end # method self.open
|
118
89
|
|
119
|
-
class TabHashReader
|
120
90
|
|
121
|
-
def self.read( path
|
122
|
-
|
91
|
+
def self.read( path )
|
92
|
+
open( path ) { |tab| tab.read }
|
123
93
|
end
|
124
94
|
|
125
|
-
def self.parse( txt, headers: true )
|
126
|
-
TabReader.parse( txt, headers: headers )
|
127
|
-
end
|
128
95
|
|
129
|
-
def self.foreach( path,
|
130
|
-
|
96
|
+
def self.foreach( path, &block )
|
97
|
+
tab = open( path )
|
98
|
+
|
99
|
+
if block_given?
|
100
|
+
begin
|
101
|
+
tab.each( &block )
|
102
|
+
ensure
|
103
|
+
tab.close
|
104
|
+
end
|
105
|
+
else
|
106
|
+
tab.to_enum ## note: caller (responsible) must close file!!!
|
107
|
+
## remove version without block given - why? why not?
|
108
|
+
## use Tab.open().to_enum or Tab.open().each
|
109
|
+
## or Tab.new( File.new() ).to_enum or Tab.new( File.new() ).each ???
|
110
|
+
end
|
111
|
+
end # method self.foreach
|
112
|
+
|
113
|
+
|
114
|
+
def self.parse( data, &block )
|
115
|
+
tab = new( data )
|
116
|
+
|
117
|
+
if block_given?
|
118
|
+
tab.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
119
|
+
else # slurp contents, if no block is given
|
120
|
+
tab.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
121
|
+
end
|
122
|
+
end # method self.parse
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
## convenience helper for header (first row with column names)
|
127
|
+
def self.header( path ) ## use header or headers - or use both (with alias)?
|
128
|
+
# read first lines (only)
|
129
|
+
|
130
|
+
records = []
|
131
|
+
open( path ) do |tab|
|
132
|
+
tab.each do |record|
|
133
|
+
records << record
|
134
|
+
break ## only parse/read first record
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
## unwrap record if empty return nil - why? why not?
|
139
|
+
## return empty record e.g. [] - why? why not?
|
140
|
+
## returns nil for empty (for now) - why? why not?
|
141
|
+
records.size == 0 ? nil : records.first
|
142
|
+
end # method self.header
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
def initialize( data )
|
148
|
+
if data.is_a?( String )
|
149
|
+
@input = data # note: just needs each for each_line
|
150
|
+
else ## assume io
|
151
|
+
@input = data
|
152
|
+
end
|
131
153
|
end
|
132
154
|
|
133
|
-
|
134
|
-
|
155
|
+
|
156
|
+
include Enumerable
|
157
|
+
|
158
|
+
def each( &block )
|
159
|
+
if block_given?
|
160
|
+
@input.each_line do |line|
|
161
|
+
|
162
|
+
values = self.class.parse_line( line )
|
163
|
+
|
164
|
+
block.call( values )
|
165
|
+
end
|
166
|
+
else
|
167
|
+
to_enum
|
168
|
+
end
|
169
|
+
end # method each
|
170
|
+
|
171
|
+
def read() to_a; end # method read
|
172
|
+
|
173
|
+
def close
|
174
|
+
@input.close if @input.respond_to?(:close) ## note: string needs no close
|
135
175
|
end
|
136
176
|
|
137
|
-
end # class
|
177
|
+
end # class TabReader
|
@@ -0,0 +1,111 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
class TabHashReader
|
4
|
+
|
5
|
+
|
6
|
+
def self.open( path, mode=nil, headers: nil, &block ) ## rename path to filename or name - why? why not?
|
7
|
+
|
8
|
+
## note: default mode (if nil/not passed in) to 'r:bom|utf-8'
|
9
|
+
f = File.open( path, mode ? mode : 'r:bom|utf-8' )
|
10
|
+
tab = new(f, headers: headers )
|
11
|
+
|
12
|
+
# handle blocks like Ruby's open()
|
13
|
+
if block_given?
|
14
|
+
begin
|
15
|
+
block.call( tab )
|
16
|
+
ensure
|
17
|
+
tab.close
|
18
|
+
end
|
19
|
+
else
|
20
|
+
tab
|
21
|
+
end
|
22
|
+
end # method self.open
|
23
|
+
|
24
|
+
|
25
|
+
def self.read( path, headers: nil )
|
26
|
+
open( path, headers: headers ) { |tab| tab.read }
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
|
31
|
+
def self.foreach( path, headers: nil, &block )
|
32
|
+
tab = open( path, headers: headers)
|
33
|
+
|
34
|
+
if block_given?
|
35
|
+
begin
|
36
|
+
tab.each( &block )
|
37
|
+
ensure
|
38
|
+
tab.close
|
39
|
+
end
|
40
|
+
else
|
41
|
+
tab.to_enum ## note: caller (responsible) must close file!!!
|
42
|
+
## remove version without block given - why? why not?
|
43
|
+
## use Tab.open().to_enum or Tab.open().each
|
44
|
+
## or Tab.new( File.new() ).to_enum or Tab.new( File.new() ).each ???
|
45
|
+
end
|
46
|
+
end # method self.foreach
|
47
|
+
|
48
|
+
|
49
|
+
def self.parse( data, headers: nil, &block )
|
50
|
+
tab = new( data, headers: headers )
|
51
|
+
|
52
|
+
if block_given?
|
53
|
+
tab.each( &block ) ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
54
|
+
else # slurp contents, if no block is given
|
55
|
+
tab.read ## note: caller (responsible) must close file!!! - add autoclose - why? why not?
|
56
|
+
end
|
57
|
+
end # method self.parse
|
58
|
+
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
def initialize( data, headers: nil )
|
63
|
+
raise ArgumentError.new( "Cannot parse nil as TAB" ) if data.nil?
|
64
|
+
|
65
|
+
if data.is_a?( String )
|
66
|
+
@input = data # note: just needs each for each_line
|
67
|
+
else ## assume io
|
68
|
+
@input = data
|
69
|
+
end
|
70
|
+
|
71
|
+
## pass in headers as array e.g. ['A', 'B', 'C']
|
72
|
+
@names = headers ? headers : nil
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
include Enumerable
|
78
|
+
|
79
|
+
|
80
|
+
def each( &block )
|
81
|
+
|
82
|
+
## todo/fix:
|
83
|
+
## add case for headers/names.size != values.size
|
84
|
+
## - add rest option? for if less headers than values (see python csv.DictReader - why? why not?)
|
85
|
+
##
|
86
|
+
## handle case with duplicate and empty header names etc.
|
87
|
+
|
88
|
+
|
89
|
+
if block_given?
|
90
|
+
TabReader.parse( @input ) do |values|
|
91
|
+
if @names.nil? ## check for (first) headers row
|
92
|
+
@names = values ## store header row / a.k.a. field/column names
|
93
|
+
else ## "regular" record
|
94
|
+
record = @names.zip( values ).to_h ## todo/fix: check for more values than names/headers!!!
|
95
|
+
block.call( record )
|
96
|
+
end
|
97
|
+
end
|
98
|
+
else
|
99
|
+
to_enum
|
100
|
+
end
|
101
|
+
end # method each
|
102
|
+
|
103
|
+
def read() to_a; end # method read
|
104
|
+
|
105
|
+
|
106
|
+
def close
|
107
|
+
@input.close if @input.respond_to?(:close) ## note: string needs no close
|
108
|
+
end
|
109
|
+
|
110
|
+
|
111
|
+
end # class TabHashReader
|
data/lib/tabreader/version.rb
CHANGED
@@ -4,8 +4,8 @@
|
|
4
4
|
## note: for now TabReader is a class!!! NOT a module - change - why? why not?
|
5
5
|
class TabReader
|
6
6
|
|
7
|
-
MAJOR =
|
8
|
-
MINOR =
|
7
|
+
MAJOR = 1 ## todo: namespace inside version or something - why? why not??
|
8
|
+
MINOR = 0
|
9
9
|
PATCH = 0
|
10
10
|
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
11
11
|
|
data/test/helper.rb
CHANGED
data/test/test_reader.rb
CHANGED
@@ -26,11 +26,9 @@ TXT
|
|
26
26
|
|
27
27
|
puts "== parse:"
|
28
28
|
pp TabReader.parse( txt1 )
|
29
|
-
pp TabReader.parse( txt1, headers: true )
|
30
29
|
|
31
30
|
puts "== parse:"
|
32
31
|
pp TabReader.parse( txt2 )
|
33
|
-
pp TabReader.parse( txt2, headers: true )
|
34
32
|
|
35
33
|
puts "== parse_line:"
|
36
34
|
pp TabReader.parse_line( "1\t2\t3" )
|
@@ -49,16 +47,12 @@ def test_read
|
|
49
47
|
|
50
48
|
puts "== read:"
|
51
49
|
pp TabReader.read( "#{TabReader.test_data_dir}/test.tab" )
|
52
|
-
pp TabReader.read( "#{TabReader.test_data_dir}/test.tab", headers: true )
|
53
50
|
puts "== header:"
|
54
51
|
pp TabReader.header( "#{TabReader.test_data_dir}/test.tab" )
|
55
52
|
puts "== foreach:"
|
56
53
|
TabReader.foreach( "#{TabReader.test_data_dir}/test.tab" ) do |row|
|
57
54
|
pp row
|
58
55
|
end
|
59
|
-
TabReader.foreach( "#{TabReader.test_data_dir}/test.tab", headers: true ) do |row|
|
60
|
-
pp row
|
61
|
-
end
|
62
56
|
end
|
63
57
|
|
64
58
|
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
###
|
4
|
+
# to run use
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader_hash.rb
|
6
|
+
|
7
|
+
|
8
|
+
require 'helper'
|
9
|
+
|
10
|
+
class TestReader < MiniTest::Test
|
11
|
+
|
12
|
+
|
13
|
+
def test_parse
|
14
|
+
|
15
|
+
txt1 = <<TXT
|
16
|
+
a\tb\tc
|
17
|
+
1\t2\t3
|
18
|
+
4\t5\t6
|
19
|
+
TXT
|
20
|
+
|
21
|
+
txt2 = <<TXT
|
22
|
+
a b c d
|
23
|
+
1 2 3 4
|
24
|
+
5 6 7 8
|
25
|
+
TXT
|
26
|
+
|
27
|
+
puts "== parse:"
|
28
|
+
pp TabHashReader.parse( txt1 )
|
29
|
+
|
30
|
+
puts "== parse:"
|
31
|
+
pp TabHashReader.parse( txt2 )
|
32
|
+
|
33
|
+
assert true
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
def test_read
|
38
|
+
|
39
|
+
puts "== read:"
|
40
|
+
pp TabHashReader.read( "#{TabReader.test_data_dir}/test.tab" )
|
41
|
+
puts "== foreach:"
|
42
|
+
TabHashReader.foreach( "#{TabReader.test_data_dir}/test.tab" ) do |row|
|
43
|
+
pp row
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
def test_read_empty
|
49
|
+
|
50
|
+
puts "== read (empty):"
|
51
|
+
pp TabHashReader.read( "#{TabReader.test_data_dir}/empty.tab" )
|
52
|
+
puts "== foreach (empty):"
|
53
|
+
TabHashReader.foreach( "#{TabReader.test_data_dir}/empty.tab" ) do |row|
|
54
|
+
pp row
|
55
|
+
end
|
56
|
+
puts "== parse (empty):"
|
57
|
+
pp TabHashReader.parse( "" )
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: tabreader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gerald Bauer
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdoc
|
@@ -38,7 +38,7 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.16'
|
41
|
-
description: tabreader - read in tabular datafiles in text in the
|
41
|
+
description: tabreader - read in tabular datafiles in text in the tabular (TAB) format
|
42
42
|
email: wwwmake@googlegroups.com
|
43
43
|
executables: []
|
44
44
|
extensions: []
|
@@ -55,12 +55,14 @@ files:
|
|
55
55
|
- Rakefile
|
56
56
|
- lib/tabreader.rb
|
57
57
|
- lib/tabreader/reader.rb
|
58
|
+
- lib/tabreader/reader_hash.rb
|
58
59
|
- lib/tabreader/version.rb
|
59
60
|
- test/data/empty.tab
|
60
61
|
- test/data/test.tab
|
61
62
|
- test/helper.rb
|
62
63
|
- test/test_reader.rb
|
63
|
-
|
64
|
+
- test/test_reader_hash.rb
|
65
|
+
homepage: https://github.com/csv11/tabreader
|
64
66
|
licenses:
|
65
67
|
- Public Domain
|
66
68
|
metadata: {}
|
@@ -85,5 +87,5 @@ rubyforge_project:
|
|
85
87
|
rubygems_version: 2.5.2
|
86
88
|
signing_key:
|
87
89
|
specification_version: 4
|
88
|
-
summary: tabreader - read in tabular datafiles in text in the
|
90
|
+
summary: tabreader - read in tabular datafiles in text in the tabular (TAB) format
|
89
91
|
test_files: []
|