tabreader 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.md +3 -0
- data/LICENSE.md +116 -0
- data/Manifest.txt +12 -0
- data/README.md +237 -0
- data/Rakefile +26 -0
- data/lib/tabreader/reader.rb +114 -0
- data/lib/tabreader/version.rb +24 -0
- data/lib/tabreader.rb +16 -0
- data/test/data/empty.tab +0 -0
- data/test/data/test.tab +5 -0
- data/test/helper.rb +16 -0
- data/test/test_reader.rb +80 -0
- metadata +89 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA1:
|
|
3
|
+
metadata.gz: 9f60d622c5ed07603e3ce719cbf1bca92093a1ca
|
|
4
|
+
data.tar.gz: a751de430625b35d0bd6b9343d2f9b6cd3571f40
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: bb921628bb052ae9e5f9ac3ff02db1fe3e578491fab8d222a4b2a6f99a761ba3dbcac0b262a8e91af4a071b99351935b28d670fe6f787614160a314724b872f7
|
|
7
|
+
data.tar.gz: ab8a161b0d8b72e1eaa7ab226fd31dcd1e719a030de7e7e93fd3547a29dd09c1bca3acb19881f20e282387a41cbe29a76dd29431dd1c90f79c686e068ebe3f57
|
data/HISTORY.md
ADDED
data/LICENSE.md
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
CC0 1.0 Universal
|
|
2
|
+
|
|
3
|
+
Statement of Purpose
|
|
4
|
+
|
|
5
|
+
The laws of most jurisdictions throughout the world automatically confer
|
|
6
|
+
exclusive Copyright and Related Rights (defined below) upon the creator and
|
|
7
|
+
subsequent owner(s) (each and all, an "owner") of an original work of
|
|
8
|
+
authorship and/or a database (each, a "Work").
|
|
9
|
+
|
|
10
|
+
Certain owners wish to permanently relinquish those rights to a Work for the
|
|
11
|
+
purpose of contributing to a commons of creative, cultural and scientific
|
|
12
|
+
works ("Commons") that the public can reliably and without fear of later
|
|
13
|
+
claims of infringement build upon, modify, incorporate in other works, reuse
|
|
14
|
+
and redistribute as freely as possible in any form whatsoever and for any
|
|
15
|
+
purposes, including without limitation commercial purposes. These owners may
|
|
16
|
+
contribute to the Commons to promote the ideal of a free culture and the
|
|
17
|
+
further production of creative, cultural and scientific works, or to gain
|
|
18
|
+
reputation or greater distribution for their Work in part through the use and
|
|
19
|
+
efforts of others.
|
|
20
|
+
|
|
21
|
+
For these and/or other purposes and motivations, and without any expectation
|
|
22
|
+
of additional consideration or compensation, the person associating CC0 with a
|
|
23
|
+
Work (the "Affirmer"), to the extent that he or she is an owner of Copyright
|
|
24
|
+
and Related Rights in the Work, voluntarily elects to apply CC0 to the Work
|
|
25
|
+
and publicly distribute the Work under its terms, with knowledge of his or her
|
|
26
|
+
Copyright and Related Rights in the Work and the meaning and intended legal
|
|
27
|
+
effect of CC0 on those rights.
|
|
28
|
+
|
|
29
|
+
1. Copyright and Related Rights. A Work made available under CC0 may be
|
|
30
|
+
protected by copyright and related or neighboring rights ("Copyright and
|
|
31
|
+
Related Rights"). Copyright and Related Rights include, but are not limited
|
|
32
|
+
to, the following:
|
|
33
|
+
|
|
34
|
+
i. the right to reproduce, adapt, distribute, perform, display, communicate,
|
|
35
|
+
and translate a Work;
|
|
36
|
+
|
|
37
|
+
ii. moral rights retained by the original author(s) and/or performer(s);
|
|
38
|
+
|
|
39
|
+
iii. publicity and privacy rights pertaining to a person's image or likeness
|
|
40
|
+
depicted in a Work;
|
|
41
|
+
|
|
42
|
+
iv. rights protecting against unfair competition in regards to a Work,
|
|
43
|
+
subject to the limitations in paragraph 4(a), below;
|
|
44
|
+
|
|
45
|
+
v. rights protecting the extraction, dissemination, use and reuse of data in
|
|
46
|
+
a Work;
|
|
47
|
+
|
|
48
|
+
vi. database rights (such as those arising under Directive 96/9/EC of the
|
|
49
|
+
European Parliament and of the Council of 11 March 1996 on the legal
|
|
50
|
+
protection of databases, and under any national implementation thereof,
|
|
51
|
+
including any amended or successor version of such directive); and
|
|
52
|
+
|
|
53
|
+
vii. other similar, equivalent or corresponding rights throughout the world
|
|
54
|
+
based on applicable law or treaty, and any national implementations thereof.
|
|
55
|
+
|
|
56
|
+
2. Waiver. To the greatest extent permitted by, but not in contravention of,
|
|
57
|
+
applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and
|
|
58
|
+
unconditionally waives, abandons, and surrenders all of Affirmer's Copyright
|
|
59
|
+
and Related Rights and associated claims and causes of action, whether now
|
|
60
|
+
known or unknown (including existing as well as future claims and causes of
|
|
61
|
+
action), in the Work (i) in all territories worldwide, (ii) for the maximum
|
|
62
|
+
duration provided by applicable law or treaty (including future time
|
|
63
|
+
extensions), (iii) in any current or future medium and for any number of
|
|
64
|
+
copies, and (iv) for any purpose whatsoever, including without limitation
|
|
65
|
+
commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes
|
|
66
|
+
the Waiver for the benefit of each member of the public at large and to the
|
|
67
|
+
detriment of Affirmer's heirs and successors, fully intending that such Waiver
|
|
68
|
+
shall not be subject to revocation, rescission, cancellation, termination, or
|
|
69
|
+
any other legal or equitable action to disrupt the quiet enjoyment of the Work
|
|
70
|
+
by the public as contemplated by Affirmer's express Statement of Purpose.
|
|
71
|
+
|
|
72
|
+
3. Public License Fallback. Should any part of the Waiver for any reason be
|
|
73
|
+
judged legally invalid or ineffective under applicable law, then the Waiver
|
|
74
|
+
shall be preserved to the maximum extent permitted taking into account
|
|
75
|
+
Affirmer's express Statement of Purpose. In addition, to the extent the Waiver
|
|
76
|
+
is so judged Affirmer hereby grants to each affected person a royalty-free,
|
|
77
|
+
non transferable, non sublicensable, non exclusive, irrevocable and
|
|
78
|
+
unconditional license to exercise Affirmer's Copyright and Related Rights in
|
|
79
|
+
the Work (i) in all territories worldwide, (ii) for the maximum duration
|
|
80
|
+
provided by applicable law or treaty (including future time extensions), (iii)
|
|
81
|
+
in any current or future medium and for any number of copies, and (iv) for any
|
|
82
|
+
purpose whatsoever, including without limitation commercial, advertising or
|
|
83
|
+
promotional purposes (the "License"). The License shall be deemed effective as
|
|
84
|
+
of the date CC0 was applied by Affirmer to the Work. Should any part of the
|
|
85
|
+
License for any reason be judged legally invalid or ineffective under
|
|
86
|
+
applicable law, such partial invalidity or ineffectiveness shall not
|
|
87
|
+
invalidate the remainder of the License, and in such case Affirmer hereby
|
|
88
|
+
affirms that he or she will not (i) exercise any of his or her remaining
|
|
89
|
+
Copyright and Related Rights in the Work or (ii) assert any associated claims
|
|
90
|
+
and causes of action with respect to the Work, in either case contrary to
|
|
91
|
+
Affirmer's express Statement of Purpose.
|
|
92
|
+
|
|
93
|
+
4. Limitations and Disclaimers.
|
|
94
|
+
|
|
95
|
+
a. No trademark or patent rights held by Affirmer are waived, abandoned,
|
|
96
|
+
surrendered, licensed or otherwise affected by this document.
|
|
97
|
+
|
|
98
|
+
b. Affirmer offers the Work as-is and makes no representations or warranties
|
|
99
|
+
of any kind concerning the Work, express, implied, statutory or otherwise,
|
|
100
|
+
including without limitation warranties of title, merchantability, fitness
|
|
101
|
+
for a particular purpose, non infringement, or the absence of latent or
|
|
102
|
+
other defects, accuracy, or the present or absence of errors, whether or not
|
|
103
|
+
discoverable, all to the greatest extent permissible under applicable law.
|
|
104
|
+
|
|
105
|
+
c. Affirmer disclaims responsibility for clearing rights of other persons
|
|
106
|
+
that may apply to the Work or any use thereof, including without limitation
|
|
107
|
+
any person's Copyright and Related Rights in the Work. Further, Affirmer
|
|
108
|
+
disclaims responsibility for obtaining any necessary consents, permissions
|
|
109
|
+
or other rights required for any use of the Work.
|
|
110
|
+
|
|
111
|
+
d. Affirmer understands and acknowledges that Creative Commons is not a
|
|
112
|
+
party to this document and has no duty or obligation with respect to this
|
|
113
|
+
CC0 or use of the Work.
|
|
114
|
+
|
|
115
|
+
For more information, please see
|
|
116
|
+
<http://creativecommons.org/publicdomain/zero/1.0/>
|
data/Manifest.txt
ADDED
data/README.md
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
# tabreader - read in tabular datafiles in text in the tab format
|
|
2
|
+
|
|
3
|
+
* home :: [github.com/datatxt/tabreader](https://github.com/datatxt/tabreader)
|
|
4
|
+
* bugs :: [github.com/datatxt/tabreader/issues](https://github.com/datatxt/tabreader/issues)
|
|
5
|
+
* gem :: [rubygems.org/gems/tabreader](https://rubygems.org/gems/tabreader)
|
|
6
|
+
* rdoc :: [rubydoc.info/gems/tabreader](http://rubydoc.info/gems/tabreader)
|
|
7
|
+
* forum :: [wwwmake](http://groups.google.com/group/wwwmake)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
## Usage
|
|
11
|
+
|
|
12
|
+
Yes, it's as simple as:
|
|
13
|
+
|
|
14
|
+
``` ruby
|
|
15
|
+
line = "1\t2\t3"
|
|
16
|
+
values = line.split( "\t" )
|
|
17
|
+
pp values
|
|
18
|
+
# => ["1","2","3"]
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
or the "magic" packaged up in `TabReader`:
|
|
22
|
+
|
|
23
|
+
``` ruby
|
|
24
|
+
line = "1\t2\t3"
|
|
25
|
+
values = TabReader.parse_line( line )
|
|
26
|
+
pp values
|
|
27
|
+
# => ["1","2","3"]
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
or use the convenience helpers:
|
|
31
|
+
|
|
32
|
+
``` ruby
|
|
33
|
+
txt <<=TAB
|
|
34
|
+
1\t2\t3
|
|
35
|
+
4\t5\t6
|
|
36
|
+
TAB
|
|
37
|
+
|
|
38
|
+
records = TabReader.parse( txt )
|
|
39
|
+
pp records
|
|
40
|
+
# => [["1","2","3"],
|
|
41
|
+
# ["5","6","7"]]
|
|
42
|
+
|
|
43
|
+
# -or-
|
|
44
|
+
|
|
45
|
+
records = TabReader.read( "values.tab" )
|
|
46
|
+
pp records
|
|
47
|
+
# => [["1","2","3"],
|
|
48
|
+
# ["5","6","7"]]
|
|
49
|
+
|
|
50
|
+
# -or-
|
|
51
|
+
|
|
52
|
+
TabReader.foreach( "values.tab" ) do |rec|
|
|
53
|
+
pp rec
|
|
54
|
+
end
|
|
55
|
+
## => ["1","2","3"]
|
|
56
|
+
## => ["5","6","7"]
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
### What about headers?
|
|
61
|
+
|
|
62
|
+
Use the `TabHashReader`
|
|
63
|
+
if the first line is a header (or if missing pass in the headers
|
|
64
|
+
as an array) and you want your records as hashes instead of arrays of values.
|
|
65
|
+
Example:
|
|
66
|
+
|
|
67
|
+
``` ruby
|
|
68
|
+
txt <<=TAB
|
|
69
|
+
A\tB\tC
|
|
70
|
+
1\t2\t3
|
|
71
|
+
4\t5\t6
|
|
72
|
+
TAB
|
|
73
|
+
|
|
74
|
+
records = TabHashReader.parse( txt )
|
|
75
|
+
pp records
|
|
76
|
+
|
|
77
|
+
# -or-
|
|
78
|
+
|
|
79
|
+
txt2 <<=TAB
|
|
80
|
+
1\t2\t3
|
|
81
|
+
4\t5\t6
|
|
82
|
+
TAB
|
|
83
|
+
|
|
84
|
+
records = TabHashReader.parse( txt2, headers: ["A","B","C"] )
|
|
85
|
+
pp records
|
|
86
|
+
|
|
87
|
+
# => [{"A": "1", "B": "2", "C": "3"},
|
|
88
|
+
# {"A": "4", "B": "5", "C": "6"}]
|
|
89
|
+
|
|
90
|
+
# -or-
|
|
91
|
+
|
|
92
|
+
records = TabHashReader.read( "hash.tab" )
|
|
93
|
+
pp records
|
|
94
|
+
# => [{"A": "1", "B": "2", "C": "3"},
|
|
95
|
+
# {"A": "4", "B": "5", "C": "6"}]
|
|
96
|
+
|
|
97
|
+
# -or-
|
|
98
|
+
|
|
99
|
+
TabHashReader.foreach( "hash.tab" ) do |rec|
|
|
100
|
+
pp rec
|
|
101
|
+
end
|
|
102
|
+
# => {"A": "1", "B": "2", "C": "3"}
|
|
103
|
+
# => {"A": "4", "B": "5", "C": "6"}
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
## Frequently Asked Questions (FAQ) and Answers
|
|
111
|
+
|
|
112
|
+
### Q: Why NOT use `CSV.read( col_sep: "\t", quote_char: "∅" )`?
|
|
113
|
+
|
|
114
|
+
Tab != CSV
|
|
115
|
+
|
|
116
|
+
The tab format is an (even) simpler format than
|
|
117
|
+
the comma-separated values (CSV) classic format. How?
|
|
118
|
+
|
|
119
|
+
The tab format has NO escape rules.
|
|
120
|
+
A double quote (`"`) is a double quote (`"`). Example:
|
|
121
|
+
|
|
122
|
+
```
|
|
123
|
+
"1"→"2"→"3"
|
|
124
|
+
4→5→6
|
|
125
|
+
```
|
|
126
|
+
vs
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
"1","2","3"
|
|
130
|
+
4,5,6
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
Turns into `"1", "2", "3"` and `4, 5, 6`
|
|
134
|
+
in tab and `1, 2, 3` and `4, 5, 6` in CSV.
|
|
135
|
+
Note: The surrounding double quotes get stripped in CSV.
|
|
136
|
+
You have to double up double quotes (e.g. `""`)
|
|
137
|
+
for adding "literal" double quotes in CSV:
|
|
138
|
+
|
|
139
|
+
```
|
|
140
|
+
"""1""","""2""","""3"""
|
|
141
|
+
4,5,"Six says, ""Hello, World!"""
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
vs
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
"1"→"2"→"3"
|
|
148
|
+
4→5→Six says, "Hello, World!"
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Thus, to avoid any surprises, do NOT use
|
|
152
|
+
`CSV.read( col_sep: "\t", quote_char: "∅" )` and friends for tab.
|
|
153
|
+
Note: Simpler also equals faster :-).
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
### Q: What's the tab format?
|
|
158
|
+
|
|
159
|
+
Let's reprint the (complete) tab spec(ification) right here
|
|
160
|
+
(in an edited simpler version):
|
|
161
|
+
|
|
162
|
+
A tab file encodes a number of records that may contain multiple fields.
|
|
163
|
+
Each record is represented as a single line.
|
|
164
|
+
Each field value is represented as text.
|
|
165
|
+
Fields in a record are separated from each other by a tab character.
|
|
166
|
+
|
|
167
|
+
Note that fields that contain tabs are not allowable in this encoding.
|
|
168
|
+
|
|
169
|
+
Here is a quick grammar in Backus-Naur Form (BNF):
|
|
170
|
+
|
|
171
|
+
```
|
|
172
|
+
field ::= [character]+ # multiple characters
|
|
173
|
+
record ::= field [TAB field]+ EOL # at least one field, or more
|
|
174
|
+
tab ::= record+
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
Example:
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
Name→Age→Address
|
|
181
|
+
Paul→23→1115 W Franklin
|
|
182
|
+
Bessy the Cow→5→Big Farm Way
|
|
183
|
+
Zeke→45→W Main St
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
(Source: [Tab Spec @ IANA Media Types](https://www.iana.org/assignments/media-types/text/tab-separated-values))
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
### Q: Why tab?
|
|
193
|
+
|
|
194
|
+
The tab format works great with
|
|
195
|
+
command line text wrangling / processing tools
|
|
196
|
+
because you can split lines / records on tab
|
|
197
|
+
(with no exceptions or extra escape rules).
|
|
198
|
+
Use classics such as `cut`, `paste`, `sort`, `uniq`, `grep`, `sed`, `awk`
|
|
199
|
+
and many more.
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
### Q: Why NOT tab?
|
|
205
|
+
|
|
206
|
+
Let's quote from the Awesome CSV page:
|
|
207
|
+
|
|
208
|
+
In theory the tab (`\t`) separator is perfect. Values never use tabs, don't they? So why hasn't the tab separator taken off?
|
|
209
|
+
|
|
210
|
+
In practice tab separators are invisible or look like spaces and often you cannot tell if a space is a tab or not.
|
|
211
|
+
|
|
212
|
+
Thus, tab works great only and only (like space) if your values do NOT use spaces and you treat a tab like a space.
|
|
213
|
+
|
|
214
|
+
(Source: [Awesome CSV @ CSV v1.1](https://github.com/csv11/awesome-csv))
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
## Best of Both Worlds
|
|
219
|
+
|
|
220
|
+
Use the `csv2tab` tool to convert comma-separated values (CSV) datafiles
|
|
221
|
+
to tab and use the `tab2csv` tool to convert tab datafiles to
|
|
222
|
+
comma-separated values (CSV).
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
## License
|
|
228
|
+
|
|
229
|
+

|
|
230
|
+
|
|
231
|
+
The `tabreader` scripts are dedicated to the public domain.
|
|
232
|
+
Use it as you please with no restrictions whatsoever.
|
|
233
|
+
|
|
234
|
+
## Questions? Comments?
|
|
235
|
+
|
|
236
|
+
Send them along to the [wwwmake forum](http://groups.google.com/group/wwwmake).
|
|
237
|
+
Thanks!
|
data/Rakefile
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
require 'hoe'
|
|
2
|
+
require './lib/tabreader/version.rb'
|
|
3
|
+
|
|
4
|
+
Hoe.spec 'tabreader' do
|
|
5
|
+
|
|
6
|
+
self.version = TabReader::VERSION
|
|
7
|
+
|
|
8
|
+
self.summary = "tabreader - read in tabular datafiles in text in the tab format"
|
|
9
|
+
self.description = summary
|
|
10
|
+
|
|
11
|
+
self.urls = ['https://github.com/datatext/tabreader']
|
|
12
|
+
|
|
13
|
+
self.author = 'Gerald Bauer'
|
|
14
|
+
self.email = 'wwwmake@googlegroups.com'
|
|
15
|
+
|
|
16
|
+
# switch extension to .markdown for gihub formatting
|
|
17
|
+
self.readme_file = 'README.md'
|
|
18
|
+
self.history_file = 'HISTORY.md'
|
|
19
|
+
|
|
20
|
+
self.licenses = ['Public Domain']
|
|
21
|
+
|
|
22
|
+
self.spec_extras = {
|
|
23
|
+
:required_ruby_version => '>= 2.2.2'
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TabReader
|
|
5
|
+
|
|
6
|
+
##
|
|
7
|
+
## add more configs - why? why not?
|
|
8
|
+
## add comments and blanks (skip blank lines) - why? why not?
|
|
9
|
+
## e.g. comments = '#'
|
|
10
|
+
## blanks (skip blank lines)
|
|
11
|
+
## rtrim,ltrim,trim
|
|
12
|
+
|
|
13
|
+
##
|
|
14
|
+
## todo: add converters: e.g. strip (akk trim / ltrim / rtrim )
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def self.read( path, headers: false )
|
|
18
|
+
txt = File.open( path, 'r:utf-8' ).read
|
|
19
|
+
## puts "#{path}:"
|
|
20
|
+
## pp txt
|
|
21
|
+
parse( txt, headers: headers )
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def self.parse( txt, headers: false ) ## use parse_rows or parse_lines for array or array results
|
|
25
|
+
rows = []
|
|
26
|
+
|
|
27
|
+
if headers.is_a?( Array )
|
|
28
|
+
columns = headers
|
|
29
|
+
else
|
|
30
|
+
columns = nil ## header row a.k.a. columns / fields
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
txt.each_line do |line|
|
|
34
|
+
values = parse_line( line )
|
|
35
|
+
if headers ## add values as name/value pairs e.g. array of hashes
|
|
36
|
+
if columns.nil?
|
|
37
|
+
columns = values ## first row is header row
|
|
38
|
+
else
|
|
39
|
+
## note: will cut-off values if values.size > columns.size
|
|
40
|
+
## add warning/error - why? why not?
|
|
41
|
+
## if values.size <= columns.size will get filled-up with nil
|
|
42
|
+
pairs = columns.zip(values)
|
|
43
|
+
## pp pairs
|
|
44
|
+
h = pairs.to_h
|
|
45
|
+
## pp h
|
|
46
|
+
|
|
47
|
+
rows << h
|
|
48
|
+
end
|
|
49
|
+
else ## add values as is e.g. array of array
|
|
50
|
+
rows << values
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
rows
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.foreach( path, headers: false )
|
|
57
|
+
if headers.is_a?( Array )
|
|
58
|
+
columns = headers
|
|
59
|
+
else
|
|
60
|
+
columns = nil ## header row a.k.a. columns / fields
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
File.open( path, 'r:utf-8' ).each_line do |line|
|
|
64
|
+
pp line
|
|
65
|
+
values = parse_line( line )
|
|
66
|
+
if headers ## add values as name/value pairs e.g. array of hashes
|
|
67
|
+
if columns.nil?
|
|
68
|
+
columns = values ## first row is header row
|
|
69
|
+
else
|
|
70
|
+
pairs = columns.zip(values)
|
|
71
|
+
h = pairs.to_h
|
|
72
|
+
yield( h )
|
|
73
|
+
end
|
|
74
|
+
else ## add values as is e.g. array of array
|
|
75
|
+
yield( values )
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# return nil
|
|
80
|
+
nil
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def self.parse_line( line )
|
|
85
|
+
## check - can handle comments and blank lines too - why? why not?
|
|
86
|
+
## remove trailing newlines
|
|
87
|
+
|
|
88
|
+
## note: chomp('') if is an empty string,
|
|
89
|
+
## it will remove all trailing newlines from the string.
|
|
90
|
+
## use line.sub(/[\n\r]*$/, '') or similar instead - why? why not?
|
|
91
|
+
line = line.chomp('')
|
|
92
|
+
|
|
93
|
+
values = line.split("\t")
|
|
94
|
+
values
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def self.header( path )
|
|
98
|
+
line = File.open( path, 'r:utf-8' ) do |f|
|
|
99
|
+
if f.eof?
|
|
100
|
+
## handle empty file; return empty string; no readline call possible
|
|
101
|
+
## todo/check: return nil from header is no header or [] - why? why not?
|
|
102
|
+
## or throw exception end of file reached (EOFError) - why? why not?
|
|
103
|
+
""
|
|
104
|
+
else
|
|
105
|
+
f.readline
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
## note: line includes \n or \r\n at the end
|
|
110
|
+
## pp line
|
|
111
|
+
parse_line( line )
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
end # class TabReader
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
## note: for now TabReader is a class!!! NOT a module - change - why? why not?
|
|
5
|
+
class TabReader
|
|
6
|
+
|
|
7
|
+
MAJOR = 0 ## todo: namespace inside version or something - why? why not??
|
|
8
|
+
MINOR = 0
|
|
9
|
+
PATCH = 1
|
|
10
|
+
VERSION = [MAJOR,MINOR,PATCH].join('.')
|
|
11
|
+
|
|
12
|
+
def self.version
|
|
13
|
+
VERSION
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def self.banner
|
|
17
|
+
"tabreader/#{VERSION} on Ruby #{RUBY_VERSION} (#{RUBY_RELEASE_DATE}) [#{RUBY_PLATFORM}]"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def self.root
|
|
21
|
+
File.expand_path( File.dirname(File.dirname(File.dirname(__FILE__))) )
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end # class TabReader
|
data/lib/tabreader.rb
ADDED
data/test/data/empty.tab
ADDED
|
File without changes
|
data/test/data/test.tab
ADDED
data/test/helper.rb
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
## $:.unshift(File.dirname(__FILE__))
|
|
2
|
+
|
|
3
|
+
## minitest setup
|
|
4
|
+
|
|
5
|
+
require 'minitest/autorun'
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
## our own code
|
|
9
|
+
require 'tabreader'
|
|
10
|
+
|
|
11
|
+
## add test_data_dir helper
|
|
12
|
+
class TabReader
|
|
13
|
+
def self.test_data_dir
|
|
14
|
+
"#{root}/test/data"
|
|
15
|
+
end
|
|
16
|
+
end
|
data/test/test_reader.rb
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
###
|
|
4
|
+
# to run use
|
|
5
|
+
# ruby -I ./lib -I ./test test/test_reader.rb
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
require 'helper'
|
|
9
|
+
|
|
10
|
+
class TestReader < MiniTest::Test
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_parse
|
|
14
|
+
|
|
15
|
+
txt1 = <<TXT
|
|
16
|
+
a\tb\tc
|
|
17
|
+
1\t2\t3
|
|
18
|
+
4\t5\t6
|
|
19
|
+
TXT
|
|
20
|
+
|
|
21
|
+
txt2 = <<TXT
|
|
22
|
+
a b c d
|
|
23
|
+
1 2 3 4
|
|
24
|
+
5 6 7 8
|
|
25
|
+
TXT
|
|
26
|
+
|
|
27
|
+
puts "== parse:"
|
|
28
|
+
pp TabReader.parse( txt1 )
|
|
29
|
+
pp TabReader.parse( txt1, headers: true )
|
|
30
|
+
|
|
31
|
+
puts "== parse:"
|
|
32
|
+
pp TabReader.parse( txt2 )
|
|
33
|
+
pp TabReader.parse( txt2, headers: true )
|
|
34
|
+
|
|
35
|
+
puts "== parse_line:"
|
|
36
|
+
pp TabReader.parse_line( "1\t2\t3" )
|
|
37
|
+
|
|
38
|
+
puts "== parse_line:"
|
|
39
|
+
pp TabReader.parse_line( "1 2 3 4" )
|
|
40
|
+
|
|
41
|
+
puts "== parse_line:"
|
|
42
|
+
pp TabReader.parse_line( "1\t2\t3\r\n" )
|
|
43
|
+
|
|
44
|
+
assert true
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_read
|
|
49
|
+
|
|
50
|
+
puts "== read:"
|
|
51
|
+
pp TabReader.read( "#{TabReader.test_data_dir}/test.tab" )
|
|
52
|
+
pp TabReader.read( "#{TabReader.test_data_dir}/test.tab", headers: true )
|
|
53
|
+
puts "== header:"
|
|
54
|
+
pp TabReader.header( "#{TabReader.test_data_dir}/test.tab" )
|
|
55
|
+
puts "== foreach:"
|
|
56
|
+
TabReader.foreach( "#{TabReader.test_data_dir}/test.tab" ) do |row|
|
|
57
|
+
pp row
|
|
58
|
+
end
|
|
59
|
+
TabReader.foreach( "#{TabReader.test_data_dir}/test.tab", headers: true ) do |row|
|
|
60
|
+
pp row
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_read_empty
|
|
66
|
+
|
|
67
|
+
puts "== read (empty):"
|
|
68
|
+
pp TabReader.read( "#{TabReader.test_data_dir}/empty.tab" )
|
|
69
|
+
puts "== header (empty):"
|
|
70
|
+
pp TabReader.header( "#{TabReader.test_data_dir}/empty.tab" )
|
|
71
|
+
puts "== foreach (empty):"
|
|
72
|
+
TabReader.foreach( "#{TabReader.test_data_dir}/empty.tab" ) do |row|
|
|
73
|
+
pp row
|
|
74
|
+
end
|
|
75
|
+
puts "== parse (empty):"
|
|
76
|
+
pp TabReader.parse( "" )
|
|
77
|
+
pp TabReader.parse_line( "" )
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: tabreader
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.0.1
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Gerald Bauer
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2018-08-17 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rdoc
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '4.0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '4.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: hoe
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '3.16'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '3.16'
|
|
41
|
+
description: tabreader - read in tabular datafiles in text in the tab format
|
|
42
|
+
email: wwwmake@googlegroups.com
|
|
43
|
+
executables: []
|
|
44
|
+
extensions: []
|
|
45
|
+
extra_rdoc_files:
|
|
46
|
+
- HISTORY.md
|
|
47
|
+
- LICENSE.md
|
|
48
|
+
- Manifest.txt
|
|
49
|
+
- README.md
|
|
50
|
+
files:
|
|
51
|
+
- HISTORY.md
|
|
52
|
+
- LICENSE.md
|
|
53
|
+
- Manifest.txt
|
|
54
|
+
- README.md
|
|
55
|
+
- Rakefile
|
|
56
|
+
- lib/tabreader.rb
|
|
57
|
+
- lib/tabreader/reader.rb
|
|
58
|
+
- lib/tabreader/version.rb
|
|
59
|
+
- test/data/empty.tab
|
|
60
|
+
- test/data/test.tab
|
|
61
|
+
- test/helper.rb
|
|
62
|
+
- test/test_reader.rb
|
|
63
|
+
homepage: https://github.com/datatext/tabreader
|
|
64
|
+
licenses:
|
|
65
|
+
- Public Domain
|
|
66
|
+
metadata: {}
|
|
67
|
+
post_install_message:
|
|
68
|
+
rdoc_options:
|
|
69
|
+
- "--main"
|
|
70
|
+
- README.md
|
|
71
|
+
require_paths:
|
|
72
|
+
- lib
|
|
73
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
74
|
+
requirements:
|
|
75
|
+
- - ">="
|
|
76
|
+
- !ruby/object:Gem::Version
|
|
77
|
+
version: 2.2.2
|
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
|
+
requirements:
|
|
80
|
+
- - ">="
|
|
81
|
+
- !ruby/object:Gem::Version
|
|
82
|
+
version: '0'
|
|
83
|
+
requirements: []
|
|
84
|
+
rubyforge_project:
|
|
85
|
+
rubygems_version: 2.5.2
|
|
86
|
+
signing_key:
|
|
87
|
+
specification_version: 4
|
|
88
|
+
summary: tabreader - read in tabular datafiles in text in the tab format
|
|
89
|
+
test_files: []
|