csvreader 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b7c45afa79ca462aef1a8a9f1f6da26b7b08e125
4
- data.tar.gz: b1a20a1acf5e88331900169f5d3542fb4f866f58
3
+ metadata.gz: c522e332ef3c1fead487b99d5fe147ba43ad2090
4
+ data.tar.gz: 51dd6d88ef8dc35615513961bab7e0e1c3b3512b
5
5
  SHA512:
6
- metadata.gz: 7cf456f8e886e687ade020c70830332c1b2c3bdb1d14a148ed9a01ce2d7d1e9acffab2a3435907641fad220830f3eee39c20d8ca645e096bae5fe0bb2184a075
7
- data.tar.gz: a887567bd67a3080d99adb2a609d8e79a5385b83abe1109f1f4d24a1ebd5fb9536e38701e8fefe5020c16753f674d45b673d00b6300b3422d713a630c3dbf0d6
6
+ metadata.gz: 7e563f75e916829e8de1b0a3b1208dd089de9a7907d010e3ba2cd23f1a70fedcb8d98c95e65c15ab7d3ad8705ae41a4ad6cd543ba20d6a72dc67f27b0060286b
7
+ data.tar.gz: 57036e2457b4dc1837748538062150650b47abef3d2493f4c4f42db4291fdd3001cb6fb218eca38c7c11816c67360cfea74030e137ff7edf8de1fb9e47f991ec
@@ -22,6 +22,7 @@ test/data/beer11.csv
22
22
  test/data/cars11.csv
23
23
  test/data/cities11.csv
24
24
  test/data/customers11.csv
25
+ test/data/iris11.csv
25
26
  test/data/shakespeare.csv
26
27
  test/helper.rb
27
28
  test/test_buffer.rb
data/README.md CHANGED
@@ -422,7 +422,7 @@ Csv.strict.read( ..., sep: "\t" )
422
422
 
423
423
 
424
424
 
425
- ### Q: How can I read records with fixed width (and no separator)?
425
+ ### Q: How can I read records with fixed width fields (and no separator)?
426
426
 
427
427
  Pass in the `width` keyword option with the field widths / lengths
428
428
  to the "fixed" parser. Example:
@@ -432,7 +432,7 @@ txt = <<TXT
432
432
  12345678123456781234567890123456789012345678901212345678901234
433
433
  TXT
434
434
 
435
- Csv.fixed.parse( txt, width: [8,8,32,14] )
435
+ Csv.fixed.parse( txt, width: [8,8,32,14] ) # or Csv.fix or Csv.f
436
436
  # => [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
437
437
 
438
438
 
@@ -441,15 +441,68 @@ John Smith john@example.com 1-888-555-6666
441
441
  Michele O'Reileymichele@example.com 1-333-321-8765
442
442
  TXT
443
443
 
444
- Csv.fixed.parse( txt, width: [8,8,32,14] )
444
+ Csv.fixed.parse( txt, width: [8,8,32,14] ) # or Csv.fix or Csv.f
445
445
  # => [["John", "Smith", "john@example.com", "1-888-555-6666"],
446
446
  # ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
447
447
 
448
448
  # and so on
449
449
  ```
450
450
 
451
+ <!--
451
452
  Note: You can use for your convenience the built-in
452
453
  `Csv.fix` or `Csv.f` aliases / shortcuts.
454
+ -->
455
+
456
+
457
+ Note: You can use negative widths (e.g. `-2`, `-3`, and so on)
458
+ to "skip" filler fields (e.g. `--`, `---`, and so on).
459
+ Example:
460
+
461
+ ``` ruby
462
+ txt = <<TXT
463
+ 12345678--12345678---12345678901234567890123456789012--12345678901234XXX
464
+ TXT
465
+
466
+ Csv.fixed.parse( txt, width: [8,-2,8,-3,32,-2,14] ) # or Csv.fix or Csv.f
467
+ # => [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
468
+ ```
469
+
470
+
471
+ Bonus: If the width is a string (not an array)
472
+ (e.g. `'a8 a8 a32 Z*'` or `'A8 A8 A32 Z*'` and so on)
473
+ than the fixed width field parser
474
+ will use `String#unpack` and the value of width as its format string spec.
475
+ Example:
476
+
477
+ ``` ruby
478
+ txt = <<TXT
479
+ 12345678123456781234567890123456789012345678901212345678901234
480
+ TXT
481
+
482
+ Csv.fixed.parse( txt, width: 'a8 a8 a32 Z*' ) # or Csv.fix or Csv.f
483
+ # => [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
484
+
485
+ txt = <<TXT
486
+ John Smith john@example.com 1-888-555-6666
487
+ Michele O'Reileymichele@example.com 1-333-321-8765
488
+ TXT
489
+
490
+ Csv.fixed.parse( txt, width: 'A8 A8 A32 Z*' ) # or Csv.fix or Csv.f
491
+ # => [["John", "Smith", "john@example.com", "1-888-555-6666"],
492
+ # ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
493
+
494
+ # and so on
495
+ ```
496
+
497
+ | String Directive | Returns | Meaning |
498
+ |------------------|---------|-------------------------|
499
+ | `A` | String | Arbitrary binary string (remove trailing nulls and ASCII spaces) |
500
+ | `a` | String | Arbitrary binary string |
501
+ | `Z` | String | Null-terminated string |
502
+
503
+
504
+ and many more. See the `String#unpack` documentation
505
+ for the complete format spec and directives.
453
506
 
454
507
 
455
508
 
@@ -69,15 +69,31 @@ def parse_lines( input, width:, &block )
69
69
  end
70
70
 
71
71
 
72
- values = []
73
- offset = 0 # start position / offset
74
- width.each_with_index do |w,i|
75
- logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
76
-
77
- value = line[offset, w]
78
- value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
79
- values << value
80
- offset += w
72
+ if width.is_a?( String )
73
+ ## assume it's String#unpack format e.g.
74
+ ## "209231-231992395 MoreData".unpack('aa5A1A9a4Z*')
75
+ ## returns an array as follows :
76
+ ## ["2", "09231", "-", "231992395", " ", "MoreData"]
77
+ ## see String#unpack
78
+
79
+ values = line.unpack( width )
80
+ else ## assume array with integers
81
+ values = []
82
+ offset = 0 # start position / offset
83
+ width.each_with_index do |w,i|
84
+ logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
85
+
86
+ if w < 0 ## convention - if width negative, skip column
87
+ # note: minus (-) and minus (-) equal plus (+)
88
+ ## e.g. 2 - -2 = 4
89
+ offset -= w
90
+ else
91
+ value = line[offset, w]
92
+ value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
93
+ values << value
94
+ offset += w
95
+ end
96
+ end
81
97
  end
82
98
 
83
99
  ## note: requires block - enforce? how? why? why not?
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 1
8
- PATCH = 0
8
+ PATCH = 1
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -0,0 +1,163 @@
1
+ ###
2
+ # The Iris flower data set or Fisher's Iris data set is a multivariate data set
3
+ # introduced by the British statistician and biologist Ronald Fisher in his 1936 paper
4
+ # The use of multiple measurements in taxonomic problems as an example of
5
+ # linear discriminant analysis.
6
+ # See https://en.wikipedia.org/wiki/Iris_flower_data_set
7
+
8
+
9
+ # The dataset contains a set of 150 records under five attributes
10
+ # - petal length, petal width, sepal length, sepal width and species.
11
+
12
+
13
+ Sepal length, Sepal width, Petal length, Petal width, Species
14
+ 5.1, 3.5, 1.4, 0.2, I. setosa
15
+ 4.9, 3.0, 1.4, 0.2, I. setosa
16
+ 4.7, 3.2, 1.3, 0.2, I. setosa
17
+ 4.6, 3.1, 1.5, 0.2, I. setosa
18
+ 5.0, 3.6, 1.4, 0.3, I. setosa
19
+ 5.4, 3.9, 1.7, 0.4, I. setosa
20
+ 4.6, 3.4, 1.4, 0.3, I. setosa
21
+ 5.0, 3.4, 1.5, 0.2, I. setosa
22
+ 4.4, 2.9, 1.4, 0.2, I. setosa
23
+ 4.9, 3.1, 1.5, 0.1, I. setosa
24
+ 5.4, 3.7, 1.5, 0.2, I. setosa
25
+ 4.8, 3.4, 1.6, 0.2, I. setosa
26
+ 4.8, 3.0, 1.4, 0.1, I. setosa
27
+ 4.3, 3.0, 1.1, 0.1, I. setosa
28
+ 5.8, 4.0, 1.2, 0.2, I. setosa
29
+ 5.7, 4.4, 1.5, 0.4, I. setosa
30
+ 5.4, 3.9, 1.3, 0.4, I. setosa
31
+ 5.1, 3.5, 1.4, 0.3, I. setosa
32
+ 5.7, 3.8, 1.7, 0.3, I. setosa
33
+ 5.1, 3.8, 1.5, 0.3, I. setosa
34
+ 5.4, 3.4, 1.7, 0.2, I. setosa
35
+ 5.1, 3.7, 1.5, 0.4, I. setosa
36
+ 4.6, 3.6, 1.0, 0.2, I. setosa
37
+ 5.1, 3.3, 1.7, 0.5, I. setosa
38
+ 4.8, 3.4, 1.9, 0.2, I. setosa
39
+ 5.0, 3.0, 1.6, 0.2, I. setosa
40
+ 5.0, 3.4, 1.6, 0.4, I. setosa
41
+ 5.2, 3.5, 1.5, 0.2, I. setosa
42
+ 5.2, 3.4, 1.4, 0.2, I. setosa
43
+ 4.7, 3.2, 1.6, 0.2, I. setosa
44
+ 4.8, 3.1, 1.6, 0.2, I. setosa
45
+ 5.4, 3.4, 1.5, 0.4, I. setosa
46
+ 5.2, 4.1, 1.5, 0.1, I. setosa
47
+ 5.5, 4.2, 1.4, 0.2, I. setosa
48
+ 4.9, 3.1, 1.5, 0.2, I. setosa
49
+ 5.0, 3.2, 1.2, 0.2, I. setosa
50
+ 5.5, 3.5, 1.3, 0.2, I. setosa
51
+ 4.9, 3.6, 1.4, 0.1, I. setosa
52
+ 4.4, 3.0, 1.3, 0.2, I. setosa
53
+ 5.1, 3.4, 1.5, 0.2, I. setosa
54
+ 5.0, 3.5, 1.3, 0.3, I. setosa
55
+ 4.5, 2.3, 1.3, 0.3, I. setosa
56
+ 4.4, 3.2, 1.3, 0.2, I. setosa
57
+ 5.0, 3.5, 1.6, 0.6, I. setosa
58
+ 5.1, 3.8, 1.9, 0.4, I. setosa
59
+ 4.8, 3.0, 1.4, 0.3, I. setosa
60
+ 5.1, 3.8, 1.6, 0.2, I. setosa
61
+ 4.6, 3.2, 1.4, 0.2, I. setosa
62
+ 5.3, 3.7, 1.5, 0.2, I. setosa
63
+ 5.0, 3.3, 1.4, 0.2, I. setosa
64
+ 7.0, 3.2, 4.7, 1.4, I. versicolor
65
+ 6.4, 3.2, 4.5, 1.5, I. versicolor
66
+ 6.9, 3.1, 4.9, 1.5, I. versicolor
67
+ 5.5, 2.3, 4.0, 1.3, I. versicolor
68
+ 6.5, 2.8, 4.6, 1.5, I. versicolor
69
+ 5.7, 2.8, 4.5, 1.3, I. versicolor
70
+ 6.3, 3.3, 4.7, 1.6, I. versicolor
71
+ 4.9, 2.4, 3.3, 1.0, I. versicolor
72
+ 6.6, 2.9, 4.6, 1.3, I. versicolor
73
+ 5.2, 2.7, 3.9, 1.4, I. versicolor
74
+ 5.0, 2.0, 3.5, 1.0, I. versicolor
75
+ 5.9, 3.0, 4.2, 1.5, I. versicolor
76
+ 6.0, 2.2, 4.0, 1.0, I. versicolor
77
+ 6.1, 2.9, 4.7, 1.4, I. versicolor
78
+ 5.6, 2.9, 3.6, 1.3, I. versicolor
79
+ 6.7, 3.1, 4.4, 1.4, I. versicolor
80
+ 5.6, 3.0, 4.5, 1.5, I. versicolor
81
+ 5.8, 2.7, 4.1, 1.0, I. versicolor
82
+ 6.2, 2.2, 4.5, 1.5, I. versicolor
83
+ 5.6, 2.5, 3.9, 1.1, I. versicolor
84
+ 5.9, 3.2, 4.8, 1.8, I. versicolor
85
+ 6.1, 2.8, 4.0, 1.3, I. versicolor
86
+ 6.3, 2.5, 4.9, 1.5, I. versicolor
87
+ 6.1, 2.8, 4.7, 1.2, I. versicolor
88
+ 6.4, 2.9, 4.3, 1.3, I. versicolor
89
+ 6.6, 3.0, 4.4, 1.4, I. versicolor
90
+ 6.8, 2.8, 4.8, 1.4, I. versicolor
91
+ 6.7, 3.0, 5.0, 1.7, I. versicolor
92
+ 6.0, 2.9, 4.5, 1.5, I. versicolor
93
+ 5.7, 2.6, 3.5, 1.0, I. versicolor
94
+ 5.5, 2.4, 3.8, 1.1, I. versicolor
95
+ 5.5, 2.4, 3.7, 1.0, I. versicolor
96
+ 5.8, 2.7, 3.9, 1.2, I. versicolor
97
+ 6.0, 2.7, 5.1, 1.6, I. versicolor
98
+ 5.4, 3.0, 4.5, 1.5, I. versicolor
99
+ 6.0, 3.4, 4.5, 1.6, I. versicolor
100
+ 6.7, 3.1, 4.7, 1.5, I. versicolor
101
+ 6.3, 2.3, 4.4, 1.3, I. versicolor
102
+ 5.6, 3.0, 4.1, 1.3, I. versicolor
103
+ 5.5, 2.5, 4.0, 1.3, I. versicolor
104
+ 5.5, 2.6, 4.4, 1.2, I. versicolor
105
+ 6.1, 3.0, 4.6, 1.4, I. versicolor
106
+ 5.8, 2.6, 4.0, 1.2, I. versicolor
107
+ 5.0, 2.3, 3.3, 1.0, I. versicolor
108
+ 5.6, 2.7, 4.2, 1.3, I. versicolor
109
+ 5.7, 3.0, 4.2, 1.2, I. versicolor
110
+ 5.7, 2.9, 4.2, 1.3, I. versicolor
111
+ 6.2, 2.9, 4.3, 1.3, I. versicolor
112
+ 5.1, 2.5, 3.0, 1.1, I. versicolor
113
+ 5.7, 2.8, 4.1, 1.3, I. versicolor
114
+ 6.3, 3.3, 6.0, 2.5, I. virginica
115
+ 5.8, 2.7, 5.1, 1.9, I. virginica
116
+ 7.1, 3.0, 5.9, 2.1, I. virginica
117
+ 6.3, 2.9, 5.6, 1.8, I. virginica
118
+ 6.5, 3.0, 5.8, 2.2, I. virginica
119
+ 7.6, 3.0, 6.6, 2.1, I. virginica
120
+ 4.9, 2.5, 4.5, 1.7, I. virginica
121
+ 7.3, 2.9, 6.3, 1.8, I. virginica
122
+ 6.7, 2.5, 5.8, 1.8, I. virginica
123
+ 7.2, 3.6, 6.1, 2.5, I. virginica
124
+ 6.5, 3.2, 5.1, 2.0, I. virginica
125
+ 6.4, 2.7, 5.3, 1.9, I. virginica
126
+ 6.8, 3.0, 5.5, 2.1, I. virginica
127
+ 5.7, 2.5, 5.0, 2.0, I. virginica
128
+ 5.8, 2.8, 5.1, 2.4, I. virginica
129
+ 6.4, 3.2, 5.3, 2.3, I. virginica
130
+ 6.5, 3.0, 5.5, 1.8, I. virginica
131
+ 7.7, 3.8, 6.7, 2.2, I. virginica
132
+ 7.7, 2.6, 6.9, 2.3, I. virginica
133
+ 6.0, 2.2, 5.0, 1.5, I. virginica
134
+ 6.9, 3.2, 5.7, 2.3, I. virginica
135
+ 5.6, 2.8, 4.9, 2.0, I. virginica
136
+ 7.7, 2.8, 6.7, 2.0, I. virginica
137
+ 6.3, 2.7, 4.9, 1.8, I. virginica
138
+ 6.7, 3.3, 5.7, 2.1, I. virginica
139
+ 7.2, 3.2, 6.0, 1.8, I. virginica
140
+ 6.2, 2.8, 4.8, 1.8, I. virginica
141
+ 6.1, 3.0, 4.9, 1.8, I. virginica
142
+ 6.4, 2.8, 5.6, 2.1, I. virginica
143
+ 7.2, 3.0, 5.8, 1.6, I. virginica
144
+ 7.4, 2.8, 6.1, 1.9, I. virginica
145
+ 7.9, 3.8, 6.4, 2.0, I. virginica
146
+ 6.4, 2.8, 5.6, 2.2, I. virginica
147
+ 6.3, 2.8, 5.1, 1.5, I. virginica
148
+ 6.1, 2.6, 5.6, 1.4, I. virginica
149
+ 7.7, 3.0, 6.1, 2.3, I. virginica
150
+ 6.3, 3.4, 5.6, 2.4, I. virginica
151
+ 6.4, 3.1, 5.5, 1.8, I. virginica
152
+ 6.0, 3.0, 4.8, 1.8, I. virginica
153
+ 6.9, 3.1, 5.4, 2.1, I. virginica
154
+ 6.7, 3.1, 5.6, 2.4, I. virginica
155
+ 6.9, 3.1, 5.1, 2.3, I. virginica
156
+ 5.8, 2.7, 5.1, 1.9, I. virginica
157
+ 6.8, 3.2, 5.9, 2.3, I. virginica
158
+ 6.7, 3.3, 5.7, 2.5, I. virginica
159
+ 6.7, 3.0, 5.2, 2.3, I. virginica
160
+ 6.3, 2.5, 5.0, 1.9, I. virginica
161
+ 6.5, 3.0, 5.2, 2.0, I. virginica
162
+ 6.2, 3.4, 5.4, 2.3, I. virginica
163
+ 5.9, 3.0, 5.1, 1.8, I. virginica
@@ -14,9 +14,9 @@ def parser() CsvReader::Parser::FIXED; end
14
14
  def reader() CsvReader.fixed; end
15
15
 
16
16
 
17
- def test_width
18
- width( parser )
19
- width( reader )
17
+ def test_numbers
18
+ numbers( parser )
19
+ numbers( reader )
20
20
  end
21
21
 
22
22
  def test_contacts
@@ -25,7 +25,7 @@ def test_contacts
25
25
  end
26
26
 
27
27
 
28
- def width( parser )
28
+ def numbers( parser )
29
29
  records = [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
30
30
 
31
31
  assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
@@ -37,6 +37,11 @@ TXT
37
37
 
38
38
  assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
39
39
  12345678123456781234567890123456789012345678901212345678901234
40
+ TXT
41
+
42
+ ## note: negative width fields gets skipped
43
+ assert_equal records, parser.parse( <<TXT, width: [8,-2,8,-3,32,-2,14] )
44
+ 12345678XX12345678XXX12345678901234567890123456789012XX12345678901234XXX
40
45
  TXT
41
46
  end
42
47
 
@@ -61,20 +66,20 @@ TXT
61
66
  end
62
67
 
63
68
 
64
- def test_contacts
65
- records = [["John", "Smith", "john@example.com", "1-888-555-6666"],
66
- ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
67
-
68
- assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
69
- # fixed width with comments and blank lines
70
69
 
71
- John Smith john@example.com 1-888-555-6666
72
- Michele O'Reileymichele@example.com 1-333-321-8765
70
+ def test_unpack_numbers
71
+ records = [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
73
72
 
73
+ assert_equal records, parser.parse( <<TXT, width: 'a8 a8 a32 Z*' )
74
+ 12345678123456781234567890123456789012345678901212345678901234
74
75
  TXT
76
+ end
75
77
 
78
+ def test_unpack_contacts
79
+ records = [["John", "Smith", "john@example.com", "1-888-555-6666"],
80
+ ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
76
81
 
77
- assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
82
+ assert_equal records, parser.parse( <<TXT, width: 'A8 A8 A32 Z*' )
78
83
  John Smith john@example.com 1-888-555-6666
79
84
  Michele O'Reileymichele@example.com 1-333-321-8765
80
85
  TXT
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
@@ -73,6 +73,7 @@ files:
73
73
  - test/data/cars11.csv
74
74
  - test/data/cities11.csv
75
75
  - test/data/customers11.csv
76
+ - test/data/iris11.csv
76
77
  - test/data/shakespeare.csv
77
78
  - test/helper.rb
78
79
  - test/test_buffer.rb