csvreader 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b7c45afa79ca462aef1a8a9f1f6da26b7b08e125
4
- data.tar.gz: b1a20a1acf5e88331900169f5d3542fb4f866f58
3
+ metadata.gz: c522e332ef3c1fead487b99d5fe147ba43ad2090
4
+ data.tar.gz: 51dd6d88ef8dc35615513961bab7e0e1c3b3512b
5
5
  SHA512:
6
- metadata.gz: 7cf456f8e886e687ade020c70830332c1b2c3bdb1d14a148ed9a01ce2d7d1e9acffab2a3435907641fad220830f3eee39c20d8ca645e096bae5fe0bb2184a075
7
- data.tar.gz: a887567bd67a3080d99adb2a609d8e79a5385b83abe1109f1f4d24a1ebd5fb9536e38701e8fefe5020c16753f674d45b673d00b6300b3422d713a630c3dbf0d6
6
+ metadata.gz: 7e563f75e916829e8de1b0a3b1208dd089de9a7907d010e3ba2cd23f1a70fedcb8d98c95e65c15ab7d3ad8705ae41a4ad6cd543ba20d6a72dc67f27b0060286b
7
+ data.tar.gz: 57036e2457b4dc1837748538062150650b47abef3d2493f4c4f42db4291fdd3001cb6fb218eca38c7c11816c67360cfea74030e137ff7edf8de1fb9e47f991ec
@@ -22,6 +22,7 @@ test/data/beer11.csv
22
22
  test/data/cars11.csv
23
23
  test/data/cities11.csv
24
24
  test/data/customers11.csv
25
+ test/data/iris11.csv
25
26
  test/data/shakespeare.csv
26
27
  test/helper.rb
27
28
  test/test_buffer.rb
data/README.md CHANGED
@@ -422,7 +422,7 @@ Csv.strict.read( ..., sep: "\t" )
422
422
 
423
423
 
424
424
 
425
- ### Q: How can I read records with fixed width (and no separator)?
425
+ ### Q: How can I read records with fixed width fields (and no separator)?
426
426
 
427
427
  Pass in the `width` keyword option with the field widths / lengths
428
428
  to the "fixed" parser. Example:
@@ -432,7 +432,7 @@ txt = <<TXT
432
432
  12345678123456781234567890123456789012345678901212345678901234
433
433
  TXT
434
434
 
435
- Csv.fixed.parse( txt, width: [8,8,32,14] )
435
+ Csv.fixed.parse( txt, width: [8,8,32,14] ) # or Csv.fix or Csv.f
436
436
  # => [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
437
437
 
438
438
 
@@ -441,15 +441,68 @@ John Smith john@example.com 1-888-555-6666
441
441
  Michele O'Reileymichele@example.com 1-333-321-8765
442
442
  TXT
443
443
 
444
- Csv.fixed.parse( txt, width: [8,8,32,14] )
444
+ Csv.fixed.parse( txt, width: [8,8,32,14] ) # or Csv.fix or Csv.f
445
445
  # => [["John", "Smith", "john@example.com", "1-888-555-6666"],
446
446
  # ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
447
447
 
448
448
  # and so on
449
449
  ```
450
450
 
451
+ <!--
451
452
  Note: You can use for your convenience the built-in
452
453
  `Csv.fix` or `Csv.f` aliases / shortcuts.
454
+ -->
455
+
456
+
457
+ Note: You can use negative widths (e.g. `-2`, `-3`, and so on)
458
+ to "skip" filler fields (e.g. `--`, `---`, and so on).
459
+ Example:
460
+
461
+ ``` ruby
462
+ txt = <<TXT
463
+ 12345678--12345678---12345678901234567890123456789012--12345678901234XXX
464
+ TXT
465
+
466
+ Csv.fixed.parse( txt, width: [8,-2,8,-3,32,-2,14] ) # or Csv.fix or Csv.f
467
+ # => [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
468
+ ```
469
+
470
+
471
+ Bonus: If the width is a string (not an array)
472
+ (e.g. `'a8 a8 a32 Z*'` or `'A8 A8 A32 Z*'` and so on)
473
+ than the fixed width field parser
474
+ will use `String#unpack` and the value of width as its format string spec.
475
+ Example:
476
+
477
+ ``` ruby
478
+ txt = <<TXT
479
+ 12345678123456781234567890123456789012345678901212345678901234
480
+ TXT
481
+
482
+ Csv.fixed.parse( txt, width: 'a8 a8 a32 Z*' ) # or Csv.fix or Csv.f
483
+ # => [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
484
+
485
+ txt = <<TXT
486
+ John Smith john@example.com 1-888-555-6666
487
+ Michele O'Reileymichele@example.com 1-333-321-8765
488
+ TXT
489
+
490
+ Csv.fixed.parse( txt, width: 'A8 A8 A32 Z*' ) # or Csv.fix or Csv.f
491
+ # => [["John", "Smith", "john@example.com", "1-888-555-6666"],
492
+ # ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
493
+
494
+ # and so on
495
+ ```
496
+
497
+ | String Directive | Returns | Meaning |
498
+ |------------------|---------|-------------------------|
499
+ | `A` | String | Arbitrary binary string (remove trailing nulls and ASCII spaces) |
500
+ | `a` | String | Arbitrary binary string |
501
+ | `Z` | String | Null-terminated string |
502
+
503
+
504
+ and many more. See the `String#unpack` documentation
505
+ for the complete format spec and directives.
453
506
 
454
507
 
455
508
 
@@ -69,15 +69,31 @@ def parse_lines( input, width:, &block )
69
69
  end
70
70
 
71
71
 
72
- values = []
73
- offset = 0 # start position / offset
74
- width.each_with_index do |w,i|
75
- logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
76
-
77
- value = line[offset, w]
78
- value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
79
- values << value
80
- offset += w
72
+ if width.is_a?( String )
73
+ ## assume it's String#unpack format e.g.
74
+ ## "209231-231992395 MoreData".unpack('aa5A1A9a4Z*')
75
+ ## returns an array as follows :
76
+ ## ["2", "09231", "-", "231992395", " ", "MoreData"]
77
+ ## see String#unpack
78
+
79
+ values = line.unpack( width )
80
+ else ## assume array with integers
81
+ values = []
82
+ offset = 0 # start position / offset
83
+ width.each_with_index do |w,i|
84
+ logger.debug "[#{i}] start: #{offset}, width: #{w}" if logger.debug?
85
+
86
+ if w < 0 ## convention - if width negative, skip column
87
+ # note: minus (-) and minus (-) equal plus (+)
88
+ ## e.g. 2 - -2 = 4
89
+ offset -= w
90
+ else
91
+ value = line[offset, w]
92
+ value = value.strip if value ## note: if not nil strip; only use rstrip (for trailing only) - why? why not?
93
+ values << value
94
+ offset += w
95
+ end
96
+ end
81
97
  end
82
98
 
83
99
  ## note: requires block - enforce? how? why? why not?
@@ -5,7 +5,7 @@ class CsvReader ## note: uses a class for now - change to module - why? why no
5
5
 
6
6
  MAJOR = 1 ## todo: namespace inside version or something - why? why not??
7
7
  MINOR = 1
8
- PATCH = 0
8
+ PATCH = 1
9
9
  VERSION = [MAJOR,MINOR,PATCH].join('.')
10
10
 
11
11
 
@@ -0,0 +1,163 @@
1
+ ###
2
+ # The Iris flower data set or Fisher's Iris data set is a multivariate data set
3
+ # introduced by the British statistician and biologist Ronald Fisher in his 1936 paper
4
+ # The use of multiple measurements in taxonomic problems as an example of
5
+ # linear discriminant analysis.
6
+ # See https://en.wikipedia.org/wiki/Iris_flower_data_set
7
+
8
+
9
+ # The dataset contains a set of 150 records under five attributes
10
+ # - petal length, petal width, sepal length, sepal width and species.
11
+
12
+
13
+ Sepal length, Sepal width, Petal length, Petal width, Species
14
+ 5.1, 3.5, 1.4, 0.2, I. setosa
15
+ 4.9, 3.0, 1.4, 0.2, I. setosa
16
+ 4.7, 3.2, 1.3, 0.2, I. setosa
17
+ 4.6, 3.1, 1.5, 0.2, I. setosa
18
+ 5.0, 3.6, 1.4, 0.3, I. setosa
19
+ 5.4, 3.9, 1.7, 0.4, I. setosa
20
+ 4.6, 3.4, 1.4, 0.3, I. setosa
21
+ 5.0, 3.4, 1.5, 0.2, I. setosa
22
+ 4.4, 2.9, 1.4, 0.2, I. setosa
23
+ 4.9, 3.1, 1.5, 0.1, I. setosa
24
+ 5.4, 3.7, 1.5, 0.2, I. setosa
25
+ 4.8, 3.4, 1.6, 0.2, I. setosa
26
+ 4.8, 3.0, 1.4, 0.1, I. setosa
27
+ 4.3, 3.0, 1.1, 0.1, I. setosa
28
+ 5.8, 4.0, 1.2, 0.2, I. setosa
29
+ 5.7, 4.4, 1.5, 0.4, I. setosa
30
+ 5.4, 3.9, 1.3, 0.4, I. setosa
31
+ 5.1, 3.5, 1.4, 0.3, I. setosa
32
+ 5.7, 3.8, 1.7, 0.3, I. setosa
33
+ 5.1, 3.8, 1.5, 0.3, I. setosa
34
+ 5.4, 3.4, 1.7, 0.2, I. setosa
35
+ 5.1, 3.7, 1.5, 0.4, I. setosa
36
+ 4.6, 3.6, 1.0, 0.2, I. setosa
37
+ 5.1, 3.3, 1.7, 0.5, I. setosa
38
+ 4.8, 3.4, 1.9, 0.2, I. setosa
39
+ 5.0, 3.0, 1.6, 0.2, I. setosa
40
+ 5.0, 3.4, 1.6, 0.4, I. setosa
41
+ 5.2, 3.5, 1.5, 0.2, I. setosa
42
+ 5.2, 3.4, 1.4, 0.2, I. setosa
43
+ 4.7, 3.2, 1.6, 0.2, I. setosa
44
+ 4.8, 3.1, 1.6, 0.2, I. setosa
45
+ 5.4, 3.4, 1.5, 0.4, I. setosa
46
+ 5.2, 4.1, 1.5, 0.1, I. setosa
47
+ 5.5, 4.2, 1.4, 0.2, I. setosa
48
+ 4.9, 3.1, 1.5, 0.2, I. setosa
49
+ 5.0, 3.2, 1.2, 0.2, I. setosa
50
+ 5.5, 3.5, 1.3, 0.2, I. setosa
51
+ 4.9, 3.6, 1.4, 0.1, I. setosa
52
+ 4.4, 3.0, 1.3, 0.2, I. setosa
53
+ 5.1, 3.4, 1.5, 0.2, I. setosa
54
+ 5.0, 3.5, 1.3, 0.3, I. setosa
55
+ 4.5, 2.3, 1.3, 0.3, I. setosa
56
+ 4.4, 3.2, 1.3, 0.2, I. setosa
57
+ 5.0, 3.5, 1.6, 0.6, I. setosa
58
+ 5.1, 3.8, 1.9, 0.4, I. setosa
59
+ 4.8, 3.0, 1.4, 0.3, I. setosa
60
+ 5.1, 3.8, 1.6, 0.2, I. setosa
61
+ 4.6, 3.2, 1.4, 0.2, I. setosa
62
+ 5.3, 3.7, 1.5, 0.2, I. setosa
63
+ 5.0, 3.3, 1.4, 0.2, I. setosa
64
+ 7.0, 3.2, 4.7, 1.4, I. versicolor
65
+ 6.4, 3.2, 4.5, 1.5, I. versicolor
66
+ 6.9, 3.1, 4.9, 1.5, I. versicolor
67
+ 5.5, 2.3, 4.0, 1.3, I. versicolor
68
+ 6.5, 2.8, 4.6, 1.5, I. versicolor
69
+ 5.7, 2.8, 4.5, 1.3, I. versicolor
70
+ 6.3, 3.3, 4.7, 1.6, I. versicolor
71
+ 4.9, 2.4, 3.3, 1.0, I. versicolor
72
+ 6.6, 2.9, 4.6, 1.3, I. versicolor
73
+ 5.2, 2.7, 3.9, 1.4, I. versicolor
74
+ 5.0, 2.0, 3.5, 1.0, I. versicolor
75
+ 5.9, 3.0, 4.2, 1.5, I. versicolor
76
+ 6.0, 2.2, 4.0, 1.0, I. versicolor
77
+ 6.1, 2.9, 4.7, 1.4, I. versicolor
78
+ 5.6, 2.9, 3.6, 1.3, I. versicolor
79
+ 6.7, 3.1, 4.4, 1.4, I. versicolor
80
+ 5.6, 3.0, 4.5, 1.5, I. versicolor
81
+ 5.8, 2.7, 4.1, 1.0, I. versicolor
82
+ 6.2, 2.2, 4.5, 1.5, I. versicolor
83
+ 5.6, 2.5, 3.9, 1.1, I. versicolor
84
+ 5.9, 3.2, 4.8, 1.8, I. versicolor
85
+ 6.1, 2.8, 4.0, 1.3, I. versicolor
86
+ 6.3, 2.5, 4.9, 1.5, I. versicolor
87
+ 6.1, 2.8, 4.7, 1.2, I. versicolor
88
+ 6.4, 2.9, 4.3, 1.3, I. versicolor
89
+ 6.6, 3.0, 4.4, 1.4, I. versicolor
90
+ 6.8, 2.8, 4.8, 1.4, I. versicolor
91
+ 6.7, 3.0, 5.0, 1.7, I. versicolor
92
+ 6.0, 2.9, 4.5, 1.5, I. versicolor
93
+ 5.7, 2.6, 3.5, 1.0, I. versicolor
94
+ 5.5, 2.4, 3.8, 1.1, I. versicolor
95
+ 5.5, 2.4, 3.7, 1.0, I. versicolor
96
+ 5.8, 2.7, 3.9, 1.2, I. versicolor
97
+ 6.0, 2.7, 5.1, 1.6, I. versicolor
98
+ 5.4, 3.0, 4.5, 1.5, I. versicolor
99
+ 6.0, 3.4, 4.5, 1.6, I. versicolor
100
+ 6.7, 3.1, 4.7, 1.5, I. versicolor
101
+ 6.3, 2.3, 4.4, 1.3, I. versicolor
102
+ 5.6, 3.0, 4.1, 1.3, I. versicolor
103
+ 5.5, 2.5, 4.0, 1.3, I. versicolor
104
+ 5.5, 2.6, 4.4, 1.2, I. versicolor
105
+ 6.1, 3.0, 4.6, 1.4, I. versicolor
106
+ 5.8, 2.6, 4.0, 1.2, I. versicolor
107
+ 5.0, 2.3, 3.3, 1.0, I. versicolor
108
+ 5.6, 2.7, 4.2, 1.3, I. versicolor
109
+ 5.7, 3.0, 4.2, 1.2, I. versicolor
110
+ 5.7, 2.9, 4.2, 1.3, I. versicolor
111
+ 6.2, 2.9, 4.3, 1.3, I. versicolor
112
+ 5.1, 2.5, 3.0, 1.1, I. versicolor
113
+ 5.7, 2.8, 4.1, 1.3, I. versicolor
114
+ 6.3, 3.3, 6.0, 2.5, I. virginica
115
+ 5.8, 2.7, 5.1, 1.9, I. virginica
116
+ 7.1, 3.0, 5.9, 2.1, I. virginica
117
+ 6.3, 2.9, 5.6, 1.8, I. virginica
118
+ 6.5, 3.0, 5.8, 2.2, I. virginica
119
+ 7.6, 3.0, 6.6, 2.1, I. virginica
120
+ 4.9, 2.5, 4.5, 1.7, I. virginica
121
+ 7.3, 2.9, 6.3, 1.8, I. virginica
122
+ 6.7, 2.5, 5.8, 1.8, I. virginica
123
+ 7.2, 3.6, 6.1, 2.5, I. virginica
124
+ 6.5, 3.2, 5.1, 2.0, I. virginica
125
+ 6.4, 2.7, 5.3, 1.9, I. virginica
126
+ 6.8, 3.0, 5.5, 2.1, I. virginica
127
+ 5.7, 2.5, 5.0, 2.0, I. virginica
128
+ 5.8, 2.8, 5.1, 2.4, I. virginica
129
+ 6.4, 3.2, 5.3, 2.3, I. virginica
130
+ 6.5, 3.0, 5.5, 1.8, I. virginica
131
+ 7.7, 3.8, 6.7, 2.2, I. virginica
132
+ 7.7, 2.6, 6.9, 2.3, I. virginica
133
+ 6.0, 2.2, 5.0, 1.5, I. virginica
134
+ 6.9, 3.2, 5.7, 2.3, I. virginica
135
+ 5.6, 2.8, 4.9, 2.0, I. virginica
136
+ 7.7, 2.8, 6.7, 2.0, I. virginica
137
+ 6.3, 2.7, 4.9, 1.8, I. virginica
138
+ 6.7, 3.3, 5.7, 2.1, I. virginica
139
+ 7.2, 3.2, 6.0, 1.8, I. virginica
140
+ 6.2, 2.8, 4.8, 1.8, I. virginica
141
+ 6.1, 3.0, 4.9, 1.8, I. virginica
142
+ 6.4, 2.8, 5.6, 2.1, I. virginica
143
+ 7.2, 3.0, 5.8, 1.6, I. virginica
144
+ 7.4, 2.8, 6.1, 1.9, I. virginica
145
+ 7.9, 3.8, 6.4, 2.0, I. virginica
146
+ 6.4, 2.8, 5.6, 2.2, I. virginica
147
+ 6.3, 2.8, 5.1, 1.5, I. virginica
148
+ 6.1, 2.6, 5.6, 1.4, I. virginica
149
+ 7.7, 3.0, 6.1, 2.3, I. virginica
150
+ 6.3, 3.4, 5.6, 2.4, I. virginica
151
+ 6.4, 3.1, 5.5, 1.8, I. virginica
152
+ 6.0, 3.0, 4.8, 1.8, I. virginica
153
+ 6.9, 3.1, 5.4, 2.1, I. virginica
154
+ 6.7, 3.1, 5.6, 2.4, I. virginica
155
+ 6.9, 3.1, 5.1, 2.3, I. virginica
156
+ 5.8, 2.7, 5.1, 1.9, I. virginica
157
+ 6.8, 3.2, 5.9, 2.3, I. virginica
158
+ 6.7, 3.3, 5.7, 2.5, I. virginica
159
+ 6.7, 3.0, 5.2, 2.3, I. virginica
160
+ 6.3, 2.5, 5.0, 1.9, I. virginica
161
+ 6.5, 3.0, 5.2, 2.0, I. virginica
162
+ 6.2, 3.4, 5.4, 2.3, I. virginica
163
+ 5.9, 3.0, 5.1, 1.8, I. virginica
@@ -14,9 +14,9 @@ def parser() CsvReader::Parser::FIXED; end
14
14
  def reader() CsvReader.fixed; end
15
15
 
16
16
 
17
- def test_width
18
- width( parser )
19
- width( reader )
17
+ def test_numbers
18
+ numbers( parser )
19
+ numbers( reader )
20
20
  end
21
21
 
22
22
  def test_contacts
@@ -25,7 +25,7 @@ def test_contacts
25
25
  end
26
26
 
27
27
 
28
- def width( parser )
28
+ def numbers( parser )
29
29
  records = [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
30
30
 
31
31
  assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
@@ -37,6 +37,11 @@ TXT
37
37
 
38
38
  assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
39
39
  12345678123456781234567890123456789012345678901212345678901234
40
+ TXT
41
+
42
+ ## note: negative width fields gets skipped
43
+ assert_equal records, parser.parse( <<TXT, width: [8,-2,8,-3,32,-2,14] )
44
+ 12345678XX12345678XXX12345678901234567890123456789012XX12345678901234XXX
40
45
  TXT
41
46
  end
42
47
 
@@ -61,20 +66,20 @@ TXT
61
66
  end
62
67
 
63
68
 
64
- def test_contacts
65
- records = [["John", "Smith", "john@example.com", "1-888-555-6666"],
66
- ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
67
-
68
- assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
69
- # fixed width with comments and blank lines
70
69
 
71
- John Smith john@example.com 1-888-555-6666
72
- Michele O'Reileymichele@example.com 1-333-321-8765
70
+ def test_unpack_numbers
71
+ records = [["12345678","12345678", "12345678901234567890123456789012", "12345678901234"]]
73
72
 
73
+ assert_equal records, parser.parse( <<TXT, width: 'a8 a8 a32 Z*' )
74
+ 12345678123456781234567890123456789012345678901212345678901234
74
75
  TXT
76
+ end
75
77
 
78
+ def test_unpack_contacts
79
+ records = [["John", "Smith", "john@example.com", "1-888-555-6666"],
80
+ ["Michele", "O'Reiley", "michele@example.com", "1-333-321-8765"]]
76
81
 
77
- assert_equal records, parser.parse( <<TXT, width: [8,8,32,14] )
82
+ assert_equal records, parser.parse( <<TXT, width: 'A8 A8 A32 Z*' )
78
83
  John Smith john@example.com 1-888-555-6666
79
84
  Michele O'Reileymichele@example.com 1-333-321-8765
80
85
  TXT
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: csvreader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gerald Bauer
@@ -73,6 +73,7 @@ files:
73
73
  - test/data/cars11.csv
74
74
  - test/data/cities11.csv
75
75
  - test/data/customers11.csv
76
+ - test/data/iris11.csv
76
77
  - test/data/shakespeare.csv
77
78
  - test/helper.rb
78
79
  - test/test_buffer.rb