masticate 0.4.0 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,4 @@
1
1
  # convert date columns to numerics
2
- require "csv"
3
2
 
4
3
  class Masticate::Datify < Masticate::Base
5
4
  def configure(opts)
@@ -20,8 +19,7 @@ class Masticate::Datify < Masticate::Base
20
19
  @index = row.index(@field) or raise "Unable to find column '#{@field}'"
21
20
  end
22
21
  elsif row
23
- ts = DateTime.strptime(row[@index], @format).to_time
24
- row[@index] = ts.to_i rescue nil
22
+ row[@index] = DateTime.strptime(row[@index], @format).to_time.to_i rescue nil
25
23
  end
26
24
  row
27
25
  end
@@ -10,6 +10,7 @@ class Masticate::Mender < Masticate::Base
10
10
  @inlined = opts[:inlined]
11
11
  @snip = opts[:snip]
12
12
  @dejunk = opts[:dejunk]
13
+ @buried = opts[:buried]
13
14
 
14
15
  @expected_field_count = nil
15
16
  @holding = ''
@@ -47,6 +48,15 @@ class Masticate::Mender < Masticate::Base
47
48
  else
48
49
  raise "Do not understand snip instruction [#{@snip.inspect}]"
49
50
  end
51
+
52
+ if @buried
53
+ if @buried.is_a?(Fixnum) || @buried =~ /^\d+/
54
+ @buried_index = @buried.to_i
55
+ else
56
+ @buried_index = row.index(@buried) or raise "Unable to find column '#{@buried}'"
57
+ end
58
+ end
59
+
50
60
  @expected_field_count = @headers.count
51
61
  row = @headers
52
62
  elsif row
@@ -65,6 +75,12 @@ class Masticate::Mender < Masticate::Base
65
75
  @holding = ''
66
76
  end
67
77
 
78
+ if @buried && (row.count > @expected_field_count)
79
+ # buried delimiter
80
+ # take the N+1th field and merge it onto the Nth field, moving up the remaining fields
81
+ row[@buried_index] += row.delete_at(@buried_index + 1)
82
+ end
83
+
68
84
  if @dejunk && row && row.select {|s| s && !s.strip.empty?}.count <= 2
69
85
  # junky row, suppress output
70
86
  nil
@@ -1,3 +1,3 @@
1
1
  module Masticate
2
- VERSION = "0.4.0"
2
+ VERSION = "0.4.1"
3
3
  end
data/masticate.gemspec CHANGED
@@ -16,8 +16,8 @@ Gem::Specification.new do |gem|
16
16
  gem.require_paths = ["lib"]
17
17
  gem.version = Masticate::VERSION
18
18
 
19
- gem.add_development_dependency "rake", "~> 0.9.2"
20
- gem.add_development_dependency "rspec", "~> 2.9.0"
21
- gem.add_development_dependency "guard-rspec", "~> 0.7.0"
22
- gem.add_development_dependency "ruby_gntp", "~> 0.3.4"
19
+ gem.add_development_dependency "rake"
20
+ gem.add_development_dependency "rspec"
21
+ gem.add_development_dependency "guard-rspec"
22
+ gem.add_development_dependency "ruby_gntp"
23
23
  end
@@ -0,0 +1,4 @@
1
+ LastName|FirstName|Comment|ID|Date
2
+ Washington|George|text without embedded pipes|111|3/13/2012
3
+ Adams|John|text with|embedded pipe|222|4/14/2012
4
+ Jefferson|Thomas|text with no embedded pipe|333|5/15/2012
@@ -0,0 +1,4 @@
1
+ LastName,FirstName,Comment,ID,Date
2
+ Washington,George,text without embedded pipes,111,3/13/2012
3
+ Adams,John,text withembedded pipe,222,4/14/2012
4
+ Jefferson,Thomas,text with no embedded pipe,333,5/15/2012
@@ -51,4 +51,16 @@ describe "mending" do
51
51
  results[:output_count].should == 4
52
52
  output.should == correct_output
53
53
  end
54
+
55
+ it "should consolidate fields with embedded delimiters" do
56
+ filename = File.dirname(__FILE__) + "/../data/buried_delims_in_input.psv"
57
+ tmp = Tempfile.new('mending')
58
+ results = Masticate.mend(filename, :col_sep => '|', :buried => 'Comment', :output => tmp)
59
+ output = File.read(tmp)
60
+ correct_output = File.read(File.dirname(__FILE__) + "/../data/delims_untangled.csv")
61
+
62
+ results[:input_count].should == 4
63
+ results[:output_count].should == 4
64
+ output.should == correct_output
65
+ end
54
66
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: masticate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.4.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,72 +9,72 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-21 00:00:00.000000000 Z
12
+ date: 2012-09-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
- - - ~>
19
+ - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.9.2
21
+ version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
25
25
  none: false
26
26
  requirements:
27
- - - ~>
27
+ - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.9.2
29
+ version: '0'
30
30
  - !ruby/object:Gem::Dependency
31
31
  name: rspec
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
- - - ~>
35
+ - - ! '>='
36
36
  - !ruby/object:Gem::Version
37
- version: 2.9.0
37
+ version: '0'
38
38
  type: :development
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
- - - ~>
43
+ - - ! '>='
44
44
  - !ruby/object:Gem::Version
45
- version: 2.9.0
45
+ version: '0'
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: guard-rspec
48
48
  requirement: !ruby/object:Gem::Requirement
49
49
  none: false
50
50
  requirements:
51
- - - ~>
51
+ - - ! '>='
52
52
  - !ruby/object:Gem::Version
53
- version: 0.7.0
53
+ version: '0'
54
54
  type: :development
55
55
  prerelease: false
56
56
  version_requirements: !ruby/object:Gem::Requirement
57
57
  none: false
58
58
  requirements:
59
- - - ~>
59
+ - - ! '>='
60
60
  - !ruby/object:Gem::Version
61
- version: 0.7.0
61
+ version: '0'
62
62
  - !ruby/object:Gem::Dependency
63
63
  name: ruby_gntp
64
64
  requirement: !ruby/object:Gem::Requirement
65
65
  none: false
66
66
  requirements:
67
- - - ~>
67
+ - - ! '>='
68
68
  - !ruby/object:Gem::Version
69
- version: 0.3.4
69
+ version: '0'
70
70
  type: :development
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
73
73
  none: false
74
74
  requirements:
75
- - - ~>
75
+ - - ! '>='
76
76
  - !ruby/object:Gem::Version
77
- version: 0.3.4
77
+ version: '0'
78
78
  description: Data file crunching
79
79
  email:
80
80
  - jmay@pobox.com
@@ -111,6 +111,7 @@ files:
111
111
  - spec/data/badnums.csv
112
112
  - spec/data/badnums_fixed.csv
113
113
  - spec/data/broken_psv.txt
114
+ - spec/data/buried_delims_in_input.psv
114
115
  - spec/data/concat_result.txt
115
116
  - spec/data/cookery_input.psv
116
117
  - spec/data/cookery_result.csv
@@ -118,6 +119,7 @@ files:
118
119
  - spec/data/cooking_result.csv
119
120
  - spec/data/datify_input.csv
120
121
  - spec/data/datify_result.csv
122
+ - spec/data/delims_untangled.csv
121
123
  - spec/data/downcase_results.csv
122
124
  - spec/data/events.csv
123
125
  - spec/data/events_reduced.csv
@@ -166,7 +168,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
166
168
  version: '0'
167
169
  segments:
168
170
  - 0
169
- hash: -519721259904741395
171
+ hash: 2222163119454515723
170
172
  required_rubygems_version: !ruby/object:Gem::Requirement
171
173
  none: false
172
174
  requirements:
@@ -175,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
175
177
  version: '0'
176
178
  segments:
177
179
  - 0
178
- hash: -519721259904741395
180
+ hash: 2222163119454515723
179
181
  requirements: []
180
182
  rubyforge_project: masticate
181
183
  rubygems_version: 1.8.24
@@ -186,6 +188,7 @@ test_files:
186
188
  - spec/data/badnums.csv
187
189
  - spec/data/badnums_fixed.csv
188
190
  - spec/data/broken_psv.txt
191
+ - spec/data/buried_delims_in_input.psv
189
192
  - spec/data/concat_result.txt
190
193
  - spec/data/cookery_input.psv
191
194
  - spec/data/cookery_result.csv
@@ -193,6 +196,7 @@ test_files:
193
196
  - spec/data/cooking_result.csv
194
197
  - spec/data/datify_input.csv
195
198
  - spec/data/datify_result.csv
199
+ - spec/data/delims_untangled.csv
196
200
  - spec/data/downcase_results.csv
197
201
  - spec/data/events.csv
198
202
  - spec/data/events_reduced.csv