truss_parser 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -1
- data/lib/truss_parser/parser.rb +22 -18
- data/lib/truss_parser/version.rb +1 -1
- data/normalized_data.csv +9 -7
- data/normalized_test_data.csv +9 -7
- data/scrubbed-sample.csv +9 -7
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21b5b585a5031ba1ab4e56c898cdef4ffba8b2ea
|
4
|
+
data.tar.gz: 2601596e24dcdd0ecdea1ae7516438d3234fa817
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8eb6dc0edc8f3bd84b1e4e4dbc1cd26a2c7bc3de3666a5ae6872837e76c49b6e9ba9370bb566b59c4452c259df5aa5819316d0cf6ede03c5f286dbcc73530405
|
7
|
+
data.tar.gz: 986c117aaa103820b36800985c51250919d6d2a15ed0006eb6fe4a00700718dda236031d43373e3e8d063c3d8ce04d2ddb1a975d987d82c568d8afb41c3a4554
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -12,7 +12,7 @@ Assuming you have an environment set up for the Ruby ecosystem:
|
|
12
12
|
## Usage
|
13
13
|
|
14
14
|
- After `gem install`ing the `truss` gem:
|
15
|
-
- Run `truss_parser sample.csv` to parse and normalize the `sample.csv` that is shipped along in this gem. Alternatively, you can also run `truss_parser sample-with-broken-utf8.csv` as well. The
|
15
|
+
- Run `truss_parser sample.csv` to parse and normalize the `sample.csv` that is shipped along in this gem. Alternatively, you can also run `truss_parser sample-with-broken-utf8.csv` as well. The TrussParser gem takes in a CSV file as an argument, and outputs normalized CSV data in `normalized_data.csv`. The `scrubbed-sample.csv` file is in an in-between state: the CSV is cleaned of broken Unicode, but has not had any data manipulation or transformations yet. The `normalized_data.csv` file is _both_ free of broken Unicode and had its data manipulated and transformed according to the specifications in `challenge.md`
|
16
16
|
- Example:
|
17
17
|
|
18
18
|
![Example of gem usage](truss_parser.gif)
|
@@ -20,6 +20,8 @@ Assuming you have an environment set up for the Ruby ecosystem:
|
|
20
20
|
## Testing
|
21
21
|
|
22
22
|
- Run `rake spec` to run the RSpec tests.
|
23
|
+
- To run one spec in particular, run `bundle exec rspec spec/truss_parser/parser_spec.rb`
|
24
|
+
- `parser_spec.rb` outputs its normalized CSV to `normalized_test_data.csv`. This is done in attempts to keep the test environment separate and not pollute the development environment.
|
23
25
|
- You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
24
26
|
|
25
27
|
|
data/lib/truss_parser/parser.rb
CHANGED
@@ -35,13 +35,13 @@ module Parser
|
|
35
35
|
# generate a new CSV without broken unicode
|
36
36
|
generate_scrubbed_csv(cleaned_arrays)
|
37
37
|
|
38
|
-
table = CSV.table("
|
38
|
+
table = CSV.table("#{scrubbed_csv}")
|
39
39
|
|
40
40
|
# drop rows with unparseable DateTimes
|
41
41
|
drop_unparseable_time(table)
|
42
42
|
|
43
|
-
|
44
|
-
generate_scrubbed_csv(
|
43
|
+
table.to_a.reject! { |row| row.blank? }
|
44
|
+
generate_scrubbed_csv(table.to_a)
|
45
45
|
end
|
46
46
|
|
47
47
|
def normalize
|
@@ -51,15 +51,15 @@ module Parser
|
|
51
51
|
|
52
52
|
# any zip codes with less than 5 digits, prepend 0's to them until they are 5 digits long
|
53
53
|
validate_zipcode(row['zip'])
|
54
|
-
|
55
|
-
|
54
|
+
# uppercase all names
|
55
|
+
upcase_fullname(row['fullname'])
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
# pass address column as is, validate everything is valid unicode
|
58
|
+
# else, replace with Unicode Replacement Character
|
59
|
+
validate_address(row['address'])
|
60
60
|
|
61
|
-
|
62
|
-
|
61
|
+
foo_duration_seconds = calculate_duration(row, 'fooduration')
|
62
|
+
bar_duration_seconds = calculate_duration(row, 'barduration')
|
63
63
|
|
64
64
|
calculate_total_duration(row, foo_duration_seconds, bar_duration_seconds)
|
65
65
|
|
@@ -77,14 +77,14 @@ module Parser
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def generate_scrubbed_csv(arrays)
|
80
|
-
CSV.open("
|
80
|
+
CSV.open("#{scrubbed_csv}", "w+") do |csv|
|
81
81
|
arrays.map { |ary| csv << ary }
|
82
82
|
end
|
83
83
|
end
|
84
84
|
|
85
85
|
def validate_args
|
86
|
-
if csv_file.split.length != 1
|
87
|
-
STDERR.puts "Warning:
|
86
|
+
if csv_file.split.length != 1 || scrubbed_csv.split.length != 1 || normalized_data.split.length != 1
|
87
|
+
STDERR.puts "Warning: You did not input parameters correctly. Please try again with one command-line argument."
|
88
88
|
exit
|
89
89
|
end
|
90
90
|
end
|
@@ -99,6 +99,7 @@ module Parser
|
|
99
99
|
end
|
100
100
|
|
101
101
|
def upcase_fullname(fullname)
|
102
|
+
fullname = '' if fullname.nil?
|
102
103
|
fullname.upcase!
|
103
104
|
end
|
104
105
|
|
@@ -128,16 +129,19 @@ module Parser
|
|
128
129
|
row['totalduration'] = foo_duration_seconds + bar_duration_seconds
|
129
130
|
end
|
130
131
|
|
131
|
-
def calculate_duration(row)
|
132
|
-
duration_seconds =
|
133
|
-
|
132
|
+
def calculate_duration(row, key)
|
133
|
+
duration_seconds = row[key].split(':')
|
134
|
+
.map { |num| num.to_i }
|
135
|
+
.reduce(0) { |num1, num2| num1 * 60 + num2 }
|
136
|
+
|
137
|
+
row[key] = duration_seconds
|
134
138
|
end
|
135
139
|
|
136
140
|
def drop_unparseable_time(table)
|
137
141
|
table.each_with_index do |row, i|
|
138
142
|
begin
|
139
|
-
calculate_duration(row
|
140
|
-
calculate_duration(row
|
143
|
+
calculate_duration(row, :fooduration)
|
144
|
+
calculate_duration(row, :barduration)
|
141
145
|
format_timestamp(row[:timestamp])
|
142
146
|
rescue ArgumentError => e
|
143
147
|
STDERR.puts "Warning: Row #{i} will be deleted due to an unparseable Time.
|
data/lib/truss_parser/version.rb
CHANGED
data/normalized_data.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,
|
2
|
-
|
3
|
-
2016-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,5012,5553,10565,I am the very model of a modern major general
|
2
|
+
2014-03-11T20:00:00-04:00,"Somewhere Else, In Another Time, BB",00001,SUPERMAN ÜBERTAN,401012,5553,406565,This is some Unicode right h�xxx ü ¡! 😀
|
3
|
+
2016-02-29T07:11:11-05:00,111 Ste. #123123123,01101,RÉSUMÉ RON,113012,5553,118565,🏳️🏴🏳️🏴
|
4
|
+
2010-12-31T19:00:01-05:00,"This Is Not An Address, BusyTown, BT",94121,MARY 1,5012,0,5012,I like Emoji! 🍏🍎😍
|
5
|
+
2016-12-31T18:59:59-05:00,"123 Gangnam Style Lives Here, Gangnam Town",31403,ANTICIPATION OF UNICODE FAILURE,5012,5553,10565,I like Math Symbols! ≱≰⨌⊚
|
6
|
+
2011-11-11T06:11:11-05:00,überTown,10001,PROMPT NEGOTIATOR,5012,5553,10565,"I’m just gonna say, this is AMAZING. WHAT NEGOTIATIONS."
|
7
|
+
2010-05-12T12:48:12-04:00,Høøük¡,01231,SLEEPER SERVICE,5012,5553,10565,2/1/22
|
8
|
+
2012-10-05T18:31:11-04:00,"Test Pattern Town, Test Pattern, TP",00121,株式会社スタジオジブリ,5012,5553,10565,1:11:11.123
|
9
|
+
2004-10-02T04:44:11-04:00,The Moon,00011,HERE WE GO,5012,5553,10565,
|
data/normalized_test_data.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,
|
2
|
-
|
3
|
-
2016-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,5012,5553,10565,I am the very model of a modern major general
|
2
|
+
2014-03-11T20:00:00-04:00,"Somewhere Else, In Another Time, BB",00001,SUPERMAN ÜBERTAN,401012,5553,406565,This is some Unicode right h�xxx ü ¡! 😀
|
3
|
+
2016-02-29T07:11:11-05:00,111 Ste. #123123123,01101,RÉSUMÉ RON,113012,5553,118565,🏳️🏴🏳️🏴
|
4
|
+
2010-12-31T19:00:01-05:00,"This Is Not An Address, BusyTown, BT",94121,MARY 1,5012,0,5012,I like Emoji! 🍏🍎😍
|
5
|
+
2016-12-31T18:59:59-05:00,"123 Gangnam Style Lives Here, Gangnam Town",31403,ANTICIPATION OF UNICODE FAILURE,5012,5553,10565,I like Math Symbols! ≱≰⨌⊚
|
6
|
+
2011-11-11T06:11:11-05:00,überTown,10001,PROMPT NEGOTIATOR,5012,5553,10565,"I’m just gonna say, this is AMAZING. WHAT NEGOTIATIONS."
|
7
|
+
2010-05-12T12:48:12-04:00,Høøük¡,01231,SLEEPER SERVICE,5012,5553,10565,2/1/22
|
8
|
+
2012-10-05T18:31:11-04:00,"Test Pattern Town, Test Pattern, TP",00121,株式会社スタジオジブリ,5012,5553,10565,1:11:11.123
|
9
|
+
2004-10-02T04:44:11-04:00,The Moon,00011,HERE WE GO,5012,5553,10565,
|
data/scrubbed-sample.csv
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
timestamp,address,zip,fullname,fooduration,barduration,totalduration,notes
|
2
|
-
4/1/11 11:00:00 AM,"123 4th St, Anywhere, AA",94121,Monkey Alberto,
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
2
|
+
4/1/11 11:00:00 AM,"123 4th St, Anywhere, AA",94121,Monkey Alberto,5012,5553,zzsasdfa,I am the very model of a modern major general
|
3
|
+
3/12/14 12:00:00 AM,"Somewhere Else, In Another Time, BB",1,Superman übertan,401012,5553,zzsasdfa,This is some Unicode right h�xxx ü ¡! 😀
|
4
|
+
2/29/16 12:11:11 PM,111 Ste. #123123123,1101,Résumé Ron,113012,5553,zzsasdfa,🏳️🏴🏳️🏴
|
5
|
+
1/1/11 12:00:01 AM,"This Is Not An Address, BusyTown, BT",94121,Mary 1,5012,0,zzsasdfa,I like Emoji! 🍏🍎😍
|
6
|
+
12/31/16 11:59:59 PM,"123 Gangnam Style Lives Here, Gangnam Town",31403,Anticipation of Unicode Failure,5012,5553,zzsasdfa,I like Math Symbols! ≱≰⨌⊚
|
7
|
+
11/11/11 11:11:11 AM,überTown,10001,Prompt Negotiator,5012,5553,zzsasdfa,"I’m just gonna say, this is AMAZING. WHAT NEGOTIATIONS."
|
8
|
+
5/12/10 4:48:12 PM,Høøük¡,1231,Sleeper Service,5012,5553,zzsasdfa,2/1/22
|
9
|
+
10/5/12 10:31:11 PM,"Test Pattern Town, Test Pattern, TP",121,株式会社スタジオジブリ,5012,5553,zzsasdfa,1:11:11.123
|
10
|
+
10/2/04 8:44:11 AM,The Moon,11,HERE WE GO,5012,5553,zzsasdfa,
|