truss_parser 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +3 -1
- data/lib/truss_parser/parser.rb +22 -18
- data/lib/truss_parser/version.rb +1 -1
- data/normalized_data.csv +9 -7
- data/normalized_test_data.csv +9 -7
- data/scrubbed-sample.csv +9 -7
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 21b5b585a5031ba1ab4e56c898cdef4ffba8b2ea
|
4
|
+
data.tar.gz: 2601596e24dcdd0ecdea1ae7516438d3234fa817
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8eb6dc0edc8f3bd84b1e4e4dbc1cd26a2c7bc3de3666a5ae6872837e76c49b6e9ba9370bb566b59c4452c259df5aa5819316d0cf6ede03c5f286dbcc73530405
|
7
|
+
data.tar.gz: 986c117aaa103820b36800985c51250919d6d2a15ed0006eb6fe4a00700718dda236031d43373e3e8d063c3d8ce04d2ddb1a975d987d82c568d8afb41c3a4554
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -12,7 +12,7 @@ Assuming you have an environment set up for the Ruby ecosystem:
|
|
12
12
|
## Usage
|
13
13
|
|
14
14
|
- After `gem install`ing the `truss` gem:
|
15
|
-
- Run `truss_parser sample.csv` to parse and normalize the `sample.csv` that is shipped along in this gem. Alternatively, you can also run `truss_parser sample-with-broken-utf8.csv` as well. The
|
15
|
+
- Run `truss_parser sample.csv` to parse and normalize the `sample.csv` that is shipped along in this gem. Alternatively, you can also run `truss_parser sample-with-broken-utf8.csv` as well. The TrussParser gem takes in a CSV file as an argument, and outputs normalized CSV data in `normalized_data.csv`. The `scrubbed-sample.csv` file is in an in-between state: the CSV is cleaned of broken Unicode, but has not had any data manipulation or transformations yet. The `normalized_data.csv` file is _both_ free of broken Unicode and had its data manipulated and transformed according to the specifications in `challenge.md`
|
16
16
|
- Example:
|
17
17
|
|
18
18
|

|
@@ -20,6 +20,8 @@ Assuming you have an environment set up for the Ruby ecosystem:
|
|
20
20
|
## Testing
|
21
21
|
|
22
22
|
- Run `rake spec` to run the RSpec tests.
|
23
|
+
- To run one spec in particular, run `bundle exec rspec spec/truss_parser/parser_spec.rb`
|
24
|
+
- `parser_spec.rb` outputs its normalized CSV to `normalized_test_data.csv`. This is done in attempts to keep the test environment separate and not pollute the development environment.
|
23
25
|
- You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
24
26
|
|
25
27
|
|
data/lib/truss_parser/parser.rb
CHANGED
@@ -35,13 +35,13 @@ module Parser
|
|
35
35
|
# generate a new CSV without broken unicode
|
36
36
|
generate_scrubbed_csv(cleaned_arrays)
|
37
37
|
|
38
|
-
table = CSV.table("
|
38
|
+
table = CSV.table("#{scrubbed_csv}")
|
39
39
|
|
40
40
|
# drop rows with unparseable DateTimes
|
41
41
|
drop_unparseable_time(table)
|
42
42
|
|
43
|
-
|
44
|
-
generate_scrubbed_csv(
|
43
|
+
table.to_a.reject! { |row| row.blank? }
|
44
|
+
generate_scrubbed_csv(table.to_a)
|
45
45
|
end
|
46
46
|
|
47
47
|
def normalize
|
@@ -51,15 +51,15 @@ module Parser
|
|
51
51
|
|
52
52
|
# any zip codes with less than 5 digits, prepend 0's to them until they are 5 digits long
|
53
53
|
validate_zipcode(row['zip'])
|
54
|
-
|
55
|
-
|
54
|
+
# uppercase all names
|
55
|
+
upcase_fullname(row['fullname'])
|
56
56
|
|
57
|
-
|
58
|
-
|
59
|
-
|
57
|
+
# pass address column as is, validate everything is valid unicode
|
58
|
+
# else, replace with Unicode Replacement Character
|
59
|
+
validate_address(row['address'])
|
60
60
|
|
61
|
-
|
62
|
-
|
61
|
+
foo_duration_seconds = calculate_duration(row, 'fooduration')
|
62
|
+
bar_duration_seconds = calculate_duration(row, 'barduration')
|
63
63
|
|
64
64
|
calculate_total_duration(row, foo_duration_seconds, bar_duration_seconds)
|
65
65
|
|
@@ -77,14 +77,14 @@ module Parser
|
|
77
77
|
end
|
78
78
|
|
79
79
|
def generate_scrubbed_csv(arrays)
|
80
|
-
CSV.open("
|
80
|
+
CSV.open("#{scrubbed_csv}", "w+") do |csv|
|
81
81
|
arrays.map { |ary| csv << ary }
|
82
82
|
end
|
83
83
|
end
|
84
84
|
|
85
85
|
def validate_args
|
86
|
-
if csv_file.split.length != 1
|
87
|
-
STDERR.puts "Warning:
|
86
|
+
if csv_file.split.length != 1 || scrubbed_csv.split.length != 1 || normalized_data.split.length != 1
|
87
|
+
STDERR.puts "Warning: You did not input parameters correctly. Please try again with one command-line argument."
|
88
88
|
exit
|
89
89
|
end
|
90
90
|
end
|
@@ -99,6 +99,7 @@ module Parser
|
|
99
99
|
end
|
100
100
|
|
101
101
|
def upcase_fullname(fullname)
|
102
|
+
fullname = '' if fullname.nil?
|
102
103
|
fullname.upcase!
|
103
104
|
end
|
104
105
|
|
@@ -128,16 +129,19 @@ module Parser
|
|
128
129
|
row['totalduration'] = foo_duration_seconds + bar_duration_seconds
|
129
130
|
end
|
130
131
|
|
131
|
-
def calculate_duration(row)
|
132
|
-
duration_seconds =
|
133
|
-
|
132
|
+
def calculate_duration(row, key)
|
133
|
+
duration_seconds = row[key].split(':')
|
134
|
+
.map { |num| num.to_i }
|
135
|
+
.reduce(0) { |num1, num2| num1 * 60 + num2 }
|
136
|
+
|
137
|
+
row[key] = duration_seconds
|
134
138
|
end
|
135
139
|
|
136
140
|
def drop_unparseable_time(table)
|
137
141
|
table.each_with_index do |row, i|
|
138
142
|
begin
|
139
|
-
calculate_duration(row
|
140
|
-
calculate_duration(row
|
143
|
+
calculate_duration(row, :fooduration)
|
144
|
+
calculate_duration(row, :barduration)
|
141
145
|
format_timestamp(row[:timestamp])
|
142
146
|
rescue ArgumentError => e
|
143
147
|
STDERR.puts "Warning: Row #{i} will be deleted due to an unparseable Time.
|
data/lib/truss_parser/version.rb
CHANGED
data/normalized_data.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,
|
2
|
-
|
3
|
-
2016-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,5012,5553,10565,I am the very model of a modern major general
|
2
|
+
2014-03-11T20:00:00-04:00,"Somewhere Else, In Another Time, BB",00001,SUPERMAN ÜBERTAN,401012,5553,406565,This is some Unicode right h�xxx ü ¡! 😀
|
3
|
+
2016-02-29T07:11:11-05:00,111 Ste. #123123123,01101,RÉSUMÉ RON,113012,5553,118565,🏳️🏴🏳️🏴
|
4
|
+
2010-12-31T19:00:01-05:00,"This Is Not An Address, BusyTown, BT",94121,MARY 1,5012,0,5012,I like Emoji! 🍏🍎😍
|
5
|
+
2016-12-31T18:59:59-05:00,"123 Gangnam Style Lives Here, Gangnam Town",31403,ANTICIPATION OF UNICODE FAILURE,5012,5553,10565,I like Math Symbols! ≱≰⨌⊚
|
6
|
+
2011-11-11T06:11:11-05:00,überTown,10001,PROMPT NEGOTIATOR,5012,5553,10565,"I’m just gonna say, this is AMAZING. WHAT NEGOTIATIONS."
|
7
|
+
2010-05-12T12:48:12-04:00,Høøük¡,01231,SLEEPER SERVICE,5012,5553,10565,2/1/22
|
8
|
+
2012-10-05T18:31:11-04:00,"Test Pattern Town, Test Pattern, TP",00121,株式会社スタジオジブリ,5012,5553,10565,1:11:11.123
|
9
|
+
2004-10-02T04:44:11-04:00,The Moon,00011,HERE WE GO,5012,5553,10565,
|
data/normalized_test_data.csv
CHANGED
@@ -1,7 +1,9 @@
|
|
1
|
-
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,
|
2
|
-
|
3
|
-
2016-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
1
|
+
2011-04-01T07:00:00-04:00,"123 4th St, Anywhere, AA",94121,MONKEY ALBERTO,5012,5553,10565,I am the very model of a modern major general
|
2
|
+
2014-03-11T20:00:00-04:00,"Somewhere Else, In Another Time, BB",00001,SUPERMAN ÜBERTAN,401012,5553,406565,This is some Unicode right h�xxx ü ¡! 😀
|
3
|
+
2016-02-29T07:11:11-05:00,111 Ste. #123123123,01101,RÉSUMÉ RON,113012,5553,118565,🏳️🏴🏳️🏴
|
4
|
+
2010-12-31T19:00:01-05:00,"This Is Not An Address, BusyTown, BT",94121,MARY 1,5012,0,5012,I like Emoji! 🍏🍎😍
|
5
|
+
2016-12-31T18:59:59-05:00,"123 Gangnam Style Lives Here, Gangnam Town",31403,ANTICIPATION OF UNICODE FAILURE,5012,5553,10565,I like Math Symbols! ≱≰⨌⊚
|
6
|
+
2011-11-11T06:11:11-05:00,überTown,10001,PROMPT NEGOTIATOR,5012,5553,10565,"I’m just gonna say, this is AMAZING. WHAT NEGOTIATIONS."
|
7
|
+
2010-05-12T12:48:12-04:00,Høøük¡,01231,SLEEPER SERVICE,5012,5553,10565,2/1/22
|
8
|
+
2012-10-05T18:31:11-04:00,"Test Pattern Town, Test Pattern, TP",00121,株式会社スタジオジブリ,5012,5553,10565,1:11:11.123
|
9
|
+
2004-10-02T04:44:11-04:00,The Moon,00011,HERE WE GO,5012,5553,10565,
|
data/scrubbed-sample.csv
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
timestamp,address,zip,fullname,fooduration,barduration,totalduration,notes
|
2
|
-
4/1/11 11:00:00 AM,"123 4th St, Anywhere, AA",94121,Monkey Alberto,
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
2
|
+
4/1/11 11:00:00 AM,"123 4th St, Anywhere, AA",94121,Monkey Alberto,5012,5553,zzsasdfa,I am the very model of a modern major general
|
3
|
+
3/12/14 12:00:00 AM,"Somewhere Else, In Another Time, BB",1,Superman übertan,401012,5553,zzsasdfa,This is some Unicode right h�xxx ü ¡! 😀
|
4
|
+
2/29/16 12:11:11 PM,111 Ste. #123123123,1101,Résumé Ron,113012,5553,zzsasdfa,🏳️🏴🏳️🏴
|
5
|
+
1/1/11 12:00:01 AM,"This Is Not An Address, BusyTown, BT",94121,Mary 1,5012,0,zzsasdfa,I like Emoji! 🍏🍎😍
|
6
|
+
12/31/16 11:59:59 PM,"123 Gangnam Style Lives Here, Gangnam Town",31403,Anticipation of Unicode Failure,5012,5553,zzsasdfa,I like Math Symbols! ≱≰⨌⊚
|
7
|
+
11/11/11 11:11:11 AM,überTown,10001,Prompt Negotiator,5012,5553,zzsasdfa,"I’m just gonna say, this is AMAZING. WHAT NEGOTIATIONS."
|
8
|
+
5/12/10 4:48:12 PM,Høøük¡,1231,Sleeper Service,5012,5553,zzsasdfa,2/1/22
|
9
|
+
10/5/12 10:31:11 PM,"Test Pattern Town, Test Pattern, TP",121,株式会社スタジオジブリ,5012,5553,zzsasdfa,1:11:11.123
|
10
|
+
10/2/04 8:44:11 AM,The Moon,11,HERE WE GO,5012,5553,zzsasdfa,
|