json-csv 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +7 -0
  2. data/bin/json-csv +269 -0
  3. data/lib/json-csv.rb +269 -0
  4. metadata +46 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 149d44bb6b05763481545d60fcfbe168849a73e5
4
+ data.tar.gz: e09fbc056bba47cf68731d6ba36ed805a4363ac0
5
+ SHA512:
6
+ metadata.gz: f92db5bd62890c330e801d052f58e3571972d6c3ee3eb6a4377dca37b8f78935cc63e51eed6f11c2d12351cfcb37bfd6532bd14b38d14ab640845a02ce59ddb0
7
+ data.tar.gz: 034b6007c8ef2d7a3043d907f2e4d70c15eb70d27186ede5228ced8cf5f42efc0903a4ef1beec8865efdb8978ea205f5f076b8396ef4eaa04537da68eb5206f7
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # json-csv
4
+ #
5
+ # Converts JSON to CSV, and vice versa.
6
+ # Run "json-csv -h" to see options.
7
+ #
8
+ # Copyright 2017 Appcues, Inc.
9
+ # https://github.com/appcues/json-csv
10
+ #
11
+ # This code is released under the MIT License, available at:
12
+ # https://opensource.org/licenses/MIT
13
+ #
14
+ require 'optparse'
15
+ require 'json'
16
+
17
+ class JsonCsv
18
+ VERSION = "0.5.1"
19
+ VERSION_DATE = "2017-06-25"
20
+
21
+ DEFAULT_OPTS = {
22
+ input_file: "-",
23
+ output_file: "-",
24
+ source_encoding: "json",
25
+ tmpdir: ENV['TMPDIR'] || "/tmp",
26
+ debug: false,
27
+ depth: -1,
28
+ line_ending: "\r\n",
29
+ }
30
+
31
+ class << self
32
+ def new_from_argv(argv)
33
+ opts = DEFAULT_OPTS
34
+
35
+ OptionParser.new do |op|
36
+ op.banner = <<-EOT
37
+ Converts JSON to CSV, and vice versa.
38
+ Usage: #{$0} [options] [--] [input-file [output-file]]
39
+ EOT
40
+
41
+ op.on("-i input-file", "--input input-file", "Input file (default STDIN)") do |input_file|
42
+ opts[:input_file] = input_file
43
+ end
44
+
45
+ op.on("-o output-file", "--output output-file", "Output file (default STDOUT)") do |output_file|
46
+ opts[:output_file] = output_file
47
+ end
48
+
49
+ op.on("-s json|csv", "--source-encoding json|csv", "Encoding of input file (default json)") do |source|
50
+ opts[:source_encoding] = source
51
+ end
52
+
53
+ op.on("-d depth", "--depth depth", "Maximum depth of JSON-to-CSV conversion (default -1, unlimited)") do |depth|
54
+ opts[:depth] = depth.to_i
55
+ opts[:depth] += 1 if opts[:depth] > 0 # this is a fudge to use -1 as infinity
56
+ end
57
+
58
+ op.on("-e crlf|cr|lf", "--line-ending crlf|cr|lf", "Line endings for output file (default crlf).") do |ending|
59
+ opts[:line_ending] = {"crlf" => "\r\n", "cr" => "\r", "lf" => "\n"}[ending]
60
+ if !opts[:line_ending]
61
+ STDERR.puts "Invalid line ending '#{ending}'. Valid choices: crlf cr lf"
62
+ exit 1
63
+ end
64
+ end
65
+
66
+ op.on_tail("--debug", "Turn debugging messages on") do
67
+ opts[:debug] = true
68
+ end
69
+
70
+ op.on_tail("--version", "Print version info and exit") do
71
+ puts "json-csv version #{VERSION} (#{VERSION_DATE})"
72
+ puts "https://github.com/appcues/json-csv"
73
+ exit
74
+ end
75
+
76
+ op.on_tail("-h", "--help", "Show this message and exit") do
77
+ puts op.to_s
78
+ exit
79
+ end
80
+
81
+ end.parse!(argv)
82
+
83
+
84
+ opts[:input_file] = argv.shift if argv.count > 0
85
+ opts[:output_file] = argv.shift if argv.count > 0
86
+
87
+ self.new(opts)
88
+ end
89
+
90
+ def convert_json_to_csv(opts)
91
+ self.new(opts).convert_json_to_csv()
92
+ end
93
+
94
+ def convert_csv_to_json(opts)
95
+ self.new(opts).convert_csv_to_json()
96
+ end
97
+ end
98
+
99
+ def initialize(opts)
100
+ @opts = DEFAULT_OPTS.merge(opts)
101
+ end
102
+
103
+ def run(opts = {})
104
+ opts = @opts.merge(opts)
105
+ enc = opts[:source_encoding]
106
+ if enc == "json"
107
+ convert_json_to_csv()
108
+ elsif enc == "csv"
109
+ convert_csv_to_json()
110
+ else
111
+ STDERR.puts "no such source encoding '#{enc}'"
112
+ exit 1
113
+ end
114
+ end
115
+
116
+ def convert_json_to_csv(opts = {})
117
+ opts = @opts.merge(opts)
118
+
119
+ ## First pass -- create CSV headers from JSON input
120
+ input_fh = nil
121
+ tmp_fh = nil
122
+ tmp_filename = nil
123
+ data_filename = nil
124
+
125
+ if opts[:input_file] == "-"
126
+ input_fh = STDIN
127
+ data_filename = tmp_filename = "#{opts[:tmpdir]}/json-csv-#{$$}.tmp"
128
+ debug(opts, "STDIN will be written to #{tmp_filename}.")
129
+ tmp_fh = File.open(data_filename, "w")
130
+ else
131
+ input_fh = File.open(opts[:input_file], "r")
132
+ data_filename = opts[:input_file]
133
+ end
134
+
135
+ debug(opts, "Getting headers from JSON data.")
136
+ headers = get_headers_from_json(input_fh, tmp_fh, opts[:depth])
137
+
138
+ input_fh.close
139
+ tmp_fh.close if tmp_fh
140
+
141
+
142
+ ## Second pass -- write CSV data from JSON input
143
+ data_fh = File.open(data_filename, "r")
144
+ output_fh = nil
145
+
146
+ if opts[:output_file] == "-"
147
+ output_fh = STDOUT
148
+ else
149
+ output_fh = File.open(opts[:output_file], "w")
150
+ end
151
+
152
+ debug(opts, "Writing CSV output.")
153
+ output_csv(headers, data_fh, output_fh)
154
+ data_fh.close
155
+ output_fh.close
156
+
157
+ debug(opts, "Removing #{tmp_filename}.")
158
+ File.unlink(tmp_filename) if tmp_filename
159
+ end
160
+
161
+ def convert_csv_to_json(opts = {})
162
+ raise NotImplementedError
163
+ end
164
+
165
+
166
+ private
167
+
168
+ def debug(opts, msg)
169
+ STDERR.puts("#{Time.now}\t#{msg}") if opts[:debug]
170
+ end
171
+
172
+ # Returns a hash of `'header' => index` pairs, sorted.
173
+ def get_headers_from_json(input_fh, tmp_fh, depth)
174
+ headers = {}
175
+ input_fh.each_line do |input|
176
+ tmp_fh.puts(input) if tmp_fh
177
+ json = JSON.parse(input)
178
+ flatten_json(json, depth).each do |key, value|
179
+ headers[key] = true
180
+ end
181
+ end
182
+ sort_keys(headers)
183
+ end
184
+
185
+ # Sorts a hash with string keys by number of dots in the string,
186
+ # then alphabetically.
187
+ # Returns a hash of `'key' => index` pairs, in order of index.
188
+ def sort_keys(hash)
189
+ sorted = {}
190
+ sorted_keys = hash.keys.sort do |a, b|
191
+ x = (count_dots(a) <=> count_dots(b))
192
+ x == 0 ? (a<=>b) : x
193
+ end
194
+ sorted_keys.each_with_index do |key, i|
195
+ sorted[key] = i
196
+ end
197
+ sorted
198
+ end
199
+
200
+ def count_dots(str)
201
+ str.chars.select{|c| c == "."}.count
202
+ end
203
+
204
+ def flat_assign(dest, key, value, depth)
205
+ flat_value = flatten_json(value, depth - 1)
206
+ if flat_value.is_a?(Hash)
207
+ flat_value.each do |k,v|
208
+ dest["#{key}.#{k}"] = v
209
+ end
210
+ else
211
+ dest["#{key}"] = flat_value
212
+ end
213
+ dest
214
+ end
215
+
216
+ def flatten_json(json, depth = -1)
217
+ return {} if depth == 0
218
+
219
+ if json.is_a?(Hash)
220
+ flat = {}
221
+ json.each do |key, value|
222
+ flat_assign(flat, key, value, depth)
223
+ end
224
+ flat
225
+
226
+ elsif json.is_a?(Array)
227
+ flat = {}
228
+ json.each_with_index do |value, i|
229
+ flat_assign(flat, i, value, depth)
230
+ end
231
+ flat
232
+
233
+ else # number or string
234
+ json
235
+ end
236
+ end
237
+
238
+ def armor(val)
239
+ str = val.to_s.gsub('"', '""')
240
+ if str.match(/[",\n]/)
241
+ '"' + str + '"'
242
+ else
243
+ str
244
+ end
245
+ end
246
+
247
+ def output_csv(headers, data_fh, output_fh, line_ending)
248
+ # Write header line
249
+ output_fh.write(headers.map{|h| armor(h[0])}.join(","))
250
+ output_fh.write(line_ending)
251
+
252
+ header_count = headers.count
253
+ data_fh.each_line do |input|
254
+ json = JSON.parse(input)
255
+ flat = flatten_json(json)
256
+ output = Array.new(header_count)
257
+ flat.each do |key, value|
258
+ output[headers[key]] = value if headers[key]
259
+ end
260
+ output_fh.write(output.map{|x| armor(x)}.join(","))
261
+ output_fh.write(line_ending)
262
+ end
263
+ end
264
+ end
265
+
266
+
267
+ ## command line mode
268
+ JsonCsv.new_from_argv(ARGV).run if $0 == __FILE__
269
+
@@ -0,0 +1,269 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # json-csv
4
+ #
5
+ # Converts JSON to CSV, and vice versa.
6
+ # Run "json-csv -h" to see options.
7
+ #
8
+ # Copyright 2017 Appcues, Inc.
9
+ # https://github.com/appcues/json-csv
10
+ #
11
+ # This code is released under the MIT License, available at:
12
+ # https://opensource.org/licenses/MIT
13
+ #
14
+ require 'optparse'
15
+ require 'json'
16
+
17
+ class JsonCsv
18
+ VERSION = "0.5.1"
19
+ VERSION_DATE = "2017-06-25"
20
+
21
+ DEFAULT_OPTS = {
22
+ input_file: "-",
23
+ output_file: "-",
24
+ source_encoding: "json",
25
+ tmpdir: ENV['TMPDIR'] || "/tmp",
26
+ debug: false,
27
+ depth: -1,
28
+ line_ending: "\r\n",
29
+ }
30
+
31
+ class << self
32
+ def new_from_argv(argv)
33
+ opts = DEFAULT_OPTS
34
+
35
+ OptionParser.new do |op|
36
+ op.banner = <<-EOT
37
+ Converts JSON to CSV, and vice versa.
38
+ Usage: #{$0} [options] [--] [input-file [output-file]]
39
+ EOT
40
+
41
+ op.on("-i input-file", "--input input-file", "Input file (default STDIN)") do |input_file|
42
+ opts[:input_file] = input_file
43
+ end
44
+
45
+ op.on("-o output-file", "--output output-file", "Output file (default STDOUT)") do |output_file|
46
+ opts[:output_file] = output_file
47
+ end
48
+
49
+ op.on("-s json|csv", "--source-encoding json|csv", "Encoding of input file (default json)") do |source|
50
+ opts[:source_encoding] = source
51
+ end
52
+
53
+ op.on("-d depth", "--depth depth", "Maximum depth of JSON-to-CSV conversion (default -1, unlimited)") do |depth|
54
+ opts[:depth] = depth.to_i
55
+ opts[:depth] += 1 if opts[:depth] > 0 # this is a fudge to use -1 as infinity
56
+ end
57
+
58
+ op.on("-e crlf|cr|lf", "--line-ending crlf|cr|lf", "Line endings for output file (default crlf).") do |ending|
59
+ opts[:line_ending] = {"crlf" => "\r\n", "cr" => "\r", "lf" => "\n"}[ending]
60
+ if !opts[:line_ending]
61
+ STDERR.puts "Invalid line ending '#{ending}'. Valid choices: crlf cr lf"
62
+ exit 1
63
+ end
64
+ end
65
+
66
+ op.on_tail("--debug", "Turn debugging messages on") do
67
+ opts[:debug] = true
68
+ end
69
+
70
+ op.on_tail("--version", "Print version info and exit") do
71
+ puts "json-csv version #{VERSION} (#{VERSION_DATE})"
72
+ puts "https://github.com/appcues/json-csv"
73
+ exit
74
+ end
75
+
76
+ op.on_tail("-h", "--help", "Show this message and exit") do
77
+ puts op.to_s
78
+ exit
79
+ end
80
+
81
+ end.parse!(argv)
82
+
83
+
84
+ opts[:input_file] = argv.shift if argv.count > 0
85
+ opts[:output_file] = argv.shift if argv.count > 0
86
+
87
+ self.new(opts)
88
+ end
89
+
90
+ def convert_json_to_csv(opts)
91
+ self.new(opts).convert_json_to_csv()
92
+ end
93
+
94
+ def convert_csv_to_json(opts)
95
+ self.new(opts).convert_csv_to_json()
96
+ end
97
+ end
98
+
99
+ def initialize(opts)
100
+ @opts = DEFAULT_OPTS.merge(opts)
101
+ end
102
+
103
+ def run(opts = {})
104
+ opts = @opts.merge(opts)
105
+ enc = opts[:source_encoding]
106
+ if enc == "json"
107
+ convert_json_to_csv()
108
+ elsif enc == "csv"
109
+ convert_csv_to_json()
110
+ else
111
+ STDERR.puts "no such source encoding '#{enc}'"
112
+ exit 1
113
+ end
114
+ end
115
+
116
+ def convert_json_to_csv(opts = {})
117
+ opts = @opts.merge(opts)
118
+
119
+ ## First pass -- create CSV headers from JSON input
120
+ input_fh = nil
121
+ tmp_fh = nil
122
+ tmp_filename = nil
123
+ data_filename = nil
124
+
125
+ if opts[:input_file] == "-"
126
+ input_fh = STDIN
127
+ data_filename = tmp_filename = "#{opts[:tmpdir]}/json-csv-#{$$}.tmp"
128
+ debug(opts, "STDIN will be written to #{tmp_filename}.")
129
+ tmp_fh = File.open(data_filename, "w")
130
+ else
131
+ input_fh = File.open(opts[:input_file], "r")
132
+ data_filename = opts[:input_file]
133
+ end
134
+
135
+ debug(opts, "Getting headers from JSON data.")
136
+ headers = get_headers_from_json(input_fh, tmp_fh, opts[:depth])
137
+
138
+ input_fh.close
139
+ tmp_fh.close if tmp_fh
140
+
141
+
142
+ ## Second pass -- write CSV data from JSON input
143
+ data_fh = File.open(data_filename, "r")
144
+ output_fh = nil
145
+
146
+ if opts[:output_file] == "-"
147
+ output_fh = STDOUT
148
+ else
149
+ output_fh = File.open(opts[:output_file], "w")
150
+ end
151
+
152
+ debug(opts, "Writing CSV output.")
153
+ output_csv(headers, data_fh, output_fh)
154
+ data_fh.close
155
+ output_fh.close
156
+
157
+ debug(opts, "Removing #{tmp_filename}.")
158
+ File.unlink(tmp_filename) if tmp_filename
159
+ end
160
+
161
+ def convert_csv_to_json(opts = {})
162
+ raise NotImplementedError
163
+ end
164
+
165
+
166
+ private
167
+
168
+ def debug(opts, msg)
169
+ STDERR.puts("#{Time.now}\t#{msg}") if opts[:debug]
170
+ end
171
+
172
+ # Returns a hash of `'header' => index` pairs, sorted.
173
+ def get_headers_from_json(input_fh, tmp_fh, depth)
174
+ headers = {}
175
+ input_fh.each_line do |input|
176
+ tmp_fh.puts(input) if tmp_fh
177
+ json = JSON.parse(input)
178
+ flatten_json(json, depth).each do |key, value|
179
+ headers[key] = true
180
+ end
181
+ end
182
+ sort_keys(headers)
183
+ end
184
+
185
+ # Sorts a hash with string keys by number of dots in the string,
186
+ # then alphabetically.
187
+ # Returns a hash of `'key' => index` pairs, in order of index.
188
+ def sort_keys(hash)
189
+ sorted = {}
190
+ sorted_keys = hash.keys.sort do |a, b|
191
+ x = (count_dots(a) <=> count_dots(b))
192
+ x == 0 ? (a<=>b) : x
193
+ end
194
+ sorted_keys.each_with_index do |key, i|
195
+ sorted[key] = i
196
+ end
197
+ sorted
198
+ end
199
+
200
+ def count_dots(str)
201
+ str.chars.select{|c| c == "."}.count
202
+ end
203
+
204
+ def flat_assign(dest, key, value, depth)
205
+ flat_value = flatten_json(value, depth - 1)
206
+ if flat_value.is_a?(Hash)
207
+ flat_value.each do |k,v|
208
+ dest["#{key}.#{k}"] = v
209
+ end
210
+ else
211
+ dest["#{key}"] = flat_value
212
+ end
213
+ dest
214
+ end
215
+
216
+ def flatten_json(json, depth = -1)
217
+ return {} if depth == 0
218
+
219
+ if json.is_a?(Hash)
220
+ flat = {}
221
+ json.each do |key, value|
222
+ flat_assign(flat, key, value, depth)
223
+ end
224
+ flat
225
+
226
+ elsif json.is_a?(Array)
227
+ flat = {}
228
+ json.each_with_index do |value, i|
229
+ flat_assign(flat, i, value, depth)
230
+ end
231
+ flat
232
+
233
+ else # number or string
234
+ json
235
+ end
236
+ end
237
+
238
+ def armor(val)
239
+ str = val.to_s.gsub('"', '""')
240
+ if str.match(/[",\n]/)
241
+ '"' + str + '"'
242
+ else
243
+ str
244
+ end
245
+ end
246
+
247
+ def output_csv(headers, data_fh, output_fh, line_ending)
248
+ # Write header line
249
+ output_fh.write(headers.map{|h| armor(h[0])}.join(","))
250
+ output_fh.write(line_ending)
251
+
252
+ header_count = headers.count
253
+ data_fh.each_line do |input|
254
+ json = JSON.parse(input)
255
+ flat = flatten_json(json)
256
+ output = Array.new(header_count)
257
+ flat.each do |key, value|
258
+ output[headers[key]] = value if headers[key]
259
+ end
260
+ output_fh.write(output.map{|x| armor(x)}.join(","))
261
+ output_fh.write(line_ending)
262
+ end
263
+ end
264
+ end
265
+
266
+
267
+ ## command line mode
268
+ JsonCsv.new_from_argv(ARGV).run if $0 == __FILE__
269
+
metadata ADDED
@@ -0,0 +1,46 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: json-csv
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.1
5
+ platform: ruby
6
+ authors:
7
+ - pete gamache
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-06-25 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description:
14
+ email: pete@appcues.com
15
+ executables:
16
+ - json-csv
17
+ extensions: []
18
+ extra_rdoc_files: []
19
+ files:
20
+ - bin/json-csv
21
+ - lib/json-csv.rb
22
+ homepage: http://github.com/appcues/json-csv
23
+ licenses:
24
+ - MIT
25
+ metadata: {}
26
+ post_install_message:
27
+ rdoc_options: []
28
+ require_paths:
29
+ - lib
30
+ required_ruby_version: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: '0'
35
+ required_rubygems_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: '0'
40
+ requirements: []
41
+ rubyforge_project:
42
+ rubygems_version: 2.4.8
43
+ signing_key:
44
+ specification_version: 4
45
+ summary: A command-line JSON/CSV converter
46
+ test_files: []