json_csv 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/json_csv/csv_builder.rb +53 -0
- data/lib/json_csv/csv_to_json.rb +97 -95
- data/lib/json_csv/json_to_csv.rb +93 -35
- data/lib/json_csv/version.rb +1 -1
- data/lib/json_csv.rb +2 -2
- metadata +2 -2
- data/lib/json_csv/array_notation.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0043834553b3243296e8b2b55c6806721b82673c'
|
4
|
+
data.tar.gz: 857ab74bc2b9f95ffb0f5d9d47ba520f8ba0a17f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 427f4714f7b6999c798bef38b8e80bef3f0fe5033b31909e4a3aa064aee79e407e5f5957a207dfaa55d9d389b7da763d813c053ce184f3dd8bc97c0b38ead32a
|
7
|
+
data.tar.gz: cdf0311ca31c8c830f22daf426f18e2a36f77cb5ebc47ebdc97ceaf8eeecea1bf1b88f2dec60dd2155e4f6d94c17ccfa5aded6cb64efe9b399d1aaf27beb5738
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json_csv/json_to_csv'
|
3
|
+
|
4
|
+
module JsonCsv
|
5
|
+
class CsvBuilder
|
6
|
+
private_class_method :new # private constructor. we don't want users to initialize this class.
|
7
|
+
attr_reader :known_headers_to_indexes # map of all headers seen by this CsvBuilder, mapped to their column order indexes
|
8
|
+
|
9
|
+
def initialize(open_csv_handle)
|
10
|
+
@known_headers_to_indexes = {}
|
11
|
+
@open_csv_handle = open_csv_handle
|
12
|
+
end
|
13
|
+
|
14
|
+
# Adds data from the given json hash to the CSV we're building.
|
15
|
+
def add(json_hash)
|
16
|
+
row_to_write = []
|
17
|
+
JsonCsv.json_hash_to_flat_csv_row_hash(json_hash).each do |column_header, cell_value|
|
18
|
+
known_headers_to_indexes[column_header] = known_headers_to_indexes.length unless known_headers_to_indexes.key?(column_header)
|
19
|
+
row_to_write[known_headers_to_indexes[column_header]] = cell_value
|
20
|
+
end
|
21
|
+
@open_csv_handle << row_to_write
|
22
|
+
end
|
23
|
+
|
24
|
+
# Writes out a CSV file that does NOT contain a header row. Only data values.
|
25
|
+
# Returns an array of headers that correspond to the written-out CSV file's columns.
|
26
|
+
#
|
27
|
+
# Why don't we include CSV headers in the CSV? Because don't know what set of headers
|
28
|
+
# we're working with while we dynamically create this CSV. Different JSON documents may
|
29
|
+
# or may not all contain the same headers. For this reason, this is more of an internal
|
30
|
+
# method that isn't called directly by users of this gem.
|
31
|
+
def self.create_csv_without_headers(csv_outfile_path, csv_write_mode = 'wb')
|
32
|
+
csv_builder = nil
|
33
|
+
|
34
|
+
CSV.open(csv_outfile_path, csv_write_mode) do |csv|
|
35
|
+
csv_builder = new(csv)
|
36
|
+
yield csv_builder
|
37
|
+
end
|
38
|
+
|
39
|
+
csv_builder.known_headers_to_indexes.keys
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.original_header_indexes_to_sorted_indexes(csv_headers, column_header_comparator)
|
43
|
+
original_headers_to_indexes = Hash[csv_headers.map.with_index { |header, index| [header, index] }]
|
44
|
+
headers_to_sorted_indexes = Hash[csv_headers.sort(&column_header_comparator).map.with_index { |header, index| [header, index] }]
|
45
|
+
original_to_sorted_index_map = {}
|
46
|
+
original_headers_to_indexes.each do |header, original_index|
|
47
|
+
original_to_sorted_index_map[original_index] = headers_to_sorted_indexes[header]
|
48
|
+
end
|
49
|
+
original_to_sorted_index_map
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
data/lib/json_csv/csv_to_json.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'json_csv/array_notation'
|
2
1
|
require 'json_csv/utils'
|
3
2
|
require 'csv'
|
4
3
|
|
@@ -11,117 +10,120 @@ module JsonCsv
|
|
11
10
|
TYPE_BOOLEAN = 'boolean'.freeze
|
12
11
|
FIELD_CASTING_TYPES = [TYPE_STRING, TYPE_INTEGER, TYPE_FLOAT, TYPE_BOOLEAN].freeze
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
13
|
+
def self.included(base)
|
14
|
+
base.extend ClassMethods
|
15
|
+
end
|
16
|
+
|
17
|
+
module ClassMethods
|
18
|
+
# Takes flat csv data and yields to a block for each row,
|
19
|
+
# presenting that row as un-flattened json.
|
20
|
+
# This method works for large CSVs and uses very little memory
|
21
|
+
# because it only keeps one row in memory at a time.
|
22
|
+
# Sample usage: csv_file_to_hierarchical_json_hash(path_to_csv, field_casting_rules = {}, strip_value_whitespace = true) do |row_json_hash, row_number|
|
23
|
+
def csv_file_to_hierarchical_json_hash(path_to_csv, field_casting_rules = {}, strip_value_whitespace = true)
|
24
|
+
i = 1 # start with row 1 because this corresponds to the first row of 0-indexed CSV data
|
25
|
+
CSV.foreach(path_to_csv, headers: true, header_converters: lambda { |header|
|
26
|
+
header.strip # remove leading and trailing header whitespace
|
27
|
+
}) do |row_data_hash|
|
28
|
+
yield csv_row_hash_to_hierarchical_json_hash(row_data_hash, field_casting_rules, strip_value_whitespace), i
|
29
|
+
i += 1
|
25
30
|
end
|
26
|
-
}) do |row_data_hash|
|
27
|
-
yield csv_row_hash_to_hierarchical_json_hash(row_data_hash, field_casting_rules, strip_value_whitespace), i
|
28
|
-
i += 1
|
29
31
|
end
|
30
|
-
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
def csv_row_hash_to_hierarchical_json_hash(row_data_hash, field_casting_rules, strip_value_whitespace = true)
|
34
|
+
hierarchical_hash = {}
|
35
|
+
row_data_hash.each do |key, value|
|
36
|
+
next if value.nil? || value == '' # ignore nil or empty string values
|
37
|
+
put_value_at_json_path(hierarchical_hash, key, value, field_casting_rules)
|
38
|
+
end
|
39
|
+
# Clean up empty array elements, which may have come about from CSV data
|
40
|
+
# that was 1-indexed instead of 0-indexed.
|
41
|
+
JsonCsv::Utils.recursively_remove_blank_fields!(hierarchical_hash)
|
42
|
+
JsonCsv::Utils.recursively_strip_value_whitespace!(hierarchical_hash) if strip_value_whitespace
|
43
|
+
hierarchical_hash
|
37
44
|
end
|
38
|
-
# Clean up empty array elements, which may have come about from CSV data
|
39
|
-
# that was 1-indexed instead of 0-indexed.
|
40
|
-
JsonCsv::Utils.recursively_remove_blank_fields!(hierarchical_hash)
|
41
|
-
JsonCsv::Utils.recursively_strip_value_whitespace!(hierarchical_hash) if strip_value_whitespace
|
42
|
-
hierarchical_hash
|
43
|
-
end
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
# For the given obj, puts the given value at the given json_path,
|
47
|
+
# creating nested elements as needed. This method calls itself
|
48
|
+
# recursively when placing a value at a nested path, and during
|
49
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
50
|
+
def put_value_at_json_path(obj, json_path, value, field_casting_rules = {}, full_json_path_from_top = json_path)
|
51
|
+
json_path_pieces = json_path_to_pieces(json_path)
|
51
52
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
if json_path_pieces.length == 1
|
54
|
+
# If the full_json_path_from_top matches one of the field_casting_rules,
|
55
|
+
# then case this field to the specified cast type
|
56
|
+
full_json_path_from_top_as_field_casting_rule_pattern = real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
57
|
+
obj[json_path_pieces[0]] = field_casting_rules.key?(full_json_path_from_top_as_field_casting_rule_pattern) ? apply_field_casting_type(value, field_casting_rules[full_json_path_from_top_as_field_casting_rule_pattern]) : value
|
58
|
+
else
|
59
|
+
obj[json_path_pieces[0]] ||= (json_path_pieces[1].is_a?(Integer) ? [] : {})
|
60
|
+
put_value_at_json_path(obj[json_path_pieces[0]], pieces_to_json_path(json_path_pieces[1..-1]), value, field_casting_rules, full_json_path_from_top)
|
61
|
+
end
|
60
62
|
end
|
61
|
-
end
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
# Takes a real json_path like "related_books[1].notes_from_reviewers[0]" and
|
65
|
+
# converts it to a field_casting_rule_pattern like: "related_books[x].notes_from_reviewers[x]"
|
66
|
+
def real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
67
|
+
full_json_path_from_top.gsub(/\d+/, 'x')
|
68
|
+
end
|
68
69
|
|
69
|
-
|
70
|
-
|
70
|
+
def apply_field_casting_type(value, field_casting_type)
|
71
|
+
raise ArgumentError, "Invalid cast type #{field_casting_type}" unless FIELD_CASTING_TYPES.include?(field_casting_type)
|
71
72
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
73
|
+
case field_casting_type
|
74
|
+
when TYPE_INTEGER
|
75
|
+
raise ArgumentError, "\"#{value}\" is not an integer" unless value =~ /^[0-9]+$/
|
76
|
+
value.to_i
|
77
|
+
when TYPE_FLOAT
|
78
|
+
raise ArgumentError, "\"#{value}\" is not a float" unless value =~ /^[0-9]+(\.[0-9]+)*$/ || value =~ /^\.[0-9]+$/
|
79
|
+
value.to_f
|
80
|
+
when TYPE_BOOLEAN
|
81
|
+
if value.downcase == 'true'
|
82
|
+
true
|
83
|
+
elsif value.downcase == 'false'
|
84
|
+
false
|
85
|
+
else
|
86
|
+
raise ArgumentError, "\"#{value}\" is not a boolean"
|
87
|
+
end
|
84
88
|
else
|
85
|
-
|
89
|
+
value # fall back to string, which is the original form
|
86
90
|
end
|
87
|
-
else
|
88
|
-
value # fall back to string, which is the original form
|
89
91
|
end
|
90
|
-
end
|
91
92
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
93
|
+
# Takes the given json_path and splits it into individual json path pieces.
|
94
|
+
# e.g. Takes "related_books[1].notes_from_reviewers[0]" and converts it to:
|
95
|
+
# ["related_books", 1, "notes_from_reviewers", 0]
|
96
|
+
def json_path_to_pieces(json_path)
|
97
|
+
# split on...
|
98
|
+
# '].' (when preceded by a number)
|
99
|
+
# OR
|
100
|
+
# '[' (when followed by a number)
|
101
|
+
# OR
|
102
|
+
# ']' (when preceded by a number)
|
103
|
+
# OR
|
104
|
+
# '.' (always)
|
105
|
+
# ...and remove empty elements (which only come up when you're working with
|
106
|
+
# a json_path like '[0]', which splits between the first bracket and the number)
|
107
|
+
pieces = json_path.split(/(?<=\d)\]\.|\[(?=\d)|(?<=\d)\]|\./).reject { |piece| piece == '' }
|
108
|
+
pieces.map { |piece| piece.to_i.to_s == piece ? piece.to_i : piece } # numeric pieces should be actual numbers
|
109
|
+
end
|
109
110
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
111
|
+
# Generates a string json path from the given pieces
|
112
|
+
# e.g. Takes ["related_books", 1, "notes_from_reviewers", 0] and converts it to:
|
113
|
+
# "related_books[1].notes_from_reviewers[0]"
|
114
|
+
def pieces_to_json_path(pieces)
|
115
|
+
json_path = ''
|
116
|
+
pieces.each do |piece|
|
117
|
+
if piece.is_a?(Integer)
|
118
|
+
json_path += "[#{piece}]"
|
119
|
+
else
|
120
|
+
json_path += '.' unless json_path.empty?
|
121
|
+
json_path += piece
|
122
|
+
end
|
121
123
|
end
|
124
|
+
json_path
|
122
125
|
end
|
123
|
-
json_path
|
124
|
-
end
|
125
126
|
|
127
|
+
end
|
126
128
|
end
|
127
129
|
end
|
data/lib/json_csv/json_to_csv.rb
CHANGED
@@ -1,50 +1,108 @@
|
|
1
1
|
require 'json'
|
2
|
+
require 'json_csv/csv_builder'
|
2
3
|
|
3
4
|
module JsonCsv
|
4
5
|
module JsonToCsv
|
5
6
|
|
6
|
-
|
7
|
-
|
8
|
-
# Set first_index to 1 if you want the first element in an array to
|
9
|
-
#
|
10
|
-
def json_hash_to_flat_csv_row_hash(json_hash, array_notation = JsonCsv::ArrayNotation::BRACKETS)
|
11
|
-
flat = flatten_hash(json_hash)
|
12
|
-
# Convert values to strings because in the CSV file, all values are strings
|
13
|
-
flat.each { |key, val| flat[key] = val.nil? ? '' : val.to_s }
|
14
|
-
# If we're using dash array notation, convert the headers
|
15
|
-
if array_notation == JsonCsv::ArrayNotation::DASH
|
16
|
-
Hash[flat.map { |key, val| [JsonCsv::ArrayNotation.bracket_header_to_dash_header(key), val] }]
|
17
|
-
else
|
18
|
-
flat
|
19
|
-
end
|
7
|
+
def self.included(base)
|
8
|
+
base.extend ClassMethods
|
20
9
|
end
|
21
10
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
11
|
+
module ClassMethods
|
12
|
+
DEFAULT_HEADER_SORT_COMPARATOR = lambda do |header1, header2|
|
13
|
+
# Ensure correct alphabetical sorting AND numeric sorting via zero-padding of numbers
|
14
|
+
header1_with_zero_padding = header1.gsub(/(?<=\[)\d+(?=\])/) { |capture| capture.to_i.to_s.rjust(5, '0') }
|
15
|
+
header2_with_zero_padding = header2.gsub(/(?<=\[)\d+(?=\])/) { |capture| capture.to_i.to_s.rjust(5, '0') }
|
16
|
+
header1_with_zero_padding <=> header2_with_zero_padding
|
17
|
+
end
|
18
|
+
|
19
|
+
def default_header_comparison(header1, header2)
|
20
|
+
DEFAULT_HEADER_SORT_COMPARATOR.call(header1, header2)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Example usage:
|
24
|
+
# create_csv_for_json_records('/path/to/file.csv') do |csv_builder|
|
25
|
+
# json_docs.each do |json_doc|
|
26
|
+
# csv_builder.add(json_hash)
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
def create_csv_for_json_records(csv_outfile_path, header_sort_comparator = DEFAULT_HEADER_SORT_COMPARATOR)
|
30
|
+
csv_temp_outfile_path = csv_outfile_path + '.temp'
|
31
|
+
|
32
|
+
begin
|
33
|
+
# Step 1: Build CSV with unsorted headers in temp file
|
34
|
+
csv_headers = JsonCsv::CsvBuilder.create_csv_without_headers(csv_temp_outfile_path, 'wb') do |csv_builder|
|
35
|
+
yield csv_builder
|
29
36
|
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
+
|
38
|
+
# Step 2: Sort CSV columns by header, based on column_header_comparator
|
39
|
+
original_to_sorted_index_map = JsonCsv::CsvBuilder.original_header_indexes_to_sorted_indexes(csv_headers, header_sort_comparator)
|
40
|
+
CSV.open(csv_outfile_path, 'wb') do |final_csv|
|
41
|
+
# Open temporary CSV for reading
|
42
|
+
CSV.open(csv_temp_outfile_path, 'rb') do |temp_csv|
|
43
|
+
|
44
|
+
# write out ordered header row
|
45
|
+
reordered_header_row = []
|
46
|
+
csv_headers.each_with_index do |header, index|
|
47
|
+
reordered_header_row[original_to_sorted_index_map[index]] = header
|
48
|
+
end
|
49
|
+
|
50
|
+
final_csv << reordered_header_row
|
51
|
+
|
52
|
+
temp_csv.each do |temp_csv_row|
|
53
|
+
reordered_temp_csv_row = []
|
54
|
+
# write out ordered data row
|
55
|
+
temp_csv_row.each_with_index do |cell_value, index|
|
56
|
+
reordered_temp_csv_row[original_to_sorted_index_map[index]] = cell_value
|
57
|
+
end
|
58
|
+
final_csv << reordered_temp_csv_row
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
ensure
|
63
|
+
# Always delete the temporary CSV
|
64
|
+
FileUtils.rm_f(csv_temp_outfile_path)
|
37
65
|
end
|
38
|
-
else
|
39
|
-
flat_hash_to_build[parent_path] = obj unless obj.nil? || obj == '' # ignore nil or empty string values
|
40
66
|
end
|
41
67
|
|
42
|
-
|
43
|
-
|
68
|
+
# Converts the given json_hash into a flat csv hash, converting all values to
|
69
|
+
# strings (because CSVs are dumb and don't store info about data types)
|
70
|
+
# Set first_index to 1 if you want the first element in an array to
|
71
|
+
#
|
72
|
+
def json_hash_to_flat_csv_row_hash(json_hash)
|
73
|
+
flat = flatten_hash(json_hash)
|
74
|
+
# Convert values to strings because in the CSV file, all values are strings
|
75
|
+
flat.each { |key, val| flat[key] = val.nil? ? '' : val.to_s }
|
76
|
+
flat
|
77
|
+
end
|
44
78
|
|
45
|
-
|
46
|
-
|
47
|
-
|
79
|
+
# This method calls itself recursively while flattening a hash, and during
|
80
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
81
|
+
def flatten_hash(obj, parent_path = '', flat_hash_to_build = {})
|
82
|
+
if obj.is_a?(Hash)
|
83
|
+
obj.each do |key, val|
|
84
|
+
if key_contains_unallowed_characters?(key)
|
85
|
+
raise ArgumentError, 'Cannot deal with hash keys that contain "[" or "]" or "." because these characters have special meanings in CSV headers.'
|
86
|
+
end
|
87
|
+
path = parent_path + (parent_path.empty? ? '' : '.') + key
|
88
|
+
flatten_hash(val, path, flat_hash_to_build)
|
89
|
+
end
|
90
|
+
elsif obj.is_a?(Array)
|
91
|
+
obj.each_with_index do |el, index|
|
92
|
+
path = parent_path + "[#{index}]"
|
93
|
+
flatten_hash(el, path, flat_hash_to_build)
|
94
|
+
end
|
95
|
+
else
|
96
|
+
flat_hash_to_build[parent_path] = obj unless obj.nil? || obj == '' # ignore nil or empty string values
|
97
|
+
end
|
98
|
+
|
99
|
+
flat_hash_to_build
|
100
|
+
end
|
101
|
+
|
102
|
+
def key_contains_unallowed_characters?(key)
|
103
|
+
return true if key.index('[') || key.index(']') || key.index('.')
|
104
|
+
false
|
105
|
+
end
|
48
106
|
end
|
49
107
|
|
50
108
|
end
|
data/lib/json_csv/version.rb
CHANGED
data/lib/json_csv.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
@@ -88,7 +88,7 @@ extra_rdoc_files: []
|
|
88
88
|
files:
|
89
89
|
- README.md
|
90
90
|
- lib/json_csv.rb
|
91
|
-
- lib/json_csv/
|
91
|
+
- lib/json_csv/csv_builder.rb
|
92
92
|
- lib/json_csv/csv_to_json.rb
|
93
93
|
- lib/json_csv/json_to_csv.rb
|
94
94
|
- lib/json_csv/utils.rb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module JsonCsv
|
2
|
-
module ArrayNotation
|
3
|
-
BRACKETS = 'BRACKETS'.freeze
|
4
|
-
DASH = 'DASH'.freeze
|
5
|
-
|
6
|
-
VALID_ARRAY_NOTATIONS = [BRACKETS, DASH].freeze
|
7
|
-
|
8
|
-
def self.bracket_header_to_dash_header(bracket_header)
|
9
|
-
# e.g. replace occurrences of '[1]' with '-1'
|
10
|
-
bracket_header.gsub(/(\[(\d+)\])/, '-\2')
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.dash_header_to_bracket_header(dash_header)
|
14
|
-
# e.g. replace occurrences of '-1' with '[1]'
|
15
|
-
dash_header.gsub(/(-(\d+))/, '[\2]')
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.raise_error_if_invalid_array_notation_value!(error_class, array_notation)
|
19
|
-
raise error_class, "Invalid array notation. Must be one of #{VALID_ARRAY_NOTATIONS.join(' or ')}." unless VALID_ARRAY_NOTATIONS.include?(array_notation)
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|