json_csv 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/json_csv/csv_builder.rb +53 -0
- data/lib/json_csv/csv_to_json.rb +97 -95
- data/lib/json_csv/json_to_csv.rb +93 -35
- data/lib/json_csv/version.rb +1 -1
- data/lib/json_csv.rb +2 -2
- metadata +2 -2
- data/lib/json_csv/array_notation.rb +0 -23
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '0043834553b3243296e8b2b55c6806721b82673c'
|
4
|
+
data.tar.gz: 857ab74bc2b9f95ffb0f5d9d47ba520f8ba0a17f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 427f4714f7b6999c798bef38b8e80bef3f0fe5033b31909e4a3aa064aee79e407e5f5957a207dfaa55d9d389b7da763d813c053ce184f3dd8bc97c0b38ead32a
|
7
|
+
data.tar.gz: cdf0311ca31c8c830f22daf426f18e2a36f77cb5ebc47ebdc97ceaf8eeecea1bf1b88f2dec60dd2155e4f6d94c17ccfa5aded6cb64efe9b399d1aaf27beb5738
|
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json_csv/json_to_csv'
|
3
|
+
|
4
|
+
module JsonCsv
|
5
|
+
class CsvBuilder
|
6
|
+
private_class_method :new # private constructor. we don't want users to initialize this class.
|
7
|
+
attr_reader :known_headers_to_indexes # map of all headers seen by this CsvBuilder, mapped to their column order indexes
|
8
|
+
|
9
|
+
def initialize(open_csv_handle)
|
10
|
+
@known_headers_to_indexes = {}
|
11
|
+
@open_csv_handle = open_csv_handle
|
12
|
+
end
|
13
|
+
|
14
|
+
# Adds data from the given json hash to the CSV we're building.
|
15
|
+
def add(json_hash)
|
16
|
+
row_to_write = []
|
17
|
+
JsonCsv.json_hash_to_flat_csv_row_hash(json_hash).each do |column_header, cell_value|
|
18
|
+
known_headers_to_indexes[column_header] = known_headers_to_indexes.length unless known_headers_to_indexes.key?(column_header)
|
19
|
+
row_to_write[known_headers_to_indexes[column_header]] = cell_value
|
20
|
+
end
|
21
|
+
@open_csv_handle << row_to_write
|
22
|
+
end
|
23
|
+
|
24
|
+
# Writes out a CSV file that does NOT contain a header row. Only data values.
|
25
|
+
# Returns an array of headers that correspond to the written-out CSV file's columns.
|
26
|
+
#
|
27
|
+
# Why don't we include CSV headers in the CSV? Because don't know what set of headers
|
28
|
+
# we're working with while we dynamically create this CSV. Different JSON documents may
|
29
|
+
# or may not all contain the same headers. For this reason, this is more of an internal
|
30
|
+
# method that isn't called directly by users of this gem.
|
31
|
+
def self.create_csv_without_headers(csv_outfile_path, csv_write_mode = 'wb')
|
32
|
+
csv_builder = nil
|
33
|
+
|
34
|
+
CSV.open(csv_outfile_path, csv_write_mode) do |csv|
|
35
|
+
csv_builder = new(csv)
|
36
|
+
yield csv_builder
|
37
|
+
end
|
38
|
+
|
39
|
+
csv_builder.known_headers_to_indexes.keys
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.original_header_indexes_to_sorted_indexes(csv_headers, column_header_comparator)
|
43
|
+
original_headers_to_indexes = Hash[csv_headers.map.with_index { |header, index| [header, index] }]
|
44
|
+
headers_to_sorted_indexes = Hash[csv_headers.sort(&column_header_comparator).map.with_index { |header, index| [header, index] }]
|
45
|
+
original_to_sorted_index_map = {}
|
46
|
+
original_headers_to_indexes.each do |header, original_index|
|
47
|
+
original_to_sorted_index_map[original_index] = headers_to_sorted_indexes[header]
|
48
|
+
end
|
49
|
+
original_to_sorted_index_map
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
data/lib/json_csv/csv_to_json.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'json_csv/array_notation'
|
2
1
|
require 'json_csv/utils'
|
3
2
|
require 'csv'
|
4
3
|
|
@@ -11,117 +10,120 @@ module JsonCsv
|
|
11
10
|
TYPE_BOOLEAN = 'boolean'.freeze
|
12
11
|
FIELD_CASTING_TYPES = [TYPE_STRING, TYPE_INTEGER, TYPE_FLOAT, TYPE_BOOLEAN].freeze
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
13
|
+
def self.included(base)
|
14
|
+
base.extend ClassMethods
|
15
|
+
end
|
16
|
+
|
17
|
+
module ClassMethods
|
18
|
+
# Takes flat csv data and yields to a block for each row,
|
19
|
+
# presenting that row as un-flattened json.
|
20
|
+
# This method works for large CSVs and uses very little memory
|
21
|
+
# because it only keeps one row in memory at a time.
|
22
|
+
# Sample usage: csv_file_to_hierarchical_json_hash(path_to_csv, field_casting_rules = {}, strip_value_whitespace = true) do |row_json_hash, row_number|
|
23
|
+
def csv_file_to_hierarchical_json_hash(path_to_csv, field_casting_rules = {}, strip_value_whitespace = true)
|
24
|
+
i = 1 # start with row 1 because this corresponds to the first row of 0-indexed CSV data
|
25
|
+
CSV.foreach(path_to_csv, headers: true, header_converters: lambda { |header|
|
26
|
+
header.strip # remove leading and trailing header whitespace
|
27
|
+
}) do |row_data_hash|
|
28
|
+
yield csv_row_hash_to_hierarchical_json_hash(row_data_hash, field_casting_rules, strip_value_whitespace), i
|
29
|
+
i += 1
|
25
30
|
end
|
26
|
-
}) do |row_data_hash|
|
27
|
-
yield csv_row_hash_to_hierarchical_json_hash(row_data_hash, field_casting_rules, strip_value_whitespace), i
|
28
|
-
i += 1
|
29
31
|
end
|
30
|
-
end
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
33
|
+
def csv_row_hash_to_hierarchical_json_hash(row_data_hash, field_casting_rules, strip_value_whitespace = true)
|
34
|
+
hierarchical_hash = {}
|
35
|
+
row_data_hash.each do |key, value|
|
36
|
+
next if value.nil? || value == '' # ignore nil or empty string values
|
37
|
+
put_value_at_json_path(hierarchical_hash, key, value, field_casting_rules)
|
38
|
+
end
|
39
|
+
# Clean up empty array elements, which may have come about from CSV data
|
40
|
+
# that was 1-indexed instead of 0-indexed.
|
41
|
+
JsonCsv::Utils.recursively_remove_blank_fields!(hierarchical_hash)
|
42
|
+
JsonCsv::Utils.recursively_strip_value_whitespace!(hierarchical_hash) if strip_value_whitespace
|
43
|
+
hierarchical_hash
|
37
44
|
end
|
38
|
-
# Clean up empty array elements, which may have come about from CSV data
|
39
|
-
# that was 1-indexed instead of 0-indexed.
|
40
|
-
JsonCsv::Utils.recursively_remove_blank_fields!(hierarchical_hash)
|
41
|
-
JsonCsv::Utils.recursively_strip_value_whitespace!(hierarchical_hash) if strip_value_whitespace
|
42
|
-
hierarchical_hash
|
43
|
-
end
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
# For the given obj, puts the given value at the given json_path,
|
47
|
+
# creating nested elements as needed. This method calls itself
|
48
|
+
# recursively when placing a value at a nested path, and during
|
49
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
50
|
+
def put_value_at_json_path(obj, json_path, value, field_casting_rules = {}, full_json_path_from_top = json_path)
|
51
|
+
json_path_pieces = json_path_to_pieces(json_path)
|
51
52
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
53
|
+
if json_path_pieces.length == 1
|
54
|
+
# If the full_json_path_from_top matches one of the field_casting_rules,
|
55
|
+
# then case this field to the specified cast type
|
56
|
+
full_json_path_from_top_as_field_casting_rule_pattern = real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
57
|
+
obj[json_path_pieces[0]] = field_casting_rules.key?(full_json_path_from_top_as_field_casting_rule_pattern) ? apply_field_casting_type(value, field_casting_rules[full_json_path_from_top_as_field_casting_rule_pattern]) : value
|
58
|
+
else
|
59
|
+
obj[json_path_pieces[0]] ||= (json_path_pieces[1].is_a?(Integer) ? [] : {})
|
60
|
+
put_value_at_json_path(obj[json_path_pieces[0]], pieces_to_json_path(json_path_pieces[1..-1]), value, field_casting_rules, full_json_path_from_top)
|
61
|
+
end
|
60
62
|
end
|
61
|
-
end
|
62
63
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
64
|
+
# Takes a real json_path like "related_books[1].notes_from_reviewers[0]" and
|
65
|
+
# converts it to a field_casting_rule_pattern like: "related_books[x].notes_from_reviewers[x]"
|
66
|
+
def real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
67
|
+
full_json_path_from_top.gsub(/\d+/, 'x')
|
68
|
+
end
|
68
69
|
|
69
|
-
|
70
|
-
|
70
|
+
def apply_field_casting_type(value, field_casting_type)
|
71
|
+
raise ArgumentError, "Invalid cast type #{field_casting_type}" unless FIELD_CASTING_TYPES.include?(field_casting_type)
|
71
72
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
73
|
+
case field_casting_type
|
74
|
+
when TYPE_INTEGER
|
75
|
+
raise ArgumentError, "\"#{value}\" is not an integer" unless value =~ /^[0-9]+$/
|
76
|
+
value.to_i
|
77
|
+
when TYPE_FLOAT
|
78
|
+
raise ArgumentError, "\"#{value}\" is not a float" unless value =~ /^[0-9]+(\.[0-9]+)*$/ || value =~ /^\.[0-9]+$/
|
79
|
+
value.to_f
|
80
|
+
when TYPE_BOOLEAN
|
81
|
+
if value.downcase == 'true'
|
82
|
+
true
|
83
|
+
elsif value.downcase == 'false'
|
84
|
+
false
|
85
|
+
else
|
86
|
+
raise ArgumentError, "\"#{value}\" is not a boolean"
|
87
|
+
end
|
84
88
|
else
|
85
|
-
|
89
|
+
value # fall back to string, which is the original form
|
86
90
|
end
|
87
|
-
else
|
88
|
-
value # fall back to string, which is the original form
|
89
91
|
end
|
90
|
-
end
|
91
92
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
93
|
+
# Takes the given json_path and splits it into individual json path pieces.
|
94
|
+
# e.g. Takes "related_books[1].notes_from_reviewers[0]" and converts it to:
|
95
|
+
# ["related_books", 1, "notes_from_reviewers", 0]
|
96
|
+
def json_path_to_pieces(json_path)
|
97
|
+
# split on...
|
98
|
+
# '].' (when preceded by a number)
|
99
|
+
# OR
|
100
|
+
# '[' (when followed by a number)
|
101
|
+
# OR
|
102
|
+
# ']' (when preceded by a number)
|
103
|
+
# OR
|
104
|
+
# '.' (always)
|
105
|
+
# ...and remove empty elements (which only come up when you're working with
|
106
|
+
# a json_path like '[0]', which splits between the first bracket and the number)
|
107
|
+
pieces = json_path.split(/(?<=\d)\]\.|\[(?=\d)|(?<=\d)\]|\./).reject { |piece| piece == '' }
|
108
|
+
pieces.map { |piece| piece.to_i.to_s == piece ? piece.to_i : piece } # numeric pieces should be actual numbers
|
109
|
+
end
|
109
110
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
111
|
+
# Generates a string json path from the given pieces
|
112
|
+
# e.g. Takes ["related_books", 1, "notes_from_reviewers", 0] and converts it to:
|
113
|
+
# "related_books[1].notes_from_reviewers[0]"
|
114
|
+
def pieces_to_json_path(pieces)
|
115
|
+
json_path = ''
|
116
|
+
pieces.each do |piece|
|
117
|
+
if piece.is_a?(Integer)
|
118
|
+
json_path += "[#{piece}]"
|
119
|
+
else
|
120
|
+
json_path += '.' unless json_path.empty?
|
121
|
+
json_path += piece
|
122
|
+
end
|
121
123
|
end
|
124
|
+
json_path
|
122
125
|
end
|
123
|
-
json_path
|
124
|
-
end
|
125
126
|
|
127
|
+
end
|
126
128
|
end
|
127
129
|
end
|
data/lib/json_csv/json_to_csv.rb
CHANGED
@@ -1,50 +1,108 @@
|
|
1
1
|
require 'json'
|
2
|
+
require 'json_csv/csv_builder'
|
2
3
|
|
3
4
|
module JsonCsv
|
4
5
|
module JsonToCsv
|
5
6
|
|
6
|
-
|
7
|
-
|
8
|
-
# Set first_index to 1 if you want the first element in an array to
|
9
|
-
#
|
10
|
-
def json_hash_to_flat_csv_row_hash(json_hash, array_notation = JsonCsv::ArrayNotation::BRACKETS)
|
11
|
-
flat = flatten_hash(json_hash)
|
12
|
-
# Convert values to strings because in the CSV file, all values are strings
|
13
|
-
flat.each { |key, val| flat[key] = val.nil? ? '' : val.to_s }
|
14
|
-
# If we're using dash array notation, convert the headers
|
15
|
-
if array_notation == JsonCsv::ArrayNotation::DASH
|
16
|
-
Hash[flat.map { |key, val| [JsonCsv::ArrayNotation.bracket_header_to_dash_header(key), val] }]
|
17
|
-
else
|
18
|
-
flat
|
19
|
-
end
|
7
|
+
def self.included(base)
|
8
|
+
base.extend ClassMethods
|
20
9
|
end
|
21
10
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
11
|
+
module ClassMethods
|
12
|
+
DEFAULT_HEADER_SORT_COMPARATOR = lambda do |header1, header2|
|
13
|
+
# Ensure correct alphabetical sorting AND numeric sorting via zero-padding of numbers
|
14
|
+
header1_with_zero_padding = header1.gsub(/(?<=\[)\d+(?=\])/) { |capture| capture.to_i.to_s.rjust(5, '0') }
|
15
|
+
header2_with_zero_padding = header2.gsub(/(?<=\[)\d+(?=\])/) { |capture| capture.to_i.to_s.rjust(5, '0') }
|
16
|
+
header1_with_zero_padding <=> header2_with_zero_padding
|
17
|
+
end
|
18
|
+
|
19
|
+
def default_header_comparison(header1, header2)
|
20
|
+
DEFAULT_HEADER_SORT_COMPARATOR.call(header1, header2)
|
21
|
+
end
|
22
|
+
|
23
|
+
# Example usage:
|
24
|
+
# create_csv_for_json_records('/path/to/file.csv') do |csv_builder|
|
25
|
+
# json_docs.each do |json_doc|
|
26
|
+
# csv_builder.add(json_hash)
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
def create_csv_for_json_records(csv_outfile_path, header_sort_comparator = DEFAULT_HEADER_SORT_COMPARATOR)
|
30
|
+
csv_temp_outfile_path = csv_outfile_path + '.temp'
|
31
|
+
|
32
|
+
begin
|
33
|
+
# Step 1: Build CSV with unsorted headers in temp file
|
34
|
+
csv_headers = JsonCsv::CsvBuilder.create_csv_without_headers(csv_temp_outfile_path, 'wb') do |csv_builder|
|
35
|
+
yield csv_builder
|
29
36
|
end
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
+
|
38
|
+
# Step 2: Sort CSV columns by header, based on column_header_comparator
|
39
|
+
original_to_sorted_index_map = JsonCsv::CsvBuilder.original_header_indexes_to_sorted_indexes(csv_headers, header_sort_comparator)
|
40
|
+
CSV.open(csv_outfile_path, 'wb') do |final_csv|
|
41
|
+
# Open temporary CSV for reading
|
42
|
+
CSV.open(csv_temp_outfile_path, 'rb') do |temp_csv|
|
43
|
+
|
44
|
+
# write out ordered header row
|
45
|
+
reordered_header_row = []
|
46
|
+
csv_headers.each_with_index do |header, index|
|
47
|
+
reordered_header_row[original_to_sorted_index_map[index]] = header
|
48
|
+
end
|
49
|
+
|
50
|
+
final_csv << reordered_header_row
|
51
|
+
|
52
|
+
temp_csv.each do |temp_csv_row|
|
53
|
+
reordered_temp_csv_row = []
|
54
|
+
# write out ordered data row
|
55
|
+
temp_csv_row.each_with_index do |cell_value, index|
|
56
|
+
reordered_temp_csv_row[original_to_sorted_index_map[index]] = cell_value
|
57
|
+
end
|
58
|
+
final_csv << reordered_temp_csv_row
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
ensure
|
63
|
+
# Always delete the temporary CSV
|
64
|
+
FileUtils.rm_f(csv_temp_outfile_path)
|
37
65
|
end
|
38
|
-
else
|
39
|
-
flat_hash_to_build[parent_path] = obj unless obj.nil? || obj == '' # ignore nil or empty string values
|
40
66
|
end
|
41
67
|
|
42
|
-
|
43
|
-
|
68
|
+
# Converts the given json_hash into a flat csv hash, converting all values to
|
69
|
+
# strings (because CSVs are dumb and don't store info about data types)
|
70
|
+
# Set first_index to 1 if you want the first element in an array to
|
71
|
+
#
|
72
|
+
def json_hash_to_flat_csv_row_hash(json_hash)
|
73
|
+
flat = flatten_hash(json_hash)
|
74
|
+
# Convert values to strings because in the CSV file, all values are strings
|
75
|
+
flat.each { |key, val| flat[key] = val.nil? ? '' : val.to_s }
|
76
|
+
flat
|
77
|
+
end
|
44
78
|
|
45
|
-
|
46
|
-
|
47
|
-
|
79
|
+
# This method calls itself recursively while flattening a hash, and during
|
80
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
81
|
+
def flatten_hash(obj, parent_path = '', flat_hash_to_build = {})
|
82
|
+
if obj.is_a?(Hash)
|
83
|
+
obj.each do |key, val|
|
84
|
+
if key_contains_unallowed_characters?(key)
|
85
|
+
raise ArgumentError, 'Cannot deal with hash keys that contain "[" or "]" or "." because these characters have special meanings in CSV headers.'
|
86
|
+
end
|
87
|
+
path = parent_path + (parent_path.empty? ? '' : '.') + key
|
88
|
+
flatten_hash(val, path, flat_hash_to_build)
|
89
|
+
end
|
90
|
+
elsif obj.is_a?(Array)
|
91
|
+
obj.each_with_index do |el, index|
|
92
|
+
path = parent_path + "[#{index}]"
|
93
|
+
flatten_hash(el, path, flat_hash_to_build)
|
94
|
+
end
|
95
|
+
else
|
96
|
+
flat_hash_to_build[parent_path] = obj unless obj.nil? || obj == '' # ignore nil or empty string values
|
97
|
+
end
|
98
|
+
|
99
|
+
flat_hash_to_build
|
100
|
+
end
|
101
|
+
|
102
|
+
def key_contains_unallowed_characters?(key)
|
103
|
+
return true if key.index('[') || key.index(']') || key.index('.')
|
104
|
+
false
|
105
|
+
end
|
48
106
|
end
|
49
107
|
|
50
108
|
end
|
data/lib/json_csv/version.rb
CHANGED
data/lib/json_csv.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
@@ -88,7 +88,7 @@ extra_rdoc_files: []
|
|
88
88
|
files:
|
89
89
|
- README.md
|
90
90
|
- lib/json_csv.rb
|
91
|
-
- lib/json_csv/
|
91
|
+
- lib/json_csv/csv_builder.rb
|
92
92
|
- lib/json_csv/csv_to_json.rb
|
93
93
|
- lib/json_csv/json_to_csv.rb
|
94
94
|
- lib/json_csv/utils.rb
|
@@ -1,23 +0,0 @@
|
|
1
|
-
module JsonCsv
|
2
|
-
module ArrayNotation
|
3
|
-
BRACKETS = 'BRACKETS'.freeze
|
4
|
-
DASH = 'DASH'.freeze
|
5
|
-
|
6
|
-
VALID_ARRAY_NOTATIONS = [BRACKETS, DASH].freeze
|
7
|
-
|
8
|
-
def self.bracket_header_to_dash_header(bracket_header)
|
9
|
-
# e.g. replace occurrences of '[1]' with '-1'
|
10
|
-
bracket_header.gsub(/(\[(\d+)\])/, '-\2')
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.dash_header_to_bracket_header(dash_header)
|
14
|
-
# e.g. replace occurrences of '-1' with '[1]'
|
15
|
-
dash_header.gsub(/(-(\d+))/, '[\2]')
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.raise_error_if_invalid_array_notation_value!(error_class, array_notation)
|
19
|
-
raise error_class, "Invalid array notation. Must be one of #{VALID_ARRAY_NOTATIONS.join(' or ')}." unless VALID_ARRAY_NOTATIONS.include?(array_notation)
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
end
|