json_csv 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/json_csv/csv_to_json.rb +111 -0
- data/lib/json_csv/json_to_csv.rb +46 -0
- data/lib/json_csv/version.rb +1 -1
- data/lib/json_csv.rb +6 -3
- data/lib/tasks/json_csv/ci.rake +13 -1
- metadata +45 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f70535d234603ac01375388383a3823da0f73673
|
4
|
+
data.tar.gz: cdae7c0ddfa419662a18a097a7373e8d196a1f48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8793368170c8003b8bea3af8b6e84e410973589458c8ac0d244702bed2d779ee914c24c028929053ef10e4cc4ce3d8cc054324700a453d3858a95401df4d402c
|
7
|
+
data.tar.gz: 871e4bfcfb63b869b60f9bc413c32823f118f2bfaec4296032ea7b7bffa07876ff0db5e66cf782b2c9d76800a0fd81e8b38fd046120ee782f18390657c20fd69
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module JsonCsv
|
4
|
+
module CsvToJson
|
5
|
+
|
6
|
+
TYPE_STRING = 'string'.freeze
|
7
|
+
TYPE_INTEGER = 'integer'.freeze
|
8
|
+
TYPE_FLOAT = 'float'.freeze
|
9
|
+
TYPE_BOOLEAN = 'boolean'.freeze
|
10
|
+
FIELD_CASTING_TYPES = [TYPE_STRING, TYPE_INTEGER, TYPE_FLOAT, TYPE_BOOLEAN].freeze
|
11
|
+
|
12
|
+
# Takes flat csv data and yields to a block for each row,
|
13
|
+
# presenting that row as un-flattened json.
|
14
|
+
# This method works for large CSVs and uses very little memory
|
15
|
+
# because it only keeps one row in memory at a time.
|
16
|
+
def csv_file_to_hierarchical_json_hash(path_to_csv, field_casting_rules = {})
|
17
|
+
i = 0
|
18
|
+
CSV.foreach(path_to_csv, headers: true) do |row_data_hash|
|
19
|
+
hierarchical_hash = {}
|
20
|
+
row_data_hash.each do |key, value|
|
21
|
+
next if value.nil? || value == '' # ignore nil or empty string values
|
22
|
+
put_value_at_json_path(hierarchical_hash, key, value, field_casting_rules)
|
23
|
+
end
|
24
|
+
yield hierarchical_hash, i
|
25
|
+
i += 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# For the given obj, puts the given value at the given json_path,
|
30
|
+
# creating nested elements as needed. This method calls itself
|
31
|
+
# recursively when placing a value at a nested path, and during
|
32
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
33
|
+
def put_value_at_json_path(obj, json_path, value, field_casting_rules = {}, full_json_path_from_top = json_path)
|
34
|
+
json_path_pieces = json_path_to_pieces(json_path)
|
35
|
+
|
36
|
+
if json_path_pieces.length == 1
|
37
|
+
# If the full_json_path_from_top matches one of the field_casting_rules,
|
38
|
+
# then case this field to the specified cast type
|
39
|
+
full_json_path_from_top_as_field_casting_rule_pattern = real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
40
|
+
obj[json_path_pieces[0]] = field_casting_rules.key?(full_json_path_from_top_as_field_casting_rule_pattern) ? apply_field_casting_type(value, field_casting_rules[full_json_path_from_top_as_field_casting_rule_pattern]) : value
|
41
|
+
else
|
42
|
+
obj[json_path_pieces[0]] ||= (json_path_pieces[1].is_a?(Integer) ? [] : {})
|
43
|
+
put_value_at_json_path(obj[json_path_pieces[0]], pieces_to_json_path(json_path_pieces[1..-1]), value, field_casting_rules, full_json_path_from_top)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Takes a real json_path like "related_books[1].notes_from_reviewers[0]" and
|
48
|
+
# converts it to a field_casting_rule_pattern like: "related_books[x].notes_from_reviewers[x]"
|
49
|
+
def real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
50
|
+
full_json_path_from_top.gsub(/\d+/, 'x')
|
51
|
+
end
|
52
|
+
|
53
|
+
def apply_field_casting_type(value, field_casting_type)
|
54
|
+
raise ArgumentError, "Invalid cast type #{field_casting_type}" unless FIELD_CASTING_TYPES.include?(field_casting_type)
|
55
|
+
|
56
|
+
case field_casting_type
|
57
|
+
when TYPE_INTEGER
|
58
|
+
raise ArgumentError, "\"#{value}\" is not an integer" unless value =~ /^[0-9]+$/
|
59
|
+
value.to_i
|
60
|
+
when TYPE_FLOAT
|
61
|
+
raise ArgumentError, "\"#{value}\" is not a float" unless value =~ /^[0-9]+(\.[0-9]+)*$/ || value =~ /^\.[0-9]+$/
|
62
|
+
value.to_f
|
63
|
+
when TYPE_BOOLEAN
|
64
|
+
if value.downcase == 'true'
|
65
|
+
true
|
66
|
+
elsif value.downcase == 'false'
|
67
|
+
false
|
68
|
+
else
|
69
|
+
raise ArgumentError, "\"#{value}\" is not a boolean"
|
70
|
+
end
|
71
|
+
else
|
72
|
+
value # fall back to string, which is the original form
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Takes the given json_path and splits it into individual json path pieces.
|
77
|
+
# e.g. Takes "related_books[1].notes_from_reviewers[0]" and converts it to:
|
78
|
+
# ["related_books", 1, "notes_from_reviewers", 0]
|
79
|
+
def json_path_to_pieces(json_path)
|
80
|
+
# split on...
|
81
|
+
# '].' (when preceded by a number)
|
82
|
+
# OR
|
83
|
+
# '[' (when followed by a number)
|
84
|
+
# OR
|
85
|
+
# ']' (when preceded by a number)
|
86
|
+
# OR
|
87
|
+
# '.' (always)
|
88
|
+
# ...and remove empty elements (which only come up when you're working with
|
89
|
+
# a json_path like '[0]', which splits between the first bracket and the number)
|
90
|
+
pieces = json_path.split(/(?<=\d)\]\.|\[(?=\d)|(?<=\d)\]|\./).reject { |piece| piece == '' }
|
91
|
+
pieces.map { |piece| piece.to_i.to_s == piece ? piece.to_i : piece } # numeric pieces should be actual numbers
|
92
|
+
end
|
93
|
+
|
94
|
+
# Generates a string json path from the given pieces
|
95
|
+
# e.g. Takes ["related_books", 1, "notes_from_reviewers", 0] and converts it to:
|
96
|
+
# "related_books[1].notes_from_reviewers[0]"
|
97
|
+
def pieces_to_json_path(pieces)
|
98
|
+
json_path = ''
|
99
|
+
pieces.each do |piece|
|
100
|
+
if piece.is_a?(Integer)
|
101
|
+
json_path += "[#{piece}]"
|
102
|
+
else
|
103
|
+
json_path += '.' unless json_path.empty?
|
104
|
+
json_path += piece
|
105
|
+
end
|
106
|
+
end
|
107
|
+
json_path
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module JsonCsv
|
4
|
+
module JsonToCsv
|
5
|
+
|
6
|
+
# Converts the given json_hash into a flat csv hash, converting all values to
|
7
|
+
# strings (because CSVs are dumb and don't store info about data types)
|
8
|
+
# Set first_index to 1 if you want the first element in an array to
|
9
|
+
#
|
10
|
+
def json_hash_to_flat_csv_row_hash(json_hash)
|
11
|
+
flat = flatten_hash(json_hash)
|
12
|
+
# Convert values to strings because in the CSV file, all values are strings
|
13
|
+
flat.each { |key, val| flat[key] = val.nil? ? '' : val.to_s }
|
14
|
+
flat
|
15
|
+
end
|
16
|
+
|
17
|
+
# This method calls itself recursively while flattening a hash, and during
|
18
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
19
|
+
def flatten_hash(obj, parent_path = '', flat_hash_to_build = {})
|
20
|
+
if obj.is_a?(Hash)
|
21
|
+
obj.each do |key, val|
|
22
|
+
if key_contains_unallowed_characters?(key)
|
23
|
+
raise ArgumentError, 'Cannot deal with hash keys that contain "[" or "]" because these are used for internal processing.'
|
24
|
+
end
|
25
|
+
path = parent_path + (parent_path.empty? ? '' : '.') + key
|
26
|
+
flatten_hash(val, path, flat_hash_to_build)
|
27
|
+
end
|
28
|
+
elsif obj.is_a?(Array)
|
29
|
+
obj.each_with_index do |el, index|
|
30
|
+
path = parent_path + "[#{index}]"
|
31
|
+
flatten_hash(el, path, flat_hash_to_build)
|
32
|
+
end
|
33
|
+
else
|
34
|
+
flat_hash_to_build[parent_path] = obj unless obj.nil? || obj == '' # ignore nil or empty string values
|
35
|
+
end
|
36
|
+
|
37
|
+
flat_hash_to_build
|
38
|
+
end
|
39
|
+
|
40
|
+
def key_contains_unallowed_characters?(key)
|
41
|
+
return true if key.index('[') || key.index(']') || key.index('.')
|
42
|
+
false
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
data/lib/json_csv/version.rb
CHANGED
data/lib/json_csv.rb
CHANGED
data/lib/tasks/json_csv/ci.rake
CHANGED
@@ -13,13 +13,25 @@ namespace :json_csv do
|
|
13
13
|
spec.rspec_opts = ['--backtrace'] if ENV['CI']
|
14
14
|
end
|
15
15
|
|
16
|
+
require 'rubocop/rake_task'
|
17
|
+
|
18
|
+
desc 'Run style checker'
|
19
|
+
RuboCop::RakeTask.new(:rubocop) do |task|
|
20
|
+
task.requires << 'rubocop-rspec'
|
21
|
+
task.fail_on_error = true
|
22
|
+
end
|
16
23
|
rescue LoadError => e
|
17
24
|
puts "[Warning] Exception creating rspec rake tasks. This message can be ignored in environments that intentionally do not pull in the RSpec gem (i.e. production)."
|
18
25
|
puts e
|
19
26
|
end
|
20
27
|
|
21
28
|
desc "CI build"
|
22
|
-
task :
|
29
|
+
task ci: ['json_csv:rubocop'] do
|
30
|
+
Rake::Task["json_csv:rspec"].invoke
|
31
|
+
end
|
32
|
+
|
33
|
+
desc "CI build"
|
34
|
+
task :ci_nocop do
|
23
35
|
Rake::Task["json_csv:rspec"].invoke
|
24
36
|
end
|
25
37
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
@@ -38,6 +38,48 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rubocop
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.51.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.51.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop-rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.20.1
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.20.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.15.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.15.1
|
41
83
|
description: A library for converting json to csv...and back!
|
42
84
|
email: elo2112@columbia.edu
|
43
85
|
executables: []
|
@@ -46,6 +88,8 @@ extra_rdoc_files: []
|
|
46
88
|
files:
|
47
89
|
- README.md
|
48
90
|
- lib/json_csv.rb
|
91
|
+
- lib/json_csv/csv_to_json.rb
|
92
|
+
- lib/json_csv/json_to_csv.rb
|
49
93
|
- lib/json_csv/version.rb
|
50
94
|
- lib/tasks/json_csv.rake
|
51
95
|
- lib/tasks/json_csv/ci.rake
|