json_csv 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/json_csv/csv_to_json.rb +111 -0
- data/lib/json_csv/json_to_csv.rb +46 -0
- data/lib/json_csv/version.rb +1 -1
- data/lib/json_csv.rb +6 -3
- data/lib/tasks/json_csv/ci.rake +13 -1
- metadata +45 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f70535d234603ac01375388383a3823da0f73673
|
4
|
+
data.tar.gz: cdae7c0ddfa419662a18a097a7373e8d196a1f48
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8793368170c8003b8bea3af8b6e84e410973589458c8ac0d244702bed2d779ee914c24c028929053ef10e4cc4ce3d8cc054324700a453d3858a95401df4d402c
|
7
|
+
data.tar.gz: 871e4bfcfb63b869b60f9bc413c32823f118f2bfaec4296032ea7b7bffa07876ff0db5e66cf782b2c9d76800a0fd81e8b38fd046120ee782f18390657c20fd69
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module JsonCsv
|
4
|
+
module CsvToJson
|
5
|
+
|
6
|
+
TYPE_STRING = 'string'.freeze
|
7
|
+
TYPE_INTEGER = 'integer'.freeze
|
8
|
+
TYPE_FLOAT = 'float'.freeze
|
9
|
+
TYPE_BOOLEAN = 'boolean'.freeze
|
10
|
+
FIELD_CASTING_TYPES = [TYPE_STRING, TYPE_INTEGER, TYPE_FLOAT, TYPE_BOOLEAN].freeze
|
11
|
+
|
12
|
+
# Takes flat csv data and yields to a block for each row,
|
13
|
+
# presenting that row as un-flattened json.
|
14
|
+
# This method works for large CSVs and uses very little memory
|
15
|
+
# because it only keeps one row in memory at a time.
|
16
|
+
def csv_file_to_hierarchical_json_hash(path_to_csv, field_casting_rules = {})
|
17
|
+
i = 0
|
18
|
+
CSV.foreach(path_to_csv, headers: true) do |row_data_hash|
|
19
|
+
hierarchical_hash = {}
|
20
|
+
row_data_hash.each do |key, value|
|
21
|
+
next if value.nil? || value == '' # ignore nil or empty string values
|
22
|
+
put_value_at_json_path(hierarchical_hash, key, value, field_casting_rules)
|
23
|
+
end
|
24
|
+
yield hierarchical_hash, i
|
25
|
+
i += 1
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# For the given obj, puts the given value at the given json_path,
|
30
|
+
# creating nested elements as needed. This method calls itself
|
31
|
+
# recursively when placing a value at a nested path, and during
|
32
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
33
|
+
def put_value_at_json_path(obj, json_path, value, field_casting_rules = {}, full_json_path_from_top = json_path)
|
34
|
+
json_path_pieces = json_path_to_pieces(json_path)
|
35
|
+
|
36
|
+
if json_path_pieces.length == 1
|
37
|
+
# If the full_json_path_from_top matches one of the field_casting_rules,
|
38
|
+
# then case this field to the specified cast type
|
39
|
+
full_json_path_from_top_as_field_casting_rule_pattern = real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
40
|
+
obj[json_path_pieces[0]] = field_casting_rules.key?(full_json_path_from_top_as_field_casting_rule_pattern) ? apply_field_casting_type(value, field_casting_rules[full_json_path_from_top_as_field_casting_rule_pattern]) : value
|
41
|
+
else
|
42
|
+
obj[json_path_pieces[0]] ||= (json_path_pieces[1].is_a?(Integer) ? [] : {})
|
43
|
+
put_value_at_json_path(obj[json_path_pieces[0]], pieces_to_json_path(json_path_pieces[1..-1]), value, field_casting_rules, full_json_path_from_top)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# Takes a real json_path like "related_books[1].notes_from_reviewers[0]" and
|
48
|
+
# converts it to a field_casting_rule_pattern like: "related_books[x].notes_from_reviewers[x]"
|
49
|
+
def real_json_path_to_field_casting_rule_pattern(full_json_path_from_top)
|
50
|
+
full_json_path_from_top.gsub(/\d+/, 'x')
|
51
|
+
end
|
52
|
+
|
53
|
+
def apply_field_casting_type(value, field_casting_type)
|
54
|
+
raise ArgumentError, "Invalid cast type #{field_casting_type}" unless FIELD_CASTING_TYPES.include?(field_casting_type)
|
55
|
+
|
56
|
+
case field_casting_type
|
57
|
+
when TYPE_INTEGER
|
58
|
+
raise ArgumentError, "\"#{value}\" is not an integer" unless value =~ /^[0-9]+$/
|
59
|
+
value.to_i
|
60
|
+
when TYPE_FLOAT
|
61
|
+
raise ArgumentError, "\"#{value}\" is not a float" unless value =~ /^[0-9]+(\.[0-9]+)*$/ || value =~ /^\.[0-9]+$/
|
62
|
+
value.to_f
|
63
|
+
when TYPE_BOOLEAN
|
64
|
+
if value.downcase == 'true'
|
65
|
+
true
|
66
|
+
elsif value.downcase == 'false'
|
67
|
+
false
|
68
|
+
else
|
69
|
+
raise ArgumentError, "\"#{value}\" is not a boolean"
|
70
|
+
end
|
71
|
+
else
|
72
|
+
value # fall back to string, which is the original form
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
# Takes the given json_path and splits it into individual json path pieces.
|
77
|
+
# e.g. Takes "related_books[1].notes_from_reviewers[0]" and converts it to:
|
78
|
+
# ["related_books", 1, "notes_from_reviewers", 0]
|
79
|
+
def json_path_to_pieces(json_path)
|
80
|
+
# split on...
|
81
|
+
# '].' (when preceded by a number)
|
82
|
+
# OR
|
83
|
+
# '[' (when followed by a number)
|
84
|
+
# OR
|
85
|
+
# ']' (when preceded by a number)
|
86
|
+
# OR
|
87
|
+
# '.' (always)
|
88
|
+
# ...and remove empty elements (which only come up when you're working with
|
89
|
+
# a json_path like '[0]', which splits between the first bracket and the number)
|
90
|
+
pieces = json_path.split(/(?<=\d)\]\.|\[(?=\d)|(?<=\d)\]|\./).reject { |piece| piece == '' }
|
91
|
+
pieces.map { |piece| piece.to_i.to_s == piece ? piece.to_i : piece } # numeric pieces should be actual numbers
|
92
|
+
end
|
93
|
+
|
94
|
+
# Generates a string json path from the given pieces
|
95
|
+
# e.g. Takes ["related_books", 1, "notes_from_reviewers", 0] and converts it to:
|
96
|
+
# "related_books[1].notes_from_reviewers[0]"
|
97
|
+
def pieces_to_json_path(pieces)
|
98
|
+
json_path = ''
|
99
|
+
pieces.each do |piece|
|
100
|
+
if piece.is_a?(Integer)
|
101
|
+
json_path += "[#{piece}]"
|
102
|
+
else
|
103
|
+
json_path += '.' unless json_path.empty?
|
104
|
+
json_path += piece
|
105
|
+
end
|
106
|
+
end
|
107
|
+
json_path
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'json'
|
2
|
+
|
3
|
+
module JsonCsv
|
4
|
+
module JsonToCsv
|
5
|
+
|
6
|
+
# Converts the given json_hash into a flat csv hash, converting all values to
|
7
|
+
# strings (because CSVs are dumb and don't store info about data types)
|
8
|
+
# Set first_index to 1 if you want the first element in an array to
|
9
|
+
#
|
10
|
+
def json_hash_to_flat_csv_row_hash(json_hash)
|
11
|
+
flat = flatten_hash(json_hash)
|
12
|
+
# Convert values to strings because in the CSV file, all values are strings
|
13
|
+
flat.each { |key, val| flat[key] = val.nil? ? '' : val.to_s }
|
14
|
+
flat
|
15
|
+
end
|
16
|
+
|
17
|
+
# This method calls itself recursively while flattening a hash, and during
|
18
|
+
# this sequence of calls the obj param may either be a hash or an array.
|
19
|
+
def flatten_hash(obj, parent_path = '', flat_hash_to_build = {})
|
20
|
+
if obj.is_a?(Hash)
|
21
|
+
obj.each do |key, val|
|
22
|
+
if key_contains_unallowed_characters?(key)
|
23
|
+
raise ArgumentError, 'Cannot deal with hash keys that contain "[" or "]" because these are used for internal processing.'
|
24
|
+
end
|
25
|
+
path = parent_path + (parent_path.empty? ? '' : '.') + key
|
26
|
+
flatten_hash(val, path, flat_hash_to_build)
|
27
|
+
end
|
28
|
+
elsif obj.is_a?(Array)
|
29
|
+
obj.each_with_index do |el, index|
|
30
|
+
path = parent_path + "[#{index}]"
|
31
|
+
flatten_hash(el, path, flat_hash_to_build)
|
32
|
+
end
|
33
|
+
else
|
34
|
+
flat_hash_to_build[parent_path] = obj unless obj.nil? || obj == '' # ignore nil or empty string values
|
35
|
+
end
|
36
|
+
|
37
|
+
flat_hash_to_build
|
38
|
+
end
|
39
|
+
|
40
|
+
def key_contains_unallowed_characters?(key)
|
41
|
+
return true if key.index('[') || key.index(']') || key.index('.')
|
42
|
+
false
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
data/lib/json_csv/version.rb
CHANGED
data/lib/json_csv.rb
CHANGED
data/lib/tasks/json_csv/ci.rake
CHANGED
@@ -13,13 +13,25 @@ namespace :json_csv do
|
|
13
13
|
spec.rspec_opts = ['--backtrace'] if ENV['CI']
|
14
14
|
end
|
15
15
|
|
16
|
+
require 'rubocop/rake_task'
|
17
|
+
|
18
|
+
desc 'Run style checker'
|
19
|
+
RuboCop::RakeTask.new(:rubocop) do |task|
|
20
|
+
task.requires << 'rubocop-rspec'
|
21
|
+
task.fail_on_error = true
|
22
|
+
end
|
16
23
|
rescue LoadError => e
|
17
24
|
puts "[Warning] Exception creating rspec rake tasks. This message can be ignored in environments that intentionally do not pull in the RSpec gem (i.e. production)."
|
18
25
|
puts e
|
19
26
|
end
|
20
27
|
|
21
28
|
desc "CI build"
|
22
|
-
task :
|
29
|
+
task ci: ['json_csv:rubocop'] do
|
30
|
+
Rake::Task["json_csv:rspec"].invoke
|
31
|
+
end
|
32
|
+
|
33
|
+
desc "CI build"
|
34
|
+
task :ci_nocop do
|
23
35
|
Rake::Task["json_csv:rspec"].invoke
|
24
36
|
end
|
25
37
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: json_csv
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eric O'Hanlon
|
@@ -38,6 +38,48 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '3.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rubocop
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.51.0
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.51.0
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rubocop-rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 1.20.1
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 1.20.1
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: simplecov
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: 0.15.1
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.15.1
|
41
83
|
description: A library for converting json to csv...and back!
|
42
84
|
email: elo2112@columbia.edu
|
43
85
|
executables: []
|
@@ -46,6 +88,8 @@ extra_rdoc_files: []
|
|
46
88
|
files:
|
47
89
|
- README.md
|
48
90
|
- lib/json_csv.rb
|
91
|
+
- lib/json_csv/csv_to_json.rb
|
92
|
+
- lib/json_csv/json_to_csv.rb
|
49
93
|
- lib/json_csv/version.rb
|
50
94
|
- lib/tasks/json_csv.rake
|
51
95
|
- lib/tasks/json_csv/ci.rake
|