structured_csv 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,26 +1,143 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: structured_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-14 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2021-05-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: yaml
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '11.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '11.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.17'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.17'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '4.7'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '4.7'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '13.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '13.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.10'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.10'
111
+ - !ruby/object:Gem::Dependency
112
+ name: simplecov
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.21'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.21'
13
125
  description: Library to process structured CSV files
14
126
  email:
15
127
  - open.source@ribose.com
16
128
  executables:
129
+ - csv_join
17
130
  - csv_join.rb
131
+ - structured_csv_to_yaml
18
132
  - structured_csv_to_yaml.rb
19
133
  extensions: []
20
134
  extra_rdoc_files: []
21
135
  files:
136
+ - ".editorconfig"
137
+ - ".gitattributes"
22
138
  - ".github/workflows/main.yml"
23
139
  - ".gitignore"
140
+ - ".hound.yml"
24
141
  - ".rspec"
25
142
  - ".rubocop.yml"
26
143
  - CODE_OF_CONDUCT.md
@@ -29,9 +146,14 @@ files:
29
146
  - Rakefile
30
147
  - bin/console
31
148
  - bin/setup
149
+ - exe/csv_join
32
150
  - exe/csv_join.rb
151
+ - exe/structured_csv_to_yaml
33
152
  - exe/structured_csv_to_yaml.rb
34
153
  - lib/structured_csv.rb
154
+ - lib/structured_csv/common.rb
155
+ - lib/structured_csv/csv2yaml.rb
156
+ - lib/structured_csv/csv_join.rb
35
157
  - lib/structured_csv/version.rb
36
158
  - samples/T-SP-E.212B-2018.et.csv
37
159
  - samples/T-SP-E.212B-2018.et.yaml
@@ -53,14 +175,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
53
175
  requirements:
54
176
  - - ">="
55
177
  - !ruby/object:Gem::Version
56
- version: 2.4.0
178
+ version: 2.6.7
57
179
  required_rubygems_version: !ruby/object:Gem::Requirement
58
180
  requirements:
59
181
  - - ">="
60
182
  - !ruby/object:Gem::Version
61
183
  version: '0'
62
184
  requirements: []
63
- rubygems_version: 3.0.3
185
+ rubygems_version: 3.1.4
64
186
  signing_key:
65
187
  specification_version: 4
66
188
  summary: Library to process structured CSV files
data/exe/csv_join.rb DELETED
@@ -1,95 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'csv'
3
- require 'yaml'
4
- require 'pathname'
5
-
6
- module CsvJoin
7
-
8
- def self.join(csv, section_name)
9
- first_row = nil
10
- last_row = -1
11
-
12
- puts "section_name #{section_name}"
13
-
14
- csv.each_with_index do |row, index|
15
- if first_row.nil? && is_start_of_portion?(row, section_name)
16
- puts "found first"
17
- first_row = index+1
18
- next
19
- end
20
-
21
- if !first_row.nil? && is_row_empty?(row)
22
- puts "found last"
23
- last_row = index
24
- break
25
- end
26
- end
27
-
28
- puts "first #{first_row} last #{last_row}"
29
- csv[first_row..last_row]
30
- end
31
-
32
- def self.load_csv(csvfile)
33
- # puts csvfile
34
-
35
- content = File.read(csvfile, encoding: "bom|utf-8").scrub
36
- CSV.parse(content, liberal_parsing: true, encoding: "UTF-8")
37
- end
38
-
39
- def self.convert(csvdir, outfile)
40
- raise "first argument must be a directory!" unless File.directory?(csvdir)
41
-
42
- csv = CSV.open(outfile, "wb", encoding: "UTF-8")
43
-
44
- csvfiles = Dir.glob(File.join(csvdir, "**", "*.csv")).sort
45
- raise "directory must contain .csv files!" if csvfiles.empty?
46
-
47
- # Assume all files use the same header structure as the first CSV file
48
- header = []
49
- csvheader = ""
50
-
51
- csvfiles.each do |csvfile|
52
- content = load_csv(csvfile)
53
-
54
- csvheader = content.shift
55
- if header.empty?
56
- header = ['name'] + csvheader
57
- csv << header
58
- end
59
-
60
- basename = Pathname.new(csvfile).basename.sub_ext('').to_s
61
- content.each do |filerow|
62
- row = []
63
- filerow.each do |value|
64
- row << case value
65
- when String
66
- value.strip
67
- else
68
- value
69
- end
70
- end
71
-
72
- all_empty = row.all? do |f|
73
- f.nil? || f.empty?
74
- end
75
- next if all_empty
76
-
77
- row.unshift(basename)
78
-
79
- csv << row
80
- end
81
- end
82
-
83
- csv
84
- end
85
-
86
- end
87
-
88
- csvdir = ARGV.pop
89
- outfile = Pathname.new(csvdir).sub_ext(".csv").to_s
90
-
91
- # puts outfile
92
-
93
- CsvJoin.convert(csvdir, outfile)
94
-
95
- # puts CsvJoin.convert(csvdir)
@@ -1,254 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'csv'
3
- require 'yaml'
4
-
5
- module CsvToStructuredHash
6
-
7
- def self.get_portion(csv, section_name)
8
- first_row = nil
9
- last_row = -1
10
- data_meta = {}
11
-
12
- puts "section_name #{section_name}"
13
-
14
- csv.each_with_index do |row, index|
15
- if first_row.nil? && is_start_of_portion?(row, section_name)
16
- # puts"found first"
17
-
18
- row[1].split(';').each do |opt|
19
- k, v = opt.split('=')
20
- data_meta[k.to_sym] = v
21
- end if row[1] && !row[1].empty?
22
-
23
- first_row = index+1
24
- next
25
- end
26
-
27
- if !first_row.nil? && is_row_empty?(row)
28
- # puts "found last"
29
- last_row = index
30
- break
31
- end
32
- end
33
-
34
- # puts "first #{first_row} last #{last_row}"
35
- {
36
- first_row: first_row,
37
- last_row: last_row,
38
- rows: csv[first_row..last_row],
39
- meta: data_meta
40
- }
41
- end
42
-
43
- def self.is_start_of_portion?(row, section_name)
44
- return false if row.first.nil?
45
- row.first.strip.to_s == section_name.to_s
46
- end
47
-
48
- def self.is_row_empty?(row)
49
- row.map do |f|
50
- f.is_a?(String) ? f.strip : f
51
- end.all?(&:nil?)
52
- end
53
-
54
- def self.get_csv(csv_filename)
55
- content = File.read(csv_filename, encoding: "bom|utf-8")
56
- CSV.parse(content, liberal_parsing: true, encoding: "UTF-8")
57
- end
58
-
59
- def self.split_header_key_type(header_field)
60
- field_name = ""
61
- field_type = CAST_DEFAULT_TYPE
62
-
63
- # puts header_field
64
- arr = header_field.match(/\A(.*)\[(.*)\]\Z/)
65
-
66
- if arr.nil?
67
- field_name = header_field
68
- else
69
- field_name = arr[1]
70
- field_type = arr[2]
71
- end
72
-
73
- {
74
- name: field_name,
75
- type: field_type
76
- }
77
- end
78
-
79
- CAST_DEFAULT_TYPE = "string".freeze
80
-
81
- def self.cast_type(value, type_in_string)
82
- return if value.nil?
83
- type = type_in_string.downcase
84
-
85
- case type
86
- when "boolean"
87
- if value.downcase == "true"
88
- true
89
- elsif value.downcase == "false"
90
- false
91
- end
92
- when "integer"
93
- value.to_s.strip.to_i
94
- when "string"
95
- value.to_s.strip
96
- when /^array\{(.*)\}/
97
- val_type = Regexp.last_match[1] || CAST_DEFAULT_TYPE
98
- value.split(";").map do |v|
99
- # puts "cast type as #{v}, #{val_type.to_s}"
100
- cast_type(v, val_type.to_s)
101
- end
102
- else
103
- value.to_s
104
- end
105
- end
106
-
107
- def self.parse_metadata(rows)
108
- hash = {}
109
-
110
- rows.each_with_index do |row,index|
111
- # Skip all the empty rows
112
- next if is_row_empty?(row)
113
-
114
- name_type = split_header_key_type(row.first)
115
- key = name_type[:name]
116
- type = name_type[:type]
117
-
118
- value = cast_type(row[1], type)
119
- hash[key] = value
120
- end
121
-
122
- # puts "=============================METADATA================="
123
- # pp hash
124
- normalize_namespaces(hash)
125
- end
126
-
127
- def self.parse_data(rows, data_meta)
128
- header = []
129
- data_name = data_meta[:name]
130
- data_type = data_meta[:type] || "hash"
131
- data_key = data_meta[:key]
132
-
133
- base_structure = case data_type
134
- when "hash"
135
- {}
136
- when "array"
137
- []
138
- end
139
-
140
- rows.each_with_index do |row,index|
141
- # Assume the first column is always the key
142
- if index == 0
143
- # puts "row #{row}"
144
- header = row.map do |field|
145
- split_header_key_type(field) unless field.nil?
146
- end.compact
147
-
148
- if data_type == "hash" && data_key.nil?
149
- data_key = header.first
150
- end
151
-
152
- next
153
- end
154
- # puts "header #{header.inspect}"
155
-
156
- # Skip all the empty rows
157
- next if is_row_empty?(row)
158
-
159
- # Skip if no key value
160
- next if row[0].nil?
161
-
162
- header_names = header.inject([]) do |acc,v|
163
- acc << v[:name]
164
- end
165
-
166
- row_values = []
167
- header.each_with_index do |h, i|
168
- v = row[i]
169
- v = v.strip unless v.nil?
170
- row_values[i] = cast_type(v, h[:type])
171
- end
172
-
173
- k = row_values[0]
174
- d = Hash[header_names[0..-1].zip(row_values[0..-1])]
175
- # .transform_keys { |k| k.to_sym }
176
-
177
- # Remove keys if they point to nil
178
- d.keys.each do |k|
179
- d.delete(k) if d[k].nil?
180
- end
181
-
182
- case data_type
183
- when "hash"
184
- unless base_structure[k].nil?
185
- puts "[WARNING] there is already data inside key [#{k}] -- maybe you should set type=array?"
186
- end
187
- base_structure[k] = normalize_namespaces(d)
188
- when "array"
189
- base_structure << normalize_namespaces(d)
190
- end
191
- end
192
-
193
- if data_name
194
- base_structure = {
195
- data_name => base_structure
196
- }
197
- end
198
-
199
- base_structure
200
- end
201
-
202
- def self.convert(csv_filename)
203
- raw_data = get_csv(csv_filename)
204
-
205
- metadata_section = get_portion(raw_data, "METADATA")
206
- data_section = get_portion(raw_data, "DATA")
207
-
208
- # puts '----------'
209
- # pp data_section[:rows]
210
- # puts '----------'
211
-
212
- {
213
- "metadata" => parse_metadata(metadata_section[:rows]),
214
- "data" => parse_data(data_section[:rows], data_section[:meta])
215
- }
216
- end
217
-
218
- # Structure all child hashes if the key is namespaced.
219
- # e.g. { "hello.me" => data } becomes
220
- # { "hello" => { "me" => data } }
221
- #
222
- def self.normalize_namespaces(hash)
223
- new_hash = {}
224
-
225
- hash.each_pair do |k, v|
226
- # puts"k (#{k}) v (#{v})"
227
- key_components = k.to_s.split('.')
228
-
229
- level = new_hash
230
- last_component = key_components.pop
231
- key_components.each do |component|
232
- # puts"c (#{component})"
233
- level[component] ||= {}
234
- level = level[component]
235
- end
236
-
237
- level[last_component] = v
238
- end
239
-
240
- new_hash
241
- end
242
- end
243
-
244
- csvfile = ARGV.pop
245
- raise "first argument must be a .csv file!" unless csvfile =~ /\.csv$/
246
-
247
- outfile = csvfile.gsub(/csv$/, "yaml")
248
-
249
- IO.write(
250
- outfile,
251
- CsvToStructuredHash.convert(csvfile).to_yaml
252
- )
253
-
254
- # pp CsvToStructuredHash.convert(filename)