structured_csv 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,26 +1,143 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: structured_csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-05-14 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2021-05-17 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: csv
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '3.1'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '3.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: yaml
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.1'
41
+ - !ruby/object:Gem::Dependency
42
+ name: byebug
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '11.1'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '11.1'
55
+ - !ruby/object:Gem::Dependency
56
+ name: guard
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '2.17'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '2.17'
69
+ - !ruby/object:Gem::Dependency
70
+ name: guard-rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '4.7'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '4.7'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rake
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '13.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '13.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: rspec
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: '3.10'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '3.10'
111
+ - !ruby/object:Gem::Dependency
112
+ name: simplecov
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.21'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '0.21'
13
125
  description: Library to process structured CSV files
14
126
  email:
15
127
  - open.source@ribose.com
16
128
  executables:
129
+ - csv_join
17
130
  - csv_join.rb
131
+ - structured_csv_to_yaml
18
132
  - structured_csv_to_yaml.rb
19
133
  extensions: []
20
134
  extra_rdoc_files: []
21
135
  files:
136
+ - ".editorconfig"
137
+ - ".gitattributes"
22
138
  - ".github/workflows/main.yml"
23
139
  - ".gitignore"
140
+ - ".hound.yml"
24
141
  - ".rspec"
25
142
  - ".rubocop.yml"
26
143
  - CODE_OF_CONDUCT.md
@@ -29,9 +146,14 @@ files:
29
146
  - Rakefile
30
147
  - bin/console
31
148
  - bin/setup
149
+ - exe/csv_join
32
150
  - exe/csv_join.rb
151
+ - exe/structured_csv_to_yaml
33
152
  - exe/structured_csv_to_yaml.rb
34
153
  - lib/structured_csv.rb
154
+ - lib/structured_csv/common.rb
155
+ - lib/structured_csv/csv2yaml.rb
156
+ - lib/structured_csv/csv_join.rb
35
157
  - lib/structured_csv/version.rb
36
158
  - samples/T-SP-E.212B-2018.et.csv
37
159
  - samples/T-SP-E.212B-2018.et.yaml
@@ -53,14 +175,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
53
175
  requirements:
54
176
  - - ">="
55
177
  - !ruby/object:Gem::Version
56
- version: 2.4.0
178
+ version: 2.6.7
57
179
  required_rubygems_version: !ruby/object:Gem::Requirement
58
180
  requirements:
59
181
  - - ">="
60
182
  - !ruby/object:Gem::Version
61
183
  version: '0'
62
184
  requirements: []
63
- rubygems_version: 3.0.3
185
+ rubygems_version: 3.1.4
64
186
  signing_key:
65
187
  specification_version: 4
66
188
  summary: Library to process structured CSV files
data/exe/csv_join.rb DELETED
@@ -1,95 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'csv'
3
- require 'yaml'
4
- require 'pathname'
5
-
6
- module CsvJoin
7
-
8
- def self.join(csv, section_name)
9
- first_row = nil
10
- last_row = -1
11
-
12
- puts "section_name #{section_name}"
13
-
14
- csv.each_with_index do |row, index|
15
- if first_row.nil? && is_start_of_portion?(row, section_name)
16
- puts "found first"
17
- first_row = index+1
18
- next
19
- end
20
-
21
- if !first_row.nil? && is_row_empty?(row)
22
- puts "found last"
23
- last_row = index
24
- break
25
- end
26
- end
27
-
28
- puts "first #{first_row} last #{last_row}"
29
- csv[first_row..last_row]
30
- end
31
-
32
- def self.load_csv(csvfile)
33
- # puts csvfile
34
-
35
- content = File.read(csvfile, encoding: "bom|utf-8").scrub
36
- CSV.parse(content, liberal_parsing: true, encoding: "UTF-8")
37
- end
38
-
39
- def self.convert(csvdir, outfile)
40
- raise "first argument must be a directory!" unless File.directory?(csvdir)
41
-
42
- csv = CSV.open(outfile, "wb", encoding: "UTF-8")
43
-
44
- csvfiles = Dir.glob(File.join(csvdir, "**", "*.csv")).sort
45
- raise "directory must contain .csv files!" if csvfiles.empty?
46
-
47
- # Assume all files use the same header structure as the first CSV file
48
- header = []
49
- csvheader = ""
50
-
51
- csvfiles.each do |csvfile|
52
- content = load_csv(csvfile)
53
-
54
- csvheader = content.shift
55
- if header.empty?
56
- header = ['name'] + csvheader
57
- csv << header
58
- end
59
-
60
- basename = Pathname.new(csvfile).basename.sub_ext('').to_s
61
- content.each do |filerow|
62
- row = []
63
- filerow.each do |value|
64
- row << case value
65
- when String
66
- value.strip
67
- else
68
- value
69
- end
70
- end
71
-
72
- all_empty = row.all? do |f|
73
- f.nil? || f.empty?
74
- end
75
- next if all_empty
76
-
77
- row.unshift(basename)
78
-
79
- csv << row
80
- end
81
- end
82
-
83
- csv
84
- end
85
-
86
- end
87
-
88
- csvdir = ARGV.pop
89
- outfile = Pathname.new(csvdir).sub_ext(".csv").to_s
90
-
91
- # puts outfile
92
-
93
- CsvJoin.convert(csvdir, outfile)
94
-
95
- # puts CsvJoin.convert(csvdir)
@@ -1,254 +0,0 @@
1
- #!/usr/bin/env ruby
2
- require 'csv'
3
- require 'yaml'
4
-
5
- module CsvToStructuredHash
6
-
7
- def self.get_portion(csv, section_name)
8
- first_row = nil
9
- last_row = -1
10
- data_meta = {}
11
-
12
- puts "section_name #{section_name}"
13
-
14
- csv.each_with_index do |row, index|
15
- if first_row.nil? && is_start_of_portion?(row, section_name)
16
- # puts"found first"
17
-
18
- row[1].split(';').each do |opt|
19
- k, v = opt.split('=')
20
- data_meta[k.to_sym] = v
21
- end if row[1] && !row[1].empty?
22
-
23
- first_row = index+1
24
- next
25
- end
26
-
27
- if !first_row.nil? && is_row_empty?(row)
28
- # puts "found last"
29
- last_row = index
30
- break
31
- end
32
- end
33
-
34
- # puts "first #{first_row} last #{last_row}"
35
- {
36
- first_row: first_row,
37
- last_row: last_row,
38
- rows: csv[first_row..last_row],
39
- meta: data_meta
40
- }
41
- end
42
-
43
- def self.is_start_of_portion?(row, section_name)
44
- return false if row.first.nil?
45
- row.first.strip.to_s == section_name.to_s
46
- end
47
-
48
- def self.is_row_empty?(row)
49
- row.map do |f|
50
- f.is_a?(String) ? f.strip : f
51
- end.all?(&:nil?)
52
- end
53
-
54
- def self.get_csv(csv_filename)
55
- content = File.read(csv_filename, encoding: "bom|utf-8")
56
- CSV.parse(content, liberal_parsing: true, encoding: "UTF-8")
57
- end
58
-
59
- def self.split_header_key_type(header_field)
60
- field_name = ""
61
- field_type = CAST_DEFAULT_TYPE
62
-
63
- # puts header_field
64
- arr = header_field.match(/\A(.*)\[(.*)\]\Z/)
65
-
66
- if arr.nil?
67
- field_name = header_field
68
- else
69
- field_name = arr[1]
70
- field_type = arr[2]
71
- end
72
-
73
- {
74
- name: field_name,
75
- type: field_type
76
- }
77
- end
78
-
79
- CAST_DEFAULT_TYPE = "string".freeze
80
-
81
- def self.cast_type(value, type_in_string)
82
- return if value.nil?
83
- type = type_in_string.downcase
84
-
85
- case type
86
- when "boolean"
87
- if value.downcase == "true"
88
- true
89
- elsif value.downcase == "false"
90
- false
91
- end
92
- when "integer"
93
- value.to_s.strip.to_i
94
- when "string"
95
- value.to_s.strip
96
- when /^array\{(.*)\}/
97
- val_type = Regexp.last_match[1] || CAST_DEFAULT_TYPE
98
- value.split(";").map do |v|
99
- # puts "cast type as #{v}, #{val_type.to_s}"
100
- cast_type(v, val_type.to_s)
101
- end
102
- else
103
- value.to_s
104
- end
105
- end
106
-
107
- def self.parse_metadata(rows)
108
- hash = {}
109
-
110
- rows.each_with_index do |row,index|
111
- # Skip all the empty rows
112
- next if is_row_empty?(row)
113
-
114
- name_type = split_header_key_type(row.first)
115
- key = name_type[:name]
116
- type = name_type[:type]
117
-
118
- value = cast_type(row[1], type)
119
- hash[key] = value
120
- end
121
-
122
- # puts "=============================METADATA================="
123
- # pp hash
124
- normalize_namespaces(hash)
125
- end
126
-
127
- def self.parse_data(rows, data_meta)
128
- header = []
129
- data_name = data_meta[:name]
130
- data_type = data_meta[:type] || "hash"
131
- data_key = data_meta[:key]
132
-
133
- base_structure = case data_type
134
- when "hash"
135
- {}
136
- when "array"
137
- []
138
- end
139
-
140
- rows.each_with_index do |row,index|
141
- # Assume the first column is always the key
142
- if index == 0
143
- # puts "row #{row}"
144
- header = row.map do |field|
145
- split_header_key_type(field) unless field.nil?
146
- end.compact
147
-
148
- if data_type == "hash" && data_key.nil?
149
- data_key = header.first
150
- end
151
-
152
- next
153
- end
154
- # puts "header #{header.inspect}"
155
-
156
- # Skip all the empty rows
157
- next if is_row_empty?(row)
158
-
159
- # Skip if no key value
160
- next if row[0].nil?
161
-
162
- header_names = header.inject([]) do |acc,v|
163
- acc << v[:name]
164
- end
165
-
166
- row_values = []
167
- header.each_with_index do |h, i|
168
- v = row[i]
169
- v = v.strip unless v.nil?
170
- row_values[i] = cast_type(v, h[:type])
171
- end
172
-
173
- k = row_values[0]
174
- d = Hash[header_names[0..-1].zip(row_values[0..-1])]
175
- # .transform_keys { |k| k.to_sym }
176
-
177
- # Remove keys if they point to nil
178
- d.keys.each do |k|
179
- d.delete(k) if d[k].nil?
180
- end
181
-
182
- case data_type
183
- when "hash"
184
- unless base_structure[k].nil?
185
- puts "[WARNING] there is already data inside key [#{k}] -- maybe you should set type=array?"
186
- end
187
- base_structure[k] = normalize_namespaces(d)
188
- when "array"
189
- base_structure << normalize_namespaces(d)
190
- end
191
- end
192
-
193
- if data_name
194
- base_structure = {
195
- data_name => base_structure
196
- }
197
- end
198
-
199
- base_structure
200
- end
201
-
202
- def self.convert(csv_filename)
203
- raw_data = get_csv(csv_filename)
204
-
205
- metadata_section = get_portion(raw_data, "METADATA")
206
- data_section = get_portion(raw_data, "DATA")
207
-
208
- # puts '----------'
209
- # pp data_section[:rows]
210
- # puts '----------'
211
-
212
- {
213
- "metadata" => parse_metadata(metadata_section[:rows]),
214
- "data" => parse_data(data_section[:rows], data_section[:meta])
215
- }
216
- end
217
-
218
- # Structure all child hashes if the key is namespaced.
219
- # e.g. { "hello.me" => data } becomes
220
- # { "hello" => { "me" => data } }
221
- #
222
- def self.normalize_namespaces(hash)
223
- new_hash = {}
224
-
225
- hash.each_pair do |k, v|
226
- # puts"k (#{k}) v (#{v})"
227
- key_components = k.to_s.split('.')
228
-
229
- level = new_hash
230
- last_component = key_components.pop
231
- key_components.each do |component|
232
- # puts"c (#{component})"
233
- level[component] ||= {}
234
- level = level[component]
235
- end
236
-
237
- level[last_component] = v
238
- end
239
-
240
- new_hash
241
- end
242
- end
243
-
244
- csvfile = ARGV.pop
245
- raise "first argument must be a .csv file!" unless csvfile =~ /\.csv$/
246
-
247
- outfile = csvfile.gsub(/csv$/, "yaml")
248
-
249
- IO.write(
250
- outfile,
251
- CsvToStructuredHash.convert(csvfile).to_yaml
252
- )
253
-
254
- # pp CsvToStructuredHash.convert(filename)