spout 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -1
- data/README.md +35 -2
- data/lib/spout/actions.rb +25 -13
- data/lib/spout/tasks/engine.rake +211 -55
- data/lib/spout/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b17c5cb1d0059611bb4ebb9807ffb83f5015a765
|
4
|
+
data.tar.gz: 3c3e5d7493590ecfffcb6a7534d73b4ce0f56bee
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 50846a11d41e8654a283a5836bd9d5496745bdf4c0707e2a7494ceca86633ac743ac2f9fda054185a5ebe6f0c72bf218414c4b7d71fa05deda1541ab368f2bad
|
7
|
+
data.tar.gz: 08da58a7d5533b2210144e66d30158e825131a24f968f956cd7d64aae584084b3c97aed6acdb00d47351132158ca44dbab0aa9862874681d010ba4d08438d8b3
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,11 @@
|
|
1
|
-
## 0.
|
1
|
+
## 0.2.0 (June 26, 2013)
|
2
|
+
|
3
|
+
### Enhancements
|
4
|
+
- Domains can now be imported using `spout import_domains CSVFILE`
|
5
|
+
- Data Dictionary can now be exported to the Hybrid data dictionary CSV format along with an optional version number:
|
6
|
+
- `spout hybrid [1.0.0]`
|
7
|
+
|
8
|
+
## 0.1.0 (May 21, 2013)
|
2
9
|
|
3
10
|
### Enhancements
|
4
11
|
- Existing Data Dictionaries can be converted to JSON format from a CSV file
|
data/README.md
CHANGED
@@ -34,7 +34,7 @@ spout import data_dictionary.csv
|
|
34
34
|
|
35
35
|
The CSV should contain at minimal the two column headers:
|
36
36
|
|
37
|
-
`id`: This column will give the variable
|
37
|
+
`id`: This column will give the variable its name, and also be used to name the file, i.e. `<id>.json`
|
38
38
|
`folder`: This can be blank, however it is used to place variables into a folder hiearchy. The folder column can contain forward slashes `/` to place a variable into a subfolder. An example may be, `id`: `myvarid`, `folder`: `Demographics/Subfolder` would create a file `variables/Demographics/Subfolder/myvarid.json`
|
39
39
|
|
40
40
|
Other columns that will be interpreted include:
|
@@ -64,6 +64,24 @@ Other columns that will be interpreted include:
|
|
64
64
|
|
65
65
|
All other columns get grouped into a hash labeled `other`.
|
66
66
|
|
67
|
+
#### Importing domains from an existing CSV file
|
68
|
+
|
69
|
+
```
|
70
|
+
spout import_domains data_dictionary_domains.csv
|
71
|
+
```
|
72
|
+
|
73
|
+
The CSV should contain at minimal three column headers:
|
74
|
+
|
75
|
+
`domain_id`: The name of the associated domain for the choice/option.
|
76
|
+
`value`: The value of the choice/option.
|
77
|
+
`display_name`: The display name of the choice/option.
|
78
|
+
|
79
|
+
Other columns that are imported include:
|
80
|
+
|
81
|
+
`description`: A longer description of the choice/option.
|
82
|
+
`folder`: The name of the folder path where the domain resides.
|
83
|
+
|
84
|
+
|
67
85
|
### Test your repository
|
68
86
|
|
69
87
|
If you created your data dictionary repository using `spout new`, you can go ahead and test using:
|
@@ -99,7 +117,7 @@ Then run either `spout test` or `bundle exec rake` to run your tests.
|
|
99
117
|
|
100
118
|
### Create a CSV Data Dictionary from your JSON repository
|
101
119
|
|
102
|
-
Provide an optional version parameter to name the folder the CSVs will be generated in, defaults to 1.0.0
|
120
|
+
Provide an optional version parameter to name the folder the CSVs will be generated in, defaults to 1.0.0.
|
103
121
|
|
104
122
|
```
|
105
123
|
spout export
|
@@ -116,3 +134,18 @@ or
|
|
116
134
|
```
|
117
135
|
bundle exec rake dd:create [VERSION=1.0.0]
|
118
136
|
```
|
137
|
+
|
138
|
+
|
139
|
+
### Export to the Hybrid Data Dictionary format from your JSON repository
|
140
|
+
|
141
|
+
Exporting to a format compatible with [Hybrid](https://github.com/sleepepi/hybrid) is also available.
|
142
|
+
|
143
|
+
```
|
144
|
+
spout hybrid
|
145
|
+
```
|
146
|
+
|
147
|
+
You can optionally provide a version string
|
148
|
+
|
149
|
+
```
|
150
|
+
spout hybrid [1.0.0]
|
151
|
+
```
|
data/lib/spout/actions.rb
CHANGED
@@ -11,8 +11,12 @@ module Spout
|
|
11
11
|
system "bundle exec rake"
|
12
12
|
when 'import', 'i', 'im', 'imp', '--import', '-i', '-im', '-imp'
|
13
13
|
import_from_csv(argv)
|
14
|
+
when 'import_domain', '--import_domain', 'import_domains', '--import_domains'
|
15
|
+
import_from_csv(argv, 'domains')
|
14
16
|
when 'export', 'e', 'ex', 'exp', '--export', '-e', '-ex', '-exp'
|
15
17
|
new_data_dictionary_export(argv)
|
18
|
+
when 'hybrid', '-hybrid', '--hybrid', 'y', 'hy', '-y', '-hy'
|
19
|
+
new_data_dictionary_export(argv, 'hybrid')
|
16
20
|
else
|
17
21
|
help
|
18
22
|
end
|
@@ -20,7 +24,7 @@ module Spout
|
|
20
24
|
|
21
25
|
protected
|
22
26
|
|
23
|
-
def
|
27
|
+
def csv_usage
|
24
28
|
usage = <<-EOT
|
25
29
|
|
26
30
|
Usage: spout import CSVFILE
|
@@ -28,12 +32,15 @@ Usage: spout import CSVFILE
|
|
28
32
|
The CSVFILE must be the location of a valid CSV file.
|
29
33
|
|
30
34
|
EOT
|
35
|
+
usage
|
36
|
+
end
|
31
37
|
|
38
|
+
def import_from_csv(argv, type = "")
|
32
39
|
csv_file = File.join(argv[1].to_s.strip)
|
33
40
|
if File.exists?(csv_file)
|
34
|
-
system "bundle exec rake dd:import CSV=#{csv_file}"
|
41
|
+
system "bundle exec rake dd:import CSV=#{csv_file} #{'TYPE='+type if type.to_s != ''}"
|
35
42
|
else
|
36
|
-
puts
|
43
|
+
puts csv_usage
|
37
44
|
end
|
38
45
|
end
|
39
46
|
|
@@ -43,23 +50,28 @@ EOT
|
|
43
50
|
Usage: spout COMMAND [ARGS]
|
44
51
|
|
45
52
|
The most common spout commands are:
|
46
|
-
[n]ew
|
47
|
-
|
48
|
-
|
49
|
-
[
|
50
|
-
[
|
51
|
-
[
|
52
|
-
|
53
|
-
|
53
|
+
[n]ew Create a new Spout dictionary.
|
54
|
+
"spout new my_dd" creates a new data
|
55
|
+
dictionary called MyDD in "./my_dd"
|
56
|
+
[t]est Running the test file
|
57
|
+
[i]mport Import a CSV file into the JSON repository
|
58
|
+
[e]xport [1.0.0] Export the JSON respository to a CSV
|
59
|
+
h[y]brid [1.0.0] Export the JSON repository in the Hybrid
|
60
|
+
Dictionary format
|
61
|
+
[v]ersion Returns the version of Spout
|
62
|
+
|
63
|
+
Commands can be referenced by the first letter:
|
64
|
+
Ex: `spout t`, for test
|
54
65
|
|
55
66
|
EOT
|
56
67
|
puts help_message
|
57
68
|
end
|
58
69
|
|
59
|
-
def new_data_dictionary_export(argv)
|
70
|
+
def new_data_dictionary_export(argv, type = '')
|
60
71
|
version = argv[1].to_s.gsub(/[^a-zA-Z0-9\.-]/, '_').strip
|
61
72
|
version_string = (version == '' ? "" : "VERSION=#{version}")
|
62
|
-
|
73
|
+
type_string = type.to_s == '' ? "" : "TYPE=#{type}"
|
74
|
+
system "bundle exec rake dd:create #{version_string} #{type_string}"
|
63
75
|
end
|
64
76
|
|
65
77
|
def new_template_dictionary(argv)
|
data/lib/spout/tasks/engine.rake
CHANGED
@@ -19,75 +19,231 @@ namespace :dd do
|
|
19
19
|
desc 'Create Data Dictionary from repository'
|
20
20
|
task :create do
|
21
21
|
|
22
|
-
folder = "dd/#{ENV['VERSION'] ||
|
22
|
+
folder = "dd/#{ENV['VERSION'] || '1.0.0'}"
|
23
23
|
FileUtils.mkpath folder
|
24
24
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
variable_folder = file.gsub(/variables\//, '').split('/')[0..-2].join('/')
|
31
|
-
csv << [variable_folder] + keys.collect{|key| json[key].kind_of?(Array) ? json[key].join(';') : json[key].to_s}
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
CSV.open("#{folder}/domains.csv", "wb") do |csv|
|
36
|
-
keys = %w(value display_name description)
|
37
|
-
csv << ['folder', 'id'] + keys
|
38
|
-
Dir.glob("domains/**/*.json").each do |file|
|
39
|
-
if json = JSON.parse(File.read(file)) rescue false
|
40
|
-
domain_folder = file.gsub(/domains\//, '').split('/')[0..-2].join('/')
|
41
|
-
domain_name = file.gsub(/domains\//, '').split('/').last.to_s.gsub(/.json/, '')
|
42
|
-
json.each do |hash|
|
43
|
-
csv << [domain_folder, domain_name] + keys.collect{|key| hash[key]}
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
25
|
+
case ENV['TYPE']
|
26
|
+
when 'hybrid'
|
27
|
+
hybrid_export(folder)
|
28
|
+
else
|
29
|
+
standard_export(folder)
|
47
30
|
end
|
48
31
|
|
32
|
+
|
49
33
|
puts "Data Dictionary Created in #{folder}"
|
50
34
|
end
|
51
35
|
|
52
36
|
desc 'Initialize JSON repository from a CSV file: CSV=datadictionary.csv'
|
53
37
|
task :import do
|
54
|
-
additional_csv_info = "\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n"
|
55
|
-
|
56
38
|
puts ENV['CSV'].inspect
|
57
39
|
if File.exists?(ENV['CSV'].to_s)
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
40
|
+
ENV['TYPE'] == 'domains' ? import_domains : import_variables
|
41
|
+
else
|
42
|
+
puts "\nPlease specify a valid CSV file.".colorize( :red ) + additional_csv_info
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def standard_export(folder)
|
48
|
+
CSV.open("#{folder}/variables.csv", "wb") do |csv|
|
49
|
+
keys = %w(id display_name description type units domain labels calculation)
|
50
|
+
csv << ['folder'] + keys
|
51
|
+
Dir.glob("variables/**/*.json").each do |file|
|
52
|
+
if json = JSON.parse(File.read(file)) rescue false
|
53
|
+
variable_folder = variable_folder_path(file)
|
54
|
+
csv << [variable_folder] + keys.collect{|key| json[key].kind_of?(Array) ? json[key].join(';') : json[key].to_s}
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
CSV.open("#{folder}/domains.csv", "wb") do |csv|
|
59
|
+
keys = %w(value display_name description)
|
60
|
+
csv << ['folder', 'domain_id'] + keys
|
61
|
+
Dir.glob("domains/**/*.json").each do |file|
|
62
|
+
if json = JSON.parse(File.read(file)) rescue false
|
63
|
+
domain_folder = domain_folder_path(file)
|
64
|
+
domain_name = extract_domain_name(file)
|
65
|
+
json.each do |hash|
|
66
|
+
csv << [domain_folder, domain_name] + keys.collect{|key| hash[key]}
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
def extract_domain_name(file)
|
74
|
+
file.gsub(/domains\//, '').split('/').last.to_s.gsub(/.json/, '')
|
75
|
+
end
|
76
|
+
|
77
|
+
def domain_folder_path(file)
|
78
|
+
file.gsub(/domains\//, '').split('/')[0..-2].join('/')
|
79
|
+
end
|
80
|
+
|
81
|
+
def variable_folder_path(file)
|
82
|
+
file.gsub(/variables\//, '').split('/')[0..-2].join('/')
|
83
|
+
end
|
84
|
+
|
85
|
+
def hybrid_concept_type(json)
|
86
|
+
if json['hybrid'] and json['hybrid']['type'].to_s != ''
|
87
|
+
json['hybrid']['type']
|
88
|
+
else
|
89
|
+
hybrid_concept_type_map(json['type'])
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def hybrid_concept_type_map(variable_type)
|
94
|
+
hybrid_types = { "choices" => "categorical",
|
95
|
+
"numeric" => "continuous",
|
96
|
+
"integer" => "continuous",
|
97
|
+
"string" => "free text",
|
98
|
+
"text" => "free text",
|
99
|
+
"date" => "datetime",
|
100
|
+
"time" => "datetime",
|
101
|
+
"file" => "free text" }
|
102
|
+
hybrid_types[variable_type] || variable_type
|
103
|
+
end
|
104
|
+
|
105
|
+
def hybrid_property(json, property)
|
106
|
+
json['hybrid'] ? json['hybrid'][property] : ''
|
107
|
+
end
|
108
|
+
|
109
|
+
def hybrid_export(folder)
|
110
|
+
domain_parents = {}
|
111
|
+
CSV.open("#{folder}/hybrid.csv", "wb") do |csv|
|
112
|
+
csv << ["Folder", "Short Name", "Description", "Concept Type", "Units", "Terms", "Internal Terms", "Parents", "Children", "Field Values", "Sensitivity", "Display Name", "Commonly Used", "Calculation", "Source Name", "Source File"]
|
113
|
+
Dir.glob("variables/**/*.json").each do |file|
|
114
|
+
if json = JSON.parse(File.read(file)) rescue false
|
115
|
+
if json['domain'].to_s != ''
|
116
|
+
domain_parents[json['domain'].to_s.downcase] ||= []
|
117
|
+
domain_parents[json['domain'].to_s.downcase] << json['id'].to_s
|
63
118
|
end
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
119
|
+
row = [
|
120
|
+
variable_folder_path(file), # Folder
|
121
|
+
json['id'], # Short Name
|
122
|
+
json['description'], # Description
|
123
|
+
hybrid_concept_type(json), # Concept Type
|
124
|
+
json['units'], # Units
|
125
|
+
(json['labels'] || []).join(';'), # Terms
|
126
|
+
'', # Internal Terms
|
127
|
+
'', # Parents
|
128
|
+
'', # Children
|
129
|
+
'', # Field Values
|
130
|
+
hybrid_property(json, 'access level'), # Sensitivity
|
131
|
+
json['display_name'], # Display Name
|
132
|
+
hybrid_property(json, 'most commonly used'), # Commonly Used
|
133
|
+
json['calculation'], # Calculation
|
134
|
+
hybrid_property(json, 'SOURCE'), # Source Name
|
135
|
+
hybrid_property(json, 'filename') # Source File
|
136
|
+
]
|
137
|
+
csv << row
|
138
|
+
end
|
139
|
+
end
|
140
|
+
Dir.glob("domains/**/*.json").each do |file|
|
141
|
+
if json = JSON.parse(File.read(file)) rescue false
|
142
|
+
json.each do |option|
|
143
|
+
row = [
|
144
|
+
domain_folder_path(file), # Folder
|
145
|
+
extract_domain_name(file)+'_'+option['value'].to_s, # Short Name
|
146
|
+
option['description'], # Description
|
147
|
+
'boolean', # Concept Type
|
148
|
+
'', # Units
|
149
|
+
'', # Terms
|
150
|
+
option['value'], # Internal Terms
|
151
|
+
(domain_parents[extract_domain_name(file).downcase] || []).join(';'), # Parents
|
152
|
+
'', # Children
|
153
|
+
'', # Field Values
|
154
|
+
'0', # Sensitivity
|
155
|
+
option['display_name'], # Display Name
|
156
|
+
'', # Commonly Used
|
157
|
+
'', # Calculation
|
158
|
+
]
|
159
|
+
csv << row
|
86
160
|
end
|
87
|
-
puts " create".colorize( :green ) + " #{file_name}"
|
88
161
|
end
|
89
|
-
else
|
90
|
-
puts "\nPlease specify a valid CSV file.".colorize( :red ) + additional_csv_info
|
91
162
|
end
|
92
163
|
end
|
93
164
|
end
|
165
|
+
|
166
|
+
def import_variables
|
167
|
+
CSV.parse( File.open(ENV['CSV'].to_s, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
|
168
|
+
row = line.to_hash
|
169
|
+
if not row.keys.include?('id')
|
170
|
+
puts "\nMissing column header `".colorize( :red ) + "id".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info
|
171
|
+
exit(1)
|
172
|
+
end
|
173
|
+
next if row['id'] == ''
|
174
|
+
folder = File.join('variables', row.delete('folder').to_s)
|
175
|
+
FileUtils.mkpath folder
|
176
|
+
hash = {}
|
177
|
+
id = row.delete('id')
|
178
|
+
hash['id'] = id
|
179
|
+
hash['display_name'] = row.delete('display_name')
|
180
|
+
hash['description'] = row.delete('description').to_s
|
181
|
+
hash['type'] = row.delete('type')
|
182
|
+
domain = row.delete('domain').to_s
|
183
|
+
hash['domain'] = domain if domain != ''
|
184
|
+
units = row.delete('units').to_s
|
185
|
+
hash['units'] = units if units != ''
|
186
|
+
calculation = row.delete('calculation').to_s
|
187
|
+
hash['calculation'] = calculation if calculation != ''
|
188
|
+
labels = row.delete('labels').to_s.split(';')
|
189
|
+
hash['labels'] = labels if labels.size > 0
|
190
|
+
hash['other'] = row unless row.empty?
|
191
|
+
|
192
|
+
file_name = File.join(folder, id.downcase + '.json')
|
193
|
+
File.open(file_name, 'w') do |file|
|
194
|
+
file.write(JSON.pretty_generate(hash) + "\n")
|
195
|
+
end
|
196
|
+
puts " create".colorize( :green ) + " #{file_name}"
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
def import_domains
|
201
|
+
domains = {}
|
202
|
+
|
203
|
+
CSV.parse( File.open(ENV['CSV'].to_s, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line|
|
204
|
+
row = line.to_hash
|
205
|
+
if not row.keys.include?('domain_id')
|
206
|
+
puts "\nMissing column header `".colorize( :red ) + "domain_id".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info
|
207
|
+
exit(1)
|
208
|
+
end
|
209
|
+
if not row.keys.include?('value')
|
210
|
+
puts "\nMissing column header `".colorize( :red ) + "value".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info
|
211
|
+
exit(1)
|
212
|
+
end
|
213
|
+
if not row.keys.include?('display_name')
|
214
|
+
puts "\nMissing column header `".colorize( :red ) + "display_name".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info
|
215
|
+
exit(1)
|
216
|
+
end
|
217
|
+
|
218
|
+
next if row['domain_id'].to_s == '' or row['value'].to_s == '' or row['display_name'].to_s == ''
|
219
|
+
folder = File.join('domains', row['folder'].to_s).gsub(/[^a-zA-Z0-9_\/\.-]/, '_')
|
220
|
+
domain_name = row['domain_id'].to_s.gsub(/[^a-zA-Z0-9_\/\.-]/, '_')
|
221
|
+
domains[domain_name] ||= {}
|
222
|
+
domains[domain_name]["folder"] = folder
|
223
|
+
domains[domain_name]["options"] ||= []
|
224
|
+
|
225
|
+
hash = {}
|
226
|
+
hash['value'] = row.delete('value').to_s
|
227
|
+
hash['display_name'] = row.delete('display_name').to_s
|
228
|
+
hash['description'] = row.delete('description').to_s
|
229
|
+
|
230
|
+
domains[domain_name]["options"] << hash
|
231
|
+
end
|
232
|
+
|
233
|
+
domains.each do |domain_name, domain_hash|
|
234
|
+
folder = domain_hash["folder"]
|
235
|
+
FileUtils.mkpath folder
|
236
|
+
|
237
|
+
file_name = File.join(folder, domain_name.downcase + '.json')
|
238
|
+
|
239
|
+
File.open(file_name, 'w') do |file|
|
240
|
+
file.write(JSON.pretty_generate(domain_hash["options"]) + "\n")
|
241
|
+
end
|
242
|
+
puts " create".colorize( :green ) + " #{file_name}"
|
243
|
+
end
|
244
|
+
|
245
|
+
end
|
246
|
+
|
247
|
+
def additional_csv_info
|
248
|
+
"\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n"
|
249
|
+
end
|
data/lib/spout/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: spout
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Remo Mueller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-06-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|