etna 0.1.27 → 0.1.32

Sign up to get free protection for your applications and to get access to all the features.
data/lib/etna.rb CHANGED
@@ -20,6 +20,7 @@ require_relative './etna/csvs'
20
20
  require_relative './etna/environment_scoped'
21
21
  require_relative './etna/filesystem'
22
22
  require_relative './etna/formatting'
23
+ require_relative './etna/cwl'
23
24
 
24
25
  class EtnaApp
25
26
  include Etna::Application
@@ -91,6 +91,10 @@ module Etna::Application
91
91
  (ENV["#{self.class.name.upcase}_ENV"] || :development).to_sym
92
92
  end
93
93
 
94
+ def id
95
+ ENV["APP_NAME"] || self.class.name.snake_case.split(/::/).last
96
+ end
97
+
94
98
  def find_descendents(klass)
95
99
  ObjectSpace.each_object(Class).select do |k|
96
100
  k < klass
data/lib/etna/client.rb CHANGED
@@ -17,6 +17,29 @@ module Etna
17
17
 
18
18
  attr_reader :routes
19
19
 
20
+ def signed_route_path(route, params)
21
+ path = route_path(route,params)
22
+
23
+ signatory = params.delete(:signatory)
24
+
25
+ return path unless signatory
26
+
27
+ hmac = Etna::Hmac.new(
28
+ signatory,
29
+ method: route[:method],
30
+ host: URI(@host).host,
31
+ path: path,
32
+ expiration: (DateTime.now + 10).iso8601,
33
+ id: signatory.id,
34
+ nonce: SecureRandom.hex,
35
+ headers: params.except(*route[:params].map(&:to_sym))
36
+ )
37
+
38
+ url_params = hmac.url_params(route[:method] == 'GET')
39
+
40
+ return url_params[:path] + '?' + url_params[:query]
41
+ end
42
+
20
43
  def route_path(route, params)
21
44
  Etna::Route.path(route[:route], params)
22
45
  end
@@ -60,14 +83,19 @@ module Etna
60
83
  @routes.each do |route|
61
84
  next unless route[:name]
62
85
  self.define_singleton_method(route[:name]) do |params = {}|
63
-
64
86
  missing_params = (route[:params] - params.keys.map(&:to_s))
87
+
65
88
  unless missing_params.empty?
66
89
  raise ArgumentError, "Missing required #{missing_params.size > 1 ?
67
90
  'params' : 'param'} #{missing_params.join(', ')}"
68
91
  end
69
92
 
70
- response = send(route[:method].downcase, route_path(route, params), params)
93
+ response = send(
94
+ route[:method].downcase,
95
+ signed_route_path(route, params),
96
+ params
97
+ )
98
+
71
99
  if block_given?
72
100
  yield response
73
101
  else
@@ -1 +1,2 @@
1
- require_relative 'formatting/models_csv'
1
+ require_relative 'formatting/models_csv'
2
+ require_relative 'formatting/models_odm_xml'
@@ -0,0 +1,293 @@
1
+ require 'nokogiri'
2
+
3
+ module Etna
4
+ module Clients
5
+ class Magma
6
+ module ModelsOdmXml
7
+ end
8
+ end
9
+ end
10
+ end
11
+
12
+ module Etna
13
+ module Clients
14
+ class Magma
15
+ module ModelsOdmXml
16
+ module Prettify
17
+ def prettify(name)
18
+ name.split('_').map(&:capitalize).join(' ')
19
+ end
20
+
21
+ def shorten(name)
22
+ name.gsub('_', '').capitalize
23
+ end
24
+ end
25
+
26
+ class Exporter
27
+ include Prettify
28
+
29
+ attr_reader :project_name, :models
30
+
31
+ def initialize(project_name:, models:)
32
+ @project_name = project_name
33
+ @models = models
34
+ end
35
+
36
+ def data_type_map
37
+ @data_type_map ||= begin
38
+ map = {}
39
+ map[Etna::Clients::Magma::AttributeType::STRING] = 'text'
40
+ map[Etna::Clients::Magma::AttributeType::DATE_TIME] = 'date'
41
+ map[Etna::Clients::Magma::AttributeType::BOOLEAN] = 'text'
42
+ map[Etna::Clients::Magma::AttributeType::FLOAT] = 'float'
43
+ map[Etna::Clients::Magma::AttributeType::INTEGER] = 'integer'
44
+
45
+ map
46
+ end
47
+ end
48
+
49
+ def redcap_field_type_map
50
+ @redcap_field_type_map ||= begin
51
+ map = {}
52
+ map[Etna::Clients::Magma::AttributeType::STRING] = 'textarea'
53
+ map[Etna::Clients::Magma::AttributeType::DATE_TIME] = 'text'
54
+ map[Etna::Clients::Magma::AttributeType::BOOLEAN] = 'radio'
55
+ map[Etna::Clients::Magma::AttributeType::FLOAT] = 'text'
56
+ map[Etna::Clients::Magma::AttributeType::INTEGER] = 'text'
57
+
58
+ map
59
+ end
60
+ end
61
+
62
+ def redcap_text_validation_map
63
+ @redcap_text_validation_map ||= begin
64
+ map = {}
65
+ map[Etna::Clients::Magma::AttributeType::DATE_TIME] = 'date_mdy'
66
+ map[Etna::Clients::Magma::AttributeType::FLOAT] = 'float'
67
+ map[Etna::Clients::Magma::AttributeType::INTEGER] = 'int'
68
+
69
+ map
70
+ end
71
+ end
72
+
73
+ def write_models(output_io: nil, filename: nil)
74
+ @document = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
75
+ xml.ODM(odm_headers) do
76
+ xml.Study(OID: "Project.#{shorten(project_name)}") do
77
+ end
78
+ global_variables(xml)
79
+ metadata(xml)
80
+ end
81
+ end
82
+
83
+ @document.to_xml
84
+ end
85
+
86
+ def odm_headers
87
+ {
88
+ xmlns: "http://www.cdisc.org/ns/odm/v1.3",
89
+ 'xmlns:ds': "http://www.w3.org/2000/09/xmldsig#",
90
+ 'xmlns:xsi': "http://www.w3.org/2001/XMLSchema-instance",
91
+ 'xmlns:redcap': "https://projectredcap.org",
92
+ 'xsi:schemaLocation': "http://www.cdisc.org/ns/odm/v1.3 schema/odm/ODM1-3-1.xsd",
93
+ ODMVersion: "1.3.2",
94
+ FileOID: "000-00-0000",
95
+ FileType: "Snapshot",
96
+ Description: project_name,
97
+ AsOfDateTime: DateTime.now,
98
+ CreationDateTime: DateTime.now,
99
+ SourceSystem: "Magma",
100
+ SourceSystemVersion: DateTime.now
101
+ }
102
+ end
103
+
104
+ def global_variables(xml)
105
+ # Includes general metadata about the project, as well as
106
+ # declarations of all repeating instruments,
107
+ # which seem like Timepoints to me.
108
+ # NOTE:
109
+ # <redcap:Purpose>0</redcap:Purpose>
110
+ # 0 = Practice / just for fun
111
+ # 1 = Operational Support
112
+ # 2 = Research
113
+ # 3 = Quality Improvement
114
+ # 4 = Other
115
+
116
+ xml.GlobalVariables do
117
+ xml.StudyName "#{project_name}"
118
+ xml.StudyDescription "#{project_name} - Data Library integration project"
119
+ xml.ProtocolName "#{project_name}"
120
+ xml.send('redcap:RecordAutonumberingEnabled', 1)
121
+ xml.send('redcap:CustomRecordLabel')
122
+ xml.send('redcap:SecondaryUniqueField')
123
+ xml.send('redcap:SchedulingEnabled', 0)
124
+ xml.send('redcap:SurveysEnabled', 0)
125
+ xml.send('redcap:SurveyInvitationEmailField')
126
+ xml.send('redcap:Purpose', 2) # 2 == research
127
+ xml.send('redcap:PurposeOther')
128
+ xml.send('redcap:ProjectNotes', "Used to easily ingest clinical data for #{project_name} into the Data Library.")
129
+ xml.send('redcap:MissingDataCodes')
130
+
131
+ repeating_instruments(xml)
132
+ end
133
+ end
134
+
135
+ def repeating_attribute_types
136
+ # We don't have a good indicator for what is a repeating
137
+ # attribute for REDCap...
138
+ # Will this work for models without timepoint, that go
139
+ # straight from subject -> sample?
140
+ models.model('subject').template.attributes.all.select do |attribute|
141
+ Etna::Clients::Magma::AttributeType::COLLECTION == attribute.attribute_type
142
+ end
143
+ end
144
+
145
+ def clinical_dictionaries
146
+ # Our indicator for if something needs a REDCap form will be any
147
+ # model with a dictionary.
148
+ models.all.select do |model|
149
+ model.template.raw['dictionary']
150
+ end.map do |model|
151
+ model.template.dictionary
152
+ end
153
+ end
154
+
155
+ def repeating_instruments(xml)
156
+ # Now we get into repeating instruments and events.
157
+ # From a Magma model perspective, this should be
158
+ # Timepoint that hangs off of
159
+ # a Subject model.
160
+ xml.send('redcap:RepeatingInstrumentsAndEvents') do
161
+ repeating_attribute_types.map do |repeating_attribute|
162
+ write_repeating_instrument_xml(xml, repeating_attribute)
163
+ end
164
+ end
165
+ end
166
+
167
+ def write_repeating_instrument_xml(xml, instrument)
168
+ node = xml.send('redcap:RepeatingInstrument')
169
+ node['redcap:UniqueEventName'] = 'event_1_arm_1'
170
+ node['redcap:RepeatInstrument'] = instrument.attribute_name
171
+ node['redcap:CustomLabel'] = instrument.display_name || instrument.attribute_name.capitalize
172
+
173
+ node
174
+ end
175
+
176
+ def metadata(xml)
177
+ # Includes form and field definitions
178
+ xml.MetaDataVersion(
179
+ OID: "Metadata.#{shorten(project_name)}_#{DateTime.now}",
180
+ Name: project_name,
181
+ 'redcap:RecordIdField': 'record_id'
182
+ ) do
183
+ clinical_dictionaries.map do |dictionary|
184
+ # Each Magma dictionary needs a FormDef, with
185
+ # ItemGroupRef children for each form page (?).
186
+ # Each ItemGroupRef requires a corresponding ItemGroupDef
187
+ # with ItemRef children for each input (?).
188
+ # Each ItemRef requires a correspdonding ItemDef
189
+ # that defines the label and type, and includes
190
+ # <Question> as a label (?).
191
+ # Option validations are present as a CodeList (with
192
+ # CodeListItem children).
193
+ write_form_def(xml, dictionary)
194
+ write_item_group_def(xml, dictionary)
195
+ write_item_def(xml, dictionary)
196
+ write_code_list(xml, dictionary)
197
+ end
198
+ end
199
+ end
200
+
201
+ def write_form_def(xml, dictionary)
202
+ xml.FormDef(
203
+ OID: "Form.#{dictionary.model_name}",
204
+ Name: dictionary.model_name.capitalize,
205
+ Repeating: "No",
206
+ 'redcap:FormName': dictionary.model_name
207
+ ) do
208
+ # Assume a single item group
209
+ xml.ItemGroupRef(
210
+ ItemGroupOID: "#{dictionary.model_name}.attributes",
211
+ Mandatory: "No"
212
+ ) do
213
+ end
214
+ end
215
+ end
216
+
217
+ def write_item_group_def(xml, dictionary)
218
+ xml.ItemGroupDef(
219
+ OID: "#{dictionary.model_name}.attributes",
220
+ Name: "#{dictionary.model_name.capitalize} Attributes",
221
+ Repeating: "No"
222
+ ) do
223
+ dictionary.attributes.keys.map do |attribute_name|
224
+ xml.ItemRef(
225
+ ItemOID: "#{dictionary.model_name}.#{attribute_name}", # Does this need to be unique across all items?
226
+ Mandatory: "No",
227
+ 'redcap:Variable': attribute_name # Does this need to be unique?
228
+ ) do
229
+ end
230
+ end
231
+ end
232
+ end
233
+
234
+ def write_item_def(xml, dictionary)
235
+ model_attributes = models.model(dictionary.model_name).template.attributes
236
+ dictionary.attributes.keys.map do |attribute_name|
237
+ attribute = model_attributes.attribute(attribute_name)
238
+ attribute_type = attribute.attribute_type
239
+ params = {
240
+ OID: "#{dictionary.model_name}.#{attribute_name}", # Does this need to be unique across all items?
241
+ Name: attribute_name,
242
+ DataType: data_type_map[attribute_type] || 'text',
243
+ 'redcap:Variable': attribute_name,
244
+ 'redcap:FieldType': redcap_field_type_map[attribute_type] || 'text',
245
+ Length: '999' # How could we infer shorter values?
246
+ }
247
+
248
+ params['redcap:TextValidationType'] = redcap_text_validation_map[attribute_type] if redcap_text_validation_map[attribute_type]
249
+ params['redcap:FieldNote'] = attribute.desc if attribute.desc
250
+ xml.ItemDef(params) do
251
+ xml.Question do
252
+ xml.send('TranslatedText', attribute_name.capitalize)
253
+ end
254
+
255
+ if attribute.validation && Etna::Clients::Magma::AttributeValidationType::ARRAY == attribute.validation['type']
256
+ xml.CodeListRef(
257
+ CodeListOID: "#{dictionary.model_name}.#{attribute_name}.choices"
258
+ ) do
259
+ end
260
+ end
261
+ end
262
+ end
263
+ end
264
+
265
+ def write_code_list(xml, dictionary)
266
+ model_attributes = models.model(dictionary.model_name).template.attributes
267
+ dictionary.attributes.keys.map do |attribute_name|
268
+ attribute = model_attributes.attribute(attribute_name)
269
+ if attribute.validation && Etna::Clients::Magma::AttributeValidationType::ARRAY == attribute.validation['type']
270
+ xml.CodeList(
271
+ OID: "#{dictionary.model_name}.#{attribute_name}.choices",
272
+ Name: "#{attribute_name}",
273
+ DataType: "text",
274
+ 'redcap:Variable': attribute_name
275
+ ) do
276
+ attribute.validation['value'].map do |option|
277
+ xml.CodeListItem(
278
+ CodedValue: option
279
+ ) do
280
+ xml.Decode() do
281
+ xml.send('TranslatedText', option)
282
+ end
283
+ end
284
+ end
285
+ end
286
+ end
287
+ end
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
@@ -519,6 +519,18 @@ module Etna
519
519
  @raw['desc'] = val
520
520
  end
521
521
 
522
+ # description and description= are needed
523
+ # to make UpdateAttribute actions
524
+ # work in the model_synchronization_workflow for
525
+ # desc.
526
+ def description
527
+ raw['desc']
528
+ end
529
+
530
+ def description=(val)
531
+ @raw['desc'] = val
532
+ end
533
+
522
534
  def display_name
523
535
  raw['display_name']
524
536
  end
@@ -589,7 +601,7 @@ module Etna
589
601
  COPYABLE_ATTRIBUTE_ATTRIBUTES = [
590
602
  :attribute_name, :attribute_type, :desc, :display_name, :format_hint,
591
603
  :hidden, :link_model_name, :read_only, :attribute_group, :unique, :validation,
592
- :restricted
604
+ :restricted, :description
593
605
  ]
594
606
 
595
607
  EDITABLE_ATTRIBUTE_ATTRIBUTES = UpdateAttributeAction.members & COPYABLE_ATTRIBUTE_ATTRIBUTES
@@ -70,7 +70,7 @@ module Etna
70
70
  puts "Creating Janus project."
71
71
  create_janus_project!
72
72
  puts "Done! Adding you as an administrator on the project."
73
- add_janus_user(user['email'], "#{user['first']} #{user['last']}", 'editor')
73
+ add_janus_user(user['email'], "#{user['name']}", 'editor')
74
74
  promote_to_administrator(user['email'])
75
75
  update_magma_client_token!
76
76
 
@@ -36,11 +36,28 @@ module Etna
36
36
  documents = Documents.new({})
37
37
  last_page = nil
38
38
  while last_page.nil? || last_page.models.model_keys.map { |k| last_page.models.model(k).documents.raw.length }.sum > 0
39
+ attempts = 0
39
40
  begin
41
+ attempts += 1
40
42
  last_page = magma_client.retrieve(request)
43
+ # Unfortunately, paging in magma is not great and times out from time to time.
44
+ rescue Net::ReadTimeout => e
45
+ if attempts > 5
46
+ raise e
47
+ end
48
+
49
+ retry
41
50
  rescue Etna::Error => e
42
- raise e unless e.message.include?('not found')
43
- break
51
+ if e.status === 502
52
+ if attempts > 5
53
+ raise e
54
+ end
55
+
56
+ retry
57
+ else
58
+ raise e unless e.message.include?('not found')
59
+ break
60
+ end
44
61
  end
45
62
 
46
63
  documents += last_page.models.model(model_name).documents unless block_given?
@@ -1,5 +1,4 @@
1
1
  require 'ostruct'
2
- require 'digest'
3
2
  require 'fileutils'
4
3
  require 'tempfile'
5
4
 
@@ -9,11 +8,11 @@ module Etna
9
8
  class MaterializeDataWorkflow < Struct.new(
10
9
  :metis_client, :magma_client, :project_name,
11
10
  :model_name, :model_filters, :model_attributes_mask,
12
- :filesystem, :logger, :stub_files,
13
- :skip_tmpdir, keyword_init: true)
11
+ :filesystem, :logger, :stub_files, :concurrency,
12
+ :record_names, keyword_init: true)
14
13
 
15
14
  def initialize(**kwds)
16
- super(**({filesystem: Etna::Filesystem.new}.update(kwds)))
15
+ super(**({filesystem: Etna::Filesystem.new, concurrency: 10, record_names: "all"}.update(kwds)))
17
16
  end
18
17
 
19
18
  def magma_crud
@@ -25,31 +24,47 @@ module Etna
25
24
  end
26
25
 
27
26
  def materialize_all(dest)
28
- tmpdir = skip_tmpdir ? nil : filesystem.tmpdir
27
+ templates = {}
28
+
29
+ semaphore = Concurrent::Semaphore.new(concurrency)
30
+ errors = Queue.new
31
+
32
+ model_walker.walk_from(
33
+ model_name,
34
+ record_names,
35
+ model_attributes_mask: model_attributes_mask,
36
+ model_filters: model_filters,
37
+ page_size: 20,
38
+ ) do |template, document|
39
+ logger&.info("Materializing #{template.name}##{document[template.identifier]}")
40
+ templates[template.name] = template
41
+
42
+ begin
43
+ if (error = errors.pop(true))
44
+ raise error
45
+ end
46
+ rescue ThreadError
47
+ end
29
48
 
30
- begin
31
- model_walker.walk_from(
32
- model_name,
33
- model_attributes_mask: model_attributes_mask,
34
- model_filters: model_filters,
35
- ) do |template, document|
36
- logger&.info("Materializing #{template.name}##{document[template.identifier]}")
37
- materialize_record(dest, tmpdir, template, document)
49
+ semaphore.acquire
50
+ Thread.new do
51
+ begin
52
+ materialize_record(dest, template, document)
53
+ rescue => e
54
+ errors << e
55
+ ensure
56
+ semaphore.release
57
+ end
38
58
  end
39
- ensure
40
- filesystem.rm_rf(tmpdir) unless skip_tmpdir
41
59
  end
42
- end
43
60
 
44
- def each_root_record
45
- request = RetrievalRequest.new(project_name: project_name, model_name: model_name, record_names: "all",
46
- filter: filter, page_size: 100, page: 1)
47
- magma_crud.page_records(model_name, request) do |response|
48
- model = response.models.model(model_name)
49
- template = model.template
50
- model.documents.document_keys.each do |key|
51
- yield template, model.documents.document(key)
61
+ semaphore.acquire(concurrency)
62
+
63
+ begin
64
+ if (error = errors.pop(true))
65
+ raise error
52
66
  end
67
+ rescue ThreadError
53
68
  end
54
69
  end
55
70
 
@@ -78,11 +93,10 @@ module Etna
78
93
  @sync_metis_data_workflow ||= Etna::Clients::Metis::SyncMetisDataWorkflow.new(
79
94
  metis_client: metis_client,
80
95
  logger: logger,
81
- skip_tmpdir: skip_tmpdir,
82
96
  filesystem: filesystem)
83
97
  end
84
98
 
85
- def materialize_record(dest_dir, tmpdir, template, record)
99
+ def materialize_record(dest_dir, template, record)
86
100
  record_to_serialize = record.dup
87
101
 
88
102
  each_file(template, record) do |attr_name, url, filename, idx|
@@ -91,14 +105,16 @@ module Etna
91
105
  end
92
106
 
93
107
  dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: "_#{attr_name}_#{idx}#{File.extname(filename)}"))
94
- sync_metis_data_workflow.copy_file(bin_root_dir: dest_dir, tmpdir: tmpdir, dest: dest_file, url: url, stub: stub_files)
95
- record_to_serialize[attr_name] << { file: dest_file, original_filename: filename }
108
+ sync_metis_data_workflow.copy_file(dest: dest_file, url: url, stub: stub_files)
109
+ record_to_serialize[attr_name] << {file: dest_file, original_filename: filename}
96
110
  end
97
111
 
98
112
  dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: '.json'))
99
113
  filesystem.mkdir_p(File.dirname(dest_file))
100
- filesystem.with_writeable(dest_file, "w") do |io|
101
- io.write(record_to_serialize.to_json)
114
+ json = record_to_serialize.to_json
115
+
116
+ filesystem.with_writeable(dest_file, "w", size_hint: json.bytes.length) do |io|
117
+ io.write(json)
102
118
  end
103
119
  end
104
120