etna 0.1.27 → 0.1.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/etna.rb CHANGED
@@ -20,6 +20,7 @@ require_relative './etna/csvs'
20
20
  require_relative './etna/environment_scoped'
21
21
  require_relative './etna/filesystem'
22
22
  require_relative './etna/formatting'
23
+ require_relative './etna/cwl'
23
24
 
24
25
  class EtnaApp
25
26
  include Etna::Application
@@ -91,6 +91,10 @@ module Etna::Application
91
91
  (ENV["#{self.class.name.upcase}_ENV"] || :development).to_sym
92
92
  end
93
93
 
94
+ def id
95
+ ENV["APP_NAME"] || self.class.name.snake_case.split(/::/).last
96
+ end
97
+
94
98
  def find_descendents(klass)
95
99
  ObjectSpace.each_object(Class).select do |k|
96
100
  k < klass
data/lib/etna/client.rb CHANGED
@@ -17,6 +17,29 @@ module Etna
17
17
 
18
18
  attr_reader :routes
19
19
 
20
+ def signed_route_path(route, params)
21
+ path = route_path(route,params)
22
+
23
+ signatory = params.delete(:signatory)
24
+
25
+ return path unless signatory
26
+
27
+ hmac = Etna::Hmac.new(
28
+ signatory,
29
+ method: route[:method],
30
+ host: URI(@host).host,
31
+ path: path,
32
+ expiration: (DateTime.now + 10).iso8601,
33
+ id: signatory.id,
34
+ nonce: SecureRandom.hex,
35
+ headers: params.except(*route[:params].map(&:to_sym))
36
+ )
37
+
38
+ url_params = hmac.url_params(route[:method] == 'GET')
39
+
40
+ return url_params[:path] + '?' + url_params[:query]
41
+ end
42
+
20
43
  def route_path(route, params)
21
44
  Etna::Route.path(route[:route], params)
22
45
  end
@@ -60,14 +83,19 @@ module Etna
60
83
  @routes.each do |route|
61
84
  next unless route[:name]
62
85
  self.define_singleton_method(route[:name]) do |params = {}|
63
-
64
86
  missing_params = (route[:params] - params.keys.map(&:to_s))
87
+
65
88
  unless missing_params.empty?
66
89
  raise ArgumentError, "Missing required #{missing_params.size > 1 ?
67
90
  'params' : 'param'} #{missing_params.join(', ')}"
68
91
  end
69
92
 
70
- response = send(route[:method].downcase, route_path(route, params), params)
93
+ response = send(
94
+ route[:method].downcase,
95
+ signed_route_path(route, params),
96
+ params
97
+ )
98
+
71
99
  if block_given?
72
100
  yield response
73
101
  else
@@ -1 +1,2 @@
1
- require_relative 'formatting/models_csv'
1
+ require_relative 'formatting/models_csv'
2
+ require_relative 'formatting/models_odm_xml'
@@ -0,0 +1,293 @@
1
+ require 'nokogiri'
2
+
3
+ module Etna
4
+ module Clients
5
+ class Magma
6
+ module ModelsOdmXml
7
+ end
8
+ end
9
+ end
10
+ end
11
+
12
+ module Etna
13
+ module Clients
14
+ class Magma
15
+ module ModelsOdmXml
16
+ module Prettify
17
+ def prettify(name)
18
+ name.split('_').map(&:capitalize).join(' ')
19
+ end
20
+
21
+ def shorten(name)
22
+ name.gsub('_', '').capitalize
23
+ end
24
+ end
25
+
26
+ class Exporter
27
+ include Prettify
28
+
29
+ attr_reader :project_name, :models
30
+
31
+ def initialize(project_name:, models:)
32
+ @project_name = project_name
33
+ @models = models
34
+ end
35
+
36
+ def data_type_map
37
+ @data_type_map ||= begin
38
+ map = {}
39
+ map[Etna::Clients::Magma::AttributeType::STRING] = 'text'
40
+ map[Etna::Clients::Magma::AttributeType::DATE_TIME] = 'date'
41
+ map[Etna::Clients::Magma::AttributeType::BOOLEAN] = 'text'
42
+ map[Etna::Clients::Magma::AttributeType::FLOAT] = 'float'
43
+ map[Etna::Clients::Magma::AttributeType::INTEGER] = 'integer'
44
+
45
+ map
46
+ end
47
+ end
48
+
49
+ def redcap_field_type_map
50
+ @redcap_field_type_map ||= begin
51
+ map = {}
52
+ map[Etna::Clients::Magma::AttributeType::STRING] = 'textarea'
53
+ map[Etna::Clients::Magma::AttributeType::DATE_TIME] = 'text'
54
+ map[Etna::Clients::Magma::AttributeType::BOOLEAN] = 'radio'
55
+ map[Etna::Clients::Magma::AttributeType::FLOAT] = 'text'
56
+ map[Etna::Clients::Magma::AttributeType::INTEGER] = 'text'
57
+
58
+ map
59
+ end
60
+ end
61
+
62
+ def redcap_text_validation_map
63
+ @redcap_text_validation_map ||= begin
64
+ map = {}
65
+ map[Etna::Clients::Magma::AttributeType::DATE_TIME] = 'date_mdy'
66
+ map[Etna::Clients::Magma::AttributeType::FLOAT] = 'float'
67
+ map[Etna::Clients::Magma::AttributeType::INTEGER] = 'int'
68
+
69
+ map
70
+ end
71
+ end
72
+
73
+ def write_models(output_io: nil, filename: nil)
74
+ @document = Nokogiri::XML::Builder.new(encoding: 'UTF-8') do |xml|
75
+ xml.ODM(odm_headers) do
76
+ xml.Study(OID: "Project.#{shorten(project_name)}") do
77
+ end
78
+ global_variables(xml)
79
+ metadata(xml)
80
+ end
81
+ end
82
+
83
+ @document.to_xml
84
+ end
85
+
86
+ def odm_headers
87
+ {
88
+ xmlns: "http://www.cdisc.org/ns/odm/v1.3",
89
+ 'xmlns:ds': "http://www.w3.org/2000/09/xmldsig#",
90
+ 'xmlns:xsi': "http://www.w3.org/2001/XMLSchema-instance",
91
+ 'xmlns:redcap': "https://projectredcap.org",
92
+ 'xsi:schemaLocation': "http://www.cdisc.org/ns/odm/v1.3 schema/odm/ODM1-3-1.xsd",
93
+ ODMVersion: "1.3.2",
94
+ FileOID: "000-00-0000",
95
+ FileType: "Snapshot",
96
+ Description: project_name,
97
+ AsOfDateTime: DateTime.now,
98
+ CreationDateTime: DateTime.now,
99
+ SourceSystem: "Magma",
100
+ SourceSystemVersion: DateTime.now
101
+ }
102
+ end
103
+
104
+ def global_variables(xml)
105
+ # Includes general metadata about the project, as well as
106
+ # declarations of all repeating instruments,
107
+ # which seem like Timepoints to me.
108
+ # NOTE:
109
+ # <redcap:Purpose>0</redcap:Purpose>
110
+ # 0 = Practice / just for fun
111
+ # 1 = Operational Support
112
+ # 2 = Research
113
+ # 3 = Quality Improvement
114
+ # 4 = Other
115
+
116
+ xml.GlobalVariables do
117
+ xml.StudyName "#{project_name}"
118
+ xml.StudyDescription "#{project_name} - Data Library integration project"
119
+ xml.ProtocolName "#{project_name}"
120
+ xml.send('redcap:RecordAutonumberingEnabled', 1)
121
+ xml.send('redcap:CustomRecordLabel')
122
+ xml.send('redcap:SecondaryUniqueField')
123
+ xml.send('redcap:SchedulingEnabled', 0)
124
+ xml.send('redcap:SurveysEnabled', 0)
125
+ xml.send('redcap:SurveyInvitationEmailField')
126
+ xml.send('redcap:Purpose', 2) # 2 == research
127
+ xml.send('redcap:PurposeOther')
128
+ xml.send('redcap:ProjectNotes', "Used to easily ingest clinical data for #{project_name} into the Data Library.")
129
+ xml.send('redcap:MissingDataCodes')
130
+
131
+ repeating_instruments(xml)
132
+ end
133
+ end
134
+
135
+ def repeating_attribute_types
136
+ # We don't have a good indicator for what is a repeating
137
+ # attribute for REDCap...
138
+ # Will this work for models without timepoint, that go
139
+ # straight from subject -> sample?
140
+ models.model('subject').template.attributes.all.select do |attribute|
141
+ Etna::Clients::Magma::AttributeType::COLLECTION == attribute.attribute_type
142
+ end
143
+ end
144
+
145
+ def clinical_dictionaries
146
+ # Our indicator for if something needs a REDCap form will be any
147
+ # model with a dictionary.
148
+ models.all.select do |model|
149
+ model.template.raw['dictionary']
150
+ end.map do |model|
151
+ model.template.dictionary
152
+ end
153
+ end
154
+
155
+ def repeating_instruments(xml)
156
+ # Now we get into repeating instruments and events.
157
+ # From a Magma model perspective, this should be
158
+ # Timepoint that hangs off of
159
+ # a Subject model.
160
+ xml.send('redcap:RepeatingInstrumentsAndEvents') do
161
+ repeating_attribute_types.map do |repeating_attribute|
162
+ write_repeating_instrument_xml(xml, repeating_attribute)
163
+ end
164
+ end
165
+ end
166
+
167
+ def write_repeating_instrument_xml(xml, instrument)
168
+ node = xml.send('redcap:RepeatingInstrument')
169
+ node['redcap:UniqueEventName'] = 'event_1_arm_1'
170
+ node['redcap:RepeatInstrument'] = instrument.attribute_name
171
+ node['redcap:CustomLabel'] = instrument.display_name || instrument.attribute_name.capitalize
172
+
173
+ node
174
+ end
175
+
176
+ def metadata(xml)
177
+ # Includes form and field definitions
178
+ xml.MetaDataVersion(
179
+ OID: "Metadata.#{shorten(project_name)}_#{DateTime.now}",
180
+ Name: project_name,
181
+ 'redcap:RecordIdField': 'record_id'
182
+ ) do
183
+ clinical_dictionaries.map do |dictionary|
184
+ # Each Magma dictionary needs a FormDef, with
185
+ # ItemGroupRef children for each form page (?).
186
+ # Each ItemGroupRef requires a corresponding ItemGroupDef
187
+ # with ItemRef children for each input (?).
188
+ # Each ItemRef requires a correspdonding ItemDef
189
+ # that defines the label and type, and includes
190
+ # <Question> as a label (?).
191
+ # Option validations are present as a CodeList (with
192
+ # CodeListItem children).
193
+ write_form_def(xml, dictionary)
194
+ write_item_group_def(xml, dictionary)
195
+ write_item_def(xml, dictionary)
196
+ write_code_list(xml, dictionary)
197
+ end
198
+ end
199
+ end
200
+
201
+ def write_form_def(xml, dictionary)
202
+ xml.FormDef(
203
+ OID: "Form.#{dictionary.model_name}",
204
+ Name: dictionary.model_name.capitalize,
205
+ Repeating: "No",
206
+ 'redcap:FormName': dictionary.model_name
207
+ ) do
208
+ # Assume a single item group
209
+ xml.ItemGroupRef(
210
+ ItemGroupOID: "#{dictionary.model_name}.attributes",
211
+ Mandatory: "No"
212
+ ) do
213
+ end
214
+ end
215
+ end
216
+
217
+ def write_item_group_def(xml, dictionary)
218
+ xml.ItemGroupDef(
219
+ OID: "#{dictionary.model_name}.attributes",
220
+ Name: "#{dictionary.model_name.capitalize} Attributes",
221
+ Repeating: "No"
222
+ ) do
223
+ dictionary.attributes.keys.map do |attribute_name|
224
+ xml.ItemRef(
225
+ ItemOID: "#{dictionary.model_name}.#{attribute_name}", # Does this need to be unique across all items?
226
+ Mandatory: "No",
227
+ 'redcap:Variable': attribute_name # Does this need to be unique?
228
+ ) do
229
+ end
230
+ end
231
+ end
232
+ end
233
+
234
+ def write_item_def(xml, dictionary)
235
+ model_attributes = models.model(dictionary.model_name).template.attributes
236
+ dictionary.attributes.keys.map do |attribute_name|
237
+ attribute = model_attributes.attribute(attribute_name)
238
+ attribute_type = attribute.attribute_type
239
+ params = {
240
+ OID: "#{dictionary.model_name}.#{attribute_name}", # Does this need to be unique across all items?
241
+ Name: attribute_name,
242
+ DataType: data_type_map[attribute_type] || 'text',
243
+ 'redcap:Variable': attribute_name,
244
+ 'redcap:FieldType': redcap_field_type_map[attribute_type] || 'text',
245
+ Length: '999' # How could we infer shorter values?
246
+ }
247
+
248
+ params['redcap:TextValidationType'] = redcap_text_validation_map[attribute_type] if redcap_text_validation_map[attribute_type]
249
+ params['redcap:FieldNote'] = attribute.desc if attribute.desc
250
+ xml.ItemDef(params) do
251
+ xml.Question do
252
+ xml.send('TranslatedText', attribute_name.capitalize)
253
+ end
254
+
255
+ if attribute.validation && Etna::Clients::Magma::AttributeValidationType::ARRAY == attribute.validation['type']
256
+ xml.CodeListRef(
257
+ CodeListOID: "#{dictionary.model_name}.#{attribute_name}.choices"
258
+ ) do
259
+ end
260
+ end
261
+ end
262
+ end
263
+ end
264
+
265
+ def write_code_list(xml, dictionary)
266
+ model_attributes = models.model(dictionary.model_name).template.attributes
267
+ dictionary.attributes.keys.map do |attribute_name|
268
+ attribute = model_attributes.attribute(attribute_name)
269
+ if attribute.validation && Etna::Clients::Magma::AttributeValidationType::ARRAY == attribute.validation['type']
270
+ xml.CodeList(
271
+ OID: "#{dictionary.model_name}.#{attribute_name}.choices",
272
+ Name: "#{attribute_name}",
273
+ DataType: "text",
274
+ 'redcap:Variable': attribute_name
275
+ ) do
276
+ attribute.validation['value'].map do |option|
277
+ xml.CodeListItem(
278
+ CodedValue: option
279
+ ) do
280
+ xml.Decode() do
281
+ xml.send('TranslatedText', option)
282
+ end
283
+ end
284
+ end
285
+ end
286
+ end
287
+ end
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
293
+ end
@@ -519,6 +519,18 @@ module Etna
519
519
  @raw['desc'] = val
520
520
  end
521
521
 
522
+ # description and description= are needed
523
+ # to make UpdateAttribute actions
524
+ # work in the model_synchronization_workflow for
525
+ # desc.
526
+ def description
527
+ raw['desc']
528
+ end
529
+
530
+ def description=(val)
531
+ @raw['desc'] = val
532
+ end
533
+
522
534
  def display_name
523
535
  raw['display_name']
524
536
  end
@@ -589,7 +601,7 @@ module Etna
589
601
  COPYABLE_ATTRIBUTE_ATTRIBUTES = [
590
602
  :attribute_name, :attribute_type, :desc, :display_name, :format_hint,
591
603
  :hidden, :link_model_name, :read_only, :attribute_group, :unique, :validation,
592
- :restricted
604
+ :restricted, :description
593
605
  ]
594
606
 
595
607
  EDITABLE_ATTRIBUTE_ATTRIBUTES = UpdateAttributeAction.members & COPYABLE_ATTRIBUTE_ATTRIBUTES
@@ -70,7 +70,7 @@ module Etna
70
70
  puts "Creating Janus project."
71
71
  create_janus_project!
72
72
  puts "Done! Adding you as an administrator on the project."
73
- add_janus_user(user['email'], "#{user['first']} #{user['last']}", 'editor')
73
+ add_janus_user(user['email'], "#{user['name']}", 'editor')
74
74
  promote_to_administrator(user['email'])
75
75
  update_magma_client_token!
76
76
 
@@ -36,11 +36,28 @@ module Etna
36
36
  documents = Documents.new({})
37
37
  last_page = nil
38
38
  while last_page.nil? || last_page.models.model_keys.map { |k| last_page.models.model(k).documents.raw.length }.sum > 0
39
+ attempts = 0
39
40
  begin
41
+ attempts += 1
40
42
  last_page = magma_client.retrieve(request)
43
+ # Unfortunately, paging in magma is not great and times out from time to time.
44
+ rescue Net::ReadTimeout => e
45
+ if attempts > 5
46
+ raise e
47
+ end
48
+
49
+ retry
41
50
  rescue Etna::Error => e
42
- raise e unless e.message.include?('not found')
43
- break
51
+ if e.status === 502
52
+ if attempts > 5
53
+ raise e
54
+ end
55
+
56
+ retry
57
+ else
58
+ raise e unless e.message.include?('not found')
59
+ break
60
+ end
44
61
  end
45
62
 
46
63
  documents += last_page.models.model(model_name).documents unless block_given?
@@ -1,5 +1,4 @@
1
1
  require 'ostruct'
2
- require 'digest'
3
2
  require 'fileutils'
4
3
  require 'tempfile'
5
4
 
@@ -9,11 +8,11 @@ module Etna
9
8
  class MaterializeDataWorkflow < Struct.new(
10
9
  :metis_client, :magma_client, :project_name,
11
10
  :model_name, :model_filters, :model_attributes_mask,
12
- :filesystem, :logger, :stub_files,
13
- :skip_tmpdir, keyword_init: true)
11
+ :filesystem, :logger, :stub_files, :concurrency,
12
+ :record_names, keyword_init: true)
14
13
 
15
14
  def initialize(**kwds)
16
- super(**({filesystem: Etna::Filesystem.new}.update(kwds)))
15
+ super(**({filesystem: Etna::Filesystem.new, concurrency: 10, record_names: "all"}.update(kwds)))
17
16
  end
18
17
 
19
18
  def magma_crud
@@ -25,31 +24,47 @@ module Etna
25
24
  end
26
25
 
27
26
  def materialize_all(dest)
28
- tmpdir = skip_tmpdir ? nil : filesystem.tmpdir
27
+ templates = {}
28
+
29
+ semaphore = Concurrent::Semaphore.new(concurrency)
30
+ errors = Queue.new
31
+
32
+ model_walker.walk_from(
33
+ model_name,
34
+ record_names,
35
+ model_attributes_mask: model_attributes_mask,
36
+ model_filters: model_filters,
37
+ page_size: 20,
38
+ ) do |template, document|
39
+ logger&.info("Materializing #{template.name}##{document[template.identifier]}")
40
+ templates[template.name] = template
41
+
42
+ begin
43
+ if (error = errors.pop(true))
44
+ raise error
45
+ end
46
+ rescue ThreadError
47
+ end
29
48
 
30
- begin
31
- model_walker.walk_from(
32
- model_name,
33
- model_attributes_mask: model_attributes_mask,
34
- model_filters: model_filters,
35
- ) do |template, document|
36
- logger&.info("Materializing #{template.name}##{document[template.identifier]}")
37
- materialize_record(dest, tmpdir, template, document)
49
+ semaphore.acquire
50
+ Thread.new do
51
+ begin
52
+ materialize_record(dest, template, document)
53
+ rescue => e
54
+ errors << e
55
+ ensure
56
+ semaphore.release
57
+ end
38
58
  end
39
- ensure
40
- filesystem.rm_rf(tmpdir) unless skip_tmpdir
41
59
  end
42
- end
43
60
 
44
- def each_root_record
45
- request = RetrievalRequest.new(project_name: project_name, model_name: model_name, record_names: "all",
46
- filter: filter, page_size: 100, page: 1)
47
- magma_crud.page_records(model_name, request) do |response|
48
- model = response.models.model(model_name)
49
- template = model.template
50
- model.documents.document_keys.each do |key|
51
- yield template, model.documents.document(key)
61
+ semaphore.acquire(concurrency)
62
+
63
+ begin
64
+ if (error = errors.pop(true))
65
+ raise error
52
66
  end
67
+ rescue ThreadError
53
68
  end
54
69
  end
55
70
 
@@ -78,11 +93,10 @@ module Etna
78
93
  @sync_metis_data_workflow ||= Etna::Clients::Metis::SyncMetisDataWorkflow.new(
79
94
  metis_client: metis_client,
80
95
  logger: logger,
81
- skip_tmpdir: skip_tmpdir,
82
96
  filesystem: filesystem)
83
97
  end
84
98
 
85
- def materialize_record(dest_dir, tmpdir, template, record)
99
+ def materialize_record(dest_dir, template, record)
86
100
  record_to_serialize = record.dup
87
101
 
88
102
  each_file(template, record) do |attr_name, url, filename, idx|
@@ -91,14 +105,16 @@ module Etna
91
105
  end
92
106
 
93
107
  dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: "_#{attr_name}_#{idx}#{File.extname(filename)}"))
94
- sync_metis_data_workflow.copy_file(bin_root_dir: dest_dir, tmpdir: tmpdir, dest: dest_file, url: url, stub: stub_files)
95
- record_to_serialize[attr_name] << { file: dest_file, original_filename: filename }
108
+ sync_metis_data_workflow.copy_file(dest: dest_file, url: url, stub: stub_files)
109
+ record_to_serialize[attr_name] << {file: dest_file, original_filename: filename}
96
110
  end
97
111
 
98
112
  dest_file = File.join(dest_dir, metadata_file_name(record_name: record[template.identifier], record_model_name: template.name, ext: '.json'))
99
113
  filesystem.mkdir_p(File.dirname(dest_file))
100
- filesystem.with_writeable(dest_file, "w") do |io|
101
- io.write(record_to_serialize.to_json)
114
+ json = record_to_serialize.to_json
115
+
116
+ filesystem.with_writeable(dest_file, "w", size_hint: json.bytes.length) do |io|
117
+ io.write(json)
102
118
  end
103
119
  end
104
120