ncs_mdes_warehouse 0.9.0 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,6 +1,15 @@
1
1
  NCS Navigator MDES Warehouse History
2
2
  ====================================
3
3
 
4
+ 0.10.0
5
+ ------
6
+
7
+ - Add MDES CSV transformer. (#2710)
8
+
9
+ - Enable use in Rails 3.2.7+ applications. ActiveSupport 3.2.4-3.2.6 had [an
10
+ issue](https://github.com/rails/rails/pull/6857) which caused a conflict with
11
+ DataMapper. This issue was fixed in 3.2.7.
12
+
4
13
  0.9.0
5
14
  -----
6
15
 
data/Gemfile CHANGED
@@ -6,7 +6,7 @@ gemspec
6
6
  # activesupport, and builder, bundler takes infinite time to resolve unless you
7
7
  # lock it down a little.
8
8
  group :resolver_hint do
9
- gem 'actionpack', '~> 3.1.8'
9
+ gem 'actionpack', '~> 3.2.8'
10
10
  end
11
11
 
12
12
  group :development do
@@ -75,10 +75,12 @@ reporting schema is wiped and replaced with the results.
75
75
  DESC
76
76
  method_option 'force', :type => 'boolean',
77
77
  :desc => 'Copy the working schema to production even if there are errors'
78
+ method_option 'preserve', :type => 'boolean',
79
+ :desc => 'Do not wipe the working database before beginning ETL (for debugging)'
78
80
  def etl
79
81
  db = DatabaseInitializer.new(configuration)
80
82
  db.set_up_repository(:both)
81
- db.replace_schema
83
+ db.replace_schema unless options[:preserve]
82
84
 
83
85
  success = TransformLoad.new(configuration).run
84
86
  if success || options['force']
@@ -18,6 +18,7 @@ module NcsNavigator::Warehouse
18
18
  autoload :EventStartFromContactTransformer, 'ncs_navigator/warehouse/transformers/event_start_from_contact_transformer'
19
19
  autoload :Filters, 'ncs_navigator/warehouse/transformers/filters'
20
20
  autoload :ForeignKeyIndex, 'ncs_navigator/warehouse/transformers/foreign_key_index'
21
+ autoload :MdesCsv, 'ncs_navigator/warehouse/transformers/mdes_csv'
21
22
  autoload :NoBlankForeignKeysFilter, 'ncs_navigator/warehouse/transformers/no_blank_foreign_keys_filter'
22
23
  autoload :NoSsuOutreachAllSsusFilter, 'ncs_navigator/warehouse/transformers/no_ssu_outreach_all_ssus_filter'
23
24
  autoload :NoSsuOutreachPlaceholderFilter, 'ncs_navigator/warehouse/transformers/no_ssu_outreach_placeholder_filter'
@@ -0,0 +1,26 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Transformers
4
+ module MdesCsv
5
+ autoload :MultipleTableReader, 'ncs_navigator/warehouse/transformers/mdes_csv/multiple_table_reader'
6
+ autoload :TableReader, 'ncs_navigator/warehouse/transformers/mdes_csv/table_reader'
7
+
8
+ ##
9
+ # Creates a transformer that loads a directory full of CSV files,
10
+ # interpreting each of them using {TableReader}. Files that do not have the
11
+ # extension `csv` are ignored; so are subdirectories.
12
+ #
13
+ # @return [#transform]
14
+ def self.from_directory(configuration, directory, options={})
15
+ readers = Dir["#{directory}/*.csv"].collect do |csv_file|
16
+ model = File.basename(csv_file).sub(/\.csv$/, '')
17
+ MdesCsv::TableReader.new(configuration, model, csv_file)
18
+ end
19
+
20
+ EnumTransformer.new(
21
+ configuration,
22
+ MdesCsv::MultipleTableReader.new(configuration, readers),
23
+ options)
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,32 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ module NcsNavigator::Warehouse::Transformers::MdesCsv
4
+ ##
5
+ # An enumerable that joins the results for several {TableReaders} into a single
6
+ # {#each} call.
7
+ class MultipleTableReader
8
+ include Enumerable
9
+
10
+ attr_reader :configuration, :table_readers
11
+
12
+ ##
13
+ # @param [Configuration] configuration
14
+ # @param [Array<TableReader>] the table readers to concatenate
15
+ def initialize(configuration, table_readers)
16
+ @configuration = configuration
17
+ @table_readers = table_readers.sort_by { |r| configuration.models_module.mdes_order.index(r.model) }
18
+ end
19
+
20
+ def each
21
+ table_readers.each do |reader|
22
+ reader.each do |record|
23
+ yield record
24
+ end
25
+ end
26
+ end
27
+
28
+ def name
29
+ table_readers.collect(&:name).join(', ')
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,64 @@
1
+ require 'ncs_navigator/warehouse'
2
+
3
+ require 'csv'
4
+
5
+ module NcsNavigator::Warehouse::Transformers::MdesCsv
6
+ ##
7
+ # A streaming reader for a CSV containing one table's-worth of MDES data.
8
+ #
9
+ # The CSV is converted into warehouse model instances, one record per row. The
10
+ # CSV MUST have a header row whose cells indicate the variable contained in
11
+ # each column.
12
+ class TableReader
13
+ include Enumerable
14
+
15
+ attr_reader :configuration, :model, :filename
16
+
17
+ ##
18
+ # @param [Configuration] configuration
19
+ # @param [Class,#to_s] model_designator a name for the expected record type
20
+ # from the current CSV. It can be a table name or unqualified warehouse
21
+ # model name, or an actual warehouse model class.
22
+ # @param [String] filename the file containing the CSV
23
+ def initialize(configuration, model_designator, filename)
24
+ @configuration = configuration
25
+ @model =
26
+ if Class === model_designator
27
+ model_designator
28
+ else
29
+ configuration.model(model_designator.to_s)
30
+ end
31
+ @filename = filename
32
+ end
33
+
34
+ def name
35
+ "#{filename} => #{model.mdes_table_name} table"
36
+ end
37
+
38
+ def each
39
+ CSV.foreach(filename, :headers => true, :header_converters => [:downcase]) do |row|
40
+ yield create_instance_for_row(row)
41
+ end
42
+ end
43
+
44
+ protected
45
+
46
+ def create_instance_for_row(row)
47
+ model.new.tap do |instance|
48
+ row.each do |header, value|
49
+ next if header == 'transaction_type'
50
+
51
+ setter = "#{header}="
52
+ unless instance.respond_to?(setter)
53
+ fail "Unknown attribute #{header.inspect} for #{model}."
54
+ end
55
+ instance.send(setter, value)
56
+ end
57
+
58
+ unless instance.key && instance.key.first
59
+ fail "Missing key (#{model.key.first.name}) for #{model}."
60
+ end
61
+ end
62
+ end
63
+ end
64
+ end
@@ -1,5 +1,5 @@
1
1
  module NcsNavigator
2
2
  module Warehouse
3
- VERSION = '0.9.0'
3
+ VERSION = '0.10.0'
4
4
  end
5
5
  end
@@ -18,8 +18,9 @@ Gem::Specification.new do |s|
18
18
  s.add_dependency 'ncs_mdes', '~> 0.10'
19
19
  s.add_dependency 'ncs_navigator_configuration', '~> 0.2'
20
20
 
21
- # Post-3.2.4 breaks DataMapper due to https://github.com/rails/rails/pull/6857
22
- s.add_dependency 'activesupport', '~> 3.0', '< 3.2.4'
21
+ # AS 3.2.4-3.2.6 break DataMapper due to https://github.com/rails/rails/pull/6857
22
+ # TODO: simplify this once we can drop support for AS 3.0 and 3.1
23
+ s.add_dependency 'activesupport', '~> 3.0', '!= 3.2.4', '!= 3.2.5', '!= 3.2.6'
23
24
  s.add_dependency 'i18n', '~> 0.4' # required by activesupport
24
25
 
25
26
  s.add_dependency 'thor', '~> 0.14.6'
@@ -46,5 +47,5 @@ Gem::Specification.new do |s|
46
47
  s.add_development_dependency 'rake', '~> 0.9.2'
47
48
  s.add_development_dependency 'yard', '~> 0.7.2'
48
49
  s.add_development_dependency 'ci_reporter', '1.6.6'
49
- s.add_development_dependency 'fakefs', '~> 0.4.0'
50
+ s.add_development_dependency 'fakefs', '0.4.0' # FakeFS does not follow semver
50
51
  end
@@ -1,4 +1,5 @@
1
1
  require 'spec_helper'
2
+ require 'nokogiri'
2
3
 
3
4
  module NcsNavigator::Warehouse::Models
4
5
  module Spec
@@ -95,7 +96,10 @@ module NcsNavigator::Warehouse::Models
95
96
  it 'always emits a transaction_type entry last since the schema requires it, even though it is meaningless' do
96
97
  last = xml.root.elements.last
97
98
  last.name.should == 'transaction_type'
98
- last['nil'].should == 'true'
99
+ # Nokogiri using libxml2 2.7.3 vs. libxml2 2.8.0 has different behavior for this test.
100
+ # With 2.7.3, the attribute is named nil. With 2.8.0, it retains the xsi namespace: 'xsi:nil'.
101
+ attr_name = last.keys.detect { |a| a =~ /nil$/ }
102
+ last[attr_name].should == 'true'
99
103
  end
100
104
 
101
105
  it 'emits the columns according to the mdes_order' do
@@ -0,0 +1,66 @@
1
+ require 'spec_helper'
2
+
3
+ module NcsNavigator::Warehouse::Transformers::MdesCsv
4
+ describe MultipleTableReader do
5
+ def table_reader(model)
6
+ TableReader.new(spec_config, model, csv_filename(model))
7
+ end
8
+
9
+ def csv_filename(model_name)
10
+ tmpdir + "#{model_name}.csv"
11
+ end
12
+
13
+ def csv_file(model_name, row_arrays)
14
+ File.open(csv_filename(model_name), 'w') do |f|
15
+ row_arrays.each do |row_array|
16
+ f.puts row_array.join(',')
17
+ end
18
+ end
19
+ end
20
+
21
+ let(:table_readers) {
22
+ [
23
+ table_reader('LinkPersonParticipant'),
24
+ table_reader('Person'),
25
+ table_reader('Participant')
26
+ ]
27
+ }
28
+
29
+ let(:reader) {
30
+ MultipleTableReader.new(spec_config, table_readers)
31
+ }
32
+
33
+ let(:sub_enums) { reader.table_readers }
34
+
35
+ it 'is Enumerable' do
36
+ MultipleTableReader.ancestors.should include(Enumerable)
37
+ end
38
+
39
+ describe '#initialize' do
40
+ it 'orders the sub-enums by MDES order' do
41
+ sub_enums.collect { |table_reader| table_reader.model.mdes_table_name }.
42
+ should == %w(person participant link_person_participant)
43
+ end
44
+ end
45
+
46
+ describe '#each' do
47
+ it 'yields the results from each table reader in order' do
48
+ csv_file 'Participant', [
49
+ %w(p_id), %w(X90)
50
+ ]
51
+
52
+ csv_file 'Person', [
53
+ %w(person_id), %w(P34)
54
+ ]
55
+
56
+ csv_file 'LinkPersonParticipant', [
57
+ %w(person_pid_id), %w(T23)
58
+ ]
59
+
60
+ reader.to_a.collect { |rec| [rec.class.mdes_table_name, rec.key.first] }.should == [
61
+ %w(person P34), %w(participant X90), %w(link_person_participant T23)
62
+ ]
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,118 @@
1
+ require 'spec_helper'
2
+
3
+ module NcsNavigator::Warehouse::Transformers::MdesCsv
4
+ describe TableReader, :use_mdes do
5
+ let(:model_designator) { 'person' }
6
+ let(:filename) { tmpdir + 'people.csv' }
7
+ let(:reader) { TableReader.new(spec_config, model_designator, filename) }
8
+
9
+ it 'is enumerable' do
10
+ TableReader.ancestors.should include(Enumerable)
11
+ end
12
+
13
+ describe '#initialize' do
14
+ describe 'model_designator parameter' do
15
+ it 'accepts a table name to find the model' do
16
+ TableReader.new(spec_config, 'address', filename).model.
17
+ should == spec_config.model(:Address)
18
+ end
19
+
20
+ it 'accepts a model name to find the model' do
21
+ TableReader.new(spec_config, 'Telephone', filename).model.
22
+ should == spec_config.model(:Telephone)
23
+ end
24
+
25
+ it 'accepts a model class directly' do
26
+ TableReader.new(spec_config, spec_config.model(:Telephone), filename).model.
27
+ should == spec_config.model(:Telephone)
28
+ end
29
+ end
30
+ end
31
+
32
+ describe '#name' do
33
+ it 'is the filename and table name' do
34
+ reader.name.should == "#{filename} => person table"
35
+ end
36
+ end
37
+
38
+ describe '#each' do
39
+ let(:records) { reader.to_a }
40
+ let(:first_record) { records.first }
41
+
42
+ def csv_file(row_arrays)
43
+ File.open(filename, 'w') do |f|
44
+ row_arrays.each do |row_array|
45
+ f.puts row_array.join(',')
46
+ end
47
+ end
48
+ end
49
+
50
+ it 'produces one record per line' do
51
+ csv_file [
52
+ %w(person_id),
53
+ %w(A3),
54
+ %w(B7),
55
+ %w(C8)
56
+ ]
57
+
58
+ records.collect { |rec| rec.person_id }.should == %w(A3 B7 C8)
59
+ end
60
+
61
+ it 'maps record attributes via the headers' do
62
+ csv_file [
63
+ %w(last_name first_name person_id),
64
+ %w(MacMurray Fred FM36)
65
+ ]
66
+
67
+ first_record.person_id.should == 'FM36'
68
+ first_record.first_name.should == 'Fred'
69
+ first_record.last_name.should == 'MacMurray'
70
+ end
71
+
72
+ it 'produces model instances' do
73
+ csv_file [
74
+ %w(last_name first_name person_id),
75
+ %w(MacMurray Fred FM36)
76
+ ]
77
+
78
+ first_record.should be_a spec_config.model('person')
79
+ end
80
+
81
+ it 'interprets headers case-insensitively' do
82
+ csv_file [
83
+ %w(PERSON_ID FiRST_NaMe),
84
+ %w(BS34 Barbara)
85
+ ]
86
+
87
+ first_record.first_name.should == 'Barbara'
88
+ end
89
+
90
+ it 'fails for an unknown attribute' do
91
+ csv_file [
92
+ %w(person_id helicopter_model),
93
+ %w(TR900 Huey)
94
+ ]
95
+
96
+ expect { records }.to raise_error(/Unknown attribute "helicopter_model" for NcsNavigator::Warehouse::Models::\w+::Person\./)
97
+ end
98
+
99
+ it 'ignores transaction_type' do
100
+ csv_file [
101
+ %w(person_id transaction_type),
102
+ %w(TR900 NA)
103
+ ]
104
+
105
+ expect { records }.to_not raise_error
106
+ end
107
+
108
+ it 'fails if the key is not provided' do
109
+ csv_file [
110
+ %w(first_name last_name),
111
+ %w(Medford Man)
112
+ ]
113
+
114
+ expect { records }.to raise_error(/Missing key \(person_id\) for NcsNavigator::Warehouse::Models::\w+::Person\./)
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+
3
+ module NcsNavigator::Warehouse::Transformers
4
+ describe MdesCsv, :use_mdes do
5
+ let(:options) { { :filters => lambda { |x| x } } }
6
+
7
+ shared_examples 'an MDES CSV transformer' do
8
+ it 'returns a transformer' do
9
+ transformer.should respond_to(:transform)
10
+ end
11
+
12
+ it 'passes the options to the transformer' do
13
+ transformer.filters.to_a.size.should == 1
14
+ end
15
+ end
16
+
17
+ describe '.from_directory' do
18
+ let(:dir) { tmpdir('mdes_csv') }
19
+ let(:transformer) { MdesCsv.from_directory(spec_config, dir.to_s, options) }
20
+
21
+ before do
22
+ %w(
23
+ person.csv
24
+ contact.csv
25
+ event.csv
26
+ readme.txt
27
+ ).each do |fn|
28
+ FileUtils.touch(dir + fn)
29
+ end
30
+ end
31
+
32
+ include_examples 'an MDES CSV transformer'
33
+
34
+ it 'uses a MultipleTableReader' do
35
+ transformer.enum.should be_a(MdesCsv::MultipleTableReader)
36
+ end
37
+
38
+ it 'uses all the *.csv files in the directory' do
39
+ transformer.enum.table_readers.collect(&:filename).should == [
40
+ (dir + 'person.csv').to_s,
41
+ (dir + 'event.csv').to_s,
42
+ (dir + 'contact.csv').to_s
43
+ ]
44
+ end
45
+
46
+ it 'uses the file name (sans ext) as the model designator' do
47
+ transformer.enum.table_readers.collect(&:model).should == [
48
+ spec_config.model('person'),
49
+ spec_config.model('event'),
50
+ spec_config.model('contact'),
51
+ ]
52
+ end
53
+ end
54
+ end
55
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ncs_mdes_warehouse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.0
4
+ version: 0.10.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-05 00:00:00.000000000 Z
12
+ date: 2012-11-18 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ncs_mdes
@@ -51,9 +51,15 @@ dependencies:
51
51
  - - ~>
52
52
  - !ruby/object:Gem::Version
53
53
  version: '3.0'
54
- - - <
54
+ - - ! '!='
55
55
  - !ruby/object:Gem::Version
56
56
  version: 3.2.4
57
+ - - ! '!='
58
+ - !ruby/object:Gem::Version
59
+ version: 3.2.5
60
+ - - ! '!='
61
+ - !ruby/object:Gem::Version
62
+ version: 3.2.6
57
63
  type: :runtime
58
64
  prerelease: false
59
65
  version_requirements: !ruby/object:Gem::Requirement
@@ -62,9 +68,15 @@ dependencies:
62
68
  - - ~>
63
69
  - !ruby/object:Gem::Version
64
70
  version: '3.0'
65
- - - <
71
+ - - ! '!='
66
72
  - !ruby/object:Gem::Version
67
73
  version: 3.2.4
74
+ - - ! '!='
75
+ - !ruby/object:Gem::Version
76
+ version: 3.2.5
77
+ - - ! '!='
78
+ - !ruby/object:Gem::Version
79
+ version: 3.2.6
68
80
  - !ruby/object:Gem::Dependency
69
81
  name: i18n
70
82
  requirement: !ruby/object:Gem::Requirement
@@ -422,7 +434,7 @@ dependencies:
422
434
  requirement: !ruby/object:Gem::Requirement
423
435
  none: false
424
436
  requirements:
425
- - - ~>
437
+ - - '='
426
438
  - !ruby/object:Gem::Version
427
439
  version: 0.4.0
428
440
  type: :development
@@ -430,7 +442,7 @@ dependencies:
430
442
  version_requirements: !ruby/object:Gem::Requirement
431
443
  none: false
432
444
  requirements:
433
- - - ~>
445
+ - - '='
434
446
  - !ruby/object:Gem::Version
435
447
  version: 0.4.0
436
448
  description:
@@ -2313,6 +2325,9 @@ files:
2313
2325
  - lib/ncs_navigator/warehouse/transformers/foreign_key_index.rb
2314
2326
  - lib/ncs_navigator/warehouse/transformers/foreign_key_index/database_key_provider.rb
2315
2327
  - lib/ncs_navigator/warehouse/transformers/foreign_key_index/static_key_provider.rb
2328
+ - lib/ncs_navigator/warehouse/transformers/mdes_csv.rb
2329
+ - lib/ncs_navigator/warehouse/transformers/mdes_csv/multiple_table_reader.rb
2330
+ - lib/ncs_navigator/warehouse/transformers/mdes_csv/table_reader.rb
2316
2331
  - lib/ncs_navigator/warehouse/transformers/no_blank_foreign_keys_filter.rb
2317
2332
  - lib/ncs_navigator/warehouse/transformers/no_ssu_outreach_all_ssus_filter.rb
2318
2333
  - lib/ncs_navigator/warehouse/transformers/no_ssu_outreach_placeholder_filter.rb
@@ -2351,6 +2366,9 @@ files:
2351
2366
  - spec/ncs_navigator/warehouse/transformers/filters_spec.rb
2352
2367
  - spec/ncs_navigator/warehouse/transformers/foreign_key_index/database_key_provider_spec.rb
2353
2368
  - spec/ncs_navigator/warehouse/transformers/foreign_key_index_spec.rb
2369
+ - spec/ncs_navigator/warehouse/transformers/mdes_csv/multiple_table_reader_spec.rb
2370
+ - spec/ncs_navigator/warehouse/transformers/mdes_csv/table_reader_spec.rb
2371
+ - spec/ncs_navigator/warehouse/transformers/mdes_csv_spec.rb
2354
2372
  - spec/ncs_navigator/warehouse/transformers/no_blank_foreign_keys_filter_spec.rb
2355
2373
  - spec/ncs_navigator/warehouse/transformers/no_ssu_outreach_all_ssus_filter_spec.rb
2356
2374
  - spec/ncs_navigator/warehouse/transformers/no_ssu_outreach_placeholder_filter_spec.rb
@@ -2381,7 +2399,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
2381
2399
  version: '0'
2382
2400
  segments:
2383
2401
  - 0
2384
- hash: -1614550889051803335
2402
+ hash: -1567355274626065010
2385
2403
  required_rubygems_version: !ruby/object:Gem::Requirement
2386
2404
  none: false
2387
2405
  requirements:
@@ -2390,7 +2408,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
2390
2408
  version: '0'
2391
2409
  segments:
2392
2410
  - 0
2393
- hash: -1614550889051803335
2411
+ hash: -1567355274626065010
2394
2412
  requirements: []
2395
2413
  rubyforge_project:
2396
2414
  rubygems_version: 1.8.24
@@ -2423,6 +2441,9 @@ test_files:
2423
2441
  - spec/ncs_navigator/warehouse/transformers/filters_spec.rb
2424
2442
  - spec/ncs_navigator/warehouse/transformers/foreign_key_index/database_key_provider_spec.rb
2425
2443
  - spec/ncs_navigator/warehouse/transformers/foreign_key_index_spec.rb
2444
+ - spec/ncs_navigator/warehouse/transformers/mdes_csv/multiple_table_reader_spec.rb
2445
+ - spec/ncs_navigator/warehouse/transformers/mdes_csv/table_reader_spec.rb
2446
+ - spec/ncs_navigator/warehouse/transformers/mdes_csv_spec.rb
2426
2447
  - spec/ncs_navigator/warehouse/transformers/no_blank_foreign_keys_filter_spec.rb
2427
2448
  - spec/ncs_navigator/warehouse/transformers/no_ssu_outreach_all_ssus_filter_spec.rb
2428
2449
  - spec/ncs_navigator/warehouse/transformers/no_ssu_outreach_placeholder_filter_spec.rb