daengine 0.5.13 → 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/Rakefile +5 -0
  3. data/app/controllers/digital_assets_controller.rb +29 -46
  4. data/app/helpers/digital_assets_helper.rb +11 -2
  5. data/app/models/content_service_resource.rb +15 -15
  6. data/app/models/digital_asset.rb +83 -94
  7. data/app/models/teamsite_digital_asset_shim.rb +253 -0
  8. data/app/service/digital_asset_lookup_service.rb +47 -47
  9. data/bin/process_assets +6 -7
  10. data/bin/process_availability +7 -8
  11. data/bin/process_taxonomy +7 -8
  12. data/config/routes.rb +7 -7
  13. data/lib/daengine.rb +1 -13
  14. data/lib/daengine/content_service_processor.rb +24 -24
  15. data/lib/daengine/digital_asset_processor.rb +4 -0
  16. data/lib/daengine/teamsite_metadata_parser.rb +3 -2
  17. data/lib/daengine/version.rb +1 -1
  18. data/lib/tasks/daengine_tasks.rake +0 -9
  19. data/spec/acceptance/digital_assets_spec.rb +116 -0
  20. data/spec/controllers/digital_assets_controller_spec.rb +14 -99
  21. data/spec/dummy/config/environments/test.rb +1 -1
  22. data/spec/dummy/log/development.log +0 -0
  23. data/spec/dummy/log/test.log +8802 -0
  24. data/spec/factories.rb +8 -32
  25. data/spec/lib/content_service_processor_spec.rb +30 -30
  26. data/spec/lib/teamsite_metadata_parser_spec.rb +26 -19
  27. data/spec/mock_data/bulk-ssc_deploy.xml +900 -900
  28. data/spec/mock_data/daengine.yml +1 -1
  29. data/spec/mock_data/digitalAssets/GK-66261382-96be-4a92-839a-73467e89e1b4.txt +289 -289
  30. data/spec/mock_data/digitalAssets/TEST_FINRA_DOC.doc +0 -0
  31. data/spec/mock_data/digitalAssets/m_gyt-709d8ae1-31f1-46ea-b448-33f43dd5140f.html +1008 -1008
  32. data/spec/mock_data/selective_new_package.xml +55 -2
  33. data/spec/mock_data/taxonomy/taxonomyengine.yml +1 -1
  34. data/spec/models/digital_asset_spec.rb +23 -28
  35. data/spec/service/digital_asset_lookup_service_spec.rb +75 -79
  36. data/spec/spec_helper.rb +18 -1
  37. metadata +23 -7
  38. data/lib/daengine/digital_asset_extension_processor.rb +0 -28
  39. data/spec/lib/digital_asset_extension_processor_spec.rb +0 -32
  40. data/spec/mock_data/merrill_lynch_extension.json +0 -5
@@ -0,0 +1,253 @@
1
+ require 'mongoid'
2
+
3
+ # shim class to allow old teamsite parser to create DigitalAssets in the new format
4
+
5
+ class TeamsiteDigitalAssetShim
6
+ include Mongoid::Document
7
+ include Mongoid::Timestamps
8
+
9
+ field :title, type: String
10
+ field :changed_at, type: Time
11
+ field :audiences, type: Array, default: []
12
+ field :sami_code, type: String
13
+ field :product_ids, type: Array, default: []
14
+ field :published_at, type: Time
15
+ field :unpublished_at, type: Time
16
+ field :expires_at, type: Time
17
+ field :guid, type: String
18
+ # field :fund_ids, type: Array, default: []
19
+ field :business_owner, type: String
20
+ field :summary, type: String
21
+ field :content_organization_ids, type: Array, default: []
22
+ field :program_ids, type: Array, default: []
23
+
24
+ field :omniture_codes, type: Array, default: []
25
+ field :orderable, :type => Boolean, default: false
26
+ key :guid
27
+
28
+ # field :documents, type: Hash
29
+
30
+ embeds_many :documents, :class_name => 'DigitalAsset::Document'
31
+
32
+ accepts_nested_attributes_for :documents
33
+
34
+ #Exclude XBRL documents from all queries
35
+ default_scope excludes(:'documents.content_type' => "LDJDCMAIK") #Had to use static value instead of a Constant
36
+
37
+ scope :title_is, ->(title) { where(:title => title)}
38
+ scope :business_owner_is, ->(business_owner) { where(:business_owner => business_owner)}
39
+ scope :guid_is, ->(guid) { where(:guid => guid)}
40
+ # scope :funds_in, ->(fund_id) { where(:fund_ids.in => fund_id)}
41
+ scope :audience_in, ->(audience_id) {where(:audiences.in => audience_id)}
42
+ scope :audience_investor_approved, -> {where(:audiences.in => [Audience::INVESTOR_APPROVED])}
43
+
44
+ scope :content_organization_in, ->(content_organization_id) {where(:content_organization_ids.in => content_organization_id)}
45
+ scope :program_id_in, ->(program_id) {where(:program_ids.in => program_id)}
46
+ scope :sami_is, ->(sami_code) {where(:sami_code => sami_code)}
47
+ scope :sami_in, ->(sami_codes) {where(:sami_code.in => sami_codes)}
48
+ scope :path_is, ->(path) {where(:'documents.path' => path)}
49
+ scope :doctype_in, ->(types) {where(:'documents.content_type'.in => types)}
50
+ scope :content_type_in, ->(types) {where(:'documents.content_type'.in => types)}
51
+ scope :product_in, ->(types) {where(:product_ids.in => types)}
52
+ scope :stale, -> {where(:updated_at.lte => 2.minutes.ago)}
53
+ scope :orderable_is, ->(orderable) {where(:orderable => orderable)}
54
+ scope :orderable, -> {where(orderable: true)}
55
+ scope :has_finra, -> {where(:'documents.content_type' => DigitalAsset::ContentType::FINRA)}
56
+ scope :audience_in, ->(audience) {where(:audiences.in => audience)}
57
+ scope :alphabetical, order_by(:title => :asc)
58
+ scope :not_xbrl, -> {excludes(:'documents.content_type' => DigitalAsset::ContentType::XBRL_DOCUMENT)}
59
+
60
+ #scope :order_by_fund, order_by[[:product_ids, :asc]]
61
+ #default_scope {not_in(:'documents.content_type' => ["LDJDCMAIK"])}
62
+
63
+ # validations
64
+ validates_presence_of :guid, :title, :changed_at, :published_at,
65
+ :expires_at, :audiences, :documents
66
+
67
+ validate :validate_future_expiration
68
+
69
+ # validates_uniqueness_of :guid
70
+
71
+
72
+ def self.find_or_initialize_by(attrs = {})
73
+ tda = nil
74
+ # get the base DA if it exists
75
+ da = DigitalAsset.find_or_initialize_by(attrs)
76
+ # if it exists copy over the attributes
77
+ if(da.updated_at.present?)
78
+ tda = TeamsiteDigitalAssetShim.new(da.attributes)
79
+ else
80
+ tda = TeamsiteDigitalAssetShim.new(attrs)
81
+ end
82
+ # move metadata from the new spots to the old spots
83
+ tda.documents.build(path: da.path, doc_changed_at: da.doc_changed_at,
84
+ content_type: da.content_type, pages: da.pages, size: da.size, mime_type: da.mime_type,
85
+ subject: da.subject, keywords: da.keywords, author: da.author)
86
+ # make a finra if there is one
87
+ if(da.finra_path.present?)
88
+ tda.documents.build(content_type: DigitalAsset::ContentType::FINRA, path: da.finra_path)
89
+ end
90
+ tda
91
+ end
92
+
93
+ def update_attributes!(atts = {})
94
+ # deconvert metadata from old to new
95
+ self.attributes = atts
96
+ da = DigitalAsset.new(atts)
97
+ doc = first_non_finra
98
+ da.path = doc.try(:path)
99
+ da.finra_path = finra_document.try(:path)
100
+ da.doc_changed_at = doc.doc_changed_at
101
+ da.content_type = doc.content_type
102
+ da.pages = doc.pages
103
+ da.size = doc.size
104
+ da.mime_type = doc.mime_type
105
+ da.subject = doc.subject
106
+ da.keywords = doc.keywords
107
+ da.author = doc.author
108
+ da.digital_asset_id = guid
109
+ # save the new metadata format
110
+ da.save!
111
+ end
112
+
113
+
114
+ def as_json(opts = {})
115
+ super(opts).merge({:comp_fundcode => fund_code, :content_type_id => content_type_id, :latest_doc_changed_at => latest_doc_changed_at})
116
+ end
117
+
118
+ def self.purge!
119
+ # last_update = DigitalAsset.desc(:updated_at).try(:first).try :updated_at
120
+ DigitalAsset.stale.destroy_all if bulk_processed?
121
+ end
122
+
123
+ def latest_doc_changed_at
124
+ documents.reduce(nil) do |latest_date, d|
125
+ unless d.content_type == '549'
126
+ latest_date = d.doc_changed_at if (latest_date == nil || latest_date < d.doc_changed_at)
127
+ end
128
+ latest_date
129
+ end
130
+ end
131
+
132
+ def validate_future_expiration
133
+ errors.add(:expires_at, "Expiration date must be at least 1 minute from now") unless expires_at and expires_at > 1.minute.from_now
134
+ end
135
+
136
+ def self.bulk_processed?
137
+ (stale.count.to_f / self.count) <= 0.05
138
+ end
139
+
140
+ def path_is(path)
141
+ documents.where(path: path).first unless documents.blank?
142
+ end
143
+
144
+ def doc_changed_at(path)
145
+ path_is(path).try(:doc_changed_at)
146
+ end
147
+
148
+ def doc_size
149
+ first_non_finra.try(:size)
150
+ end
151
+
152
+ def content_type_ids
153
+ ids = []
154
+ documents.try(:each) do |d|
155
+ ids << d.content_type
156
+ end
157
+ ids
158
+ end
159
+ alias :doctype_ids :content_type_ids
160
+
161
+ def has_finra?
162
+ finra_document != nil
163
+ end
164
+
165
+ def expired?
166
+ expires_at < Time.now
167
+ end
168
+
169
+ def finra_document
170
+ finra_idx = documents.index {|d| d.content_type == DigitalAsset::ContentType::FINRA}
171
+ documents[finra_idx] if finra_idx
172
+ end
173
+
174
+ def is_investor_approved?
175
+ audiences.index(DigitalAsset::Audience::INVESTOR_APPROVED)
176
+ end
177
+ alias :investor_approved? :is_investor_approved?
178
+
179
+ def is_institutional_use?
180
+ audiences.index(DigitalAsset::Audience::INSTITUTIONAL_USE)
181
+ end
182
+ alias :institutional_use? :is_institutional_use?
183
+
184
+ def product
185
+ TaxonomyTerm.label_for_term(product_ids[0])
186
+ end
187
+ def program
188
+ TaxonomyTerm.label_for_term(program_ids[0])
189
+ end
190
+ def content_org
191
+ TaxonomyTerm.label_for_term(content_organization_ids[0])
192
+ end
193
+
194
+ def fund_code
195
+ pid = product_ids.find {|pid| TaxonomyTerm.term_id_is(pid)[0].try(:fund_code)}
196
+ pid and TaxonomyTerm.term_id_is(pid)[0].try(:fund_code).try(:rjust, 5, '0')
197
+ end
198
+
199
+ def content_type
200
+ TaxonomyTerm.label_for_term(content_type_id)
201
+ end
202
+ def content_type_id
203
+ first_non_finra.try(:content_type)
204
+ end
205
+
206
+ def pages; first_non_finra.pages end
207
+
208
+ def audience
209
+ TaxonomyTerm.label_for_term(audiences[0])
210
+ end
211
+
212
+ def primary_path
213
+ first_non_finra.try(:path)
214
+ end
215
+
216
+ def primary_extension
217
+ first_non_finra.try(:path).try(:split,'.').try(:last).try(:upcase)
218
+ end
219
+
220
+ private
221
+
222
+ def first_non_finra
223
+ documents.try(:detect) do |d|
224
+ d.content_type != DigitalAsset::ContentType::FINRA
225
+ end
226
+ end
227
+
228
+
229
+ end
230
+
231
+ class DigitalAsset::Document
232
+ include Mongoid::Document
233
+
234
+ field :path, type: String
235
+ field :doc_changed_at, type: Time
236
+ field :content_type, type: String
237
+ field :pages, type: Integer, default: 1
238
+ field :size, type: String
239
+ field :mime_type, type: String
240
+ field :subject, type: String
241
+ field :keywords, type: Array, default: []
242
+ field :author, type: String
243
+
244
+ embedded_in :digital_asset
245
+
246
+ key :path
247
+
248
+ validates_uniqueness_of :path
249
+
250
+ validates_presence_of :path #, :doc_changed_at, :content_type
251
+ validates_format_of :path, without: /\/manifest|archives\// # dont accept manifest files
252
+
253
+ end
@@ -1,57 +1,57 @@
1
1
  class DigitalAssetLookupService
2
2
 
3
- def self.find_documents_by_fund_code(fund_codes)
4
- ids = fund_codes.collect {|c| term_id_from_fund_code(c)}
5
- DigitalAsset.unscoped.content_type_in(DigitalAsset::ContentType::FUND_DOC_TYPES).product_in ids
6
- end
3
+ # def self.find_documents_by_fund_code(fund_codes)
4
+ # ids = fund_codes.collect {|c| term_id_from_fund_code(c)}
5
+ # DigitalAsset.unscoped.content_type_in(DigitalAsset::ContentType::FUND_DOC_TYPES).product_in ids
6
+ # end
7
7
 
8
- def self.find_documents_by_fund_code_and_content_type(fund_codes,content_type)
9
- assets = DigitalAssetLookupService.find_documents_by_fund_code(fund_codes)
10
- found_asset = nil
11
- assets.each {|da|
12
- da.documents.each {|doc|
13
- found_asset = da if doc.content_type == content_type
14
- }
15
- }
16
- found_asset
17
- end
8
+ # def self.find_documents_by_fund_code_and_content_type(fund_codes,content_type)
9
+ # assets = DigitalAssetLookupService.find_documents_by_fund_code(fund_codes)
10
+ # found_asset = nil
11
+ # assets.each {|da|
12
+ # da.documents.each {|doc|
13
+ # found_asset = da if doc.content_type == content_type
14
+ # }
15
+ # }
16
+ # found_asset
17
+ # end
18
18
 
19
- def self.fund_code_from_id(taxonomy_id)
20
- TaxonomyTerm.term_id_is(taxonomy_id).try(:term_type).try([], 'FUND_CODE')
21
- end
19
+ # def self.fund_code_from_id(taxonomy_id)
20
+ # TaxonomyTerm.term_id_is(taxonomy_id).try(:term_type).try([], 'FUND_CODE')
21
+ # end
22
22
 
23
- def self.term_id_from_fund_code(code)
24
- term = TaxonomyTerm.fund_code_is(code)
25
- term.try(:first).try(:term_id)
26
- end
23
+ # def self.term_id_from_fund_code(code)
24
+ # term = TaxonomyTerm.fund_code_is(code)
25
+ # term.try(:first).try(:term_id)
26
+ # end
27
27
 
28
- def self.fund_code_from_asset_path(path)
29
- da = DigitalAsset.path_is(item.path).first
30
- fund_code = fund_for_digital_asset da
31
- end
32
- def self.find_documents_by_query(arg_product_ids=nil, arg_content_type_ids=nil, arg_audience_ids=nil, finra=nil)
33
- product_ids = [] << arg_product_ids
34
- product_ids.compact!
35
- product_ids.flatten!
36
- content_type_ids = [] << arg_content_type_ids
37
- content_type_ids.compact!
38
- content_type_ids.flatten!
39
- audience_ids = [] << arg_audience_ids
40
- audience_ids.compact!
41
- audience_ids.flatten!
42
- finra = finra || false
43
- query = {}
44
- query[:product_ids.in] = product_ids if !product_ids.blank?
45
- query[:'documents.content_type'.in] = content_type_ids if !content_type_ids.blank?
46
- query[:audiences.in] = audience_ids if !audience_ids.blank?
28
+ # def self.fund_code_from_asset_path(path)
29
+ # da = DigitalAsset.path_is(item.path).first
30
+ # fund_code = fund_for_digital_asset da
31
+ # end
32
+ # def self.find_documents_by_query(arg_product_ids=nil, arg_content_type_ids=nil, arg_audience_ids=nil, finra=nil)
33
+ # product_ids = [] << arg_product_ids
34
+ # product_ids.compact!
35
+ # product_ids.flatten!
36
+ # content_type_ids = [] << arg_content_type_ids
37
+ # content_type_ids.compact!
38
+ # content_type_ids.flatten!
39
+ # audience_ids = [] << arg_audience_ids
40
+ # audience_ids.compact!
41
+ # audience_ids.flatten!
42
+ # finra = finra || false
43
+ # query = {}
44
+ # query[:product_ids.in] = product_ids if !product_ids.blank?
45
+ # query[:'documents.content_type'.in] = content_type_ids if !content_type_ids.blank?
46
+ # query[:audiences.in] = audience_ids if !audience_ids.blank?
47
47
 
48
- digital_assets = DigitalAsset.where(query)
49
- digital_assets = digital_assets.select {|d| d.has_finra?} if finra
50
- digital_assets
51
- end
48
+ # digital_assets = DigitalAsset.where(query)
49
+ # digital_assets = digital_assets.select {|d| d.has_finra?} if finra
50
+ # digital_assets
51
+ # end
52
52
 
53
- def self.find_documents_by_sami_code(sami_codes)
54
- DigitalAsset.sami_in sami_codes
55
- end
53
+ # def self.find_documents_by_sami_code(sami_codes)
54
+ # DigitalAsset.sami_in sami_codes
55
+ # end
56
56
 
57
57
  end
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env jruby
1
+ #!/usr/bin/env ruby
2
2
  #
3
3
  # This file was generated by Bundler.
4
4
  #
@@ -6,11 +6,10 @@
6
6
  # this file is here to facilitate running it.
7
7
  #
8
8
 
9
- require 'pathname'
10
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
- Pathname.new(__FILE__).realpath)
9
+ require 'daengine'
12
10
 
13
- require 'rubygems'
14
- require 'bundler/setup'
11
+ config = YAML.load_file(ARGV[0])
15
12
 
16
- load Gem.bin_path('daengine', 'process_assets')
13
+ t = Daengine.execute(config)
14
+
15
+ puts t
@@ -1,16 +1,15 @@
1
- #!/usr/bin/env jruby
1
+ #!/usr/bin/env ruby
2
2
  #
3
3
  # This file was generated by Bundler.
4
4
  #
5
- # The application 'process_availability' is installed as part of a gem, and
5
+ # The application 'process_assets' is installed as part of a gem, and
6
6
  # this file is here to facilitate running it.
7
7
  #
8
8
 
9
- require 'pathname'
10
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
- Pathname.new(__FILE__).realpath)
9
+ require 'daengine'
12
10
 
13
- require 'rubygems'
14
- require 'bundler/setup'
11
+ config = YAML.load_file(ARGV[0])
15
12
 
16
- load Gem.bin_path('daengine', 'process_availability')
13
+ t = Daengine.execute_content_service(config)
14
+
15
+ puts t
@@ -1,16 +1,15 @@
1
- #!/usr/bin/env jruby
1
+ #!/usr/bin/env ruby
2
2
  #
3
3
  # This file was generated by Bundler.
4
4
  #
5
- # The application 'process_taxonomy' is installed as part of a gem, and
5
+ # The application 'process_assets' is installed as part of a gem, and
6
6
  # this file is here to facilitate running it.
7
7
  #
8
8
 
9
- require 'pathname'
10
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
- Pathname.new(__FILE__).realpath)
9
+ require 'daengine'
12
10
 
13
- require 'rubygems'
14
- require 'bundler/setup'
11
+ config = YAML.load_file(ARGV[0])
15
12
 
16
- load Gem.bin_path('daengine', 'process_taxonomy')
13
+ t = Daengine.execute_taxonomy(config)
14
+
15
+ puts t
@@ -1,9 +1,9 @@
1
1
  Rails.application.routes.draw do
2
- match 'digital_assets/search' => 'digital_assets#search'
3
- match 'digital_assets/fund_docs' => 'digital_assets#fund_docs'
4
- # post "digital_assets/sync" => 'digital_assets#sync_assets'
5
- # match 'digital_assets/:id' => 'digital_assets#sami' #, :id => /\w{4,8}.\d*/
6
- resources :digital_assets, :only => [:index, :show]
7
- #resources :digital_assets
8
- #get 'digital_assets' => 'digital_asset#index'
2
+ resources :digital_assets, except: [:new, :edit], defaults: {format: 'json'} do
3
+ collection do
4
+ get 'fund_docs'
5
+ get 'search', to: 'digital_assets#search'
6
+ post 'updated_time'
7
+ end
8
+ end
9
9
  end
@@ -2,6 +2,7 @@ require "daengine/version"
2
2
  require 'daengine/engine'
3
3
  require 'daengine/railtie' if defined?(Rails)
4
4
  require File.expand_path('../../app/models/digital_asset',__FILE__)
5
+ require File.expand_path('../../app/models/teamsite_digital_asset_shim',__FILE__)
5
6
  require File.expand_path('../../app/models/taxonomy_term',__FILE__)
6
7
  require File.expand_path('../../app/models/content_service_resource',__FILE__)
7
8
  require 'mini_exiftool'
@@ -10,7 +11,6 @@ require 'daengine/digital_asset_processor'
10
11
  require 'daengine/taxonomy_processor'
11
12
  require 'daengine/content_service_processor'
12
13
  require 'daengine/taxonomy_parser'
13
- require 'daengine/digital_asset_extension_processor'
14
14
  require 'mongoid'
15
15
  require 'logger'
16
16
 
@@ -97,18 +97,6 @@ module Daengine
97
97
  end
98
98
  end
99
99
 
100
- def self.execute_digital_asset_extension(config_options)
101
- self.configure(config_options)
102
- self.set_logger(config[:digital_asset_extension_logfile_location])
103
- self.log("Daengine: ### Loading application config ###", "info")
104
- config_options.each { |key, value| self.log("CONFIG #{key}\t#{value}", "info")}
105
- begin
106
- DigitalAssetExtensionProcessor.process_extension_file
107
- rescue Exception => e
108
- self.log(e.message, "error")
109
- end
110
- end
111
-
112
100
  def self.set_logger(logfile)
113
101
  @logger = Logger.new(logfile, 'daily')
114
102
  end