bulk_ops 0.1.23 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,123 @@
1
+ module BulkOps::InterpretRelationshipsBehavior
2
+ extend ActiveSupport::Concern
3
+
4
+ def interpret_relationship_fields
5
+ @raw_row.each do |field,value|
6
+ next if value.blank? or field.blank? or value == field
7
+
8
+ #the default identifier type is the reference identifier of the proxy
9
+ id_type = reference_identifier
10
+
11
+ # Correctly interpret the notation "parent:id", "parent id" etc in a column header
12
+ if (split = field.split(/[:_\-\s]/)).count == 2
13
+ id_type = split.last
14
+ field = split.first
15
+ end
16
+
17
+ # skip to next field unless it's a known relationship field
18
+ next unless (relationship_type = self.class.normalize_relationship_field_name(field))
19
+
20
+ case relationship_type
21
+ when "order"
22
+ # If the field specifies the object's order among siblings
23
+ @proxy.update(order: value.to_f)
24
+ next
25
+ when "collection"
26
+ # If the field specifies the name or ID of a collection,
27
+ # find or create the collection and update the metadata to match
28
+ col = find_or_create_collection(value)
29
+ ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
30
+ next
31
+ when "parent"
32
+ # Correctly interpret the notation "row:349", "id:s8df4j32w" etc in a cell
33
+ if (split = value.split(/[:_\\s]/)).count == 2
34
+ id_type = split.first
35
+ value = split.last
36
+ end
37
+ parent = find_parent_proxy(value, field, id_type)
38
+ proxy_updates = { parent_id: parent.id}
39
+ siblings = parent.ordered_children
40
+ if siblings.present? && @proxy.previous_sibling_id.nil?
41
+ proxy_updates[:previous_sibling_id] = siblings.last.id
42
+ end
43
+ @proxy.update(proxy_updates)
44
+ end
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def find_previous_parent_row field="parent"
51
+ #Return the row number of the most recent preceding row that does
52
+ # not itself have a parent defined
53
+ i = 1;
54
+ while (prev_row = raw_data[row_number - i])
55
+ return (row_number - i) if prev_row[field].blank?
56
+ i += 1
57
+ end
58
+ end
59
+
60
+ def find_parent_proxy parent_id, field, id_type
61
+ #The id_type determines what kind of identifier we expect in parent_id
62
+ case id_type.downcase
63
+ when "id"
64
+ # Expect a reference to an existing work in the DAMS
65
+ return false unless BulkOps::SolrService.record_exists?(parent_id.to_s)
66
+ # Pull the work proxy for that work, if it exists
67
+ parent_proxy = BulkOps::WorkProxy.find_by(work_id: parent_id.to_s, operation_id: @proxy.operation.id) || BulkOps::WorkProxy.find_by(work_id: parent_id.to_s)
68
+ # If no work proxy exists for this work, create one just to keep track of this task
69
+ return parent_proxy if proxy.present?
70
+ return BulkOps::WorkProxies.create(status: "awaiting_children",
71
+ operation_id: 0,
72
+ last_event: DateTime.now,
73
+ work_id: parent_id.to_s)
74
+
75
+ when "proxy_id"
76
+ return BulkOps::WorkProxy.find(parent_id)
77
+ when "row"
78
+ if parent_id =~ /\A[-+]?[0-9]+\z/
79
+ if parent_id.to_i < 0
80
+ # if given a negative integer, count backwards from the current row (remember that parent_id.to_i is negative)
81
+ parent_id = @proxy.row_number.to_i + parent_id.to_i
82
+ elsif parent_id.to_i > 0
83
+ # if given a positive integer, just remove the row offset
84
+ parent_id = parent_id.to_i - BulkOps::ROW_OFFSET
85
+ end
86
+ elsif parent_id.to_s.downcase.include?("prev")
87
+ # if given any variation of the word "previous", get the first preceding row with no parent of its own
88
+ parent_id = find_previous_parent_row(field)
89
+ end
90
+
91
+ return BulkOps::WorkProxy.find_by(operation_id: @proxy.operation_id,
92
+ row_number: parent_id.to_i)
93
+ # when "title"
94
+ # # TODO clean up solr query and add work type to it
95
+ # query = "{!field f=title_tesim}#{object_identifier}"
96
+ # objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,
97
+ # params: { fq: query, rows: 1})["response"]["docs"]
98
+ # return ActiveFedora::Base.find(objects.first["id"]) if objects.present?
99
+ # return false
100
+ # when "identifier"
101
+ # query = "{!field f=identifier_tesim}#{object_identifier}"
102
+ # objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,params: { fq: query, rows: 100})["response"]["docs"]
103
+ # return false if objects.blank?
104
+ # return ActiveFedora::Base.find(objects.first["id"])
105
+ end
106
+ end
107
+
108
+ def find_collection(collection)
109
+ puts "FINDING COLLECTION: #{collection}"
110
+ cols = Collection.where(title: collection)
111
+ cols += Collection.where(title: collection).select{|col| col.title.first == collection}
112
+ cols += Collection.where(id: collection)
113
+ puts "COLLECTION: #{cols.last}"
114
+ return cols.last unless cols.empty?
115
+ return false
116
+ end
117
+
118
+ def find_or_create_collection(collection)
119
+ find_collection(collection) || Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
120
+ end
121
+
122
+
123
+ end
@@ -0,0 +1,21 @@
1
+ module BulkOps::InterpretScalarBehavior
2
+ extend ActiveSupport::Concern
3
+
4
+ def interpret_scalar_fields
5
+ @raw_row.each do |field, values|
6
+ next if values.blank? or field.nil? or field == values
7
+ # get the field name, if this column is a metadata field
8
+ next unless field_name = find_field_name(field.to_s)
9
+ field = schema.get_field(field_name)
10
+ # Ignore controlled fields
11
+ next if field.controlled?
12
+ BulkOps::Parser.split_values(values).each do |value|
13
+ next if value.blank?
14
+ value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
15
+ value = BulkOps::Parser.unescape_csv(value)
16
+ (@metadata[field_name] ||= []) << value
17
+ end
18
+ end
19
+ end
20
+
21
+ end
@@ -0,0 +1,80 @@
1
+ module BulkOps::SearchBuilderBehavior
2
+ extend ActiveSupport::Concern
3
+ included do
4
+ attr_reader :collection,
5
+ :admin_set,
6
+ :workflow_state
7
+ class_attribute :collection_field,
8
+ :collection_id_field,
9
+ :admin_set_field,
10
+ :admin_set_id_field,
11
+ :workflow_state_field,
12
+ :workflow_state_id_field,
13
+ :keyword_field
14
+ self.collection_field = 'member_of_collections_ssim'
15
+ self.collection_id_field = 'member_of_collection_ids_ssim'
16
+ self.admin_set_field = 'admin_set_tesim'
17
+ self.admin_set_id_field = 'isPartOf_ssim'
18
+ self.workflow_state_field = 'workflow_state_name_ssim'
19
+ self.keyword_field = 'all_fields'
20
+
21
+ self.default_processor_chain += [:member_of_collection,
22
+ :member_of_admin_set,
23
+ :in_workflow_state,
24
+ :with_keyword_query]
25
+ end
26
+
27
+ # @param [scope] Typically the controller object
28
+ def initialize(scope: {},
29
+ collection: nil,
30
+ collection_id: nil,
31
+ admin_set: nil,
32
+ admin_set_id: nil,
33
+ workflow_state: nil,
34
+ keyword_query: nil)
35
+
36
+ @collection = collection unless collection.blank?
37
+ @admin_set = admin_set unless admin_set.blank?
38
+ @admin_set_id = admin_set_id unless admin_set_id.blank?
39
+ @workflow_state = workflow_state unless workflow_state.blank?
40
+ @collection_id = collection_id unless collection_id.blank?
41
+ @workflow_state = workflow_state unless workflow_state.blank?
42
+ @keyword_query = keyword_query unless keyword_query.blank?
43
+ super(scope)
44
+ end
45
+
46
+ def models
47
+ [Work,Course,Lecture]
48
+ end
49
+
50
+ # include filters into the query to only include the collection memebers
51
+ def member_of_collection(solr_parameters)
52
+ solr_parameters[:fq] ||= []
53
+ solr_parameters[:fq] << "#{collection_field}:#{@collection}" if @collection
54
+ solr_parameters[:fq] << "#{collection_id_field}:#{@collection_id}" if @collection_id
55
+ end
56
+
57
+ # include filters into the query to only include the collection memebers
58
+ def member_of_admin_set(solr_parameters)
59
+ solr_parameters[:fq] ||= []
60
+ solr_parameters[:fq] << "#{admin_set_field}:#{@admin_set}" if @admin_set
61
+ solr_parameters[:fq] << "#{admin_set_id_field}:#{@admin_set_id}" if @admin_set_id
62
+ end
63
+
64
+ # include filters into the query to only include the collection memebers
65
+ def in_workflow_state(solr_parameters)
66
+ solr_parameters[:fq] ||= []
67
+ solr_parameters[:fq] << "#{workflow_state_field}:#{@workflow_state}" if @workflow_state
68
+ end
69
+
70
+ def with_keyword_query(solr_parameters)
71
+ if @keyword_query
72
+ solr_parameters[:q] ||= []
73
+ # solr_parameters[:q] << "#{keyword_field}:#{@keyword_query}" if @keyword_query
74
+ solr_parameters[:q] << @keyword_query
75
+ solr_parameters[:qf] = "title_tesim titleAlternative_tesim subseries_tesim creator_label_tesim contributor_label_tesim originalPublisher_tesim publisher_tesim publisherHomepage_tesim resourceType_label_tesim rightsHolder_label_tesim scale_tesim series_tesim source_tesim staffNote_tesim coordinates_tesim subjectName_label_tesim subjectPlace_label_tesim subjectTemporal_label_tesim subjectTopic_label_tesim dateCreated_tesim dateCreatedDisplay_tesim dateDigitized_tesim datePublished_tesim description_tesim physicalFormat_label_tesim keyword_tesim language_label_tesim license_tesim masterFilename_tesim physicalDescription_tesim accessRights_tesim itemCallNumber_tesim collectionCallNumber_tesim donorProvenance_tesim genre_label_tesim boxFolder_tesim subject_label_tesim file_format_tesim all_text_timv"
76
+ end
77
+ solr_parameters
78
+ end
79
+
80
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulk_ops
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.23
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ned Henry, UCSC Library Digital Initiatives
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-06 00:00:00.000000000 Z
11
+ date: 2020-01-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -97,7 +97,9 @@ files:
97
97
  - config/routes.rb
98
98
  - db/migrate/20180926190757_create_github_credentials.rb
99
99
  - db/migrate/20181017180436_create_bulk_ops_tables.rb
100
+ - db/migrate/20200122234235_remove_relationships_ammend_work_proxy.rb
100
101
  - lib/bulk_ops.rb
102
+ - lib/bulk_ops/apply_operation_job.rb
101
103
  - lib/bulk_ops/create_spreadsheet_job.rb
102
104
  - lib/bulk_ops/create_work_job.rb
103
105
  - lib/bulk_ops/delete_file_set_job.rb
@@ -108,8 +110,9 @@ files:
108
110
  - lib/bulk_ops/operation.rb
109
111
  - lib/bulk_ops/parser.rb
110
112
  - lib/bulk_ops/queue_work_ingests_job.rb
111
- - lib/bulk_ops/relationship.rb
113
+ - lib/bulk_ops/resolve_children_job.rb
112
114
  - lib/bulk_ops/search_builder_behavior.rb
115
+ - lib/bulk_ops/solr_service.rb
113
116
  - lib/bulk_ops/templates/configuration.yml
114
117
  - lib/bulk_ops/templates/readme.md
115
118
  - lib/bulk_ops/update_work_job.rb
@@ -118,6 +121,12 @@ files:
118
121
  - lib/bulk_ops/version.rb
119
122
  - lib/bulk_ops/work_job.rb
120
123
  - lib/bulk_ops/work_proxy.rb
124
+ - lib/concerns/interpret_controlled_behavior.rb
125
+ - lib/concerns/interpret_files_behavior.rb
126
+ - lib/concerns/interpret_options_behavior.rb
127
+ - lib/concerns/interpret_relationships_behavior.rb
128
+ - lib/concerns/interpret_scalar_behavior.rb
129
+ - lib/concerns/search_builder_behavior.rb
121
130
  - lib/generators/bulk_ops/install/install_generator.rb
122
131
  - lib/generators/bulk_ops/install/templates/config/github.yml.example
123
132
  homepage: http://UCSCLibrary.github.org
@@ -1,117 +0,0 @@
1
- class BulkOps::Relationship < ActiveRecord::Base
2
- RELATIONSHIP_FIELDS = ['parent','child','order','next','collection']
3
-
4
- self.table_name = "bulk_ops_relationships"
5
- belongs_to :work_proxy, class_name: "BulkOps::WorkProxy", foreign_key: "work_proxy_id"
6
- delegate :operation, :operation_id, to: :work_proxy
7
-
8
- def initialize *args
9
- super *args
10
-
11
- # Attempt to resolve the relationship immediately
12
- # which might work in the case of updates
13
- # resolve!
14
- end
15
-
16
- def findObject
17
- case (identifier_type || "").downcase
18
- when "id"
19
- begin
20
- object = ActiveFedora::Base.find(object_identifier)
21
- rescue Ldp::Gone
22
- return false
23
- end
24
- return object || false
25
- when "title"
26
- # TODO clean up solr query and add work type to it
27
- query = "{!field f=title_tesim}#{object_identifier}"
28
- objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,
29
- params: { fq: query, rows: 100})["response"]["docs"]
30
- if objects.present?
31
- return ActiveFedora::Base.find(objects.first["id"])
32
- elsif (relationship_type || "").downcase == "collection"
33
- return Collection.create(title: [object_identifier])
34
- else
35
- return false
36
- end
37
- when "identifier"
38
- query = "{!field f=identifier_tesim}#{object_identifier}"
39
- objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,params: { fq: query, rows: 100})["response"]["docs"]
40
- return false if objects.blank?
41
- return ActiveFedora::Base.find(objects.first["id"])
42
- when "row"
43
- object_proxy = BulkOps::WorkProxy.find_by(operation_id: work_proxy.operation_id,
44
- row_number: (object_identifier.to_i))
45
- ActiveFedora::Base.find(object_proxy.work_id)
46
- when "proxy_id"
47
- return false unless (proxy = BulkOps::WorkProxy.find(proxy_id))
48
- return false unless proxy.work_id.present?
49
- ActiveFedora::Base.find(proxy.work_id)
50
- end
51
- end
52
-
53
- def resolve!
54
- unless subject = work_proxy.work and object = self.findObject
55
- wait!
56
- return
57
- end
58
- implement_relationship! relationship_type, subject, object
59
- end
60
-
61
- def insert_among_children(object,new_member)
62
- return nil unless ["parent"].include?((relationship_type || "").downcase)
63
- prev_sib_id = previous_sibling
64
- # This is the id of the WorkProxy associate with the most recent sibling work
65
- # that might be fully ingested. If is it not fully ingested, we will move on
66
- # to the preceding sibling.
67
- while prev_sib_id.present?
68
- prev_sib_proxy = BulkOps::WorkProxy.find(prev_sib_id)
69
- # Check if the previous sibling is fully ingested
70
- # and get its index among its siblings (if it has been successfully attached to the parent)
71
- prev_sib_index = object.ordered_member_ids.index(prev_sib_proxy.work_id) if prev_sib_proxy.work_id.present?
72
- # Insert the new member among its siblings if we found the right place
73
- return object.ordered_members.to_a.insert(prev_sib_index+1, new_member) if prev_sib_index.present?
74
- # Otherwise, pull up the sibling's relationship field to check if it sibling has a sibling before it
75
- sib_relationship = prev_sib_proxy.relationships.find{|rel| rel.findObject.id == object.id }
76
- # If we can't find an ingested sibling among the ordered members,
77
- # break this loop and make this work the first member.
78
- break unless sib_relationship.present?
79
- prev_sib_id = sib_relationship.previous_sibling
80
- end
81
- #If we never found an existing previous sibling already attached, put this one at the front
82
- return [new_member]+object.ordered_members.to_a
83
- end
84
-
85
- def implement_relationship!(type,subject,object)
86
- case (type || "").downcase
87
- when "parent"
88
- unless object.member_ids.include? subject.id
89
- object.reload
90
- object.save
91
- object.ordered_members = insert_among_children(object, subject)
92
- object.save
93
- end
94
- when "child"
95
- #CAVEAT ordering not fully implemented in this case
96
- unless subject.member_ids.include? object.id
97
- subject.ordered_members << object
98
- subject.save
99
- end
100
- when "order"
101
- #TODO - implement this - related to ordering of filesets
102
-
103
- end
104
- update(status: "complete")
105
- end
106
-
107
- private
108
-
109
- def fail!
110
- update(status: "failed")
111
- end
112
-
113
- def wait!
114
- update(status: "pending")
115
- end
116
-
117
- end