bulk_ops 0.1.23 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,123 @@
1
+ module BulkOps::InterpretRelationshipsBehavior
2
+ extend ActiveSupport::Concern
3
+
4
+ def interpret_relationship_fields
5
+ @raw_row.each do |field,value|
6
+ next if value.blank? or field.blank? or value == field
7
+
8
+ #the default identifier type is the reference identifier of the proxy
9
+ id_type = reference_identifier
10
+
11
+ # Correctly interpret the notation "parent:id", "parent id" etc in a column header
12
+ if (split = field.split(/[:_\-\s]/)).count == 2
13
+ id_type = split.last
14
+ field = split.first
15
+ end
16
+
17
+ # skip to next field unless it's a known relationship field
18
+ next unless (relationship_type = self.class.normalize_relationship_field_name(field))
19
+
20
+ case relationship_type
21
+ when "order"
22
+ # If the field specifies the object's order among siblings
23
+ @proxy.update(order: value.to_f)
24
+ next
25
+ when "collection"
26
+ # If the field specifies the name or ID of a collection,
27
+ # find or create the collection and update the metadata to match
28
+ col = find_or_create_collection(value)
29
+ ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
30
+ next
31
+ when "parent"
32
+ # Correctly interpret the notation "row:349", "id:s8df4j32w" etc in a cell
33
+ if (split = value.split(/[:_\\s]/)).count == 2
34
+ id_type = split.first
35
+ value = split.last
36
+ end
37
+ parent = find_parent_proxy(value, field, id_type)
38
+ proxy_updates = { parent_id: parent.id}
39
+ siblings = parent.ordered_children
40
+ if siblings.present? && @proxy.previous_sibling_id.nil?
41
+ proxy_updates[:previous_sibling_id] = siblings.last.id
42
+ end
43
+ @proxy.update(proxy_updates)
44
+ end
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+ def find_previous_parent_row field="parent"
51
+ #Return the row number of the most recent preceding row that does
52
+ # not itself have a parent defined
53
+ i = 1;
54
+ while (prev_row = raw_data[row_number - i])
55
+ return (row_number - i) if prev_row[field].blank?
56
+ i += 1
57
+ end
58
+ end
59
+
60
+ def find_parent_proxy parent_id, field, id_type
61
+ #The id_type determines what kind of identifier we expect in parent_id
62
+ case id_type.downcase
63
+ when "id"
64
+ # Expect a reference to an existing work in the DAMS
65
+ return false unless BulkOps::SolrService.record_exists?(parent_id.to_s)
66
+ # Pull the work proxy for that work, if it exists
67
+ parent_proxy = BulkOps::WorkProxy.find_by(work_id: parent_id.to_s, operation_id: @proxy.operation.id) || BulkOps::WorkProxy.find_by(work_id: parent_id.to_s)
68
+ # If no work proxy exists for this work, create one just to keep track of this task
69
+ return parent_proxy if proxy.present?
70
+ return BulkOps::WorkProxies.create(status: "awaiting_children",
71
+ operation_id: 0,
72
+ last_event: DateTime.now,
73
+ work_id: parent_id.to_s)
74
+
75
+ when "proxy_id"
76
+ return BulkOps::WorkProxy.find(parent_id)
77
+ when "row"
78
+ if parent_id =~ /\A[-+]?[0-9]+\z/
79
+ if parent_id.to_i < 0
80
+ # if given a negative integer, count backwards from the current row (remember that parent_id.to_i is negative)
81
+ parent_id = @proxy.row_number.to_i + parent_id.to_i
82
+ elsif parent_id.to_i > 0
83
+ # if given a positive integer, just remove the row offset
84
+ parent_id = parent_id.to_i - BulkOps::ROW_OFFSET
85
+ end
86
+ elsif parent_id.to_s.downcase.include?("prev")
87
+ # if given any variation of the word "previous", get the first preceding row with no parent of its own
88
+ parent_id = find_previous_parent_row(field)
89
+ end
90
+
91
+ return BulkOps::WorkProxy.find_by(operation_id: @proxy.operation_id,
92
+ row_number: parent_id.to_i)
93
+ # when "title"
94
+ # # TODO clean up solr query and add work type to it
95
+ # query = "{!field f=title_tesim}#{object_identifier}"
96
+ # objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,
97
+ # params: { fq: query, rows: 1})["response"]["docs"]
98
+ # return ActiveFedora::Base.find(objects.first["id"]) if objects.present?
99
+ # return false
100
+ # when "identifier"
101
+ # query = "{!field f=identifier_tesim}#{object_identifier}"
102
+ # objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,params: { fq: query, rows: 100})["response"]["docs"]
103
+ # return false if objects.blank?
104
+ # return ActiveFedora::Base.find(objects.first["id"])
105
+ end
106
+ end
107
+
108
+ def find_collection(collection)
109
+ puts "FINDING COLLECTION: #{collection}"
110
+ cols = Collection.where(title: collection)
111
+ cols += Collection.where(title: collection).select{|col| col.title.first == collection}
112
+ cols += Collection.where(id: collection)
113
+ puts "COLLECTION: #{cols.last}"
114
+ return cols.last unless cols.empty?
115
+ return false
116
+ end
117
+
118
+ def find_or_create_collection(collection)
119
+ find_collection(collection) || Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
120
+ end
121
+
122
+
123
+ end
@@ -0,0 +1,21 @@
1
+ module BulkOps::InterpretScalarBehavior
2
+ extend ActiveSupport::Concern
3
+
4
+ def interpret_scalar_fields
5
+ @raw_row.each do |field, values|
6
+ next if values.blank? or field.nil? or field == values
7
+ # get the field name, if this column is a metadata field
8
+ next unless field_name = find_field_name(field.to_s)
9
+ field = schema.get_field(field_name)
10
+ # Ignore controlled fields
11
+ next if field.controlled?
12
+ BulkOps::Parser.split_values(values).each do |value|
13
+ next if value.blank?
14
+ value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
15
+ value = BulkOps::Parser.unescape_csv(value)
16
+ (@metadata[field_name] ||= []) << value
17
+ end
18
+ end
19
+ end
20
+
21
+ end
@@ -0,0 +1,80 @@
1
+ module BulkOps::SearchBuilderBehavior
2
+ extend ActiveSupport::Concern
3
+ included do
4
+ attr_reader :collection,
5
+ :admin_set,
6
+ :workflow_state
7
+ class_attribute :collection_field,
8
+ :collection_id_field,
9
+ :admin_set_field,
10
+ :admin_set_id_field,
11
+ :workflow_state_field,
12
+ :workflow_state_id_field,
13
+ :keyword_field
14
+ self.collection_field = 'member_of_collections_ssim'
15
+ self.collection_id_field = 'member_of_collection_ids_ssim'
16
+ self.admin_set_field = 'admin_set_tesim'
17
+ self.admin_set_id_field = 'isPartOf_ssim'
18
+ self.workflow_state_field = 'workflow_state_name_ssim'
19
+ self.keyword_field = 'all_fields'
20
+
21
+ self.default_processor_chain += [:member_of_collection,
22
+ :member_of_admin_set,
23
+ :in_workflow_state,
24
+ :with_keyword_query]
25
+ end
26
+
27
+ # @param [scope] Typically the controller object
28
+ def initialize(scope: {},
29
+ collection: nil,
30
+ collection_id: nil,
31
+ admin_set: nil,
32
+ admin_set_id: nil,
33
+ workflow_state: nil,
34
+ keyword_query: nil)
35
+
36
+ @collection = collection unless collection.blank?
37
+ @admin_set = admin_set unless admin_set.blank?
38
+ @admin_set_id = admin_set_id unless admin_set_id.blank?
39
+ @workflow_state = workflow_state unless workflow_state.blank?
40
+ @collection_id = collection_id unless collection_id.blank?
41
+ @workflow_state = workflow_state unless workflow_state.blank?
42
+ @keyword_query = keyword_query unless keyword_query.blank?
43
+ super(scope)
44
+ end
45
+
46
+ def models
47
+ [Work,Course,Lecture]
48
+ end
49
+
50
+ # include filters into the query to only include the collection memebers
51
+ def member_of_collection(solr_parameters)
52
+ solr_parameters[:fq] ||= []
53
+ solr_parameters[:fq] << "#{collection_field}:#{@collection}" if @collection
54
+ solr_parameters[:fq] << "#{collection_id_field}:#{@collection_id}" if @collection_id
55
+ end
56
+
57
+ # include filters into the query to only include the collection memebers
58
+ def member_of_admin_set(solr_parameters)
59
+ solr_parameters[:fq] ||= []
60
+ solr_parameters[:fq] << "#{admin_set_field}:#{@admin_set}" if @admin_set
61
+ solr_parameters[:fq] << "#{admin_set_id_field}:#{@admin_set_id}" if @admin_set_id
62
+ end
63
+
64
+ # include filters into the query to only include the collection memebers
65
+ def in_workflow_state(solr_parameters)
66
+ solr_parameters[:fq] ||= []
67
+ solr_parameters[:fq] << "#{workflow_state_field}:#{@workflow_state}" if @workflow_state
68
+ end
69
+
70
+ def with_keyword_query(solr_parameters)
71
+ if @keyword_query
72
+ solr_parameters[:q] ||= []
73
+ # solr_parameters[:q] << "#{keyword_field}:#{@keyword_query}" if @keyword_query
74
+ solr_parameters[:q] << @keyword_query
75
+ solr_parameters[:qf] = "title_tesim titleAlternative_tesim subseries_tesim creator_label_tesim contributor_label_tesim originalPublisher_tesim publisher_tesim publisherHomepage_tesim resourceType_label_tesim rightsHolder_label_tesim scale_tesim series_tesim source_tesim staffNote_tesim coordinates_tesim subjectName_label_tesim subjectPlace_label_tesim subjectTemporal_label_tesim subjectTopic_label_tesim dateCreated_tesim dateCreatedDisplay_tesim dateDigitized_tesim datePublished_tesim description_tesim physicalFormat_label_tesim keyword_tesim language_label_tesim license_tesim masterFilename_tesim physicalDescription_tesim accessRights_tesim itemCallNumber_tesim collectionCallNumber_tesim donorProvenance_tesim genre_label_tesim boxFolder_tesim subject_label_tesim file_format_tesim all_text_timv"
76
+ end
77
+ solr_parameters
78
+ end
79
+
80
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bulk_ops
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.23
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ned Henry, UCSC Library Digital Initiatives
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-06 00:00:00.000000000 Z
11
+ date: 2020-01-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rails
@@ -97,7 +97,9 @@ files:
97
97
  - config/routes.rb
98
98
  - db/migrate/20180926190757_create_github_credentials.rb
99
99
  - db/migrate/20181017180436_create_bulk_ops_tables.rb
100
+ - db/migrate/20200122234235_remove_relationships_ammend_work_proxy.rb
100
101
  - lib/bulk_ops.rb
102
+ - lib/bulk_ops/apply_operation_job.rb
101
103
  - lib/bulk_ops/create_spreadsheet_job.rb
102
104
  - lib/bulk_ops/create_work_job.rb
103
105
  - lib/bulk_ops/delete_file_set_job.rb
@@ -108,8 +110,9 @@ files:
108
110
  - lib/bulk_ops/operation.rb
109
111
  - lib/bulk_ops/parser.rb
110
112
  - lib/bulk_ops/queue_work_ingests_job.rb
111
- - lib/bulk_ops/relationship.rb
113
+ - lib/bulk_ops/resolve_children_job.rb
112
114
  - lib/bulk_ops/search_builder_behavior.rb
115
+ - lib/bulk_ops/solr_service.rb
113
116
  - lib/bulk_ops/templates/configuration.yml
114
117
  - lib/bulk_ops/templates/readme.md
115
118
  - lib/bulk_ops/update_work_job.rb
@@ -118,6 +121,12 @@ files:
118
121
  - lib/bulk_ops/version.rb
119
122
  - lib/bulk_ops/work_job.rb
120
123
  - lib/bulk_ops/work_proxy.rb
124
+ - lib/concerns/interpret_controlled_behavior.rb
125
+ - lib/concerns/interpret_files_behavior.rb
126
+ - lib/concerns/interpret_options_behavior.rb
127
+ - lib/concerns/interpret_relationships_behavior.rb
128
+ - lib/concerns/interpret_scalar_behavior.rb
129
+ - lib/concerns/search_builder_behavior.rb
121
130
  - lib/generators/bulk_ops/install/install_generator.rb
122
131
  - lib/generators/bulk_ops/install/templates/config/github.yml.example
123
132
  homepage: http://UCSCLibrary.github.org
@@ -1,117 +0,0 @@
1
- class BulkOps::Relationship < ActiveRecord::Base
2
- RELATIONSHIP_FIELDS = ['parent','child','order','next','collection']
3
-
4
- self.table_name = "bulk_ops_relationships"
5
- belongs_to :work_proxy, class_name: "BulkOps::WorkProxy", foreign_key: "work_proxy_id"
6
- delegate :operation, :operation_id, to: :work_proxy
7
-
8
- def initialize *args
9
- super *args
10
-
11
- # Attempt to resolve the relationship immediately
12
- # which might work in the case of updates
13
- # resolve!
14
- end
15
-
16
- def findObject
17
- case (identifier_type || "").downcase
18
- when "id"
19
- begin
20
- object = ActiveFedora::Base.find(object_identifier)
21
- rescue Ldp::Gone
22
- return false
23
- end
24
- return object || false
25
- when "title"
26
- # TODO clean up solr query and add work type to it
27
- query = "{!field f=title_tesim}#{object_identifier}"
28
- objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,
29
- params: { fq: query, rows: 100})["response"]["docs"]
30
- if objects.present?
31
- return ActiveFedora::Base.find(objects.first["id"])
32
- elsif (relationship_type || "").downcase == "collection"
33
- return Collection.create(title: [object_identifier])
34
- else
35
- return false
36
- end
37
- when "identifier"
38
- query = "{!field f=identifier_tesim}#{object_identifier}"
39
- objects = ActiveFedora::SolrService.instance.conn.get(ActiveFedora::SolrService.select_path,params: { fq: query, rows: 100})["response"]["docs"]
40
- return false if objects.blank?
41
- return ActiveFedora::Base.find(objects.first["id"])
42
- when "row"
43
- object_proxy = BulkOps::WorkProxy.find_by(operation_id: work_proxy.operation_id,
44
- row_number: (object_identifier.to_i))
45
- ActiveFedora::Base.find(object_proxy.work_id)
46
- when "proxy_id"
47
- return false unless (proxy = BulkOps::WorkProxy.find(proxy_id))
48
- return false unless proxy.work_id.present?
49
- ActiveFedora::Base.find(proxy.work_id)
50
- end
51
- end
52
-
53
- def resolve!
54
- unless subject = work_proxy.work and object = self.findObject
55
- wait!
56
- return
57
- end
58
- implement_relationship! relationship_type, subject, object
59
- end
60
-
61
- def insert_among_children(object,new_member)
62
- return nil unless ["parent"].include?((relationship_type || "").downcase)
63
- prev_sib_id = previous_sibling
64
- # This is the id of the WorkProxy associate with the most recent sibling work
65
- # that might be fully ingested. If is it not fully ingested, we will move on
66
- # to the preceding sibling.
67
- while prev_sib_id.present?
68
- prev_sib_proxy = BulkOps::WorkProxy.find(prev_sib_id)
69
- # Check if the previous sibling is fully ingested
70
- # and get its index among its siblings (if it has been successfully attached to the parent)
71
- prev_sib_index = object.ordered_member_ids.index(prev_sib_proxy.work_id) if prev_sib_proxy.work_id.present?
72
- # Insert the new member among its siblings if we found the right place
73
- return object.ordered_members.to_a.insert(prev_sib_index+1, new_member) if prev_sib_index.present?
74
- # Otherwise, pull up the sibling's relationship field to check if it sibling has a sibling before it
75
- sib_relationship = prev_sib_proxy.relationships.find{|rel| rel.findObject.id == object.id }
76
- # If we can't find an ingested sibling among the ordered members,
77
- # break this loop and make this work the first member.
78
- break unless sib_relationship.present?
79
- prev_sib_id = sib_relationship.previous_sibling
80
- end
81
- #If we never found an existing previous sibling already attached, put this one at the front
82
- return [new_member]+object.ordered_members.to_a
83
- end
84
-
85
- def implement_relationship!(type,subject,object)
86
- case (type || "").downcase
87
- when "parent"
88
- unless object.member_ids.include? subject.id
89
- object.reload
90
- object.save
91
- object.ordered_members = insert_among_children(object, subject)
92
- object.save
93
- end
94
- when "child"
95
- #CAVEAT ordering not fully implemented in this case
96
- unless subject.member_ids.include? object.id
97
- subject.ordered_members << object
98
- subject.save
99
- end
100
- when "order"
101
- #TODO - implement this - related to ordering of filesets
102
-
103
- end
104
- update(status: "complete")
105
- end
106
-
107
- private
108
-
109
- def fail!
110
- update(status: "failed")
111
- end
112
-
113
- def wait!
114
- update(status: "pending")
115
- end
116
-
117
- end