RubyGems - bulk_ops - Versions diffs - 0.1.23 → 0.2.0 - Mend

bulk_ops 0.1.23 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

checksums.yaml +4 -4
data/db/migrate/20200122234235_remove_relationships_ammend_work_proxy.rb +14 -0
data/lib/bulk_ops.rb +3 -2
data/lib/bulk_ops/apply_operation_job.rb +8 -0
data/lib/bulk_ops/create_work_job.rb +1 -1
data/lib/bulk_ops/github_access.rb +1 -1
data/lib/bulk_ops/operation.rb +57 -49
data/lib/bulk_ops/parser.rb +50 -414
data/lib/bulk_ops/resolve_children_job.rb +14 -0
data/lib/bulk_ops/solr_service.rb +13 -0
data/lib/bulk_ops/update_work_job.rb +1 -1
data/lib/bulk_ops/verification.rb +2 -10
data/lib/bulk_ops/version.rb +1 -1
data/lib/bulk_ops/work_job.rb +20 -13
data/lib/bulk_ops/work_proxy.rb +18 -2
data/lib/concerns/interpret_controlled_behavior.rb +140 -0
data/lib/concerns/interpret_files_behavior.rb +82 -0
data/lib/concerns/interpret_options_behavior.rb +59 -0
data/lib/concerns/interpret_relationships_behavior.rb +123 -0
data/lib/concerns/interpret_scalar_behavior.rb +21 -0
data/lib/concerns/search_builder_behavior.rb +80 -0
metadata +12 -3
data/lib/bulk_ops/relationship.rb +0 -117

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3653b9554a93348ce398063d0b5fb98d01e075ac70efaee09b3081f49fa036e1
-  data.tar.gz: 01eb0bdb7084fb3e0d37b417d82b47772dc5d7734428f47f91168f81724c2139
+  metadata.gz: f9a35abfb31034307e62c7b7483b215447b848e1bb57f08d7200ae5293924e96
+  data.tar.gz: 77f0dd2a02a8343f945da3cd3f0a19cf9c590556ceff90bb86118032a2ed9192
 SHA512:
-  metadata.gz: 4d77355a39703eca5010a42b3bfcb0bcd872468e77b9b15e8f57bc68042dc684c2afe57d296c858cd5d6a68e60668c6bd2a11f4e3125e1d428dd5cd1e5dd1e2e
-  data.tar.gz: 6ee7e9c2d50231d555a9adde17a6c27b6f52041533e86a9727bb9d9caf2eafb9f8ba18e6cae76f8ef010980d3b09981e9c7e794ed54dd1b8dbab776060e923bc
+  metadata.gz: 5e0490d9f81743bbfbf87848654cff4fba774f2c5b218903535e732e42e72908382598741d452a6de16017cb43a9c19b3283c07d2ff5b03008b16afc8163cda0
+  data.tar.gz: 9123a66a499e37ef944854f70f49aa7d0de8f73af6461efe1949a7dbe75cead327e3b7fd1edd266965254758bb38d89d70cfd212fc90408c54a14da77f506b38

data/db/migrate/20200122234235_remove_relationships_ammend_work_proxy.rb ADDED

@@ -0,0 +1,14 @@
+class RemoveRelationshipsAmmendWorkProxy < ActiveRecord::Migration[5.0]
+  def change
+    drop_table :bulk_ops_relationships
+    change_table :bulk_ops_work_proxies do |t|
+      t.integer :parent_id
+      t.integer :previous_sibling_id
+    end
+    remove_column :bulk_ops_operations, :operation_type
+  end
+end

data/lib/bulk_ops.rb CHANGED

@@ -34,8 +34,9 @@ module BulkOps
   OPTIONS_FILENAME = 'configuration.yml'
   ROW_OFFSET = 2
-  dirstring = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
-  Dir[dirstring].each  do |file|
+  dirstring = File.join( File.dirname(__FILE__), 'concerns/*.rb')
+  dirstring2 = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
+  ((Dir[dirstring] || []) + Dir[dirstring2]).uniq.each  do |file|
     begin
       require file
     rescue Exception => e

data/lib/bulk_ops/apply_operation_job.rb ADDED

@@ -0,0 +1,8 @@
+class BulkOps::ApplyOperationJob <  ActiveJob::Base
+  queue_as :ingest
+  def perform(op_id)
+    BulkOps::Operation.find(op_id).apply
+  end
+end

data/lib/bulk_ops/create_work_job.rb CHANGED

@@ -12,7 +12,7 @@ class BulkOps::CreateWorkJob < BulkOps::WorkJob
   end
   def define_work workClass
-    if record_exists?(@work_proxy.work_id)
+    if BulkOps::SolrService.record_exists?(@work_proxy.work_id)
         report_error "trying to ingest a work proxy that already has a work attached. Work id: #{@work_proxy.work_id} Proxy id: #{@work_proxy.id}"
         return false
     end

data/lib/bulk_ops/github_access.rb CHANGED

@@ -196,7 +196,7 @@ class BulkOps::GithubAccess
   def create_pull_request message: false
     begin
-      message ||= "Apply update #{name} through Hyrax browser interface"
+      message ||= "Apply operation #{name} through Hyrax browser interface"
       pull = client.create_pull_request(repo, "master", name, message)
       pull["number"]
     rescue Octokit::UnprocessableEntity

data/lib/bulk_ops/operation.rb CHANGED

@@ -38,10 +38,6 @@ module BulkOps
       states
     end
-    def type
-      operation_type
-    end
     def self.schema
       ScoobySnacks::METADATA_SCHEMA
     end
@@ -62,45 +58,73 @@ module BulkOps
       update(stage: new_stage)
     end
-    def apply!
-      status = "#{type}ing"
-      update({stage: "running", message: "#{type.titleize} initiated by #{user.name || user.email}"})
-#      @stage = "running"
-      final_spreadsheet
-# This commented line currently fails because it doesn't pull from the master branch by default
-# It's usually already verified, but maybe we should fix this for double-checking
-# in the future
-#      return unless verify
+    def destroy_all_works_and_proxies
+      work_proxies.each do |proxy|
+        if BulkOps::SolrService.record_exists?(proxy.work_id)
+          ActiveFedora::Base.find(work_id).destroy
+        end
+        proxy.destroy
+      end
+      update(stage: "waiting",
+             status: "reverted changes")
+   end
-      apply_ingest! if ingest?
-      apply_update! if update?
+    def destroy_all_works
+      work_proxies.each do |proxy|
+        if BulkOps::SolrService.record_exists?(proxy.work_id)
+          ActiveFedora::Base.find(work_id).destroy
+        end
+        proxy.update(status: "destroyed", message: "The work created by this proxy was destroyed by the user")
+      end
+      update(stage: "waiting",
+             status: "reverted changes")
     end
-    def apply_ingest!
-      #Destroy any existing work proxies (which should not exist for an ingest). Create new proxies from finalized spreadsheet only.
-      work_proxies.each{|proxy| proxy.destroy!}
+    def destroy_all_proxies
+      work_proxies.each do |proxy|
+        proxy.destroy
+      end
+      update(stage: "waiting",
+             status: "reverted changes")
+    end
-      #create a work proxy for each work in the spreadsheet, creating filesets where appropriate
-      @metadata.each_with_index do |values,row_number|
-        next if values.to_s.gsub(',','').blank?
+    def apply!
+      update({stage: "running",
+              status: "OK",
+              message: "Bulk operation initiated by #{user.name || user.email}"})
+      # We should now on the master branch. Make sure the correct spreadsheet version is loaded
+      final_spreadsheet
-        next if BulkOps::Parser.is_file_set? @metadata, row_number
+      # In case this operation has run before, gather all work proxies that are completed and exclude them from the application
+      complete_proxies = work_proxies.select{|proxy| proxy.status == "complete" && proxy.work_id.present?}
+      incomplete_row_numbers = Array(0..@metadata.length-1) - complete_proxies.map(&:row_number)
-        work_proxies.create(status: "queued",
+      # Destroy all proxies corresponding to incomplete rows
+      (work_proxies - complete_proxies).each{proxy| proxy.destroy!}
+      # Create a new work proxy for incompplete row
+      # All the proxies need to exist before parsing in order to correctly recognize relationships
+      incomplete_row_numbers.each do |row_number|
+        values = @metadata[row_number]
+        next if values.to_s.gsub(',','').blank?
+        next if BulkOps::Parser.is_file_set? @metadata, proxy.row_number
+        work_proxies.create(status: "new",
                             last_event: DateTime.now,
-                            row_number: row_number,
+                            work_type: work_type,
+                            row_number: proxy.row_number,
                             visibility: options['visibility'],
                             message: "created during ingest initiated by #{user.name || user.email}")
       end
-      # make sure the work proxies we just created are loaded in memory
+      # Reload the operation so that it can recognize its new proxies
       reload
-      #loop through the work proxies to create a job for each work
-      @metadata.each_with_index do |values,row_number|
+      # Parse each spreadsheet row and create a background job for each proxy we just created
+      incomplete_row_numberss.each do |row_number|
+        values = @metadata[row_number]
         proxy = work_proxies.find_by(row_number: row_number)
         proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
-        data = BulkOps::Parser.new(proxy, @metadata).interpret_data(raw_row: values)
+        data = BulkOps::Parser.new(proxy, @metadata,options).interpret_data(raw_row: values)
         next unless proxy.proxy_errors.blank?
         BulkOps::WorkJob.perform_later(proxy.work_type || "Work",
                                              user.email,
@@ -112,13 +136,6 @@ module BulkOps
       report_errors!
     end
-    def delete_all
-      work_proxies.each do |proxy|
-        ActiveFedora::Base.find(proxy.work_id).destroy
-        proxy.update(status: "destroyed", message: "The work created by this proxy was destroyed by the user")
-      end
-    end
     def check_if_finished
       return unless stage == "running" && !busy?
@@ -208,7 +225,7 @@ module BulkOps
     def report_errors!
       error_file_name = BulkOps::Error.write_errors!(accumulated_errors, git)
-      notify!(subject: "Errors initializing bulk #{type} in Hycruz", message: "Hycruz encountered some errors while it  was setting up your #{type} and preparing to begin. For most types of errors, the individual rows of the spreadsheet with errors will be ignored and the rest will proceed. Please consult the #{type} summary for real time information on the status of the #{type}. Details about these initialization errors can be seen on Github at the following url: https://github.com/#{git.repo}/blob/#{git.name}/#{git.name}/errors/#{error_file_name}") if error_file_name
+      notify!(subject: "Errors initializing bulk operation in Hycruz", message: "Hycruz encountered some errors while it  was setting up your operation and preparing to begin. For most types of errors, the individual rows of the spreadsheet with errors will be ignored and the rest will proceed. Please consult the operation summary for real time information on the status of the operation. Details about these initialization errors can be seen on Github at the following url: https://github.com/#{git.repo}/blob/#{git.name}/#{git.name}/errors/#{error_file_name}") if error_file_name
     end
     def create_pull_request message: false
@@ -222,7 +239,7 @@ module BulkOps
       update(stage: "pending")
     end
-    def create_branch(fields: nil, work_ids: nil, options: nil, operation_type: :ingest)
+    def create_branch(fields: nil, work_ids: nil, options: nil)
       git.create_branch!
       bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir
@@ -238,13 +255,12 @@ module BulkOps
         options.each { |option, value| full_options[option] = value }
         full_options[name] = name
-        full_options[type] = type
         full_options[status] = status
         git.update_options full_options
       end
-      create_new_spreadsheet(fields: fields, work_ids: work_ids) if operation_type == :ingest
+      create_new_spreadsheet(fields: fields, work_ids: work_ids)
     end
     def get_spreadsheet return_headers: false
@@ -298,14 +314,6 @@ module BulkOps
       return false
     end
-    def ingest?
-      type == "ingest"
-    end
-    def update?
-      type == "update"
-    end
     def delete_branch
       git.delete_branch!
     end

data/lib/bulk_ops/parser.rb CHANGED

@@ -1,4 +1,3 @@
 class BulkOps::Parser
   require 'uri'
@@ -6,6 +5,27 @@ class BulkOps::Parser
   delegate :relationships, :operation, :row_number, :work_id, :visibility, :work_type, :reference_identifier, :order, to: :proxy
+  include BulkOps::InterpretRelationshipsBehavior
+  include BulkOps::InterpretFilesBehavior
+  include BulkOps::InterpretScalarBehavior
+  include BulkOps::InterpretOptionsBehavior
+  include BulkOps::InterpretControlledBehavior
+  def self.unescape_csv(value)
+    value.gsub(/\\(['";,])/,'\1')
+  end
+  def self.split_values value_string
+    # Split values on all un-escaped separator character (escape character is '\')
+    # Then replace all escaped separator charactors with un-escaped versions
+    value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
+  end
+  def self.normalize_relationship_field_name field
+    normfield = field.to_s.downcase.parameterize.gsub(/[_\s-]/,'')
+    BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
+  end
   def self.is_file_set? metadata, row_number
     return false unless metadata[row_number].present?
     # If the work type is explicitly specified, use that
@@ -23,12 +43,32 @@ class BulkOps::Parser
     return true
   end
-  def initialize prx, metadata_sheet=nil
+ def self.get_negating_metadata(work_id, metadata={})
+    return false unless BulkOps::SolrService.record_exists?(work_id)
+    work = ActiveFedora::Base.find(work_id)
+    schema = ScoobySnacks::METADATA_SCHEMA
+    schema.all_fields.each do |field|
+      field_key = field.controlled? ? "#{field.name}_attributes" : field.name
+      metadata[field_key] ||= (field.multiple? ? [] : nil)
+      if field.controlled?
+        values = Array(work.send(field.name)).map{|value| {id: value.id, _destroy: true} }
+        if field.multiple?
+          metadata[field_key] += values
+        else
+          metadata[field_key] = values.first
+        end
+      end
+    end
+    return metadata
+  end
+  def initialize prx, metadata_sheet=nil, options={}
     @proxy = prx
-    @raw_data = (metadata_sheet || proxy.operation.metadata)
+    @raw_data = (metadata_sheet || operation.metadata)
     @raw_row = @raw_data[@proxy.row_number]
     @metadata = {}
     @parsing_errors = []
+    @options = options || operation.options
   end
   def interpret_data raw_row: nil, raw_data: nil, proxy: nil
@@ -42,6 +82,9 @@ class BulkOps::Parser
     interpret_relationship_fields
     setMetadataInheritance
     interpret_option_fields
+    if @proxy.work_id.present? && @options['discard_existing_metadata']
+      @metadata.deep_merge!(self.class.get_negating_metadata(@proxy.work_id))
+    end
     interpret_file_fields
     interpret_controlled_fields
     interpret_scalar_fields
@@ -66,7 +109,7 @@ class BulkOps::Parser
   end
   def connect_existing_work
-    return unless (column_name = operation.options["update_identifier"])
+    return unless (column_name = @options["update_identifier"])
     return unless (key = @raw_row.to_h.keys.find{|key| key.to_s.parameterize.downcase.gsub("_","") == column_name.to_s.parameterize.downcase.gsub("_","")})
     return unless (value = @raw_row[key]).present?
     return unless (work_id = find_work_id_from_unique_metadata(key, value))
@@ -83,351 +126,6 @@ class BulkOps::Parser
     return response["docs"][0]["id"]
   end
-  def interpret_controlled_fields
-    # The labels array tracks the contents of columns marked as labels,
-    # which may require special validation
-    labels = {}
-    # This hash is populated with relevant data as we loop through the fields
-    controlled_data = {}
-    @raw_row.each do |field_name, value|
-      next if value.blank?  or field_name.blank?
-      field_name = field_name.to_s
-      #If our CSV interpreter is feeding us the headers as a line, ignore it.
-      next if field_name == value
-      #check if they are using the 'field_name.authority' syntax
-      authority = nil
-      if ((split=field_name.split('.')).count == 2)
-        authority = split.last
-        field_name = split.first
-      end
-      # get the field name, if this column is a metadata field
-      field_name_norm = find_field_name(field_name)
-      field = schema.get_field(field_name_norm)
-      # Ignore anything that isn't a controlled field
-      next unless field.present? && field.controlled?
-      # Keep track of label fields
-      if field_name.downcase.ends_with?("label")
-        next if operation.options["ignore_labels"]
-        labels[field_name_norm] ||= []
-        labels[field_name_norm] += split_values value
-        next unless operation.options["import_labels"]
-      end
-      remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
-      # handle multiple values
-      value_array = split_values(value)
-      controlled_data[field_name_norm] ||= [] unless value_array.blank?
-      value_array.each do |value|
-        # Decide of we're dealing with a label or url
-        # It's an ID if it's a URL and the name doesn't end in 'label'
-        value.strip!
-        if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
-          value_id = value
-        #          label = WorkIndexer.fetch_remote_label(value)
-        #          error_message =  "cannot fetch remote label for url: #{value}"
-        #          report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
-        else
-          # It's a label, so unescape it and get the id
-          value = unescape_csv(value)
-          value_id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
-          #          label = value
-          report_error(:cannot_retrieve_url,
-                       message: "cannot find or create url for controlled vocabulary label: #{value}",
-                       url: value,
-                       row_number: row_number) unless value_id
-        end
-        controlled_data[field_name_norm] << {id: value_id, remove: field_name.downcase.starts_with?("remove")}
-      end
-    end
-    # Actually add all the data
-    controlled_data.each do |property_name, data|
-      @metadata["#{property_name}_attributes"] ||= [] unless data.blank?
-      data.uniq.each do |datum|
-        atts = {"id" => datum[:id]}
-        atts["_delete"] = true if datum[:remove]
-        @metadata["#{property_name}_attributes"] << atts
-      end
-    end
-  end
-  def interpret_scalar_fields
-     @raw_row.each do |field, values|
-      next if values.blank? or field.nil? or field == values
-      # get the field name, if this column is a metadata field
-      next unless field_name = find_field_name(field.to_s)
-      field = schema.get_field(field_name)
-      # Ignore controlled fields
-      next if field.controlled?
-      split_values(values).each do |value|
-        next if value.blank?
-        value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
-        value = unescape_csv(value)
-        (@metadata[field_name] ||= []) << value
-       end
-    end
-   end
-  def interpret_file_fields
-    # This method handles file additions and deletions from the spreadsheet
-    # if additional files need to be deleted because the update is set to replace
-    # some or all existing files, those replacement-related deletions are handled
-    # by the BulkOps::Operation.
-    #
-    @raw_row.each do |field, value|
-      next if value.blank?  or field.blank?
-      field = field.to_s
-      #If our CSV interpreter is feeding us the headers as a line, ignore it.
-      next if field == value
-      # Check if this is a file field, and whether we are removing or adding a file
-      next unless (action = BulkOps::Verification.is_file_field?(field))
-      # Move on if this field is the name of another property (e.g. masterFilename)
-      next if find_field_name(field)
-      # Check if we are removing a file
-      if action == "remove"
-        get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
-      else
-        # Add a file
-        operation.get_file_paths(value).each do |filepath|
-          begin
-            uploaded_file = Hyrax::UploadedFile.create(file:  File.open(filepath), user: operation.user)
-            (@metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
-          rescue Exception => e
-            report_error(:upload_error,
-                         message: "Error opening file: #{ filepath } -- #{e}",
-                         file: File.join(BulkOps::INGEST_MEDIA_PATH,filename),
-                         row_number: row_number)
-          end
-        end
-      end
-      # Check if any of the upcoming rows are child filesets
-      i = 1
-      while self.class.is_file_set?(@metadata,row_number+i)
-        child_row.each do |field,value|
-          next if value.blank?
-          title = value if ["title","label"].include?(field.downcase.strip)
-          if BulkOps::Verification.is_file_field?(field)
-            operation.get_file_paths(value).each do |filepath|
-              uploaded_file = Hyrax::UploadedFile.create(file:  File.open(filepath), user: operation.user)
-            end
-          end
-        end
-        i+=1
-      end
-    end
-  end
-  def interpret_option_fields
-    @raw_row.each do |field,value|
-      next if value.blank? or field.blank?
-      field = field.to_s
-      next if value == field
-      normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
-      if ["visibility", "public"].include?(normfield)
-        @proxy.update(visibility: format_visibility(value))
-      end
-      if ["worktype","model","type"].include?(normfield)
-        @proxy.update(work_type: format_worktype(value) )
-      end
-      if ["referenceidentifier",
-          "referenceid",
-          "refid",
-          "referenceidentifiertype",
-          "referenceidtype",
-          "refidtype",
-          "relationshipidentifier",
-          "relationshipid",
-          "relationshipidentifiertype",
-          "relationshipidtype",
-          "relid",
-          "relidtype"].include?(normfield)
-        @proxy.update(reference_identifier: format_reference_id(value))
-      end
-    end
-  end
-  def interpret_relationship_fields
-    @raw_row.each do |field,value|
-      next if value.blank?  or field.blank?
-      field = field.to_s
-      value = unescape_csv(value)
-      identifer_type = reference_identifier
-      next if value == field
-      # Correctly interpret the notation "parent:id", "parent id" etc in a column header
-      if (split = field.split(/[:_\-\s]/)).count == 2
-        identifier_type = split.last
-        relationship_type = split.first.to_s
-      else
-        relationship_type = field
-      end
-      relationship_type = self.class.normalize_relationship_field_name(relationship_type)
-      case relationship_type
-      when "order"
-        # If the field specifies the object's order among siblings
-        @proxy.update(order: value.to_f)
-        next
-      when "collection"
-        # If the field specifies the name or ID of a collection,
-        # find or create the collection and update the metadata to match
-        col = find_or_create_collection(value)
-        ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
-        next
-      when "parent", "child"
-        # correctly interpret the notation "id:a78C2d81"
-        identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
-        relationship_parameters =  { work_proxy_id: @proxy.id,
-                                     identifier_type: identifier_type,
-                                     relationship_type: relationship_type,
-                                     object_identifier: object_identifier,
-                                     status: "new"}
-        #add previous sibling link if necessary
-        previous_value = @raw_data[row_number-1][field]
-        # Check if this is a parent relationship, and the previous row also has one
-        if previous_value.present? && (relationship_type == "parent")
-          # Check if the previous row has the same parent as this row
-          if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
-            # If so, set the previous sibling parameter on the relationshp
-            #    to the id for the proxy associated with the previous row
-            relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
-          end
-        end
-        BulkOps::Relationship.create(relationship_parameters)
-      end
-    end
-  end
-  def self.normalize_relationship_field_name field
-    normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
-    BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
-  end
-  def find_previous_parent field="parent"
-    #Return the row number of the most recent preceding row that does
-    # not itself have a parent defined
-    i = 1;
-    while (prev_row = raw_data[row_number - i])
-      return (row_number - i) if prev_row[field].blank?
-      i += 1
-    end
-  end
-  def interpret_relationship_value id_type, value, field="parent"
-    #Handle "id:20kj4259" syntax if it hasn't already been handled
-    if (split = value.to_s.split(":")).count == 2
-      id_type, value = split.first
-      value = split.last
-    end
-    #Handle special shorthand syntax for refering to relative row numbers
-    if id_type == "row"
-      #if the value is an integer
-      if value =~ /\A[-+]?[0-9]+\z/
-        if value.to_i < 0
-        # if given a negative integer, count backwards from the current row (remember that value.to_i is negative)
-          return [id_type,row_number + value.to_i]
-        elsif value.to_i > 0
-          # if given a positive integer, remove the row offset
-          value = (value.to_i - BulkOps::ROW_OFFSET).to_s
-        end
-      elsif value.to_s.downcase.include?("prev")
-        # if given any variation of the word "previous", get the first preceding row with no parent of its own
-        return [id_type,find_previous_parent(field)]
-      end
-    end
-    return [id_type,value]
-  end
-  def unescape_csv(value)
-    value.gsub(/\\(['";,])/,'\1')
-  end
-  def format_worktype(value)
-    # format the value like a class name
-    type = value.titleize.gsub(/[-_\s]/,'')
-    # reject it if it isn't a defined class
-    type = false unless Object.const_defined? type
-    # fall back to the work type defined by the operation, or a standard "Work"
-    return type ||= work_type || operation.work_type || "Work"
-  end
-  def format_visibility(value)
-    case value.downcase
-    when "public", "open", "true"
-      return "open"
-    when "campus", "ucsc", "institution"
-      return "ucsc"
-    when "restricted", "private", "closed", "false"
-      return "restricted"
-    end
-  end
-  def mintLocalAuthUrl(auth_name, value)
-    value.strip!
-    id = value.parameterize
-    auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
-    entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
-                                           label: value,
-                                           uri: id)
-    return localIdToUrl(id,auth_name)
-  end
-  def findAuthUrl(auth, value)
-    value.strip!
-    return nil if auth.nil?
-    return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
-    entries.each do |entry|
-      #require exact match
-      next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
-      url = entry["url"] || entry["id"]
-#      url = localIdToUrl(url,auth) unless url =~ URI::regexp
-      return url
-    end
-    return nil
-  end
-  def localIdToUrl(id,auth_name)
-    root_urls = {production: "https://digitalcollections.library.ucsc.edu",
-                 staging: "http://digitalcollections-staging.library.ucsc.edu",
-                 development: "http://#{Socket.gethostname}",
-                 test: "http://#{Socket.gethostname}"}
-    return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
-  end
-  def getLocalAuth(field_name)
-    field =  schema.get_property(field_name)
-    # There is only ever one local authority per field, so just pick the first you find
-    if vocs = field.vocabularies
-      vocs.each do |voc|
-        return voc["subauthority"] if voc["authority"].downcase == "local"
-      end
-    end
-    return nil
-  end
   def setAdminSet
     return if @metadata[:admin_set_id]
     asets = AdminSet.where({title: "Bulk Ingest Set"})
@@ -437,7 +135,7 @@ class BulkOps::Parser
   def setMetadataInheritance
     return if @metadata[:metadataInheritance].present?
-    @metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
+    @metadata[:metadataInheritance] = @options["metadataInheritance"] unless @options["metadataInheritance"].blank?
   end
   def report_error type, message, **args
@@ -447,75 +145,13 @@ class BulkOps::Parser
     (@parsing_errors ||= []) <<  BulkOps::Error.new(**args)
   end
-  def get_removed_filesets(filestring)
-    file_ids = split_values(filestring)
-    file_ids.select{|file_id| record_exists?(file_id)}
-# This part handles filenames in addition to file ids. It doesn't work yet!
-#    file_ids.map do |file_id|
-      # If the filename is the id of an existing record, keep that
-#      next(file_id) if (record_exists?(file_id))
-      # If this is the label (i.e.filename) of an existing fileset, use that fileset id
-      # TODO MAKE THIS WORK!!
-#      next(filename) if (filename_exists?(filename))
-#      File.join(BulkOps::INGEST_MEDIA_PATH, filename_prefix, filename)
-#    end
-  end
-  def delete_file_set fileset_id
-    BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
-  end
-  def record_exists? id
-    operation.record_exists? id
-  end
-  def localAuthUrl(property, value)
-    return value if (auth = getLocalAuth(property)).nil?
-    url =   findAuthUrl(auth, value) ||  mintLocalAuthUrl(auth,value)
-    return url
-  end
-  def find_collection(collection)
-    cols = Collection.where(id: collection)
-    cols += Collection.where(title: collection).select{|col| col.title.first == collection}
-    return cols.last unless cols.empty?
-    return false
-  end
-  def find_or_create_collection(collection)
-    col = find_collection(collection)
-    return col if col
-    return false if collection.to_i > 0
-    col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
-  end
-  def get_remote_id(value, authority: nil, property: nil)
-    return false
-    #TODO retrieve URL for this value from the specified remote authr
-  end
-  def format_param_name(name)
-    name.titleize.gsub(/\s+/, "").camelcase(:lower)
-  end
-  def schema
-    ScoobySnacks::METADATA_SCHEMA
-  end
   def find_field_name(field)
     operation.find_field_name(field)
   end
-  def downcase_first_letter(str)
-    return "" unless str
-    str[0].downcase + str[1..-1]
+  def schema
+    ScoobySnacks::METADATA_SCHEMA
   end
-  def split_values value_string
-    # Split values on all un-escaped separator character (escape character is '\')
-    # Then replace all escaped separator charactors with un-escaped versions
-    value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
-  end
 end