RubyGems - bulk_ops - Versions diffs - 0.1.14 → 0.1.15 - Mend

bulk_ops 0.1.14 → 0.1.15

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/bulk_ops.rb +34 -3
data/lib/bulk_ops/create_spreadsheet_job.rb +1 -1
data/lib/bulk_ops/github_access.rb +7 -11
data/lib/bulk_ops/operation.rb +10 -29
data/lib/bulk_ops/parser.rb +485 -0
data/lib/bulk_ops/verification.rb +9 -9
data/lib/bulk_ops/version.rb +1 -1
data/lib/bulk_ops/work_proxy.rb +0 -459
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 27b9b67583cbf4ca808867661196e5bb8a6b95490b3a377dd85d11a91d0a41fb
-  data.tar.gz: 508dbf4a72146f7a893851aec847bb9bb82a399765135dfde6daa6eec0a4d121
+  metadata.gz: fea513373c0ae0267f9302311300b8f4ba03b9fa632db168aec201c2f8359182
+  data.tar.gz: baa0fe9b67bfbe7d2f8283ff7949cb8ec46e268c7e15ef17c7b73b9c3a80ef19
 SHA512:
-  metadata.gz: da715c7235ae2044b2354653b382825078a63f466e542642d995c81b6dd3bb8d8336c13ac84dd811ecefbff4e9e2422c45f1dc39e10b0ea1a4119a6736397ee2
-  data.tar.gz: 9bd37e6481170e1da5ba4494888fb16a1cfa65cc9869edcee87353228a1eb78eefe5f32c87bb7c14f4fc2b6e3ead0ad068901273d8c100e3afbce0d5268e4486
+  metadata.gz: 33810a935cc44ee6de4448a12e37d4c0889b6a4c7d409011fc5dd9d0bddc18e1a53f0f18337c933ab3dd6903d4112b0a968579f20e7e204d4278220c0dbb0315
+  data.tar.gz: b7ff43aed578a7aba0cb59d0862af6d1ffe7f50eccce6715171063a09e1edf2670e3d23333b4e506ff3d473ff6dbed56f672ff66b2f43209e58e67950706072a

data/lib/bulk_ops.rb CHANGED Viewed

@@ -1,6 +1,39 @@
 require "bulk_ops/version"
 module BulkOps
+  OPTION_FIELDS = ['visibility','work type']
+  RELATIONSHIP_FIELDS = ['parent','child','collection','order']
+  REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
+  FILE_FIELDS = ['file','files','filename','filenames']
+  FILE_ACTIONS = ['add','upload','remove','delete']
+  SEPARATOR = ';'
+  DEFAULT_ADMIN_SET_TITLE = "Bulk Ingest Set"
+  INGEST_MEDIA_PATH = "/dams_ingest"
+  TEMPLATE_DIR = "lib/bulk_ops/templates"
+  RELATIONSHIP_COLUMNS = ["parent","child","next"]
+  SPECIAL_COLUMNS = ["parent",
+                     "child",
+                     "order",
+                     "next",
+                     "work_type",
+                     "collection",
+                     "collection_title",
+                     "collection_id",
+                     "visibility",
+                     "relationship_identifier_type",
+                     "id",
+                     "filename",
+                     "file"]
+  IGNORED_COLUMNS = ["ignore","offline_notes"]
+  OPTION_REQUIREMENTS = {type: {required: true,
+                                values:[:ingest,:update]},
+                         file_method: {required: :true,
+                                       values: [:replace_some,:add_remove,:replace_all]},
+                         notifications: {required: true}}
+  SPREADSHEET_FILENAME = 'metadata.csv'
+  OPTIONS_FILENAME = 'configuration.yml'
+  ROW_OFFSET = 2
   dirstring = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
   Dir[dirstring].each  do |file|
     begin
@@ -9,7 +42,5 @@ module BulkOps
       puts "ERROR LOADING #{File.basename(file)}: #{e}"
     end
   end
-#  require 'bulk_ops/verification'
-#  require 'bulk_ops/verification'
-#  require 'bulk_ops/work_proxy'
 end

data/lib/bulk_ops/create_spreadsheet_job.rb CHANGED Viewed

@@ -36,7 +36,7 @@ class BulkOps::CreateSpreadsheetJob < ActiveJob::Base
         next if value.is_a? DateTime
         value = (label ? WorkIndexer.fetch_remote_label(value.id) : value.id) unless value.is_a? String
         value.gsub("\"","\"\"")
-      end.join(BulkOps::WorkProxy::SEPARATOR).prepend('"').concat('"')
+      end.join(BulkOps::SEPARATOR).prepend('"').concat('"')
     end.join(',')
   end

data/lib/bulk_ops/github_access.rb CHANGED Viewed

@@ -5,10 +5,6 @@ require 'base64'
 class BulkOps::GithubAccess
-  ROW_OFFSET = 2
-  SPREADSHEET_FILENAME = 'metadata.csv'
-  OPTIONS_FILENAME = 'configuration.yml'
   attr_accessor :name
   def self.auth_url user
@@ -142,11 +138,11 @@ class BulkOps::GithubAccess
   def add_new_spreadsheet file, message=false
     if file.is_a? Tempfile
       file.close
-      add_file file.path, SPREADSHEET_FILENAME, message: message
+      add_file file.path, BulkOps::SPREADSHEET_FILENAME, message: message
     elsif file.is_a?(String) && File.file?(file)
-      add_file file, SPREADSHEET_FILENAME, message: message
+      add_file file, BulkOps::SPREADSHEET_FILENAME, message: message
     elsif file.is_a? String
-      add_contents(spreadsheet_path, SPREADSHEET_FILENAME, message: message)
+      add_contents(spreadsheet_path, BulkOps::SPREADSHEET_FILENAME, message: message)
     end
   end
@@ -218,12 +214,12 @@ class BulkOps::GithubAccess
   def get_metadata_row row_number
     @current_metadata ||= load_metadata
-    @current_metadata[row_number - ROW_OFFSET]
+    @current_metadata[row_number - BulkOps::ROW_OFFSET]
   end
   def get_past_metadata_row commit_sha, row_number
     past_metadata = Base64.decode64( client.contents(repo, path: filename, ref: commit_sha) )
-    past_metadata[row_number - ROW_OFFSET]
+    past_metadata[row_number - BulkOps::ROW_OFFSET]
   end
   def get_file filename
@@ -244,13 +240,13 @@ class BulkOps::GithubAccess
   end
   def spreadsheet_path
-    "#{name}/#{SPREADSHEET_FILENAME}"
+    "#{name}/#{BulkOps::SPREADSHEET_FILENAME}"
   end
   private
   def options_path
-    "#{name}/#{OPTIONS_FILENAME}"
+    "#{name}/#{BulkOps::OPTIONS_FILENAME}"
   end
   def current_master_commit_sha

data/lib/bulk_ops/operation.rb CHANGED Viewed

@@ -7,33 +7,10 @@ module BulkOps
     include BulkOps::Verification
-    attr_accessor :work_type, :visibility, :reference_identifier
+    attr_accessor :work_type, :visibility, :reference_identifier, :metadata
     delegate  :can_merge?, :merge_pull_request, to: :git
-    INGEST_MEDIA_PATH = "/dams_ingest"
-    TEMPLATE_DIR = "lib/bulk_ops/templates"
-    RELATIONSHIP_COLUMNS = ["parent","child","next"]
-    SPECIAL_COLUMNS = ["parent",
-                       "child",
-                       "order",
-                       "next",
-                       "work_type",
-                       "collection",
-                       "collection_title",
-                       "collection_id",
-                       "visibility",
-                       "relationship_identifier_type",
-                       "id",
-                       "filename",
-                       "file"]
-    IGNORED_COLUMNS = ["ignore","offline_notes"]
-    OPTION_REQUIREMENTS = {type: {required: true,
-                                  values:[:ingest,:update]},
-                           file_method: {required: :true,
-                                           values: [:replace_some,:add_remove,:replace_all]},
-                           notifications: {required: true}}
     def self.unique_name name, user
       while  BulkOps::Operation.find_by(name: name) || BulkOps::GithubAccess.list_branch_names(user).include?(name) do
         if ['-','_'].include?(name[-2]) && name[-1].to_i > 0
@@ -119,7 +96,7 @@ module BulkOps
       @metadata.each_with_index do |values,row_number|
         proxy = work_proxies.find_by(row_number: row_number)
         proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
-        data = proxy.interpret_data values
+        data = BulkOps::Parser.new(proxy, @metadata).interpret_data(raw_row: values)
         next unless proxy.proxy_errors.blank?
         BulkOps::CreateWorkJob.perform_later(proxy.work_type || "Work",
                                              user.email,
@@ -202,7 +179,7 @@ module BulkOps
       #loop through the work proxies to create a job for each work
       work_proxies.each do |proxy|
-        data = proxy.interpret_data final_spreadsheet[proxy.row_number]
+        data = BulkOps::Parser.new(proxy,final_spreadsheet).interpret_data(raw_row: final_spreadsheet[proxy.row_number])
         BulkOps::UpdateWorkJob.perform_later(proxy.work_type || "",
                                              user.email,
                                              data,
@@ -238,13 +215,13 @@ module BulkOps
       bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir
       #copy template files
-      Dir["#{bulk_ops_dir}/#{TEMPLATE_DIR}/*"].each do |file|
+      Dir["#{bulk_ops_dir}/#{BulkOps::TEMPLATE_DIR}/*"].each do |file|
         git.add_file file
       end
       #update configuration options
       unless options.blank?
-        full_options = YAML.load_file(File.join(bulk_ops_dir,TEMPLATE_DIR, BulkOps::GithubAccess::OPTIONS_FILENAME))
+        full_options = YAML.load_file(File.join(bulk_ops_dir,BulkOps::TEMPLATE_DIR, BulkOps::OPTIONS_FILENAME))
         options.each { |option, value| full_options[option] = value }
@@ -278,6 +255,10 @@ module BulkOps
       git.update_options(options, message: message)
     end
+    def metadata
+      @metadata ||= git.load_metadata
+    end
     def options
       return {} if name.nil?
       return @options if @options
@@ -332,7 +313,7 @@ module BulkOps
     end
     def ignored_fields
-      (options['ignored headers'] || []) + IGNORED_COLUMNS
+      (options['ignored headers'] || []) + BulkOps::IGNORED_COLUMNS
     end

data/lib/bulk_ops/parser.rb ADDED Viewed

@@ -0,0 +1,485 @@
+class BulkOps::Parser
+  require 'uri'
+  attr_accessor :proxy, :raw_data, :raw_row
+  delegate :relationships, :operation, :row_number, :work_id, :visibility, :work_type, :reference_identifier, :order, to: :proxy
+  def initialize prx, metadata_sheet=nil
+    @proxy = prx
+    @raw_data = (metadata_sheet || proxy.operation.metadata)
+    @raw_row = @raw_data[@proxy.row_number].dup
+    @metadata = {}
+    @parsing_errors = []
+  end
+  def interpret_data raw_row: nil, raw_data: nil, proxy: nil
+    @raw_row = raw_row if raw_row.present?
+    @proxy = proxy if proxy.present?
+    @raw_data = raw_data if raw_data.present?
+    setAdminSet
+    setMetadataInheritance
+    interpret_option_fields
+    interpret_relationship_fields
+    disambiguate_columns
+    interpret_file_fields
+    interpret_controlled_fields
+    interpret_scalar_fields
+    @proxy.update(status: "ERROR", message: "error parsing spreadsheet line") if @parsing_errors.present?
+    @proxy.proxy_errors = (@proxy.proxy_errors || []) + @parsing_errors
+    return @metadata
+  end
+  def disambiguate_columns
+    #do nothing unless there are columns with the same header
+    return unless (@raw_row.respond_to?(:headers) && (@raw_row.headers.uniq.length < @raw_row.length) )
+    row = {}
+    (0...@raw_row.length).each do |i|
+      header = @raw_row.headers[i]
+      value = @raw_row[i]
+      # separate values in identical columns using the separator
+      row[header] = (Array(row[header]) << value).join(BulkOps::SEPARATOR)
+    end
+    #return a hash with identical columns merged
+    return row
+  end
+  def interpret_controlled_fields
+    # The labels array tracks the contents of columns marked as labels,
+    # which may require special validation
+    labels = {}
+    # This hash is populated with relevant data as we loop through the fields
+    controlled_data = {}
+    row = @raw_row.dup
+    @raw_row.each do |field_name, value|
+      next if value.blank?  or field_name.blank?
+      field_name = field_name.to_s
+      #If our CSV interpreter is feeding us the headers as a line, ignore it.
+      next if field_name == value
+      #check if they are using the 'field_name.authority' syntax
+      authority = nil
+      if ((split=field_name.split('.')).count == 2)
+        authority = split.last
+        field_name = split.first
+      end
+      # get the field name, if this column is a metadata field
+      field_name_norm = find_field_name(field_name)
+      field = schema.get_field(field_name_norm)
+      # Ignore anything that isn't a controlled field
+      next unless field.present? && field.controlled?
+      # Keep track of label fields
+      if field_name.downcase.ends_with?("label")
+        next if operation.options["ignore_labels"]
+        labels[field_name_norm] ||= []
+        labels[field_name_norm] += split_values value
+        next unless operation.options["import_labels"]
+      end
+      remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
+      # handle multiple values
+      value_array = split_values(value)
+      controlled_data[field_name_norm] ||= [] unless value_array.blank?
+      value_array.each do |value|
+        # Decide of we're dealing with a label or url
+        # It's an ID if it's a URL and the name doesn't end in 'label'
+        value.strip!
+        if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
+          value_id = value
+        #          label = WorkIndexer.fetch_remote_label(value)
+        #          error_message =  "cannot fetch remote label for url: #{value}"
+        #          report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
+        else
+          # It's a label, so unescape it and get the id
+          value = unescape_csv(value)
+          value_id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
+          #          label = value
+          report_error(:cannot_retrieve_url,
+                       message: "cannot find or create url for controlled vocabulary label: #{value}",
+                       url: value,
+                       row_number: row_number) unless value_id
+        end
+        controlled_data[field_name_norm] << {id: value_id, remove: field_name.downcase.starts_with?("remove")}
+        row.delete(field_name)
+      end
+    end
+    @raw_row = row
+    # Actually add all the data
+    controlled_data.each do |property_name, data|
+      @metadata["#{property_name}_attributes"] ||= [] unless data.blank?
+      data.uniq.each do |datum|
+        atts = {"id" => datum[:id]}
+        atts["_delete"] = true if datum[:remove]
+        @metadata["#{property_name}_attributes"] << atts
+      end
+    end
+  end
+  def interpret_scalar_fields
+    row = @raw_row.dup
+    @raw_row.each do |field, values|
+      next if values.blank? or field.nil? or field == values
+      # get the field name, if this column is a metadata field
+      next unless field_name = find_field_name(field.to_s)
+      field = schema.get_field(field_name)
+      # Ignore controlled fields
+      next if field.controlled?
+      split_values(values).each do |value|
+        next if value.blank?
+        value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
+        value = unescape_csv(value)
+        (@metadata[field_name] ||= []) << value
+        row.delete(field)
+      end
+    end
+    @raw_row = row
+  end
+  def interpret_file_fields
+    # This method handles file additions and deletions from the spreadsheet
+    # if additional files need to be deleted because the update is set to replace
+    # some or all existing files, those replacement-related deletions are handled
+    # by the BulkOps::Operation.
+    #
+    # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
+    row = @raw_row.dup
+    @raw_row.each do |field, value|
+      next if value.blank?  or field.blank?
+      field = field.to_s
+      #If our CSV interpreter is feeding us the headers as a line, ignore it.
+      next if field == value
+      # Check if this is a file field, and whether we are removing or adding a file
+      next unless (action = is_file_field?(field))
+      # Move on if this field is the name of another property (e.g. masterFilename)
+      next if find_field_name(field)
+      # Check if we are removing a file
+      if action == "remove"
+        get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
+      else
+        # Add a file
+        operation.get_file_paths(value).each do |filepath|
+          begin
+            uploaded_file = Hyrax::UploadedFile.create(file:  File.open(filepath), user: operation.user)
+            (@metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
+            row.delete(field)
+          rescue Exception => e
+            report_error(:upload_error,
+                         message: "Error opening file: #{ filepath } -- #{e}",
+                         file: File.join(BulkOps::INGEST_MEDIA_PATH,filename),
+                         row_number: row_number)
+          end
+        end
+      end
+    end
+    @raw_row = row
+  end
+  def interpret_option_fields
+    row = @raw_row.dup
+    @raw_row.each do |field,value|
+      next if value.blank? or field.blank?
+      field = field.to_s
+      next if value == field
+      normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
+      if ["visibility", "public"].include?(normfield)
+        @proxy.update(visibility: format_visibility(value))
+        row.delete(field)
+      end
+      if ["worktype","model","type"].include?(normfield)
+        @proxy.update(work_type: format_worktype(value) )
+        row.delete(field)
+      end
+      if ["referenceidentifier",
+          "referenceid",
+          "refid",
+          "referenceidentifiertype",
+          "referenceidtype",
+          "refidtype",
+          "relationshipidentifier",
+          "relationshipid",
+          "relationshipidentifiertype",
+          "relationshipidtype",
+          "relid",
+          "relidtype"].include?(normfield)
+        @proxy.update(reference_identifier: format_reference_id(value))
+        row.delete(field)
+      end
+    end
+    @raw_row = row
+  end
+  def interpret_relationship_fields
+    row = @raw_row.dup
+    @raw_row.each do |field,value|
+      next if value.blank?  or field.blank?
+      field = field.to_s
+      value = unescape_csv(value)
+      identifer_type = reference_identifier
+      next if value == field
+      # Correctly interpret the notation "parent:id", "parent id" etc in a column header
+      if (split = field.split(/[:_\-\s]/)).count == 2
+        identifier_type = split.last
+        relationship_type = split.first.to_s
+      else
+        relationship_type = field
+      end
+      relationship_type = normalize_relationship_field_name(relationship_type)
+      case relationship_type
+      when "order"
+        # If the field specifies the object's order among siblings
+        @proxy.update(order: value.to_f)
+        row.delete(field)
+        next
+      when "collection"
+        # If the field specifies the name or ID of a collection,
+        # find or create the collection and update the metadata to match
+        col = find_or_create_collection(value)
+        ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
+        row.delete field
+        next
+      when "parent", "child"
+        # correctly interpret the notation "id:a78C2d81"
+        identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
+        relationship_parameters =  { work_proxy_id: @proxy.id,
+                                     identifier_type: identifier_type,
+                                     relationship_type: relationship_type,
+                                     object_identifier: object_identifier,
+                                     status: "new"}
+        #add previous sibling link if necessary
+        previous_value = @raw_data[row_number-1][field]
+        # Check if this is a parent relationship, and the previous row also has one
+        if previous_value.present? && (relationship_type == "parent")
+          # Check if the previous row has the same parent as this row
+          if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
+            # If so, set the previous sibling parameter on the relationshp
+            #    to the id for the proxy associated with the previous row
+            relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
+          end
+        end
+        BulkOps::Relationship.create(relationship_parameters)
+        row.delete field
+      end
+    end
+    @raw_row = row
+  end
+  def normalize_relationship_field_name field
+    normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
+    BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
+  end
+  def find_previous_parent field="parent"
+    #Return the row number of the most recent preceding row that does
+    # not itself have a parent defined
+    i = 1;
+    while (prev_row = raw_data[row_number - i])
+      return (row_number - i) if prev_row[field].blank?
+      i += 1
+    end
+  end
+  def interpret_relationship_value id_type, value, field="parent"
+    #Handle "id:20kj4259" syntax if it hasn't already been handled
+    if (split = value.to_s.split(":")).count == 2
+      id_type = split.first
+      value = split.last
+    end
+    #Handle special shorthand syntax for refering to relative row numbers
+    if id_type == "row"
+      if value.to_i < 0
+        # if given a negative integer, count backwards from the current row (remember that value.to_i is negative)
+        return [id_type,row_number + value.to_i]
+      elsif value.to_s.downcase.include?("prev")
+        # if given any variation of the word "previous", get the first preceding row with no parent of its own
+        return [id_type,find_previous_parent(field)]
+      end
+    end
+    return [id_type,value]
+  end
+  def unescape_csv(value)
+    value.gsub(/\\(['";,])/,'\1')
+  end
+  def format_worktype(value)
+    # format the value like a class name
+    type = value.titleize.gsub(/[-_\s]/,'')
+    # reject it if it isn't a defined class
+    type = false unless Object.const_defined? type
+    # fall back to the work type defined by the operation, or a standard "Work"
+    return type ||= work_type || operation.work_type || "Work"
+  end
+  def format_visibility(value)
+    case value.downcase
+    when "public", "open", "true"
+      return "open"
+    when "campus", "ucsc", "institution"
+      return "ucsc"
+    when "restricted", "private", "closed", "false"
+      return "restricted"
+    end
+  end
+  def mintLocalAuthUrl(auth_name, value)
+    value.strip!
+    id = value.parameterize
+    auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
+    entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
+                                           label: value,
+                                           uri: id)
+    return localIdToUrl(id,auth_name)
+  end
+  def findAuthUrl(auth, value)
+    value.strip!
+    return nil if auth.nil?
+    return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
+    entries.each do |entry|
+      #require exact match
+      next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
+      url = entry["url"] || entry["id"]
+#      url = localIdToUrl(url,auth) unless url =~ URI::regexp
+      return url
+    end
+    return nil
+  end
+  def localIdToUrl(id,auth_name)
+    root_urls = {production: "https://digitalcollections.library.ucsc.edu",
+                 staging: "http://digitalcollections-staging.library.ucsc.edu",
+                 development: "http://#{Socket.gethostname}",
+                 test: "http://#{Socket.gethostname}"}
+    return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
+  end
+  def getLocalAuth(field_name)
+    field =  schema.get_property(field_name)
+    # There is only ever one local authority per field, so just pick the first you find
+    if vocs = field.vocabularies
+      vocs.each do |voc|
+        return voc["subauthority"] if voc["authority"].downcase == "local"
+      end
+    end
+    return nil
+  end
+  def setAdminSet
+    return if @metadata[:admin_set_id]
+    asets = AdminSet.where({title: "Bulk Ingest Set"})
+    asets = AdminSet.find('admin_set/default') if asets.blank?
+    @metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
+  end
+  def setMetadataInheritance
+    return if @metadata[:metadataInheritance].present?
+    @metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
+  end
+  def report_error type, message, **args
+    puts "ERROR MESSAGE: #{message}"
+    @proxy.update(status: "error", message: message)
+    args[:type]=type
+    (@parsing_errors ||= []) <<  BulkOps::Error.new(**args)
+  end
+  def get_removed_filesets(filestring)
+    file_ids = split_values(filestring)
+    file_ids.select{|file_id| record_exists?(file_id)}
+# This part handles filenames in addition to file ids. It doesn't work yet!
+#    file_ids.map do |file_id|
+      # If the filename is the id of an existing record, keep that
+#      next(file_id) if (record_exists?(file_id))
+      # If this is the label (i.e.filename) of an existing fileset, use that fileset id
+      # TODO MAKE THIS WORK!!
+#      next(filename) if (filename_exists?(filename))
+#      File.join(BulkOps::INGEST_MEDIA_PATH, filename_prefix, filename)
+#    end
+  end
+  def delete_file_set fileset_id
+    BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
+  end
+  def is_file_field? field
+    operation.is_file_field? field
+  end
+  def record_exists? id
+    operation.record_exists? id
+  end
+  def localAuthUrl(property, value)
+    return value if (auth = getLocalAuth(property)).nil?
+    url =   findAuthUrl(auth, value) ||  mintLocalAuthUrl(auth,value)
+    return url
+  end
+  def find_collection(collection)
+    cols = Collection.where(id: collection)
+    cols += Collection.where(title: collection).select{|col| col.title.first == collection}
+    return cols.last unless cols.empty?
+    return false
+  end
+  def find_or_create_collection(collection)
+    col = find_collection(collection)
+    return col if col
+    return false if collection.to_i > 0
+    col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
+  end
+  def get_remote_id(value, authority: nil, property: nil)
+    return false
+    #TODO retrieve URL for this value from the specified remote authr
+  end
+  def format_param_name(name)
+    name.titleize.gsub(/\s+/, "").camelcase(:lower)
+  end
+  def schema
+    ScoobySnacks::METADATA_SCHEMA
+  end
+  def find_field_name(field)
+    operation.find_field_name(field)
+  end
+  def downcase_first_letter(str)
+    return "" unless str
+    str[0].downcase + str[1..-1]
+  end
+  def split_values value_string
+    # Split values on all un-escaped separator character (escape character is '\')
+    # Then replace all escaped separator charactors with un-escaped versions
+    value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
+  end
+end

data/lib/bulk_ops/verification.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module BulkOps
       return false if fieldname.blank?
       return false if schema.get_field(fieldname)
       field_parts = fieldname.underscore.humanize.downcase.gsub(/[-_]/,' ').split(" ")
-      return false unless field_parts.any?{ |field_type| BulkOps::WorkProxy::FILE_FIELDS.include?(field_type) }
+      return false unless field_parts.any?{ |field_type| BulkOps::FILE_FIELDS.include?(field_type) }
       return "remove" if field_parts.any?{ |field_type| ['remove','delete'].include?(field_type) }
       return "add"
     end
@@ -46,7 +46,7 @@ module BulkOps
       name.gsub!(/[_\s-]?[lL]abel$/,'')
       name.gsub!(/^[rR]emove[_\s-]?/,'')
       name.gsub!(/^[dD]elete[_\s-]?/,'')
-      possible_fields = Work.attribute_names + schema.all_field_names
+      possible_fields = (Work.attribute_names + schema.all_field_names).uniq
       matching_fields = possible_fields.select{|pfield| pfield.gsub(/[_\s-]/,'').parameterize == name.gsub(/[_\s-]/,'').parameterize }
       return false if matching_fields.blank?
       #      raise Exception "Ambiguous metadata fields!" if matching_fields.uniq.count > 1
@@ -55,8 +55,8 @@ module BulkOps
     def get_file_paths(filestring)
       return [] if filestring.blank?
-      filenames = filestring.split(BulkOps::WorkProxy::SEPARATOR)
-      filenames.map { |filename| File.join(BulkOps::Operation::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
+      filenames = filestring.split(BulkOps::SEPARATOR)
+      filenames.map { |filename| File.join(BulkOps::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
     end
     def record_exists? id
@@ -85,7 +85,7 @@ module BulkOps
     end
     def verify_configuration
-      BulkOps::Operation::OPTION_REQUIREMENTS.each do |option_name, option_info|
+      BulkOps::OPTION_REQUIREMENTS.each do |option_name, option_info|
         # Make sure it's present if required
         if (option_info["required"].to_s == "true") || (option_info["required"].to_s == type)
           if options[option_name].blank?
@@ -120,7 +120,7 @@ module BulkOps
         # Ignore everything marked as a label
         next if column_name_redux.ends_with? "label"
         # Ignore any column names with special meaning in hyrax
-        next if BulkOps::Operation::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
+        next if BulkOps::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
         # Ignore any columns speficied to be ignored in the configuration
         ignored = options["ignored headers"] || []
         next if ignored.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
@@ -131,7 +131,7 @@ module BulkOps
     end
     def verify_remote_urls
-      row_offset = BulkOps::GithubAccess::ROW_OFFSET.present? ? BulkOps::GithubAccess::ROW_OFFSET : 2
+      row_offset = BulkOps::ROW_OFFSET.present? ? BulkOps::ROW_OFFSET : 2
       get_spreadsheet.each_with_index do |row, row_num|
         update(message: "verifying controlled vocab urls (row number #{row_num})")
         next if row_num.nil?
@@ -173,7 +173,7 @@ module BulkOps
     def get_ref_id row
       row.each do |field,value|
         next if field.blank? or value.blank? or field === value
-        next unless BulkOps::WorkProxy::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) ==  normalize_field(field) }
+        next unless BulkOps::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) ==  normalize_field(field) }
         return value
       end
       # No reference identifier specified in the row. Use the default for the operation.
@@ -190,7 +190,7 @@ module BulkOps
       # This is sketchy. Redo it.
       (metadata = get_spreadsheet).each do |row,row_num|
         ref_id = get_ref_id(row)
-        BulkOps::Operation::RELATIONSHIP_COLUMNS.each do |relationship|
+        BulkOps::RELATIONSHIP_COLUMNS.each do |relationship|
           next unless (obj_id = row[relationship])
           if (split = obj_id.split(':')).present? && split.count == 2
             ref_id = split[0].downcase

data/lib/bulk_ops/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module BulkOps
-  VERSION = "0.1.14"
+  VERSION = "0.1.15"
 end

data/lib/bulk_ops/work_proxy.rb CHANGED Viewed

@@ -1,12 +1,5 @@
 class BulkOps::WorkProxy < ActiveRecord::Base
-  require 'uri'
-  OPTION_FIELDS = ['visibility','work type']
-  RELATIONSHIP_FIELDS = ['parent','child','collection','order']
-  REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
-  FILE_FIELDS = ['file','files','filename','filenames']
-  FILE_ACTIONS = ['add','upload','remove','delete']
-  SEPARATOR = ';'
   self.table_name = "bulk_ops_work_proxies"
   belongs_to :operation, class_name: "BulkOps::Operation", foreign_key: "operation_id"
   has_many :relationships, class_name: "BulkOps::Relationship"
@@ -40,462 +33,10 @@ class BulkOps::WorkProxy < ActiveRecord::Base
     # TODO make it so people can edit the work again
   end
-  def interpret_data raw_data
-    admin_set = AdminSet.where(title: "Bulk Ingest Set").first || AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
-    metadata = {admin_set_id: admin_set.id}
-    metadata.merge! interpret_file_fields(raw_data)
-    metadata.merge! interpret_controlled_fields(raw_data)
-    metadata.merge! interpret_scalar_fields(raw_data)
-    metadata.merge! interpret_relationship_fields(raw_data)
-    metadata.merge! interpret_option_fields(raw_data)
-    metadata = setAdminSet(metadata)
-    metadata = setMetadataInheritance(metadata)
-    return metadata
-  end
   def proxy_errors
     @proxy_errors ||= []
   end
-  private
-  def is_file_field? field
-    operation.is_file_field? field
-  end
-  def record_exists? id
-    operation.record_exists? id
-  end
-  def localAuthUrl(property, value)
-    return value if (auth = getLocalAuth(property)).nil?
-    url =   findAuthUrl(auth, value) ||  mintLocalAuthUrl(auth,value)
-    return url
-  end
-  def find_collection(collection)
-    cols = Collection.where(id: collection)
-    cols += Collection.where(title: collection).select{|col| col.title.first == collection}
-    return cols.last unless cols.empty?
-    return false
-  end
-  def find_or_create_collection(collection)
-    col = find_collection(collection)
-    return col if col
-    return false if collection.to_i > 0
-    col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
-  end
-  def get_remote_id(value, authority: nil, property: nil)
-    return false
-    #TODO retrieve URL for this value from the specified remote authr
-  end
-  def format_param_name(name)
-    name.titleize.gsub(/\s+/, "").camelcase(:lower)
-  end
-  def schema
-    ScoobySnacks::METADATA_SCHEMA
-  end
-  def find_field_name(field)
-    operation.find_field_name(field)
-  end
-  def downcase_first_letter(str)
-    return "" unless str
-    str[0].downcase + str[1..-1]
-  end
-  def split_values value_string
-    # Split values on all un-escaped separator character (escape character is '\')
-    # Then replace all escaped separator charactors with un-escaped versions
-    value_string.split(/(?<!\\)#{SEPARATOR}/).map{|val| val.gsub("\\#{SEPARATOR}",SEPARATOR).strip}
-  end
-  def interpret_controlled_fields raw_data
-    # The labels array tracks the contents of columns marked as labels,
-    # which may require special validation
-    labels = {}
-    # This hash is populated with relevant data as we loop through the fields
-    controlled_data = {}
-    raw_data.each do |field_name, value|
-      next if value.blank?  or field_name.blank?
-      field_name = field_name.to_s
-      #If our CSV interpreter is feeding us the headers as a line, ignore it.
-      next if field_name == value
-      #check if they are using the 'field_name.authority' syntax
-      authority = nil
-      if ((split=field_name.split('.')).count == 2)
-        authority = split.last
-        field_name = split.first
-      end
-      # get the field name, if this column is a metadata field
-      field_name_norm = find_field_name(field_name)
-      field = schema.get_field(field_name_norm)
-      # Ignore anything that isn't a controlled field
-      next unless field.present? && field.controlled?
-      # Keep track of label fields
-      if field_name.downcase.ends_with?("label")
-        next if operation.options["ignore_labels"]
-        labels[field_name_norm] ||= []
-        labels[field_name_norm] += split_values value
-        next unless operation.options["import_labels"]
-      end
-      remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
-      # handle multiple values
-      value_array = split_values(value)
-      controlled_data[field_name_norm] ||= [] unless value_array.blank?
-      value_array.each do |value|
-        # Decide of we're dealing with a label or url
-        # It's an ID if it's a URL and the name doesn't end in 'label'
-        value.strip!
-        if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
-          id = value
-#          label = WorkIndexer.fetch_remote_label(value)
-#          error_message =  "cannot fetch remote label for url: #{value}"
-#          report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
-        else
-          # It's a label, so unescape it and get the id
-          value = unescape_csv(value)
-          id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
-#          label = value
-          report_error(:cannot_retrieve_url,
-                       message: "cannot find or create url for controlled vocabulary label: #{value}",
-                       url: value,
-                       row_number: row_number) unless id
-        end
-        controlled_data[field_name_norm] << {id: id, remove: field_name.downcase.starts_with?("remove")}
-      end
-    end
-    #delete any duplicates (if someone listed a url and also its label, or the same url twice)
-    controlled_data.each{|field_name, values| controlled_data[field_name] = values.uniq }
-    # Actually add all the data
-    metadata = {}
-    leftover_data = raw_data.dup.to_hash
-    controlled_data.each do |property_name, data|
-      metadata["#{property_name}_attributes"] ||= [] unless data.blank?
-      data.each do |datum|
-        atts = {"id" => datum[:id]}
-        atts["_delete"] = true if datum[:remove]
-        metadata["#{property_name}_attributes"] << atts
-        leftover_data.except! property_name
-      end
-    end
-    #return [metadata, leftover_data]
-    return metadata
-  end
-  def interpret_scalar_fields raw_data
-    metadata = {}
-    raw_data.each do |field, values|
-      next if values.blank? or field.nil? or field == values
-      # get the field name, if this column is a metadata field
-      next unless field_name = find_field_name(field.to_s)
-      field = schema.get_field(field_name)
-      # Ignore controlled fields
-      next if field.controlled?
-      split_values(values).each do |value|
-        next if value.blank?
-        value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
-        value = unescape_csv(value)
-        (metadata[field_name] ||= []) << value
-      end
-    end
-    return metadata
-  end
-  def interpret_file_fields raw_data
-    # This method handles file additions and deletions from the spreadsheet
-    # if additional files need to be deleted because the update is set to replace
-    # some or all existing files, those replacement-related deletions are handled
-    # by the BulkOps::Operation.
-    #
-    # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
-    metadata = {}
-    raw_data.each do |field, value|
-      next if value.blank?  or field.blank?
-      field = field.to_s
-      #If our CSV interpreter is feeding us the headers as a line, ignore it.
-      next if field == value
-      # Check if this is a file field, and whether we are removing or adding a file
-      next unless (action = is_file_field?(field))
-      # Move on if this field is the name of another property (e.g. masterFilename)
-      next if find_field_name(field)
-      # Check if we are removing a file
-      if action == "remove"
-        get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
-      else
-        # Add a file
-        operation.get_file_paths(value).each do |filepath|
-          begin
-            uploaded_file = Hyrax::UploadedFile.create(file:  File.open(filepath), user: operation.user)
-            (metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
-          rescue Exception => e
-            report_error(:upload_error,
-                         message: "Error opening file: #{ filepath } -- #{e}",
-                         file: File.join(BulkOps::Operation::INGEST_MEDIA_PATH,filename),
-                         row_number: row_number)
-          end
-        end
-      end
-    end
-    return metadata
-  end
-  def interpret_option_fields raw_data
-    raw_data.each do |field,value|
-      next if value.blank? or field.blank?
-      field = field.to_s
-      next if value == field
-      normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
-      if ["visibility", "public"].include?(normfield)
-        update(visibility: format_visibility(value))
-      end
-      if ["worktype","model","type"].include?(normfield)
-        update(work_type: format_worktype(value) )
-      end
-      if ["referenceidentifier",
-          "referenceid",
-          "refid",
-          "referenceidentifiertype",
-          "referenceidtype",
-          "refidtype",
-          "relationshipidentifier",
-          "relationshipid",
-          "relationshipidentifiertype",
-          "relationshipidtype",
-          "relid",
-          "relidtype"].include?(normfield)
-        update(reference_identifier: format_reference_id(value))
-      end
-    end
-    return {}
-  end
-  def interpret_relationship_fields(raw_data)
-    metadata = {}
-    raw_data.each do |field,value|
-      next if value.blank?  or field.blank?
-      field = field.to_s
-      value = unescape_csv(value)
-      identifer_type = reference_identifier
-      next if value == field
-      if (split = field.split(":")).count == 2
-        identifier_type = split.last
-        relationship_type = split.first.to_s
-      else
-        relationship_type = field
-      end
-      relationship_type = normalize_relationship_field_name(relationship_type)
-      case relationship_type
-      when "order"
-         # If the field specifies the object's order among siblings
-        update(order: value.to_f)
-        next
-      when "collection"
-        # If the field specifies the name or ID of a collection,
-        # find or create the collection and update the metadata to match
-        col = find_or_create_collection(value)
-        ( metadata[:member_of_collection_ids] ||= [] ) << col.id if col
-        next
-      when "parent", "child"
-        # correctly interpret the notation "id:a78C2d81"
-        identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
-        relationship_parameters =  { work_proxy_id: id,
-                                     identifier_type: identifier_type,
-                                     relationship_type: relationship_type,
-                                     object_identifier: object_identifier,
-                                     status: "new"}
-        #add previous sibling link if necessary
-        previous_value = operation.final_spreadsheet[row_number-1][field]
-        # Check if this is a parent relationship, and the previous row also has one
-        if previous_value.present? && (relationship_type == "parent")
-          # Check if the previous row has the same parent as this row
-          if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
-            # If so, set the previous sibling parameter on the relationshp
-            #    to the id for the proxy associated with the previous row
-            relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
-          end
-        end
-        BulkOps::Relationship.create(relationship_parameters)
-      end
-      return metadata
-    end
-  end
-  def normalize_relationship_field_name field
-    normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
-    RELATIONSHIP_FIELDS.find{|field| normfield.include?(field) }
-  end
-  def find_previous_parent field="parent"
-    #Return the row number of the most recent preceding row that does
-    # not itself have a parent defined
-    i = 0;
-    while (prev_row = operation.final_spreadsheet[row_number - i])
-      return (row_number - i) if prev_row[field].blank?
-    end
-  end
-  def interpret_relationship_value id_type, value, field="parent"
-    #Handle "id:20kj4259" syntax if it hasn't already been handled
-    if (split = value.to_s.split(":")).count == 2
-      id_type = split.first
-      value = split.last
-    end
-    #Handle special shorthand syntax for refering to relative row numbers
-    if id_type == "row"
-      if value.to_i < 0
-        # if given a negative integer, count backwards from the current row
-        return [id_type,row_number - value]
-      elsif value.to_s.downcase.include?("prev")
-        # if given any variation of the word "previous", get the first preceding row with no parent of its own
-        return [id_type,find_previous_parent(field)]
-      end
-    end
-    return [id_type,value]
-  end
-  def unescape_csv(value)
-    value.gsub(/\\(['";,])/,'\1')
-  end
-  def format_worktype(value)
-    # format the value like a class name
-    type = value.titleize.gsub(/[-_\s]/,'')
-    # reject it if it isn't a defined class
-    type = false unless Object.const_defined? type
-    # fall back to the work type defined by the operation, or a standard "Work"
-    return type ||= operation.work_type || "Work"
-  end
-  def format_visibility(value)
-    case value.downcase
-    when "public", "open", "true"
-      return "open"
-    when "campus", "ucsc", "institution"
-      return "ucsc"
-    when "restricted", "private", "closed", "false"
-      return "restricted"
-    end
-  end
-  def mintLocalAuthUrl(auth_name, value)
-    value.strip!
-    id = value.parameterize
-    auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
-    entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
-                                           label: value,
-                                           uri: id)
-    return localIdToUrl(id,auth_name)
-  end
-  def findAuthUrl(auth, value)
-    value.strip!
-    return nil if auth.nil?
-    return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
-    entries.each do |entry|
-      #require exact match
-      next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
-      url = entry["url"] || entry["id"]
-#      url = localIdToUrl(url,auth) unless url =~ URI::regexp
-      return url
-    end
-    return nil
-  end
-  def localIdToUrl(id,auth_name)
-    root_urls = {production: "https://digitalcollections.library.ucsc.edu",
-                 staging: "http://digitalcollections-staging.library.ucsc.edu",
-                 development: "http://#{Socket.gethostname}",
-                 test: "http://#{Socket.gethostname}"}
-    return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
-  end
-  def getLocalAuth(field_name)
-    field =  schema.get_property(field_name)
-    # There is only ever one local authority per field, so just pick the first you find
-    if vocs = field.vocabularies
-      vocs.each do |voc|
-        return voc["subauthority"] if voc["authority"].downcase == "local"
-      end
-    end
-    return nil
-  end
-  def setAdminSet metadata
-    return metadata if metadata[:admin_set_id]
-    asets = AdminSet.where({title: "Bulk Ingest Set"})
-    asets = AdminSet.find('admin_set/default') if asets.blank?
-    metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
-    return metadata
-  end
-  def setMetadataInheritance metadata
-    return metadata if metadata[:metadataInheritance].present?
-    metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
-    return metadata
-  end
-  def report_error type, message, **args
-    puts "ERROR MESSAGE: #{message}"
-    update(status: "error", message: message)
-    args[:type]=type
-    (@proxy_errors ||= []) <<  BulkOps::Error.new(**args)
-  end
-  def filename_prefix
-    @filename_prefix ||= operation.filename_prefix
-  end
-  def record_exists?
-    operation.record_exists? work_id
-  end
-  def get_removed_filesets(filestring)
-    file_ids = split_values(filestring)
-    file_ids.select{|file_id| record_exists?(file_id)}
-# This part handles filenames in addition to file ids. It doesn't work yet!
-#    file_ids.map do |file_id|
-      # If the filename is the id of an existing record, keep that
-#      next(file_id) if (record_exists?(file_id))
-      # If this is the label (i.e.filename) of an existing fileset, use that fileset id
-      # TODO MAKE THIS WORK!!
-#      next(filename) if (filename_exists?(filename))
-#      File.join(BulkOps::Operation::INGEST_MEDIA_PATH, filename_prefix, filename)
-#    end
-  end
-  def delete_file_set fileset_id
-    BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
-  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: bulk_ops
 version: !ruby/object:Gem::Version
-  version: 0.1.14
+  version: 0.1.15
 platform: ruby
 authors:
 - Ned Henry, UCSC Library Digital Initiatives
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-10-02 00:00:00.000000000 Z
+date: 2019-10-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rails
@@ -106,6 +106,7 @@ files:
 - lib/bulk_ops/github_access.rb
 - lib/bulk_ops/github_credential.rb
 - lib/bulk_ops/operation.rb
+- lib/bulk_ops/parser.rb
 - lib/bulk_ops/queue_work_ingests_job.rb
 - lib/bulk_ops/relationship.rb
 - lib/bulk_ops/search_builder_behavior.rb