RubyGems - bulk_ops - Versions diffs - 0.1.14 → 0.1.15 - Mend

bulk_ops 0.1.14 → 0.1.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

checksums.yaml +4 -4
data/lib/bulk_ops.rb +34 -3
data/lib/bulk_ops/create_spreadsheet_job.rb +1 -1
data/lib/bulk_ops/github_access.rb +7 -11
data/lib/bulk_ops/operation.rb +10 -29
data/lib/bulk_ops/parser.rb +485 -0
data/lib/bulk_ops/verification.rb +9 -9
data/lib/bulk_ops/version.rb +1 -1
data/lib/bulk_ops/work_proxy.rb +0 -459
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 27b9b67583cbf4ca808867661196e5bb8a6b95490b3a377dd85d11a91d0a41fb
-  data.tar.gz: 508dbf4a72146f7a893851aec847bb9bb82a399765135dfde6daa6eec0a4d121
+  metadata.gz: fea513373c0ae0267f9302311300b8f4ba03b9fa632db168aec201c2f8359182
+  data.tar.gz: baa0fe9b67bfbe7d2f8283ff7949cb8ec46e268c7e15ef17c7b73b9c3a80ef19
 SHA512:
-  metadata.gz: da715c7235ae2044b2354653b382825078a63f466e542642d995c81b6dd3bb8d8336c13ac84dd811ecefbff4e9e2422c45f1dc39e10b0ea1a4119a6736397ee2
-  data.tar.gz: 9bd37e6481170e1da5ba4494888fb16a1cfa65cc9869edcee87353228a1eb78eefe5f32c87bb7c14f4fc2b6e3ead0ad068901273d8c100e3afbce0d5268e4486
+  metadata.gz: 33810a935cc44ee6de4448a12e37d4c0889b6a4c7d409011fc5dd9d0bddc18e1a53f0f18337c933ab3dd6903d4112b0a968579f20e7e204d4278220c0dbb0315
+  data.tar.gz: b7ff43aed578a7aba0cb59d0862af6d1ffe7f50eccce6715171063a09e1edf2670e3d23333b4e506ff3d473ff6dbed56f672ff66b2f43209e58e67950706072a

data/lib/bulk_ops.rb CHANGED Viewed

@@ -1,6 +1,39 @@
 require "bulk_ops/version"
 module BulkOps
+  OPTION_FIELDS = ['visibility','work type']
+  RELATIONSHIP_FIELDS = ['parent','child','collection','order']
+  REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
+  FILE_FIELDS = ['file','files','filename','filenames']
+  FILE_ACTIONS = ['add','upload','remove','delete']
+  SEPARATOR = ';'
+  DEFAULT_ADMIN_SET_TITLE = "Bulk Ingest Set"
+  INGEST_MEDIA_PATH = "/dams_ingest"
+  TEMPLATE_DIR = "lib/bulk_ops/templates"
+  RELATIONSHIP_COLUMNS = ["parent","child","next"]
+  SPECIAL_COLUMNS = ["parent",
+                     "child",
+                     "order",
+                     "next",
+                     "work_type",
+                     "collection",
+                     "collection_title",
+                     "collection_id",
+                     "visibility",
+                     "relationship_identifier_type",
+                     "id",
+                     "filename",
+                     "file"]
+  IGNORED_COLUMNS = ["ignore","offline_notes"]
+  OPTION_REQUIREMENTS = {type: {required: true,
+                                values:[:ingest,:update]},
+                         file_method: {required: :true,
+                                       values: [:replace_some,:add_remove,:replace_all]},
+                         notifications: {required: true}}
+  SPREADSHEET_FILENAME = 'metadata.csv'
+  OPTIONS_FILENAME = 'configuration.yml'
+  ROW_OFFSET = 2
   dirstring = File.join( File.dirname(__FILE__), 'bulk_ops/**/*.rb')
   Dir[dirstring].each  do |file|
     begin
@@ -9,7 +42,5 @@ module BulkOps
       puts "ERROR LOADING #{File.basename(file)}: #{e}"
     end
   end
-#  require 'bulk_ops/verification'
-#  require 'bulk_ops/verification'
-#  require 'bulk_ops/work_proxy'
 end

data/lib/bulk_ops/create_spreadsheet_job.rb CHANGED Viewed

@@ -36,7 +36,7 @@ class BulkOps::CreateSpreadsheetJob < ActiveJob::Base
         next if value.is_a? DateTime
         value = (label ? WorkIndexer.fetch_remote_label(value.id) : value.id) unless value.is_a? String
         value.gsub("\"","\"\"")
-      end.join(BulkOps::WorkProxy::SEPARATOR).prepend('"').concat('"')
+      end.join(BulkOps::SEPARATOR).prepend('"').concat('"')
     end.join(',')
   end

data/lib/bulk_ops/github_access.rb CHANGED Viewed

@@ -5,10 +5,6 @@ require 'base64'
 class BulkOps::GithubAccess
-  ROW_OFFSET = 2
-  SPREADSHEET_FILENAME = 'metadata.csv'
-  OPTIONS_FILENAME = 'configuration.yml'
   attr_accessor :name
   def self.auth_url user
@@ -142,11 +138,11 @@ class BulkOps::GithubAccess
   def add_new_spreadsheet file, message=false
     if file.is_a? Tempfile
       file.close
-      add_file file.path, SPREADSHEET_FILENAME, message: message
+      add_file file.path, BulkOps::SPREADSHEET_FILENAME, message: message
     elsif file.is_a?(String) && File.file?(file)
-      add_file file, SPREADSHEET_FILENAME, message: message
+      add_file file, BulkOps::SPREADSHEET_FILENAME, message: message
     elsif file.is_a? String
-      add_contents(spreadsheet_path, SPREADSHEET_FILENAME, message: message)
+      add_contents(spreadsheet_path, BulkOps::SPREADSHEET_FILENAME, message: message)
     end
   end
@@ -218,12 +214,12 @@ class BulkOps::GithubAccess
   def get_metadata_row row_number
     @current_metadata ||= load_metadata
-    @current_metadata[row_number - ROW_OFFSET]
+    @current_metadata[row_number - BulkOps::ROW_OFFSET]
   end
   def get_past_metadata_row commit_sha, row_number
     past_metadata = Base64.decode64( client.contents(repo, path: filename, ref: commit_sha) )
-    past_metadata[row_number - ROW_OFFSET]
+    past_metadata[row_number - BulkOps::ROW_OFFSET]
   end
   def get_file filename
@@ -244,13 +240,13 @@ class BulkOps::GithubAccess
   end
   def spreadsheet_path
-    "#{name}/#{SPREADSHEET_FILENAME}"
+    "#{name}/#{BulkOps::SPREADSHEET_FILENAME}"
   end
   private
   def options_path
-    "#{name}/#{OPTIONS_FILENAME}"
+    "#{name}/#{BulkOps::OPTIONS_FILENAME}"
   end
   def current_master_commit_sha

data/lib/bulk_ops/operation.rb CHANGED Viewed

@@ -7,33 +7,10 @@ module BulkOps
     include BulkOps::Verification
-    attr_accessor :work_type, :visibility, :reference_identifier
+    attr_accessor :work_type, :visibility, :reference_identifier, :metadata
     delegate  :can_merge?, :merge_pull_request, to: :git
-    INGEST_MEDIA_PATH = "/dams_ingest"
-    TEMPLATE_DIR = "lib/bulk_ops/templates"
-    RELATIONSHIP_COLUMNS = ["parent","child","next"]
-    SPECIAL_COLUMNS = ["parent",
-                       "child",
-                       "order",
-                       "next",
-                       "work_type",
-                       "collection",
-                       "collection_title",
-                       "collection_id",
-                       "visibility",
-                       "relationship_identifier_type",
-                       "id",
-                       "filename",
-                       "file"]
-    IGNORED_COLUMNS = ["ignore","offline_notes"]
-    OPTION_REQUIREMENTS = {type: {required: true,
-                                  values:[:ingest,:update]},
-                           file_method: {required: :true,
-                                           values: [:replace_some,:add_remove,:replace_all]},
-                           notifications: {required: true}}
     def self.unique_name name, user
       while  BulkOps::Operation.find_by(name: name) || BulkOps::GithubAccess.list_branch_names(user).include?(name) do
         if ['-','_'].include?(name[-2]) && name[-1].to_i > 0
@@ -119,7 +96,7 @@ module BulkOps
       @metadata.each_with_index do |values,row_number|
         proxy = work_proxies.find_by(row_number: row_number)
         proxy.update(message: "interpreted at #{DateTime.now.strftime("%d/%m/%Y %H:%M")} " + proxy.message)
-        data = proxy.interpret_data values
+        data = BulkOps::Parser.new(proxy, @metadata).interpret_data(raw_row: values)
         next unless proxy.proxy_errors.blank?
         BulkOps::CreateWorkJob.perform_later(proxy.work_type || "Work",
                                              user.email,
@@ -202,7 +179,7 @@ module BulkOps
       #loop through the work proxies to create a job for each work
       work_proxies.each do |proxy|
-        data = proxy.interpret_data final_spreadsheet[proxy.row_number]
+        data = BulkOps::Parser.new(proxy,final_spreadsheet).interpret_data(raw_row: final_spreadsheet[proxy.row_number])
         BulkOps::UpdateWorkJob.perform_later(proxy.work_type || "",
                                              user.email,
                                              data,
@@ -238,13 +215,13 @@ module BulkOps
       bulk_ops_dir = Gem::Specification.find_by_name("bulk_ops").gem_dir
       #copy template files
-      Dir["#{bulk_ops_dir}/#{TEMPLATE_DIR}/*"].each do |file|
+      Dir["#{bulk_ops_dir}/#{BulkOps::TEMPLATE_DIR}/*"].each do |file|
         git.add_file file
       end
       #update configuration options
       unless options.blank?
-        full_options = YAML.load_file(File.join(bulk_ops_dir,TEMPLATE_DIR, BulkOps::GithubAccess::OPTIONS_FILENAME))
+        full_options = YAML.load_file(File.join(bulk_ops_dir,BulkOps::TEMPLATE_DIR, BulkOps::OPTIONS_FILENAME))
         options.each { |option, value| full_options[option] = value }
@@ -278,6 +255,10 @@ module BulkOps
       git.update_options(options, message: message)
     end
+    def metadata
+      @metadata ||= git.load_metadata
+    end
     def options
       return {} if name.nil?
       return @options if @options
@@ -332,7 +313,7 @@ module BulkOps
     end
     def ignored_fields
-      (options['ignored headers'] || []) + IGNORED_COLUMNS
+      (options['ignored headers'] || []) + BulkOps::IGNORED_COLUMNS
     end

data/lib/bulk_ops/parser.rb ADDED Viewed

@@ -0,0 +1,485 @@
+class BulkOps::Parser
+  require 'uri'
+  attr_accessor :proxy, :raw_data, :raw_row
+  delegate :relationships, :operation, :row_number, :work_id, :visibility, :work_type, :reference_identifier, :order, to: :proxy
+  def initialize prx, metadata_sheet=nil
+    @proxy = prx
+    @raw_data = (metadata_sheet || proxy.operation.metadata)
+    @raw_row = @raw_data[@proxy.row_number].dup
+    @metadata = {}
+    @parsing_errors = []
+  end
+  def interpret_data raw_row: nil, raw_data: nil, proxy: nil
+    @raw_row = raw_row if raw_row.present?
+    @proxy = proxy if proxy.present?
+    @raw_data = raw_data if raw_data.present?
+    setAdminSet
+    setMetadataInheritance
+    interpret_option_fields
+    interpret_relationship_fields
+    disambiguate_columns
+    interpret_file_fields
+    interpret_controlled_fields
+    interpret_scalar_fields
+    @proxy.update(status: "ERROR", message: "error parsing spreadsheet line") if @parsing_errors.present?
+    @proxy.proxy_errors = (@proxy.proxy_errors || []) + @parsing_errors
+    return @metadata
+  end
+  def disambiguate_columns
+    #do nothing unless there are columns with the same header
+    return unless (@raw_row.respond_to?(:headers) && (@raw_row.headers.uniq.length < @raw_row.length) )
+    row = {}
+    (0...@raw_row.length).each do |i|
+      header = @raw_row.headers[i]
+      value = @raw_row[i]
+      # separate values in identical columns using the separator
+      row[header] = (Array(row[header]) << value).join(BulkOps::SEPARATOR)
+    end
+    #return a hash with identical columns merged
+    return row
+  end
+  def interpret_controlled_fields
+    # The labels array tracks the contents of columns marked as labels,
+    # which may require special validation
+    labels = {}
+    # This hash is populated with relevant data as we loop through the fields
+    controlled_data = {}
+    row = @raw_row.dup
+    @raw_row.each do |field_name, value|
+      next if value.blank?  or field_name.blank?
+      field_name = field_name.to_s
+      #If our CSV interpreter is feeding us the headers as a line, ignore it.
+      next if field_name == value
+      #check if they are using the 'field_name.authority' syntax
+      authority = nil
+      if ((split=field_name.split('.')).count == 2)
+        authority = split.last
+        field_name = split.first
+      end
+      # get the field name, if this column is a metadata field
+      field_name_norm = find_field_name(field_name)
+      field = schema.get_field(field_name_norm)
+      # Ignore anything that isn't a controlled field
+      next unless field.present? && field.controlled?
+      # Keep track of label fields
+      if field_name.downcase.ends_with?("label")
+        next if operation.options["ignore_labels"]
+        labels[field_name_norm] ||= []
+        labels[field_name_norm] += split_values value
+        next unless operation.options["import_labels"]
+      end
+      remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
+      # handle multiple values
+      value_array = split_values(value)
+      controlled_data[field_name_norm] ||= [] unless value_array.blank?
+      value_array.each do |value|
+        # Decide of we're dealing with a label or url
+        # It's an ID if it's a URL and the name doesn't end in 'label'
+        value.strip!
+        if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
+          value_id = value
+        #          label = WorkIndexer.fetch_remote_label(value)
+        #          error_message =  "cannot fetch remote label for url: #{value}"
+        #          report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
+        else
+          # It's a label, so unescape it and get the id
+          value = unescape_csv(value)
+          value_id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
+          #          label = value
+          report_error(:cannot_retrieve_url,
+                       message: "cannot find or create url for controlled vocabulary label: #{value}",
+                       url: value,
+                       row_number: row_number) unless value_id
+        end
+        controlled_data[field_name_norm] << {id: value_id, remove: field_name.downcase.starts_with?("remove")}
+        row.delete(field_name)
+      end
+    end
+    @raw_row = row
+    # Actually add all the data
+    controlled_data.each do |property_name, data|
+      @metadata["#{property_name}_attributes"] ||= [] unless data.blank?
+      data.uniq.each do |datum|
+        atts = {"id" => datum[:id]}
+        atts["_delete"] = true if datum[:remove]
+        @metadata["#{property_name}_attributes"] << atts
+      end
+    end
+  end
+  def interpret_scalar_fields
+    row = @raw_row.dup
+    @raw_row.each do |field, values|
+      next if values.blank? or field.nil? or field == values
+      # get the field name, if this column is a metadata field
+      next unless field_name = find_field_name(field.to_s)
+      field = schema.get_field(field_name)
+      # Ignore controlled fields
+      next if field.controlled?
+      split_values(values).each do |value|
+        next if value.blank?
+        value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
+        value = unescape_csv(value)
+        (@metadata[field_name] ||= []) << value
+        row.delete(field)
+      end
+    end
+    @raw_row = row
+  end
+  def interpret_file_fields
+    # This method handles file additions and deletions from the spreadsheet
+    # if additional files need to be deleted because the update is set to replace
+    # some or all existing files, those replacement-related deletions are handled
+    # by the BulkOps::Operation.
+    #
+    # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
+    row = @raw_row.dup
+    @raw_row.each do |field, value|
+      next if value.blank?  or field.blank?
+      field = field.to_s
+      #If our CSV interpreter is feeding us the headers as a line, ignore it.
+      next if field == value
+      # Check if this is a file field, and whether we are removing or adding a file
+      next unless (action = is_file_field?(field))
+      # Move on if this field is the name of another property (e.g. masterFilename)
+      next if find_field_name(field)
+      # Check if we are removing a file
+      if action == "remove"
+        get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
+      else
+        # Add a file
+        operation.get_file_paths(value).each do |filepath|
+          begin
+            uploaded_file = Hyrax::UploadedFile.create(file:  File.open(filepath), user: operation.user)
+            (@metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
+            row.delete(field)
+          rescue Exception => e
+            report_error(:upload_error,
+                         message: "Error opening file: #{ filepath } -- #{e}",
+                         file: File.join(BulkOps::INGEST_MEDIA_PATH,filename),
+                         row_number: row_number)
+          end
+        end
+      end
+    end
+    @raw_row = row
+  end
+  def interpret_option_fields
+    row = @raw_row.dup
+    @raw_row.each do |field,value|
+      next if value.blank? or field.blank?
+      field = field.to_s
+      next if value == field
+      normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
+      if ["visibility", "public"].include?(normfield)
+        @proxy.update(visibility: format_visibility(value))
+        row.delete(field)
+      end
+      if ["worktype","model","type"].include?(normfield)
+        @proxy.update(work_type: format_worktype(value) )
+        row.delete(field)
+      end
+      if ["referenceidentifier",
+          "referenceid",
+          "refid",
+          "referenceidentifiertype",
+          "referenceidtype",
+          "refidtype",
+          "relationshipidentifier",
+          "relationshipid",
+          "relationshipidentifiertype",
+          "relationshipidtype",
+          "relid",
+          "relidtype"].include?(normfield)
+        @proxy.update(reference_identifier: format_reference_id(value))
+        row.delete(field)
+      end
+    end
+    @raw_row = row
+  end
+  def interpret_relationship_fields
+    row = @raw_row.dup
+    @raw_row.each do |field,value|
+      next if value.blank?  or field.blank?
+      field = field.to_s
+      value = unescape_csv(value)
+      identifer_type = reference_identifier
+      next if value == field
+      # Correctly interpret the notation "parent:id", "parent id" etc in a column header
+      if (split = field.split(/[:_\-\s]/)).count == 2
+        identifier_type = split.last
+        relationship_type = split.first.to_s
+      else
+        relationship_type = field
+      end
+      relationship_type = normalize_relationship_field_name(relationship_type)
+      case relationship_type
+      when "order"
+        # If the field specifies the object's order among siblings
+        @proxy.update(order: value.to_f)
+        row.delete(field)
+        next
+      when "collection"
+        # If the field specifies the name or ID of a collection,
+        # find or create the collection and update the metadata to match
+        col = find_or_create_collection(value)
+        ( @metadata[:member_of_collection_ids] ||= [] ) << col.id if col
+        row.delete field
+        next
+      when "parent", "child"
+        # correctly interpret the notation "id:a78C2d81"
+        identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
+        relationship_parameters =  { work_proxy_id: @proxy.id,
+                                     identifier_type: identifier_type,
+                                     relationship_type: relationship_type,
+                                     object_identifier: object_identifier,
+                                     status: "new"}
+        #add previous sibling link if necessary
+        previous_value = @raw_data[row_number-1][field]
+        # Check if this is a parent relationship, and the previous row also has one
+        if previous_value.present? && (relationship_type == "parent")
+          # Check if the previous row has the same parent as this row
+          if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
+            # If so, set the previous sibling parameter on the relationshp
+            #    to the id for the proxy associated with the previous row
+            relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
+          end
+        end
+        BulkOps::Relationship.create(relationship_parameters)
+        row.delete field
+      end
+    end
+    @raw_row = row
+  end
+  def normalize_relationship_field_name field
+    normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
+    BulkOps::RELATIONSHIP_FIELDS.find{|rel_field| normfield == rel_field }
+  end
+  def find_previous_parent field="parent"
+    #Return the row number of the most recent preceding row that does
+    # not itself have a parent defined
+    i = 1;
+    while (prev_row = raw_data[row_number - i])
+      return (row_number - i) if prev_row[field].blank?
+      i += 1
+    end
+  end
+  def interpret_relationship_value id_type, value, field="parent"
+    #Handle "id:20kj4259" syntax if it hasn't already been handled
+    if (split = value.to_s.split(":")).count == 2
+      id_type = split.first
+      value = split.last
+    end
+    #Handle special shorthand syntax for refering to relative row numbers
+    if id_type == "row"
+      if value.to_i < 0
+        # if given a negative integer, count backwards from the current row (remember that value.to_i is negative)
+        return [id_type,row_number + value.to_i]
+      elsif value.to_s.downcase.include?("prev")
+        # if given any variation of the word "previous", get the first preceding row with no parent of its own
+        return [id_type,find_previous_parent(field)]
+      end
+    end
+    return [id_type,value]
+  end
+  def unescape_csv(value)
+    value.gsub(/\\(['";,])/,'\1')
+  end
+  def format_worktype(value)
+    # format the value like a class name
+    type = value.titleize.gsub(/[-_\s]/,'')
+    # reject it if it isn't a defined class
+    type = false unless Object.const_defined? type
+    # fall back to the work type defined by the operation, or a standard "Work"
+    return type ||= work_type || operation.work_type || "Work"
+  end
+  def format_visibility(value)
+    case value.downcase
+    when "public", "open", "true"
+      return "open"
+    when "campus", "ucsc", "institution"
+      return "ucsc"
+    when "restricted", "private", "closed", "false"
+      return "restricted"
+    end
+  end
+  def mintLocalAuthUrl(auth_name, value)
+    value.strip!
+    id = value.parameterize
+    auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
+    entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
+                                           label: value,
+                                           uri: id)
+    return localIdToUrl(id,auth_name)
+  end
+  def findAuthUrl(auth, value)
+    value.strip!
+    return nil if auth.nil?
+    return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
+    entries.each do |entry|
+      #require exact match
+      next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
+      url = entry["url"] || entry["id"]
+#      url = localIdToUrl(url,auth) unless url =~ URI::regexp
+      return url
+    end
+    return nil
+  end
+  def localIdToUrl(id,auth_name)
+    root_urls = {production: "https://digitalcollections.library.ucsc.edu",
+                 staging: "http://digitalcollections-staging.library.ucsc.edu",
+                 development: "http://#{Socket.gethostname}",
+                 test: "http://#{Socket.gethostname}"}
+    return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
+  end
+  def getLocalAuth(field_name)
+    field =  schema.get_property(field_name)
+    # There is only ever one local authority per field, so just pick the first you find
+    if vocs = field.vocabularies
+      vocs.each do |voc|
+        return voc["subauthority"] if voc["authority"].downcase == "local"
+      end
+    end
+    return nil
+  end
+  def setAdminSet
+    return if @metadata[:admin_set_id]
+    asets = AdminSet.where({title: "Bulk Ingest Set"})
+    asets = AdminSet.find('admin_set/default') if asets.blank?
+    @metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
+  end
+  def setMetadataInheritance
+    return if @metadata[:metadataInheritance].present?
+    @metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
+  end
+  def report_error type, message, **args
+    puts "ERROR MESSAGE: #{message}"
+    @proxy.update(status: "error", message: message)
+    args[:type]=type
+    (@parsing_errors ||= []) <<  BulkOps::Error.new(**args)
+  end
+  def get_removed_filesets(filestring)
+    file_ids = split_values(filestring)
+    file_ids.select{|file_id| record_exists?(file_id)}
+# This part handles filenames in addition to file ids. It doesn't work yet!
+#    file_ids.map do |file_id|
+      # If the filename is the id of an existing record, keep that
+#      next(file_id) if (record_exists?(file_id))
+      # If this is the label (i.e.filename) of an existing fileset, use that fileset id
+      # TODO MAKE THIS WORK!!
+#      next(filename) if (filename_exists?(filename))
+#      File.join(BulkOps::INGEST_MEDIA_PATH, filename_prefix, filename)
+#    end
+  end
+  def delete_file_set fileset_id
+    BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
+  end
+  def is_file_field? field
+    operation.is_file_field? field
+  end
+  def record_exists? id
+    operation.record_exists? id
+  end
+  def localAuthUrl(property, value)
+    return value if (auth = getLocalAuth(property)).nil?
+    url =   findAuthUrl(auth, value) ||  mintLocalAuthUrl(auth,value)
+    return url
+  end
+  def find_collection(collection)
+    cols = Collection.where(id: collection)
+    cols += Collection.where(title: collection).select{|col| col.title.first == collection}
+    return cols.last unless cols.empty?
+    return false
+  end
+  def find_or_create_collection(collection)
+    col = find_collection(collection)
+    return col if col
+    return false if collection.to_i > 0
+    col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
+  end
+  def get_remote_id(value, authority: nil, property: nil)
+    return false
+    #TODO retrieve URL for this value from the specified remote authr
+  end
+  def format_param_name(name)
+    name.titleize.gsub(/\s+/, "").camelcase(:lower)
+  end
+  def schema
+    ScoobySnacks::METADATA_SCHEMA
+  end
+  def find_field_name(field)
+    operation.find_field_name(field)
+  end
+  def downcase_first_letter(str)
+    return "" unless str
+    str[0].downcase + str[1..-1]
+  end
+  def split_values value_string
+    # Split values on all un-escaped separator character (escape character is '\')
+    # Then replace all escaped separator charactors with un-escaped versions
+    value_string.split(/(?<!\\)#{BulkOps::SEPARATOR}/).map{|val| val.gsub("\\#{BulkOps::SEPARATOR}",BulkOps::SEPARATOR).strip}
+  end
+end

data/lib/bulk_ops/verification.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module BulkOps
       return false if fieldname.blank?
       return false if schema.get_field(fieldname)
       field_parts = fieldname.underscore.humanize.downcase.gsub(/[-_]/,' ').split(" ")
-      return false unless field_parts.any?{ |field_type| BulkOps::WorkProxy::FILE_FIELDS.include?(field_type) }
+      return false unless field_parts.any?{ |field_type| BulkOps::FILE_FIELDS.include?(field_type) }
       return "remove" if field_parts.any?{ |field_type| ['remove','delete'].include?(field_type) }
       return "add"
     end
@@ -46,7 +46,7 @@ module BulkOps
       name.gsub!(/[_\s-]?[lL]abel$/,'')
       name.gsub!(/^[rR]emove[_\s-]?/,'')
       name.gsub!(/^[dD]elete[_\s-]?/,'')
-      possible_fields = Work.attribute_names + schema.all_field_names
+      possible_fields = (Work.attribute_names + schema.all_field_names).uniq
       matching_fields = possible_fields.select{|pfield| pfield.gsub(/[_\s-]/,'').parameterize == name.gsub(/[_\s-]/,'').parameterize }
       return false if matching_fields.blank?
       #      raise Exception "Ambiguous metadata fields!" if matching_fields.uniq.count > 1
@@ -55,8 +55,8 @@ module BulkOps
     def get_file_paths(filestring)
       return [] if filestring.blank?
-      filenames = filestring.split(BulkOps::WorkProxy::SEPARATOR)
-      filenames.map { |filename| File.join(BulkOps::Operation::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
+      filenames = filestring.split(BulkOps::SEPARATOR)
+      filenames.map { |filename| File.join(BulkOps::INGEST_MEDIA_PATH, options['file_prefix'] || "", filename) }
     end
     def record_exists? id
@@ -85,7 +85,7 @@ module BulkOps
     end
     def verify_configuration
-      BulkOps::Operation::OPTION_REQUIREMENTS.each do |option_name, option_info|
+      BulkOps::OPTION_REQUIREMENTS.each do |option_name, option_info|
         # Make sure it's present if required
         if (option_info["required"].to_s == "true") || (option_info["required"].to_s == type)
           if options[option_name].blank?
@@ -120,7 +120,7 @@ module BulkOps
         # Ignore everything marked as a label
         next if column_name_redux.ends_with? "label"
         # Ignore any column names with special meaning in hyrax
-        next if BulkOps::Operation::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
+        next if BulkOps::SPECIAL_COLUMNS.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
         # Ignore any columns speficied to be ignored in the configuration
         ignored = options["ignored headers"] || []
         next if ignored.any?{|col| col.downcase.parameterize.gsub(/[_\s-]/,"") == column_name_redux }
@@ -131,7 +131,7 @@ module BulkOps
     end
     def verify_remote_urls
-      row_offset = BulkOps::GithubAccess::ROW_OFFSET.present? ? BulkOps::GithubAccess::ROW_OFFSET : 2
+      row_offset = BulkOps::ROW_OFFSET.present? ? BulkOps::ROW_OFFSET : 2
       get_spreadsheet.each_with_index do |row, row_num|
         update(message: "verifying controlled vocab urls (row number #{row_num})")
         next if row_num.nil?
@@ -173,7 +173,7 @@ module BulkOps
     def get_ref_id row
       row.each do |field,value|
         next if field.blank? or value.blank? or field === value
-        next unless BulkOps::WorkProxy::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) ==  normalize_field(field) }
+        next unless BulkOps::REFERENCE_IDENTIFIER_FIELDS.any?{ |ref_field| normalize_field(ref_field) ==  normalize_field(field) }
         return value
       end
       # No reference identifier specified in the row. Use the default for the operation.
@@ -190,7 +190,7 @@ module BulkOps
       # This is sketchy. Redo it.
       (metadata = get_spreadsheet).each do |row,row_num|
         ref_id = get_ref_id(row)
-        BulkOps::Operation::RELATIONSHIP_COLUMNS.each do |relationship|
+        BulkOps::RELATIONSHIP_COLUMNS.each do |relationship|
           next unless (obj_id = row[relationship])
           if (split = obj_id.split(':')).present? && split.count == 2
             ref_id = split[0].downcase

data/lib/bulk_ops/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module BulkOps
-  VERSION = "0.1.14"
+  VERSION = "0.1.15"
 end

data/lib/bulk_ops/work_proxy.rb CHANGED Viewed

@@ -1,12 +1,5 @@
 class BulkOps::WorkProxy < ActiveRecord::Base
-  require 'uri'
-  OPTION_FIELDS = ['visibility','work type']
-  RELATIONSHIP_FIELDS = ['parent','child','collection','order']
-  REFERENCE_IDENTIFIER_FIELDS = ['Reference Identifier','ref_id','Reference ID','Relationship ID','Relationship Identifier','Reference Identifier Type','Reference ID Type','Ref ID Type','relationship_identifier_type','relationship_id_type']
-  FILE_FIELDS = ['file','files','filename','filenames']
-  FILE_ACTIONS = ['add','upload','remove','delete']
-  SEPARATOR = ';'
   self.table_name = "bulk_ops_work_proxies"
   belongs_to :operation, class_name: "BulkOps::Operation", foreign_key: "operation_id"
   has_many :relationships, class_name: "BulkOps::Relationship"
@@ -40,462 +33,10 @@ class BulkOps::WorkProxy < ActiveRecord::Base
     # TODO make it so people can edit the work again
   end
-  def interpret_data raw_data
-    admin_set = AdminSet.where(title: "Bulk Ingest Set").first || AdminSet.find(AdminSet.find_or_create_default_admin_set_id)
-    metadata = {admin_set_id: admin_set.id}
-    metadata.merge! interpret_file_fields(raw_data)
-    metadata.merge! interpret_controlled_fields(raw_data)
-    metadata.merge! interpret_scalar_fields(raw_data)
-    metadata.merge! interpret_relationship_fields(raw_data)
-    metadata.merge! interpret_option_fields(raw_data)
-    metadata = setAdminSet(metadata)
-    metadata = setMetadataInheritance(metadata)
-    return metadata
-  end
   def proxy_errors
     @proxy_errors ||= []
   end
-  private
-  def is_file_field? field
-    operation.is_file_field? field
-  end
-  def record_exists? id
-    operation.record_exists? id
-  end
-  def localAuthUrl(property, value)
-    return value if (auth = getLocalAuth(property)).nil?
-    url =   findAuthUrl(auth, value) ||  mintLocalAuthUrl(auth,value)
-    return url
-  end
-  def find_collection(collection)
-    cols = Collection.where(id: collection)
-    cols += Collection.where(title: collection).select{|col| col.title.first == collection}
-    return cols.last unless cols.empty?
-    return false
-  end
-  def find_or_create_collection(collection)
-    col = find_collection(collection)
-    return col if col
-    return false if collection.to_i > 0
-    col = Collection.create(title: [collection.to_s], depositor: operation.user.email, collection_type: Hyrax::CollectionType.find_by(title:"User Collection"))
-  end
-  def get_remote_id(value, authority: nil, property: nil)
-    return false
-    #TODO retrieve URL for this value from the specified remote authr
-  end
-  def format_param_name(name)
-    name.titleize.gsub(/\s+/, "").camelcase(:lower)
-  end
-  def schema
-    ScoobySnacks::METADATA_SCHEMA
-  end
-  def find_field_name(field)
-    operation.find_field_name(field)
-  end
-  def downcase_first_letter(str)
-    return "" unless str
-    str[0].downcase + str[1..-1]
-  end
-  def split_values value_string
-    # Split values on all un-escaped separator character (escape character is '\')
-    # Then replace all escaped separator charactors with un-escaped versions
-    value_string.split(/(?<!\\)#{SEPARATOR}/).map{|val| val.gsub("\\#{SEPARATOR}",SEPARATOR).strip}
-  end
-  def interpret_controlled_fields raw_data
-    # The labels array tracks the contents of columns marked as labels,
-    # which may require special validation
-    labels = {}
-    # This hash is populated with relevant data as we loop through the fields
-    controlled_data = {}
-    raw_data.each do |field_name, value|
-      next if value.blank?  or field_name.blank?
-      field_name = field_name.to_s
-      #If our CSV interpreter is feeding us the headers as a line, ignore it.
-      next if field_name == value
-      #check if they are using the 'field_name.authority' syntax
-      authority = nil
-      if ((split=field_name.split('.')).count == 2)
-        authority = split.last
-        field_name = split.first
-      end
-      # get the field name, if this column is a metadata field
-      field_name_norm = find_field_name(field_name)
-      field = schema.get_field(field_name_norm)
-      # Ignore anything that isn't a controlled field
-      next unless field.present? && field.controlled?
-      # Keep track of label fields
-      if field_name.downcase.ends_with?("label")
-        next if operation.options["ignore_labels"]
-        labels[field_name_norm] ||= []
-        labels[field_name_norm] += split_values value
-        next unless operation.options["import_labels"]
-      end
-      remove = field_name.downcase.starts_with?("remove") || field_name.downcase.starts_with?("delete")
-      # handle multiple values
-      value_array = split_values(value)
-      controlled_data[field_name_norm] ||= [] unless value_array.blank?
-      value_array.each do |value|
-        # Decide of we're dealing with a label or url
-        # It's an ID if it's a URL and the name doesn't end in 'label'
-        value.strip!
-        if value =~ /^#{URI::regexp}$/ and !field_name.downcase.ends_with?("label")
-          id = value
-#          label = WorkIndexer.fetch_remote_label(value)
-#          error_message =  "cannot fetch remote label for url: #{value}"
-#          report_error( :cannot_retrieve_label , error_message, url: value, row_number: row_number) unless label
-        else
-          # It's a label, so unescape it and get the id
-          value = unescape_csv(value)
-          id = get_remote_id(value, property: field_name_norm, authority: authority) || localAuthUrl(field_name_norm, value)
-#          label = value
-          report_error(:cannot_retrieve_url,
-                       message: "cannot find or create url for controlled vocabulary label: #{value}",
-                       url: value,
-                       row_number: row_number) unless id
-        end
-        controlled_data[field_name_norm] << {id: id, remove: field_name.downcase.starts_with?("remove")}
-      end
-    end
-    #delete any duplicates (if someone listed a url and also its label, or the same url twice)
-    controlled_data.each{|field_name, values| controlled_data[field_name] = values.uniq }
-    # Actually add all the data
-    metadata = {}
-    leftover_data = raw_data.dup.to_hash
-    controlled_data.each do |property_name, data|
-      metadata["#{property_name}_attributes"] ||= [] unless data.blank?
-      data.each do |datum|
-        atts = {"id" => datum[:id]}
-        atts["_delete"] = true if datum[:remove]
-        metadata["#{property_name}_attributes"] << atts
-        leftover_data.except! property_name
-      end
-    end
-    #return [metadata, leftover_data]
-    return metadata
-  end
-  def interpret_scalar_fields raw_data
-    metadata = {}
-    raw_data.each do |field, values|
-      next if values.blank? or field.nil? or field == values
-      # get the field name, if this column is a metadata field
-      next unless field_name = find_field_name(field.to_s)
-      field = schema.get_field(field_name)
-      # Ignore controlled fields
-      next if field.controlled?
-      split_values(values).each do |value|
-        next if value.blank?
-        value = value.strip.encode('utf-8', :invalid => :replace, :undef => :replace, :replace => '_') unless value.blank?
-        value = unescape_csv(value)
-        (metadata[field_name] ||= []) << value
-      end
-    end
-    return metadata
-  end
-  def interpret_file_fields raw_data
-    # This method handles file additions and deletions from the spreadsheet
-    # if additional files need to be deleted because the update is set to replace
-    # some or all existing files, those replacement-related deletions are handled
-    # by the BulkOps::Operation.
-    #
-    # TODO: THIS DOES NOT YET MANAGE THE ORDER OF INGESTED FILESETS
-    metadata = {}
-    raw_data.each do |field, value|
-      next if value.blank?  or field.blank?
-      field = field.to_s
-      #If our CSV interpreter is feeding us the headers as a line, ignore it.
-      next if field == value
-      # Check if this is a file field, and whether we are removing or adding a file
-      next unless (action = is_file_field?(field))
-      # Move on if this field is the name of another property (e.g. masterFilename)
-      next if find_field_name(field)
-      # Check if we are removing a file
-      if action == "remove"
-        get_removed_filesets(value).each { |fileset_id| delete_file_set(file_set_id) }
-      else
-        # Add a file
-        operation.get_file_paths(value).each do |filepath|
-          begin
-            uploaded_file = Hyrax::UploadedFile.create(file:  File.open(filepath), user: operation.user)
-            (metadata[:uploaded_files] ||= []) << uploaded_file.id unless uploaded_file.id.nil?
-          rescue Exception => e
-            report_error(:upload_error,
-                         message: "Error opening file: #{ filepath } -- #{e}",
-                         file: File.join(BulkOps::Operation::INGEST_MEDIA_PATH,filename),
-                         row_number: row_number)
-          end
-        end
-      end
-    end
-    return metadata
-  end
-  def interpret_option_fields raw_data
-    raw_data.each do |field,value|
-      next if value.blank? or field.blank?
-      field = field.to_s
-      next if value == field
-      normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
-      if ["visibility", "public"].include?(normfield)
-        update(visibility: format_visibility(value))
-      end
-      if ["worktype","model","type"].include?(normfield)
-        update(work_type: format_worktype(value) )
-      end
-      if ["referenceidentifier",
-          "referenceid",
-          "refid",
-          "referenceidentifiertype",
-          "referenceidtype",
-          "refidtype",
-          "relationshipidentifier",
-          "relationshipid",
-          "relationshipidentifiertype",
-          "relationshipidtype",
-          "relid",
-          "relidtype"].include?(normfield)
-        update(reference_identifier: format_reference_id(value))
-      end
-    end
-    return {}
-  end
-  def interpret_relationship_fields(raw_data)
-    metadata = {}
-    raw_data.each do |field,value|
-      next if value.blank?  or field.blank?
-      field = field.to_s
-      value = unescape_csv(value)
-      identifer_type = reference_identifier
-      next if value == field
-      if (split = field.split(":")).count == 2
-        identifier_type = split.last
-        relationship_type = split.first.to_s
-      else
-        relationship_type = field
-      end
-      relationship_type = normalize_relationship_field_name(relationship_type)
-      case relationship_type
-      when "order"
-         # If the field specifies the object's order among siblings
-        update(order: value.to_f)
-        next
-      when "collection"
-        # If the field specifies the name or ID of a collection,
-        # find or create the collection and update the metadata to match
-        col = find_or_create_collection(value)
-        ( metadata[:member_of_collection_ids] ||= [] ) << col.id if col
-        next
-      when "parent", "child"
-        # correctly interpret the notation "id:a78C2d81"
-        identifier_type, object_identifier = interpret_relationship_value(identifier_type, value)
-        relationship_parameters =  { work_proxy_id: id,
-                                     identifier_type: identifier_type,
-                                     relationship_type: relationship_type,
-                                     object_identifier: object_identifier,
-                                     status: "new"}
-        #add previous sibling link if necessary
-        previous_value = operation.final_spreadsheet[row_number-1][field]
-        # Check if this is a parent relationship, and the previous row also has one
-        if previous_value.present? && (relationship_type == "parent")
-          # Check if the previous row has the same parent as this row
-          if object_identifier == interpret_relationship_value(identifier_type, previous_value, field).last
-            # If so, set the previous sibling parameter on the relationshp
-            #    to the id for the proxy associated with the previous row
-            relationship_parameters[:previous_sibling] = operation.work_proxies.find_by(row_number: row_number-1).id
-          end
-        end
-        BulkOps::Relationship.create(relationship_parameters)
-      end
-      return metadata
-    end
-  end
-  def normalize_relationship_field_name field
-    normfield = field.downcase.parameterize.gsub(/[_\s-]/,'')
-    RELATIONSHIP_FIELDS.find{|field| normfield.include?(field) }
-  end
-  def find_previous_parent field="parent"
-    #Return the row number of the most recent preceding row that does
-    # not itself have a parent defined
-    i = 0;
-    while (prev_row = operation.final_spreadsheet[row_number - i])
-      return (row_number - i) if prev_row[field].blank?
-    end
-  end
-  def interpret_relationship_value id_type, value, field="parent"
-    #Handle "id:20kj4259" syntax if it hasn't already been handled
-    if (split = value.to_s.split(":")).count == 2
-      id_type = split.first
-      value = split.last
-    end
-    #Handle special shorthand syntax for refering to relative row numbers
-    if id_type == "row"
-      if value.to_i < 0
-        # if given a negative integer, count backwards from the current row
-        return [id_type,row_number - value]
-      elsif value.to_s.downcase.include?("prev")
-        # if given any variation of the word "previous", get the first preceding row with no parent of its own
-        return [id_type,find_previous_parent(field)]
-      end
-    end
-    return [id_type,value]
-  end
-  def unescape_csv(value)
-    value.gsub(/\\(['";,])/,'\1')
-  end
-  def format_worktype(value)
-    # format the value like a class name
-    type = value.titleize.gsub(/[-_\s]/,'')
-    # reject it if it isn't a defined class
-    type = false unless Object.const_defined? type
-    # fall back to the work type defined by the operation, or a standard "Work"
-    return type ||= operation.work_type || "Work"
-  end
-  def format_visibility(value)
-    case value.downcase
-    when "public", "open", "true"
-      return "open"
-    when "campus", "ucsc", "institution"
-      return "ucsc"
-    when "restricted", "private", "closed", "false"
-      return "restricted"
-    end
-  end
-  def mintLocalAuthUrl(auth_name, value)
-    value.strip!
-    id = value.parameterize
-    auth = Qa::LocalAuthority.find_or_create_by(name: auth_name)
-    entry = Qa::LocalAuthorityEntry.create(local_authority: auth,
-                                           label: value,
-                                           uri: id)
-    return localIdToUrl(id,auth_name)
-  end
-  def findAuthUrl(auth, value)
-    value.strip!
-    return nil if auth.nil?
-    return nil unless (entries = Qa::Authorities::Local.subauthority_for(auth).search(value))
-    entries.each do |entry|
-      #require exact match
-      next unless entry["label"].force_encoding('UTF-8') == value.force_encoding('UTF-8')
-      url = entry["url"] || entry["id"]
-#      url = localIdToUrl(url,auth) unless url =~ URI::regexp
-      return url
-    end
-    return nil
-  end
-  def localIdToUrl(id,auth_name)
-    root_urls = {production: "https://digitalcollections.library.ucsc.edu",
-                 staging: "http://digitalcollections-staging.library.ucsc.edu",
-                 development: "http://#{Socket.gethostname}",
-                 test: "http://#{Socket.gethostname}"}
-    return "#{root_urls[Rails.env.to_sym]}/authorities/show/local/#{auth_name}/#{id}"
-  end
-  def getLocalAuth(field_name)
-    field =  schema.get_property(field_name)
-    # There is only ever one local authority per field, so just pick the first you find
-    if vocs = field.vocabularies
-      vocs.each do |voc|
-        return voc["subauthority"] if voc["authority"].downcase == "local"
-      end
-    end
-    return nil
-  end
-  def setAdminSet metadata
-    return metadata if metadata[:admin_set_id]
-    asets = AdminSet.where({title: "Bulk Ingest Set"})
-    asets = AdminSet.find('admin_set/default') if asets.blank?
-    metadata[:admin_set_id] = Array(asets).first.id unless asets.blank?
-    return metadata
-  end
-  def setMetadataInheritance metadata
-    return metadata if metadata[:metadataInheritance].present?
-    metadata[:metadataInheritance] = operation.options["metadataInheritance"] unless operation.options["metadataInheritance"].blank?
-    return metadata
-  end
-  def report_error type, message, **args
-    puts "ERROR MESSAGE: #{message}"
-    update(status: "error", message: message)
-    args[:type]=type
-    (@proxy_errors ||= []) <<  BulkOps::Error.new(**args)
-  end
-  def filename_prefix
-    @filename_prefix ||= operation.filename_prefix
-  end
-  def record_exists?
-    operation.record_exists? work_id
-  end
-  def get_removed_filesets(filestring)
-    file_ids = split_values(filestring)
-    file_ids.select{|file_id| record_exists?(file_id)}
-# This part handles filenames in addition to file ids. It doesn't work yet!
-#    file_ids.map do |file_id|
-      # If the filename is the id of an existing record, keep that
-#      next(file_id) if (record_exists?(file_id))
-      # If this is the label (i.e.filename) of an existing fileset, use that fileset id
-      # TODO MAKE THIS WORK!!
-#      next(filename) if (filename_exists?(filename))
-#      File.join(BulkOps::Operation::INGEST_MEDIA_PATH, filename_prefix, filename)
-#    end
-  end
-  def delete_file_set fileset_id
-    BulkOps::DeleteFileSetJob.perform_later(fileset_id, operation.user.email )
-  end
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: bulk_ops
 version: !ruby/object:Gem::Version
-  version: 0.1.14
+  version: 0.1.15
 platform: ruby
 authors:
 - Ned Henry, UCSC Library Digital Initiatives
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2019-10-02 00:00:00.000000000 Z
+date: 2019-10-03 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rails
@@ -106,6 +106,7 @@ files:
 - lib/bulk_ops/github_access.rb
 - lib/bulk_ops/github_credential.rb
 - lib/bulk_ops/operation.rb
+- lib/bulk_ops/parser.rb
 - lib/bulk_ops/queue_work_ingests_job.rb
 - lib/bulk_ops/relationship.rb
 - lib/bulk_ops/search_builder_behavior.rb