base_indexer 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/base_indexer/version.rb +1 -1
 - data/lib/tasks/index.rake +77 -77
 - metadata +3 -9
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 835d4deacab7029147d2dd106b1cf6518ddae2a8
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 8f44888fb252805cc0695006c6833c41805e582b
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 9599ad3b0ccff641dd9d09d59cf4a3d676376b197ef48439318699a30afc32684c120e17d1a5f7ae2ec4a24d93765a0086c5034bf86a4c6e78bb9b2a16d7bd60
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 9bec426885112b59c5010ede1ab6f7c53b3ae29f1f52c20fc8d7a4eb828cd1f1acf836b0126fb8baa9b6b32ccd073e7926324c3c001322072689617e87eaced1
         
     | 
    
        data/lib/base_indexer/version.rb
    CHANGED
    
    
    
        data/lib/tasks/index.rake
    CHANGED
    
    | 
         @@ -11,13 +11,13 @@ def log(logger,message,log_type=:info) 
     | 
|
| 
       11 
11 
     | 
    
         
             
              end
         
     | 
| 
       12 
12 
     | 
    
         
             
              puts message
         
     | 
| 
       13 
13 
     | 
    
         
             
              $stdout.flush
         
     | 
| 
       14 
     | 
    
         
            -
             
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
       15 
15 
     | 
    
         
             
            end
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
17 
     | 
    
         
             
            desc 'Index a specific list of druids from a pre-assembly log YAML file, a remediate log file, or a simple CSV.  Specify target to index into and log file to index from.'
         
     | 
| 
       18 
     | 
    
         
            -
            #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.yaml log_type=preassembly 
     | 
| 
       19 
     | 
    
         
            -
            #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1_remediation.yaml log_type=remediate 
     | 
| 
       20 
     | 
    
         
            -
            #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.csv log_type=csv # csv must contain a heading called "druid" with the druid to index 
     | 
| 
      
 18 
     | 
    
         
            +
            #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.yaml log_type=preassembly
         
     | 
| 
      
 19 
     | 
    
         
            +
            #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1_remediation.yaml log_type=remediate
         
     | 
| 
      
 20 
     | 
    
         
            +
            #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.csv log_type=csv # csv must contain a heading called "druid" with the druid to index
         
     | 
| 
       21 
21 
     | 
    
         | 
| 
       22 
22 
     | 
    
         
             
            # Examples:
         
     | 
| 
       23 
23 
     | 
    
         
             
            task :log_indexer => :environment do |t, args|
         
     | 
| 
         @@ -25,55 +25,55 @@ task :log_indexer => :environment do |t, args| 
     | 
|
| 
       25 
25 
     | 
    
         
             
              target = ENV['target'] # must pass in the target so specify solr core to index into
         
     | 
| 
       26 
26 
     | 
    
         
             
              log_file_path = ENV['log_file'] # must specify pre-assembly log file to index from
         
     | 
| 
       27 
27 
     | 
    
         
             
              log_type = ENV['log_type'] || 'preassembly' # log type (either preassembly, csv, or remediate), defaults to preassembly
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
       29 
29 
     | 
    
         
             
              raise 'You must specify a target and log file.' if target.blank? || log_file_path.blank?
         
     | 
| 
       30 
30 
     | 
    
         
             
              raise 'Log type must be preassembly, remediate or csv.' unless ['preassembly','remediate','csv'].include? log_type
         
     | 
| 
       31 
31 
     | 
    
         
             
              raise 'Log file not found.' unless File.readable? log_file_path
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
       33 
33 
     | 
    
         
             
              target_config=BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash[target]
         
     | 
| 
       34 
     | 
    
         
            -
             
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
       35 
35 
     | 
    
         
             
              raise 'Target not found.' if target_config.nil?
         
     | 
| 
       36 
     | 
    
         
            -
             
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
       37 
37 
     | 
    
         
             
              if log_type.blank? || log_type == 'preassembly'
         
     | 
| 
       38 
38 
     | 
    
         
             
                log_completed=:pre_assem_finished
         
     | 
| 
       39 
39 
     | 
    
         
             
              elsif log_type == 'remediate'
         
     | 
| 
       40 
40 
     | 
    
         
             
                log_completed=:remediate_completed
         
     | 
| 
       41 
41 
     | 
    
         
             
              end
         
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
       43 
43 
     | 
    
         
             
              output_log_file_name="#{Rails.root}/log/#{File.basename(log_file_path,File.extname(log_file_path))}_indexer_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
         
     | 
| 
       44 
44 
     | 
    
         
             
              my_logger=Logger.new(output_log_file_name) # set up a new log file
         
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
       46 
46 
     | 
    
         
             
              start_time=Time.now
         
     | 
| 
       47 
     | 
    
         
            -
             
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
       48 
48 
     | 
    
         
             
              errors=0
         
     | 
| 
       49 
49 
     | 
    
         
             
              indexed=0
         
     | 
| 
       50 
50 
     | 
    
         | 
| 
       51 
51 
     | 
    
         
             
              druids=[]
         
     | 
| 
       52 
     | 
    
         
            -
             
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
       53 
53 
     | 
    
         
             
              if ['preassembly','remediate'].include? log_type
         
     | 
| 
       54 
     | 
    
         
            -
                YAML.load_stream(IO.read(log_file_path)) { |obj| druids << obj[:pid] if obj[log_completed] == true} 
     | 
| 
      
 54 
     | 
    
         
            +
                YAML.load_stream(IO.read(log_file_path)) { |obj| druids << obj[:pid] if obj[log_completed] == true}
         
     | 
| 
       55 
55 
     | 
    
         
             
              else
         
     | 
| 
       56 
56 
     | 
    
         
             
                csv = CSV.parse(IO.read(log_file_path), :headers => true)
         
     | 
| 
       57 
57 
     | 
    
         
             
                druids=csv.map { |row| row.to_hash.with_indifferent_access['druid'] }.delete_if {|druid| druid.nil?}
         
     | 
| 
       58 
58 
     | 
    
         
             
              end
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
       60 
60 
     | 
    
         
             
              solr_server=BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash[target]['url']
         
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
       62 
62 
     | 
    
         
             
              log my_logger,"** Indexing #{druids.size} druids from #{log_file_path} into solr server #{solr_server} (target=#{target}).  Log file is of type #{log_type}."
         
     | 
| 
       63 
63 
     | 
    
         
             
              log my_logger,"Indexing started at #{start_time}"
         
     | 
| 
       64 
64 
     | 
    
         | 
| 
       65 
65 
     | 
    
         
             
              indexer = BaseIndexer.indexer_class.constantize.new
         
     | 
| 
       66 
66 
     | 
    
         | 
| 
       67 
67 
     | 
    
         
             
              counter=0
         
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
       69 
69 
     | 
    
         
             
              druids.each do |druid|
         
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
       71 
71 
     | 
    
         
             
                druid.gsub!('druid:','')
         
     | 
| 
       72 
72 
     | 
    
         
             
                counter+=1
         
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
       74 
74 
     | 
    
         
             
                begin
         
     | 
| 
       75 
75 
     | 
    
         
             
                  with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
         
     | 
| 
       76 
     | 
    
         
            -
                    indexer.index(druid, 
     | 
| 
      
 76 
     | 
    
         
            +
                    indexer.index(druid,{target=>true})
         
     | 
| 
       77 
77 
     | 
    
         
             
                    log my_logger,"#{counter} of #{druids.size}: #{druid}"
         
     | 
| 
       78 
78 
     | 
    
         
             
                    indexed += 1
         
     | 
| 
       79 
79 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -83,34 +83,34 @@ task :log_indexer => :environment do |t, args| 
     | 
|
| 
       83 
83 
     | 
    
         
             
                end
         
     | 
| 
       84 
84 
     | 
    
         | 
| 
       85 
85 
     | 
    
         
             
              end
         
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
       87 
87 
     | 
    
         
             
              log my_logger,"Objects indexed: #{indexed} out of #{druids.size}"
         
     | 
| 
       88 
88 
     | 
    
         
             
              log(my_logger,"ERRORS Encountered, #{errors} objects not indexed") if errors > 0
         
     | 
| 
       89 
89 
     | 
    
         
             
              log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"
         
     | 
| 
       90 
90 
     | 
    
         
             
              puts "Logged output at #{output_log_file_name}"
         
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
       92 
92 
     | 
    
         
             
            end
         
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
       94 
94 
     | 
    
         
             
            desc "Delete a single druid.  It will be deleted from all targets!"
         
     | 
| 
       95 
95 
     | 
    
         
             
            #Run me: rake delete RAILS_ENV=production druid=oo000oo0001
         
     | 
| 
       96 
96 
     | 
    
         
             
            # Examples:
         
     | 
| 
       97 
97 
     | 
    
         
             
            task :delete => :environment do |t, args|
         
     | 
| 
       98 
98 
     | 
    
         | 
| 
       99 
     | 
    
         
            -
              druid = ENV['druid'] 
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
      
 99 
     | 
    
         
            +
              druid = ENV['druid']
         
     | 
| 
      
 100 
     | 
    
         
            +
             
     | 
| 
       101 
101 
     | 
    
         
             
              raise 'You must specify a druid.' if druid.blank?
         
     | 
| 
       102 
102 
     | 
    
         | 
| 
       103 
103 
     | 
    
         
             
              print "Are you sure you wish to delete this druid from all targets? (y/n) "
         
     | 
| 
       104 
     | 
    
         
            -
              STDOUT.flush 
     | 
| 
      
 104 
     | 
    
         
            +
              STDOUT.flush
         
     | 
| 
       105 
105 
     | 
    
         
             
              answer=STDIN.gets.chomp
         
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
       107 
107 
     | 
    
         
             
              raise 'STOP!' unless (answer && ['y','yes'].include?(answer.downcase))
         
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
       109 
109 
     | 
    
         
             
              puts "** Delete #{druid} druid from all targets."
         
     | 
| 
       110 
110 
     | 
    
         | 
| 
       111 
111 
     | 
    
         
             
              indexer = BaseIndexer.indexer_class.constantize.new
         
     | 
| 
       112 
112 
     | 
    
         
             
              indexer.delete druid.gsub('druid:','')
         
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
       114 
114 
     | 
    
         
             
            end
         
     | 
| 
       115 
115 
     | 
    
         | 
| 
       116 
116 
     | 
    
         
             
            desc 'Index a single druid.  Specify target to index into and druid to index.'
         
     | 
| 
         @@ -119,21 +119,21 @@ desc 'Index a single druid.  Specify target to index into and druid to index.' 
     | 
|
| 
       119 
119 
     | 
    
         
             
            task :index => :environment do |t, args|
         
     | 
| 
       120 
120 
     | 
    
         | 
| 
       121 
121 
     | 
    
         
             
              target = ENV['target'] # must pass in the target so specify solr core to index into
         
     | 
| 
       122 
     | 
    
         
            -
              druid = ENV['druid'] 
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
      
 122 
     | 
    
         
            +
              druid = ENV['druid']
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
       124 
124 
     | 
    
         
             
              raise 'You must specify a target and druid.' if target.blank? || druid.blank?
         
     | 
| 
       125 
     | 
    
         
            -
             
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
       126 
126 
     | 
    
         
             
              target_config=BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash[target]
         
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
       128 
128 
     | 
    
         
             
              raise 'Target not found.' if target_config.nil?
         
     | 
| 
       129 
129 
     | 
    
         | 
| 
       130 
130 
     | 
    
         
             
              solr_server=BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash[target]['url']
         
     | 
| 
       131 
     | 
    
         
            -
             
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
       132 
132 
     | 
    
         
             
              puts "** Indexing #{druid} druid into solr server #{solr_server} (target=#{target})."
         
     | 
| 
       133 
133 
     | 
    
         | 
| 
       134 
134 
     | 
    
         
             
              indexer = BaseIndexer.indexer_class.constantize.new
         
     | 
| 
       135 
     | 
    
         
            -
              indexer.index(druid.gsub('druid:',''), 
     | 
| 
       136 
     | 
    
         
            -
             
     | 
| 
      
 135 
     | 
    
         
            +
              indexer.index(druid.gsub('druid:',''),{target=>true})
         
     | 
| 
      
 136 
     | 
    
         
            +
             
     | 
| 
       137 
137 
     | 
    
         
             
            end
         
     | 
| 
       138 
138 
     | 
    
         | 
| 
       139 
139 
     | 
    
         
             
            desc 'Index an entire collection, including the collection itself and all of its members.  Specify target to index into and collection druid to index.'
         
     | 
| 
         @@ -142,21 +142,21 @@ desc 'Index an entire collection, including the collection itself and all of its 
     | 
|
| 
       142 
142 
     | 
    
         
             
            task :collection_indexer => :environment do |t, args|
         
     | 
| 
       143 
143 
     | 
    
         | 
| 
       144 
144 
     | 
    
         
             
              target = ENV['target'] # must pass in the target so specify solr core to index into
         
     | 
| 
       145 
     | 
    
         
            -
              collection_druid = ENV['collection_druid'] 
     | 
| 
       146 
     | 
    
         
            -
             
     | 
| 
      
 145 
     | 
    
         
            +
              collection_druid = ENV['collection_druid']
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
       147 
147 
     | 
    
         
             
              raise 'You must specify a target and collection druid.' if target.blank? || collection_druid.blank?
         
     | 
| 
       148 
     | 
    
         
            -
             
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
       149 
149 
     | 
    
         
             
              target_config=BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash[target]
         
     | 
| 
       150 
     | 
    
         
            -
             
     | 
| 
      
 150 
     | 
    
         
            +
             
     | 
| 
       151 
151 
     | 
    
         
             
              raise 'Target not found.' if target_config.nil?
         
     | 
| 
       152 
152 
     | 
    
         | 
| 
       153 
153 
     | 
    
         
             
              solr_server=BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash[target]['url']
         
     | 
| 
       154 
154 
     | 
    
         | 
| 
       155 
155 
     | 
    
         
             
              output_log_file_name="#{Rails.root}/log/collection_#{collection_druid}_indexer_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
         
     | 
| 
       156 
156 
     | 
    
         
             
              my_logger=Logger.new(output_log_file_name) # set up a new log file
         
     | 
| 
       157 
     | 
    
         
            -
             
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
       158 
158 
     | 
    
         
             
              log my_logger,"** Indexing collection #{collection_druid} druid and all of its members into solr server #{solr_server} (target=#{target})."
         
     | 
| 
       159 
     | 
    
         
            -
             
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
       160 
160 
     | 
    
         
             
              start_time=Time.now
         
     | 
| 
       161 
161 
     | 
    
         
             
              log my_logger,"Indexing started at #{start_time}"
         
     | 
| 
       162 
162 
     | 
    
         | 
| 
         @@ -165,10 +165,10 @@ task :collection_indexer => :environment do |t, args| 
     | 
|
| 
       165 
165 
     | 
    
         
             
              df = DorFetcher::Client.new({:service_url => Rails.application.config.dor_fetcher_url})
         
     | 
| 
       166 
166 
     | 
    
         | 
| 
       167 
167 
     | 
    
         
             
              collection_druid=collection_druid.gsub('druid:','')
         
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
       169 
     | 
    
         
            -
              indexer.index(collection_druid, 
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
              indexer.index(collection_druid,{target=>true})
         
     | 
| 
       170 
170 
     | 
    
         
             
              log my_logger,"Indexed collection: #{collection_druid}"
         
     | 
| 
       171 
     | 
    
         
            -
             
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
       172 
172 
     | 
    
         
             
              druids = df.druid_array(df.get_collection(collection_druid, {}))
         
     | 
| 
       173 
173 
     | 
    
         | 
| 
       174 
174 
     | 
    
         
             
              log my_logger,"** Found #{druids.size} members of the collection"
         
     | 
| 
         @@ -176,15 +176,15 @@ task :collection_indexer => :environment do |t, args| 
     | 
|
| 
       176 
176 
     | 
    
         
             
              counter=0
         
     | 
| 
       177 
177 
     | 
    
         
             
              indexed=0
         
     | 
| 
       178 
178 
     | 
    
         
             
              errors=0
         
     | 
| 
       179 
     | 
    
         
            -
             
     | 
| 
      
 179 
     | 
    
         
            +
             
     | 
| 
       180 
180 
     | 
    
         
             
              druids.each do |druid|
         
     | 
| 
       181 
     | 
    
         
            -
             
     | 
| 
      
 181 
     | 
    
         
            +
             
     | 
| 
       182 
182 
     | 
    
         
             
                druid=druid.gsub('druid:','')
         
     | 
| 
       183 
183 
     | 
    
         
             
                counter+=1
         
     | 
| 
       184 
     | 
    
         
            -
             
     | 
| 
      
 184 
     | 
    
         
            +
             
     | 
| 
       185 
185 
     | 
    
         
             
                begin
         
     | 
| 
       186 
186 
     | 
    
         
             
                  with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
         
     | 
| 
       187 
     | 
    
         
            -
                    indexer.index(druid, 
     | 
| 
      
 187 
     | 
    
         
            +
                    indexer.index(druid,{target=>true})
         
     | 
| 
       188 
188 
     | 
    
         
             
                    log my_logger,"#{counter} of #{druids.size}: #{druid}"
         
     | 
| 
       189 
189 
     | 
    
         
             
                    indexed += 1
         
     | 
| 
       190 
190 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -194,13 +194,13 @@ task :collection_indexer => :environment do |t, args| 
     | 
|
| 
       194 
194 
     | 
    
         
             
                end
         
     | 
| 
       195 
195 
     | 
    
         | 
| 
       196 
196 
     | 
    
         
             
              end
         
     | 
| 
       197 
     | 
    
         
            -
             
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
       198 
198 
     | 
    
         
             
              log my_logger,"Objects indexed: #{indexed} out of #{druids.size} + 1 collection druid"
         
     | 
| 
       199 
199 
     | 
    
         
             
              log(my_logger,"ERRORS Encountered, #{errors} objects not indexed") if errors > 0
         
     | 
| 
       200 
200 
     | 
    
         
             
              log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"
         
     | 
| 
       201 
201 
     | 
    
         
             
              puts "Logged output at #{output_log_file_name}"
         
     | 
| 
       202 
     | 
    
         
            -
             
     | 
| 
       203 
     | 
    
         
            -
            end 
     | 
| 
      
 202 
     | 
    
         
            +
             
     | 
| 
      
 203 
     | 
    
         
            +
            end
         
     | 
| 
       204 
204 
     | 
    
         | 
| 
       205 
205 
     | 
    
         
             
            desc 'ReIndex just the druids that errored out from a previous batch index run. Specify target to index into and batch errored log file to index from.'
         
     | 
| 
       206 
206 
     | 
    
         
             
            #Run me: rake reindexer RAILS_ENV=production target=revs_prod file=./log/index.log
         
     | 
| 
         @@ -209,16 +209,16 @@ task :reindexer => :environment do |t, args| 
     | 
|
| 
       209 
209 
     | 
    
         | 
| 
       210 
210 
     | 
    
         
             
              target = ENV['target'] # must pass in the target so specify solr core to index into
         
     | 
| 
       211 
211 
     | 
    
         
             
              file_path = ENV['file'] # must specify previous indexing log file to index from
         
     | 
| 
       212 
     | 
    
         
            -
             
     | 
| 
      
 212 
     | 
    
         
            +
             
     | 
| 
       213 
213 
     | 
    
         
             
              raise 'You must specify a target and file.' if target.blank? || file_path.blank?
         
     | 
| 
       214 
214 
     | 
    
         
             
              raise 'File not found.' unless File.readable? file_path
         
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
      
 215 
     | 
    
         
            +
             
     | 
| 
       216 
216 
     | 
    
         
             
              target_config=BaseIndexer.solr_configuration_class_name.constantize.instance.get_configuration_hash[target]
         
     | 
| 
       217 
     | 
    
         
            -
             
     | 
| 
      
 217 
     | 
    
         
            +
             
     | 
| 
       218 
218 
     | 
    
         
             
              raise 'Target not found.' if target_config.nil?
         
     | 
| 
       219 
219 
     | 
    
         | 
| 
       220 
220 
     | 
    
         
             
              start_time=Time.now
         
     | 
| 
       221 
     | 
    
         
            -
             
     | 
| 
      
 221 
     | 
    
         
            +
             
     | 
| 
       222 
222 
     | 
    
         
             
              errors=0
         
     | 
| 
       223 
223 
     | 
    
         
             
              indexed=0
         
     | 
| 
       224 
224 
     | 
    
         | 
| 
         @@ -226,7 +226,7 @@ task :reindexer => :environment do |t, args| 
     | 
|
| 
       226 
226 
     | 
    
         | 
| 
       227 
227 
     | 
    
         
             
              output_log_file_name="#{Rails.root}/log/#{File.basename(file_path,File.extname(file_path))}_reindex_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
         
     | 
| 
       228 
228 
     | 
    
         
             
              my_logger=Logger.new(output_log_file_name) # set up a new log file
         
     | 
| 
       229 
     | 
    
         
            -
             
     | 
| 
      
 229 
     | 
    
         
            +
             
     | 
| 
       230 
230 
     | 
    
         
             
              log my_logger,"** Indexing errored out druids from #{file_path} into solr server #{solr_server} (target=#{target})."
         
     | 
| 
       231 
231 
     | 
    
         
             
              log my_logger,"Indexing started at #{start_time}"
         
     | 
| 
       232 
232 
     | 
    
         | 
| 
         @@ -237,15 +237,15 @@ task :reindexer => :environment do |t, args| 
     | 
|
| 
       237 
237 
     | 
    
         
             
              IO.readlines(file_path).each do |line|
         
     | 
| 
       238 
238 
     | 
    
         | 
| 
       239 
239 
     | 
    
         
             
                downcased_line=line.downcase
         
     | 
| 
       240 
     | 
    
         
            -
             
     | 
| 
      
 240 
     | 
    
         
            +
             
     | 
| 
       241 
241 
     | 
    
         
             
                if downcased_line.include? 'error'
         
     | 
| 
       242 
242 
     | 
    
         
             
                  druid=downcased_line.scan(/[a-z][a-z][0-9][0-9][0-9][a-z][a-z][0-9][0-9][0-9][0-9]/).first
         
     | 
| 
       243 
     | 
    
         
            -
             
     | 
| 
       244 
     | 
    
         
            -
                  unless druid.blank? 
     | 
| 
      
 243 
     | 
    
         
            +
             
     | 
| 
      
 244 
     | 
    
         
            +
                  unless druid.blank?
         
     | 
| 
       245 
245 
     | 
    
         
             
                    begin
         
     | 
| 
       246 
246 
     | 
    
         
             
                      counter+=1
         
     | 
| 
       247 
247 
     | 
    
         
             
                      with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
         
     | 
| 
       248 
     | 
    
         
            -
                        indexer.index(druid, 
     | 
| 
      
 248 
     | 
    
         
            +
                        indexer.index(druid,{target=>true})
         
     | 
| 
       249 
249 
     | 
    
         
             
                        log my_logger,"#{counter}: #{druid}"
         
     | 
| 
       250 
250 
     | 
    
         
             
                        indexed += 1
         
     | 
| 
       251 
251 
     | 
    
         
             
                      end
         
     | 
| 
         @@ -254,16 +254,16 @@ task :reindexer => :environment do |t, args| 
     | 
|
| 
       254 
254 
     | 
    
         
             
                      errors += 1
         
     | 
| 
       255 
255 
     | 
    
         
             
                    end
         
     | 
| 
       256 
256 
     | 
    
         
             
                  end
         
     | 
| 
       257 
     | 
    
         
            -
             
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
       258 
258 
     | 
    
         
             
                end
         
     | 
| 
       259 
     | 
    
         
            -
             
     | 
| 
      
 259 
     | 
    
         
            +
             
     | 
| 
       260 
260 
     | 
    
         
             
              end
         
     | 
| 
       261 
     | 
    
         
            -
             
     | 
| 
      
 261 
     | 
    
         
            +
             
     | 
| 
       262 
262 
     | 
    
         
             
              log my_logger,"Objects indexed: #{indexed}"
         
     | 
| 
       263 
263 
     | 
    
         
             
              log(my_logger,"ERRORS Encountered, #{errors} objects not indexed") if errors > 0
         
     | 
| 
       264 
264 
     | 
    
         
             
              log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"
         
     | 
| 
       265 
265 
     | 
    
         
             
              puts "Logged output at #{output_log_file_name}"
         
     | 
| 
       266 
     | 
    
         
            -
             
     | 
| 
      
 266 
     | 
    
         
            +
             
     | 
| 
       267 
267 
     | 
    
         
             
            end
         
     | 
| 
       268 
268 
     | 
    
         | 
| 
       269 
269 
     | 
    
         
             
            desc 'Delete the druids specified in the supplied text file (one druid per line, header not necessary).  Be careful!  It will delete from all targets.'
         
     | 
| 
         @@ -272,24 +272,24 @@ desc 'Delete the druids specified in the supplied text file (one druid per line, 
     | 
|
| 
       272 
272 
     | 
    
         
             
            task :delete_druids => :environment do |t, args|
         
     | 
| 
       273 
273 
     | 
    
         | 
| 
       274 
274 
     | 
    
         
             
              file_path = ENV['file'] # must specify previous indexing log file to index from
         
     | 
| 
       275 
     | 
    
         
            -
             
     | 
| 
      
 275 
     | 
    
         
            +
             
     | 
| 
       276 
276 
     | 
    
         
             
              raise 'You must specify a druid file.' if file_path.blank?
         
     | 
| 
       277 
277 
     | 
    
         
             
              raise 'File not found.' unless File.readable? file_path
         
     | 
| 
       278 
278 
     | 
    
         | 
| 
       279 
279 
     | 
    
         
             
              print "Are you sure you wish to delete all of the druids from all targets specified in #{file_path}? (y/n) "
         
     | 
| 
       280 
     | 
    
         
            -
              STDOUT.flush 
     | 
| 
      
 280 
     | 
    
         
            +
              STDOUT.flush
         
     | 
| 
       281 
281 
     | 
    
         
             
              answer=STDIN.gets.chomp
         
     | 
| 
       282 
     | 
    
         
            -
             
     | 
| 
      
 282 
     | 
    
         
            +
             
     | 
| 
       283 
283 
     | 
    
         
             
              raise 'STOP!' unless (answer && ['y','yes'].include?(answer.downcase))
         
     | 
| 
       284 
     | 
    
         
            -
             
     | 
| 
      
 284 
     | 
    
         
            +
             
     | 
| 
       285 
285 
     | 
    
         
             
              output_log_file_name="#{Rails.root}/log/#{File.basename(file_path,File.extname(file_path))}_delete_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
         
     | 
| 
       286 
286 
     | 
    
         
             
              my_logger=Logger.new(output_log_file_name) # set up a new log file
         
     | 
| 
       287 
     | 
    
         
            -
             
     | 
| 
      
 287 
     | 
    
         
            +
             
     | 
| 
       288 
288 
     | 
    
         
             
              start_time=Time.now
         
     | 
| 
       289 
     | 
    
         
            -
             
     | 
| 
      
 289 
     | 
    
         
            +
             
     | 
| 
       290 
290 
     | 
    
         
             
              errors=0
         
     | 
| 
       291 
291 
     | 
    
         
             
              indexed=0
         
     | 
| 
       292 
     | 
    
         
            -
             
     | 
| 
      
 292 
     | 
    
         
            +
             
     | 
| 
       293 
293 
     | 
    
         
             
              log my_logger,"** Deleting druids from #{file_path} in all targets."
         
     | 
| 
       294 
294 
     | 
    
         
             
              log my_logger,"Deleting started at #{start_time}"
         
     | 
| 
       295 
295 
     | 
    
         | 
| 
         @@ -301,10 +301,10 @@ task :delete_druids => :environment do |t, args| 
     | 
|
| 
       301 
301 
     | 
    
         | 
| 
       302 
302 
     | 
    
         
             
                 downcased_line=line.downcase
         
     | 
| 
       303 
303 
     | 
    
         
             
                 druid=downcased_line.scan(/[a-z][a-z][0-9][0-9][0-9][a-z][a-z][0-9][0-9][0-9][0-9]/).first
         
     | 
| 
       304 
     | 
    
         
            -
             
     | 
| 
      
 304 
     | 
    
         
            +
             
     | 
| 
       305 
305 
     | 
    
         
             
                 unless druid.blank?
         
     | 
| 
       306 
306 
     | 
    
         
             
                   counter+=1
         
     | 
| 
       307 
     | 
    
         
            -
             
     | 
| 
      
 307 
     | 
    
         
            +
             
     | 
| 
       308 
308 
     | 
    
         
             
                    begin
         
     | 
| 
       309 
309 
     | 
    
         
             
                      with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
         
     | 
| 
       310 
310 
     | 
    
         
             
                        indexer.delete druid
         
     | 
| 
         @@ -315,11 +315,11 @@ task :delete_druids => :environment do |t, args| 
     | 
|
| 
       315 
315 
     | 
    
         
             
                      log my_logger,"ERROR: Failed to delete #{druid}: #{e.message}",:error
         
     | 
| 
       316 
316 
     | 
    
         
             
                      errors += 1
         
     | 
| 
       317 
317 
     | 
    
         
             
                    end
         
     | 
| 
       318 
     | 
    
         
            -
                 end 
     | 
| 
      
 318 
     | 
    
         
            +
                 end
         
     | 
| 
       319 
319 
     | 
    
         
             
              end
         
     | 
| 
       320 
     | 
    
         
            -
             
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
       321 
321 
     | 
    
         
             
              log my_logger,"Objects deleted: #{indexed}"
         
     | 
| 
       322 
322 
     | 
    
         
             
              log(my_logger,"ERRORS Encountered, #{errors} objects not deleted",:error) if errors > 0
         
     | 
| 
       323 
323 
     | 
    
         
             
              log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"
         
     | 
| 
       324 
     | 
    
         
            -
             
     | 
| 
       325 
     | 
    
         
            -
            end
         
     | 
| 
      
 324 
     | 
    
         
            +
             
     | 
| 
      
 325 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: base_indexer
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.0.2
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Ahmed Alsum
         
     | 
| 
         @@ -17,20 +17,14 @@ dependencies: 
     | 
|
| 
       17 
17 
     | 
    
         
             
                requirements:
         
     | 
| 
       18 
18 
     | 
    
         
             
                - - "~>"
         
     | 
| 
       19 
19 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       20 
     | 
    
         
            -
                    version: '4 
     | 
| 
       21 
     | 
    
         
            -
                - - ">="
         
     | 
| 
       22 
     | 
    
         
            -
                  - !ruby/object:Gem::Version
         
     | 
| 
       23 
     | 
    
         
            -
                    version: 4.1.9
         
     | 
| 
      
 20 
     | 
    
         
            +
                    version: '4'
         
     | 
| 
       24 
21 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       25 
22 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       26 
23 
     | 
    
         
             
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
       27 
24 
     | 
    
         
             
                requirements:
         
     | 
| 
       28 
25 
     | 
    
         
             
                - - "~>"
         
     | 
| 
       29 
26 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       30 
     | 
    
         
            -
                    version: '4 
     | 
| 
       31 
     | 
    
         
            -
                - - ">="
         
     | 
| 
       32 
     | 
    
         
            -
                  - !ruby/object:Gem::Version
         
     | 
| 
       33 
     | 
    
         
            -
                    version: 4.1.9
         
     | 
| 
      
 27 
     | 
    
         
            +
                    version: '4'
         
     | 
| 
       34 
28 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       35 
29 
     | 
    
         
             
              name: discovery-indexer
         
     | 
| 
       36 
30 
     | 
    
         
             
              requirement: !ruby/object:Gem::Requirement
         
     |