preservation 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
 - data/.gitignore +18 -0
 - data/CHANGELOG.md +11 -0
 - data/Gemfile +4 -0
 - data/LICENSE.txt +22 -0
 - data/PITCHME.md +126 -0
 - data/README.md +88 -0
 - data/Rakefile +1 -0
 - data/lib/preservation.rb +24 -0
 - data/lib/preservation/configuration.rb +15 -0
 - data/lib/preservation/ingest.rb +163 -0
 - data/lib/preservation/ingest_report.rb +172 -0
 - data/lib/preservation/pure_ingest.rb +188 -0
 - data/lib/preservation/string_util.rb +19 -0
 - data/lib/preservation/version.rb +5 -0
 - data/preservation.gemspec +26 -0
 - metadata +103 -0
 
    
        checksums.yaml
    ADDED
    
    | 
         @@ -0,0 +1,7 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ---
         
     | 
| 
      
 2 
     | 
    
         
            +
            SHA1:
         
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: dfcf307b70473079a60f2801c6bd11e4f4c289d8
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 0e13efac8904ccd96fc690644520e20f388a4fce
         
     | 
| 
      
 5 
     | 
    
         
            +
            SHA512:
         
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 939f44e9a24177232e900953f4b59b64cfe62e0bce056765444dd9a726b64b441cae7b8fc6b3cea233ef9febd732a9a27f0759992cafe4e8494213edc947fffa
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 2a86f065c50cf43c8ea7bcaa707d278f05b7b73b2be53cbebad47e50f38ffa3e8324dc88a71ded669495e6c7f719ca7a7dbd8943da3bcf43a186f6f56df40daf
         
     | 
    
        data/.gitignore
    ADDED
    
    
    
        data/CHANGELOG.md
    ADDED
    
    | 
         @@ -0,0 +1,11 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Change Log
         
     | 
| 
      
 2 
     | 
    
         
            +
            All notable changes to this project will be documented in this file.
         
     | 
| 
      
 3 
     | 
    
         
            +
            This project adheres to [Semantic Versioning](http://semver.org/).
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            ## Unreleased
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            ## 0.1.0 - 2016-09-13
         
     | 
| 
      
 8 
     | 
    
         
            +
            ### Added
         
     | 
| 
      
 9 
     | 
    
         
            +
            - Transfer preparation.
         
     | 
| 
      
 10 
     | 
    
         
            +
            - Reporting from transfers database.
         
     | 
| 
      
 11 
     | 
    
         
            +
            - Disk space management.
         
     | 
    
        data/Gemfile
    ADDED
    
    
    
        data/LICENSE.txt
    ADDED
    
    | 
         @@ -0,0 +1,22 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            Copyright (c) 2016 Adrian Albin-Clark
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            MIT License
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            Permission is hereby granted, free of charge, to any person obtaining
         
     | 
| 
      
 6 
     | 
    
         
            +
            a copy of this software and associated documentation files (the
         
     | 
| 
      
 7 
     | 
    
         
            +
            "Software"), to deal in the Software without restriction, including
         
     | 
| 
      
 8 
     | 
    
         
            +
            without limitation the rights to use, copy, modify, merge, publish,
         
     | 
| 
      
 9 
     | 
    
         
            +
            distribute, sublicense, and/or sell copies of the Software, and to
         
     | 
| 
      
 10 
     | 
    
         
            +
            permit persons to whom the Software is furnished to do so, subject to
         
     | 
| 
      
 11 
     | 
    
         
            +
            the following conditions:
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            The above copyright notice and this permission notice shall be
         
     | 
| 
      
 14 
     | 
    
         
            +
            included in all copies or substantial portions of the Software.
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
         
     | 
| 
      
 17 
     | 
    
         
            +
            EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
         
     | 
| 
      
 18 
     | 
    
         
            +
            MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
         
     | 
| 
      
 19 
     | 
    
         
            +
            NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
         
     | 
| 
      
 20 
     | 
    
         
            +
            LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
         
     | 
| 
      
 21 
     | 
    
         
            +
            OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
         
     | 
| 
      
 22 
     | 
    
         
            +
            WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
         
     | 
    
        data/PITCHME.md
    ADDED
    
    | 
         @@ -0,0 +1,126 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #HSLIDE
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            ## Rationale
         
     | 
| 
      
 4 
     | 
    
         
            +
            Archivematica's [Automation Tools](https://github.com/artefactual/automation-tools)
         
     | 
| 
      
 5 
     | 
    
         
            +
            work with files and descriptive metadata which must be provided in a certain way.
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            #HSLIDE
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            ## Preservation: a way to manage ingest
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            #VSLIDE
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            - Transfer preparation.
         
     | 
| 
      
 15 
     | 
    
         
            +
            - Reporting from transfers database. <!-- .element: class="fragment" -->
         
     | 
| 
      
 16 
     | 
    
         
            +
            - Disk space management. <!-- .element: class="fragment" -->
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            #HSLIDE
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            ##  Preservation: ingest
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            Create an ingestor for Pure.
         
     | 
| 
      
 23 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 24 
     | 
    
         
            +
            ingest = Preservation::PureIngest.new
         
     | 
| 
      
 25 
     | 
    
         
            +
            ```
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
            For each uuid, if necessary, fetch the metadata, prepare a directory in the
         
     | 
| 
      
 28 
     | 
    
         
            +
            ingest path and populate it with the files and JSON description file.
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 31 
     | 
    
         
            +
            ingest.prepare_dataset uuids: uuids,
         
     | 
| 
      
 32 
     | 
    
         
            +
                                   dir_name_scheme: :doi_short,
         
     | 
| 
      
 33 
     | 
    
         
            +
                                   delay: 0
         
     | 
| 
      
 34 
     | 
    
         
            +
            ```
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
            Free up disk space for completed transfers.
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 39 
     | 
    
         
            +
            ingest.cleanup_preserved
         
     | 
| 
      
 40 
     | 
    
         
            +
            ```
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
            #VSLIDE
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            ## Transfer-ready directory
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            ```
         
     | 
| 
      
 47 
     | 
    
         
            +
            .
         
     | 
| 
      
 48 
     | 
    
         
            +
            ├── 10.17635-lancaster-researchdata-6
         
     | 
| 
      
 49 
     | 
    
         
            +
            │   ├── Ebola_data_Jun15.zip
         
     | 
| 
      
 50 
     | 
    
         
            +
            │   └── metadata
         
     | 
| 
      
 51 
     | 
    
         
            +
            │       └── metadata.json
         
     | 
| 
      
 52 
     | 
    
         
            +
            ```
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
            #VSLIDE
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
            ## Transfer-ready metadata
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
            ```json
         
     | 
| 
      
 59 
     | 
    
         
            +
            [
         
     | 
| 
      
 60 
     | 
    
         
            +
              {
         
     | 
| 
      
 61 
     | 
    
         
            +
                "filename": "objects/Ebola_data_Jun15.zip",
         
     | 
| 
      
 62 
     | 
    
         
            +
                "dc.title": "Ebolavirus evolution 2013-2015",
         
     | 
| 
      
 63 
     | 
    
         
            +
                "dc.description": "Data used for analysis of selection and evolutionary rate in Zaire Ebolavirus variant Makona",
         
     | 
| 
      
 64 
     | 
    
         
            +
                "dcterms.created": "2015-06-04T16:11:34.713+01:00",
         
     | 
| 
      
 65 
     | 
    
         
            +
                "dcterms.available": "2015-06-04",
         
     | 
| 
      
 66 
     | 
    
         
            +
                "dc.publisher": "Lancaster University",
         
     | 
| 
      
 67 
     | 
    
         
            +
                "dc.identifier": "http://dx.doi.org/10.17635/lancaster/researchdata/6",
         
     | 
| 
      
 68 
     | 
    
         
            +
                "dcterms.spatial": [
         
     | 
| 
      
 69 
     | 
    
         
            +
                  "Guinea, Sierra Leone, Liberia"
         
     | 
| 
      
 70 
     | 
    
         
            +
                ],
         
     | 
| 
      
 71 
     | 
    
         
            +
                "dc.creator": [
         
     | 
| 
      
 72 
     | 
    
         
            +
                  "Gatherer, Derek"
         
     | 
| 
      
 73 
     | 
    
         
            +
                ],
         
     | 
| 
      
 74 
     | 
    
         
            +
                "dc.contributor": [
         
     | 
| 
      
 75 
     | 
    
         
            +
                  "Robertson, David",
         
     | 
| 
      
 76 
     | 
    
         
            +
                  "Lovell, Simon"
         
     | 
| 
      
 77 
     | 
    
         
            +
                ],
         
     | 
| 
      
 78 
     | 
    
         
            +
                "dc.subject": [
         
     | 
| 
      
 79 
     | 
    
         
            +
                  "Ebolavirus",
         
     | 
| 
      
 80 
     | 
    
         
            +
                  "evolution",
         
     | 
| 
      
 81 
     | 
    
         
            +
                  "phylogenetics",
         
     | 
| 
      
 82 
     | 
    
         
            +
                  "virulence",
         
     | 
| 
      
 83 
     | 
    
         
            +
                  "Filoviridae",
         
     | 
| 
      
 84 
     | 
    
         
            +
                  "positive selection"
         
     | 
| 
      
 85 
     | 
    
         
            +
                ],
         
     | 
| 
      
 86 
     | 
    
         
            +
                "dcterms.license": "CC BY",
         
     | 
| 
      
 87 
     | 
    
         
            +
                "related": [
         
     | 
| 
      
 88 
     | 
    
         
            +
                  {
         
     | 
| 
      
 89 
     | 
    
         
            +
                    "dc.title": "The unprecedented scale of the West African Ebola virus disease outbreak is due to environmental an$
         
     | 
| 
      
 90 
     | 
    
         
            +
                    "type": "Journal article",
         
     | 
| 
      
 91 
     | 
    
         
            +
                    "dc.identifier": "http://dx.doi.org/10.1136/ebmed-2014-110127"
         
     | 
| 
      
 92 
     | 
    
         
            +
                  },
         
     | 
| 
      
 93 
     | 
    
         
            +
                  {
         
     | 
| 
      
 94 
     | 
    
         
            +
                    "dc.title": "The 2014 Ebola virus disease outbreak in West Africa",
         
     | 
| 
      
 95 
     | 
    
         
            +
                    "type": "Journal article",
         
     | 
| 
      
 96 
     | 
    
         
            +
                    "dc.identifier": "http://dx.doi.org/10.1099/vir.0.067199-0"
         
     | 
| 
      
 97 
     | 
    
         
            +
                  }
         
     | 
| 
      
 98 
     | 
    
         
            +
                ]
         
     | 
| 
      
 99 
     | 
    
         
            +
              }
         
     | 
| 
      
 100 
     | 
    
         
            +
            ]
         
     | 
| 
      
 101 
     | 
    
         
            +
            ```
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
            #HSLIDE
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
            ##  Preservation: reporting
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
            Can be used for scheduled monitoring of transfers.
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 110 
     | 
    
         
            +
            report = Preservation::IngestReport.new
         
     | 
| 
      
 111 
     | 
    
         
            +
            report.transfer_exception
         
     | 
| 
      
 112 
     | 
    
         
            +
            ```
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
            #HSLIDE
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
            ## Location
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
            <a href="https://rubygems.org/gems/preservation" target="_blank">RubyGems</a>
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
            <a href="https://github.com/lulibrary/preservation" target="_blank">GitHub</a>
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
            #HSLIDE
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
            ## Documentation
         
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
            <a href="http://www.rubydoc.info/gems/preservation" target="_blank">API in YARD</a>
         
     | 
    
        data/README.md
    ADDED
    
    | 
         @@ -0,0 +1,88 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Preservation [](https://badge.fury.io/rb/preservation) [](https://gitpitch.com/lulibrary/preservation/master?grs=github&t=sky)
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            Ingest management for Archivematica's [Automation Tools](https://github.com/artefactual/automation-tools).
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
            ## Installation
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            Add this line to your application's Gemfile:
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                gem 'preservation'
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            And then execute:
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
                $ bundle
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            Or install it yourself as:
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
                $ gem install preservation
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            ## Usage
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            ### Configuration
         
     | 
| 
      
 22 
     | 
    
         
            +
            Configure Preservation. If ```log_path``` is omitted, logging (standard library) redirects to STDOUT.
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 25 
     | 
    
         
            +
              Preservation.configure do |config|
         
     | 
| 
      
 26 
     | 
    
         
            +
                config.db_path     = ENV['ARCHIVEMATICA_DB_PATH']
         
     | 
| 
      
 27 
     | 
    
         
            +
                config.ingest_path = ENV['ARCHIVEMATICA_INGEST_PATH']
         
     | 
| 
      
 28 
     | 
    
         
            +
                config.log_path    = ENV['PRESERVATION_LOG_PATH']
         
     | 
| 
      
 29 
     | 
    
         
            +
              end
         
     | 
| 
      
 30 
     | 
    
         
            +
            ```
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            Configure data source.
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 35 
     | 
    
         
            +
            Puree.configure do |config|
         
     | 
| 
      
 36 
     | 
    
         
            +
              config.base_url   = ENV['PURE_BASE_URL']
         
     | 
| 
      
 37 
     | 
    
         
            +
              config.username   = ENV['PURE_USERNAME']
         
     | 
| 
      
 38 
     | 
    
         
            +
              config.password   = ENV['PURE_PASSWORD']
         
     | 
| 
      
 39 
     | 
    
         
            +
              config.basic_auth = true
         
     | 
| 
      
 40 
     | 
    
         
            +
            end
         
     | 
| 
      
 41 
     | 
    
         
            +
            ```
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
            ### Transfers
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            Get some dataset UUIDs for preservation.
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 48 
     | 
    
         
            +
            c = Puree::Collection.new resource: :dataset
         
     | 
| 
      
 49 
     | 
    
         
            +
            minimal_metadata = c.find limit: 2,
         
     | 
| 
      
 50 
     | 
    
         
            +
                                      offset: 10,
         
     | 
| 
      
 51 
     | 
    
         
            +
                                      full: false
         
     | 
| 
      
 52 
     | 
    
         
            +
            uuids = []
         
     | 
| 
      
 53 
     | 
    
         
            +
            minimal_metadata.each do |i|
         
     | 
| 
      
 54 
     | 
    
         
            +
              uuids << i['uuid']
         
     | 
| 
      
 55 
     | 
    
         
            +
            end
         
     | 
| 
      
 56 
     | 
    
         
            +
            ```
         
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
            Create an ingestor for Pure.
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 61 
     | 
    
         
            +
            ingest = Preservation::PureIngest.new
         
     | 
| 
      
 62 
     | 
    
         
            +
            ```
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
            For each uuid, if necessary, fetch the metadata, prepare
         
     | 
| 
      
 65 
     | 
    
         
            +
            a directory in the ingest path and populate it with the files and JSON description file.
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 68 
     | 
    
         
            +
            ingest.prepare_dataset uuids: uuids,
         
     | 
| 
      
 69 
     | 
    
         
            +
                                   dir_name_scheme: :doi_short,
         
     | 
| 
      
 70 
     | 
    
         
            +
                                   delay: 0
         
     | 
| 
      
 71 
     | 
    
         
            +
            ```
         
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
      
 73 
     | 
    
         
            +
            Free up disk space for completed transfers.
         
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
      
 75 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 76 
     | 
    
         
            +
            ingest.cleanup_preserved
         
     | 
| 
      
 77 
     | 
    
         
            +
            ```
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
            ### Reporting
         
     | 
| 
      
 80 
     | 
    
         
            +
            Can be used for scheduled monitoring of transfers.
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 83 
     | 
    
         
            +
            report = Preservation::IngestReport.new
         
     | 
| 
      
 84 
     | 
    
         
            +
            report.transfer_exception
         
     | 
| 
      
 85 
     | 
    
         
            +
            ```
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
            ## Documentation
         
     | 
| 
      
 88 
     | 
    
         
            +
            [API in YARD](http://www.rubydoc.info/gems/preservation)
         
     | 
    
        data/Rakefile
    ADDED
    
    | 
         @@ -0,0 +1 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "bundler/gem_tasks"
         
     | 
    
        data/lib/preservation.rb
    ADDED
    
    | 
         @@ -0,0 +1,24 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'date'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'fileutils'
         
     | 
| 
      
 3 
     | 
    
         
            +
            require 'free_disk_space'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'logger'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'puree'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'sqlite3'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require 'preservation/configuration'
         
     | 
| 
      
 8 
     | 
    
         
            +
            require 'preservation/ingest_report'
         
     | 
| 
      
 9 
     | 
    
         
            +
            require 'preservation/ingest'
         
     | 
| 
      
 10 
     | 
    
         
            +
            require 'preservation/pure_ingest'
         
     | 
| 
      
 11 
     | 
    
         
            +
            require 'preservation/string_util'
         
     | 
| 
      
 12 
     | 
    
         
            +
            require 'preservation/version'
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            # Top level namespace
         
     | 
| 
      
 15 
     | 
    
         
            +
            #
         
     | 
| 
      
 16 
     | 
    
         
            +
            module Preservation
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
              class << self
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                include Preservation::Configuration
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,163 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Preservation
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              # Base class for metadata and file management
         
     | 
| 
      
 4 
     | 
    
         
            +
              #
         
     | 
| 
      
 5 
     | 
    
         
            +
              class Ingest
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                attr_reader :logger
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
                def initialize
         
     | 
| 
      
 10 
     | 
    
         
            +
                  check_ingest_path
         
     | 
| 
      
 11 
     | 
    
         
            +
                  setup_logger
         
     | 
| 
      
 12 
     | 
    
         
            +
                  setup_report
         
     | 
| 
      
 13 
     | 
    
         
            +
                end
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
                # Free up disk space for completed transfers
         
     | 
| 
      
 16 
     | 
    
         
            +
                #
         
     | 
| 
      
 17 
     | 
    
         
            +
                def cleanup_preserved
         
     | 
| 
      
 18 
     | 
    
         
            +
                  preserved = get_preserved
         
     | 
| 
      
 19 
     | 
    
         
            +
                  if !preserved.nil? && !preserved.empty?
         
     | 
| 
      
 20 
     | 
    
         
            +
                    preserved.each do |i|
         
     | 
| 
      
 21 
     | 
    
         
            +
                      # skip anything that has a different owner to script
         
     | 
| 
      
 22 
     | 
    
         
            +
                      if File.stat(i).grpowned?
         
     | 
| 
      
 23 
     | 
    
         
            +
                        FileUtils.remove_dir i
         
     | 
| 
      
 24 
     | 
    
         
            +
                        @logger.info 'Deleted ' + i
         
     | 
| 
      
 25 
     | 
    
         
            +
                      end
         
     | 
| 
      
 26 
     | 
    
         
            +
                    end
         
     | 
| 
      
 27 
     | 
    
         
            +
                  end
         
     | 
| 
      
 28 
     | 
    
         
            +
                end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                private
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                def build_wget(username, password, file_url)
         
     | 
| 
      
 34 
     | 
    
         
            +
                  # construct wget command with parameters
         
     | 
| 
      
 35 
     | 
    
         
            +
                  wget_str = ''
         
     | 
| 
      
 36 
     | 
    
         
            +
                  wget_str << 'wget'
         
     | 
| 
      
 37 
     | 
    
         
            +
                  wget_str << ' '
         
     | 
| 
      
 38 
     | 
    
         
            +
                  wget_str << '--user'
         
     | 
| 
      
 39 
     | 
    
         
            +
                  wget_str << ' '
         
     | 
| 
      
 40 
     | 
    
         
            +
                  wget_str << username
         
     | 
| 
      
 41 
     | 
    
         
            +
                  wget_str << ' '
         
     | 
| 
      
 42 
     | 
    
         
            +
                  wget_str << '--password'
         
     | 
| 
      
 43 
     | 
    
         
            +
                  wget_str << ' '
         
     | 
| 
      
 44 
     | 
    
         
            +
                  wget_str << '"' + password + '"'
         
     | 
| 
      
 45 
     | 
    
         
            +
                  wget_str << ' '
         
     | 
| 
      
 46 
     | 
    
         
            +
                  wget_str << file_url
         
     | 
| 
      
 47 
     | 
    
         
            +
                  wget_str << ' '
         
     | 
| 
      
 48 
     | 
    
         
            +
                  wget_str << '--no-check-certificate'
         
     | 
| 
      
 49 
     | 
    
         
            +
                  wget_str
         
     | 
| 
      
 50 
     | 
    
         
            +
                end
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
                def check_ingest_path
         
     | 
| 
      
 53 
     | 
    
         
            +
                  if Preservation.ingest_path.nil?
         
     | 
| 
      
 54 
     | 
    
         
            +
                    puts 'Missing ingest path'
         
     | 
| 
      
 55 
     | 
    
         
            +
                    exit
         
     | 
| 
      
 56 
     | 
    
         
            +
                  end
         
     | 
| 
      
 57 
     | 
    
         
            +
                end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                def setup_logger
         
     | 
| 
      
 60 
     | 
    
         
            +
                  if @logger.nil?
         
     | 
| 
      
 61 
     | 
    
         
            +
                    if Preservation.log_path.nil?
         
     | 
| 
      
 62 
     | 
    
         
            +
                      @logger = Logger.new STDOUT
         
     | 
| 
      
 63 
     | 
    
         
            +
                    else
         
     | 
| 
      
 64 
     | 
    
         
            +
                      # Keep data for today and the past 20 days
         
     | 
| 
      
 65 
     | 
    
         
            +
                      @logger = Logger.new File.new(Preservation.log_path, 'a'), 20, 'daily'
         
     | 
| 
      
 66 
     | 
    
         
            +
                    end
         
     | 
| 
      
 67 
     | 
    
         
            +
                  end
         
     | 
| 
      
 68 
     | 
    
         
            +
                  @logger.level = Logger::INFO
         
     | 
| 
      
 69 
     | 
    
         
            +
                end
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
                def setup_report
         
     | 
| 
      
 72 
     | 
    
         
            +
                  if Preservation.db_path.nil?
         
     | 
| 
      
 73 
     | 
    
         
            +
                    puts 'Missing db path'
         
     | 
| 
      
 74 
     | 
    
         
            +
                    exit
         
     | 
| 
      
 75 
     | 
    
         
            +
                  else
         
     | 
| 
      
 76 
     | 
    
         
            +
                    @report = IngestReport.new
         
     | 
| 
      
 77 
     | 
    
         
            +
                  end
         
     | 
| 
      
 78 
     | 
    
         
            +
                end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                def enough_storage_for_download?(required_bytes)
         
     | 
| 
      
 81 
     | 
    
         
            +
                  # scale up the required space using a multiplier
         
     | 
| 
      
 82 
     | 
    
         
            +
                  multiplier = 2
         
     | 
| 
      
 83 
     | 
    
         
            +
                  available = FreeDiskSpace.bytes('/')
         
     | 
| 
      
 84 
     | 
    
         
            +
                  required_bytes * multiplier < available ? true : false
         
     | 
| 
      
 85 
     | 
    
         
            +
                end
         
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
      
 87 
     | 
    
         
            +
                def build_directory_name(metadata_record, directory_name_scheme)
         
     | 
| 
      
 88 
     | 
    
         
            +
                  doi = metadata_record['doi']
         
     | 
| 
      
 89 
     | 
    
         
            +
                  uuid = metadata_record['uuid']
         
     | 
| 
      
 90 
     | 
    
         
            +
                  title = metadata_record['title'].strip.gsub(' ', '-').gsub('/', '-')
         
     | 
| 
      
 91 
     | 
    
         
            +
                  time = Time.new
         
     | 
| 
      
 92 
     | 
    
         
            +
                  date = time.strftime("%Y-%m-%d")
         
     | 
| 
      
 93 
     | 
    
         
            +
                  time = time.strftime("%H:%M:%S")
         
     | 
| 
      
 94 
     | 
    
         
            +
                  join_str = '-----'
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                  case directory_name_scheme
         
     | 
| 
      
 97 
     | 
    
         
            +
                    when :uuid_title
         
     | 
| 
      
 98 
     | 
    
         
            +
                      [uuid, title].join(join_str)
         
     | 
| 
      
 99 
     | 
    
         
            +
                    when :title_uuid
         
     | 
| 
      
 100 
     | 
    
         
            +
                      [title, uuid].join(join_str)
         
     | 
| 
      
 101 
     | 
    
         
            +
                    when :date_uuid_title
         
     | 
| 
      
 102 
     | 
    
         
            +
                      [date, uuid, title].join(join_str)
         
     | 
| 
      
 103 
     | 
    
         
            +
                    when :date_title_uuid
         
     | 
| 
      
 104 
     | 
    
         
            +
                      [date, title, uuid].join(join_str)
         
     | 
| 
      
 105 
     | 
    
         
            +
                    when :date_time_uuid
         
     | 
| 
      
 106 
     | 
    
         
            +
                      [date, time, uuid].join(join_str)
         
     | 
| 
      
 107 
     | 
    
         
            +
                    when :date_time_title
         
     | 
| 
      
 108 
     | 
    
         
            +
                      [date, time, title].join(join_str)
         
     | 
| 
      
 109 
     | 
    
         
            +
                    when :date_time_uuid_title
         
     | 
| 
      
 110 
     | 
    
         
            +
                      [date, time, uuid, title].join(join_str)
         
     | 
| 
      
 111 
     | 
    
         
            +
                    when :date_time_title_uuid
         
     | 
| 
      
 112 
     | 
    
         
            +
                      [date, time, title, uuid].join(join_str)
         
     | 
| 
      
 113 
     | 
    
         
            +
                    when :uuid
         
     | 
| 
      
 114 
     | 
    
         
            +
                      uuid
         
     | 
| 
      
 115 
     | 
    
         
            +
                    when :doi
         
     | 
| 
      
 116 
     | 
    
         
            +
                      if doi.empty?
         
     | 
| 
      
 117 
     | 
    
         
            +
                        return ''
         
     | 
| 
      
 118 
     | 
    
         
            +
                      end
         
     | 
| 
      
 119 
     | 
    
         
            +
                      doi.gsub('/', '-')
         
     | 
| 
      
 120 
     | 
    
         
            +
                    when :doi_short
         
     | 
| 
      
 121 
     | 
    
         
            +
                      if doi.empty?
         
     | 
| 
      
 122 
     | 
    
         
            +
                        return ''
         
     | 
| 
      
 123 
     | 
    
         
            +
                      end
         
     | 
| 
      
 124 
     | 
    
         
            +
                      doi_short_to_remove = 'http://dx.doi.org/'
         
     | 
| 
      
 125 
     | 
    
         
            +
                      short = doi.gsub(doi_short_to_remove, '')
         
     | 
| 
      
 126 
     | 
    
         
            +
                      short.gsub!('/', '-')
         
     | 
| 
      
 127 
     | 
    
         
            +
                    else
         
     | 
| 
      
 128 
     | 
    
         
            +
                      uuid
         
     | 
| 
      
 129 
     | 
    
         
            +
                  end
         
     | 
| 
      
 130 
     | 
    
         
            +
                end
         
     | 
| 
      
 131 
     | 
    
         
            +
             
     | 
| 
      
 132 
     | 
    
         
            +
                # time_to_preserve?
         
     | 
| 
      
 133 
     | 
    
         
            +
                #
         
     | 
| 
      
 134 
     | 
    
         
            +
                # @param start_utc [String]
         
     | 
| 
      
 135 
     | 
    
         
            +
                # @param delay [Integer] days to wait (after modification date) before preserving
         
     | 
| 
      
 136 
     | 
    
         
            +
                # @return [Boolean]
         
     | 
| 
      
 137 
     | 
    
         
            +
                def time_to_preserve?(start_utc, delay)
         
     | 
| 
      
 138 
     | 
    
         
            +
                  now = DateTime.now
         
     | 
| 
      
 139 
     | 
    
         
            +
                  modified_datetime = DateTime.parse(start_utc)
         
     | 
| 
      
 140 
     | 
    
         
            +
                  days_since_modified = (now - modified_datetime).to_i # result in days
         
     | 
| 
      
 141 
     | 
    
         
            +
                  days_since_modified >= delay ? true : false
         
     | 
| 
      
 142 
     | 
    
         
            +
                end
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
                # # Collect all paths from DB where preservation has been done
         
     | 
| 
      
 145 
     | 
    
         
            +
                # # @return [Array<String>]
         
     | 
| 
      
 146 
     | 
    
         
            +
                def get_preserved
         
     | 
| 
      
 147 
     | 
    
         
            +
                  ingest_complete = @report.transfer_status(status_to_find: 'COMPLETE',
         
     | 
| 
      
 148 
     | 
    
         
            +
                                                                 status_presence: true)
         
     | 
| 
      
 149 
     | 
    
         
            +
                  preserved = []
         
     | 
| 
      
 150 
     | 
    
         
            +
                  ingest_complete.each do |i|
         
     | 
| 
      
 151 
     | 
    
         
            +
                    dir_path = Preservation.ingest_path + '/' + i['path']
         
     | 
| 
      
 152 
     | 
    
         
            +
                    if File.exists?(dir_path)
         
     | 
| 
      
 153 
     | 
    
         
            +
                      preserved << dir_path
         
     | 
| 
      
 154 
     | 
    
         
            +
                    end
         
     | 
| 
      
 155 
     | 
    
         
            +
                  end
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
                  preserved
         
     | 
| 
      
 158 
     | 
    
         
            +
                end
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
              end
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
            end
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,172 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Preservation
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              # Ingest reporting
         
     | 
| 
      
 4 
     | 
    
         
            +
              #
         
     | 
| 
      
 5 
     | 
    
         
            +
              class IngestReport
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize
         
     | 
| 
      
 8 
     | 
    
         
            +
                  create_db_connection
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                # Transfers based on presence (or not) of a particular status
         
     | 
| 
      
 12 
     | 
    
         
            +
                #
         
     | 
| 
      
 13 
     | 
    
         
            +
                # @param status_to_find [String]
         
     | 
| 
      
 14 
     | 
    
         
            +
                # @param status_presence [Boolean]
         
     | 
| 
      
 15 
     | 
    
         
            +
                def transfer_status(status_to_find: nil, status_presence: true)
         
     | 
| 
      
 16 
     | 
    
         
            +
                  if status_presence === true
         
     | 
| 
      
 17 
     | 
    
         
            +
                    status_presence = '='
         
     | 
| 
      
 18 
     | 
    
         
            +
                  else
         
     | 
| 
      
 19 
     | 
    
         
            +
                    status_presence = '<>'
         
     | 
| 
      
 20 
     | 
    
         
            +
                  end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
                  query = "SELECT id, uuid, hex(path) as hex_path, unit_type, status, microservice, current FROM unit WHERE status #{status_presence} ?"
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                  # Archivematica stores path as BLOB, so need to convert path to Hex, to search for it
         
     | 
| 
      
 25 
     | 
    
         
            +
                  # and use hex function in DB query
         
     | 
| 
      
 26 
     | 
    
         
            +
                  records = []
         
     | 
| 
      
 27 
     | 
    
         
            +
                  @db.results_as_hash = true
         
     | 
| 
      
 28 
     | 
    
         
            +
                  @db.execute( query, [ status_to_find ] ) do |row|
         
     | 
| 
      
 29 
     | 
    
         
            +
                    id = row['id']
         
     | 
| 
      
 30 
     | 
    
         
            +
                    uuid = row['uuid']
         
     | 
| 
      
 31 
     | 
    
         
            +
                    bin_path = StringUtil.hex_to_bin row['hex_path']
         
     | 
| 
      
 32 
     | 
    
         
            +
                    unit_type = row['unit_type']
         
     | 
| 
      
 33 
     | 
    
         
            +
                    status = row['status']
         
     | 
| 
      
 34 
     | 
    
         
            +
                    microservice = row['microservice']
         
     | 
| 
      
 35 
     | 
    
         
            +
                    current = row['current']
         
     | 
| 
      
 36 
     | 
    
         
            +
                    o = {}
         
     | 
| 
      
 37 
     | 
    
         
            +
                    o['path'] = bin_path if !bin_path.empty?
         
     | 
| 
      
 38 
     | 
    
         
            +
                    o['unit_type'] = unit_type if !unit_type.empty?
         
     | 
| 
      
 39 
     | 
    
         
            +
                    o['status'] = status if !status.empty?
         
     | 
| 
      
 40 
     | 
    
         
            +
                    o['microservice'] = microservice if !microservice.empty?
         
     | 
| 
      
 41 
     | 
    
         
            +
                    o['current'] = current if current
         
     | 
| 
      
 42 
     | 
    
         
            +
                    o['id'] = id if id
         
     | 
| 
      
 43 
     | 
    
         
            +
                    o['uuid'] = uuid if !uuid.empty?
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
                    records << o
         
     | 
| 
      
 46 
     | 
    
         
            +
                  end
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                  records
         
     | 
| 
      
 49 
     | 
    
         
            +
                end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                # Current transfer
         
     | 
| 
      
 52 
     | 
    
         
            +
                #
         
     | 
| 
      
 53 
     | 
    
         
            +
                # @return [Hash]
         
     | 
| 
      
 54 
     | 
    
         
            +
                def transfer_current
         
     | 
| 
      
 55 
     | 
    
         
            +
                  query = "SELECT id, uuid, hex(path) as hex_path, unit_type, status, microservice, current FROM unit WHERE current = 1"
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
                  # Archivematica stores path as BLOB, so need to convert path to Hex, to search for it
         
     | 
| 
      
 58 
     | 
    
         
            +
                  # and use hex function in DB query
         
     | 
| 
      
 59 
     | 
    
         
            +
                  o = {}
         
     | 
| 
      
 60 
     | 
    
         
            +
                  @db.results_as_hash = true
         
     | 
| 
      
 61 
     | 
    
         
            +
                  @db.execute( query ) do |row|
         
     | 
| 
      
 62 
     | 
    
         
            +
                    id = row['id']
         
     | 
| 
      
 63 
     | 
    
         
            +
                    uuid = row['uuid']
         
     | 
| 
      
 64 
     | 
    
         
            +
                    bin_path = hex_to_bin row['hex_path']
         
     | 
| 
      
 65 
     | 
    
         
            +
                    unit_type = row['unit_type']
         
     | 
| 
      
 66 
     | 
    
         
            +
                    status = row['status']
         
     | 
| 
      
 67 
     | 
    
         
            +
                    microservice = row['microservice']
         
     | 
| 
      
 68 
     | 
    
         
            +
                    current = row['current']
         
     | 
| 
      
 69 
     | 
    
         
            +
                    o['path'] = bin_path if !bin_path.empty?
         
     | 
| 
      
 70 
     | 
    
         
            +
                    o['unit_type'] = unit_type if !unit_type.empty?
         
     | 
| 
      
 71 
     | 
    
         
            +
                    o['status'] = status if !status.empty?
         
     | 
| 
      
 72 
     | 
    
         
            +
                    o['microservice'] = microservice if !microservice.empty?
         
     | 
| 
      
 73 
     | 
    
         
            +
                    o['current'] = current if current
         
     | 
| 
      
 74 
     | 
    
         
            +
                    o['id'] = id if id
         
     | 
| 
      
 75 
     | 
    
         
            +
                    o['uuid'] = uuid if !uuid.empty?
         
     | 
| 
      
 76 
     | 
    
         
            +
                  end
         
     | 
| 
      
 77 
     | 
    
         
            +
                  o
         
     | 
| 
      
 78 
     | 
    
         
            +
                end
         
     | 
| 
      
 79 
     | 
    
         
            +
             
     | 
| 
      
 80 
     | 
    
         
            +
                # Count of complete transfers
         
     | 
| 
      
 81 
     | 
    
         
            +
                #
         
     | 
| 
      
 82 
     | 
    
         
            +
                # @return [Integer]
         
     | 
| 
      
 83 
     | 
    
         
            +
                def transfer_complete_count
         
     | 
| 
      
 84 
     | 
    
         
            +
                  query = 'SELECT count(*) FROM unit WHERE status = ?'
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
                  status_to_find = 'COMPLETE'
         
     | 
| 
      
 87 
     | 
    
         
            +
                  @db.results_as_hash = true
         
     | 
| 
      
 88 
     | 
    
         
            +
                  @db.get_first_value( query, [status_to_find] )
         
     | 
| 
      
 89 
     | 
    
         
            +
                end
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
                # Compilation of statistics and data, with focus on exceptions
         
     | 
| 
      
 92 
     | 
    
         
            +
                #
         
     | 
| 
      
 93 
     | 
    
         
            +
                # @return [Hash]
         
     | 
| 
      
 94 
     | 
    
         
            +
                def transfer_exception
         
     | 
| 
      
 95 
     | 
    
         
            +
                  incomplete = transfer_status(status_to_find: 'COMPLETE', status_presence: false)
         
     | 
| 
      
 96 
     | 
    
         
            +
                  failed = transfer_status(status_to_find: 'FAILED', status_presence: true)
         
     | 
| 
      
 97 
     | 
    
         
            +
                  current = transfer_current
         
     | 
| 
      
 98 
     | 
    
         
            +
                  complete_count = transfer_complete_count
         
     | 
| 
      
 99 
     | 
    
         
            +
                  report = {}
         
     | 
| 
      
 100 
     | 
    
         
            +
                  report['current'] = current if !current.empty?
         
     | 
| 
      
 101 
     | 
    
         
            +
                  report['failed'] = {}
         
     | 
| 
      
 102 
     | 
    
         
            +
                  report['failed']['count'] = failed.count
         
     | 
| 
      
 103 
     | 
    
         
            +
                  report['failed']['data'] = failed if !failed.empty?
         
     | 
| 
      
 104 
     | 
    
         
            +
                  report['incomplete'] = {}
         
     | 
| 
      
 105 
     | 
    
         
            +
                  report['incomplete']['count'] = incomplete.count
         
     | 
| 
      
 106 
     | 
    
         
            +
                  report['incomplete']['data'] = incomplete if !incomplete.empty?
         
     | 
| 
      
 107 
     | 
    
         
            +
                  report['complete'] = {}
         
     | 
| 
      
 108 
     | 
    
         
            +
                  report['complete']['count'] = complete_count if complete_count
         
     | 
| 
      
 109 
     | 
    
         
            +
                  report
         
     | 
| 
      
 110 
     | 
    
         
            +
                end
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                # Is it in database?
         
     | 
| 
      
 113 
     | 
    
         
            +
                # @param path_to_find [String] directory name within ingest path
         
     | 
| 
      
 114 
     | 
    
         
            +
                # @return [Boolean]
         
     | 
| 
      
 115 
     | 
    
         
            +
                def in_db?(path_to_find)
         
     | 
| 
      
 116 
     | 
    
         
            +
                  in_db = false
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                  # Get path out of DB as a hex string
         
     | 
| 
      
 119 
     | 
    
         
            +
                  query = 'SELECT hex(path) FROM unit'
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                  # Archivematica stores path as BLOB, so need to convert path to Hex, to search for it
         
     | 
| 
      
 122 
     | 
    
         
            +
                  # and use hex function in DB query
         
     | 
| 
      
 123 
     | 
    
         
            +
                  @db.execute( query ) do |row|
         
     | 
| 
      
 124 
     | 
    
         
            +
                    bin_path = StringUtil.hex_to_bin row[0]
         
     | 
| 
      
 125 
     | 
    
         
            +
                    if bin_path === path_to_find
         
     | 
| 
      
 126 
     | 
    
         
            +
                      in_db = true
         
     | 
| 
      
 127 
     | 
    
         
            +
                    end
         
     | 
| 
      
 128 
     | 
    
         
            +
                  end
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                  in_db
         
     | 
| 
      
 131 
     | 
    
         
            +
                end
         
     | 
| 
      
 132 
     | 
    
         
            +
             
     | 
| 
      
 133 
     | 
    
         
            +
                # Has preservation been done?
         
     | 
| 
      
 134 
     | 
    
         
            +
                # @param path_to_find [String] directory name within ingest path
         
     | 
| 
      
 135 
     | 
    
         
            +
                # @return [Boolean]
         
     | 
| 
      
 136 
     | 
    
         
            +
                def preserved?(path_to_find)
         
     | 
| 
      
 137 
     | 
    
         
            +
                  preserved = false
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
                  # 'ingest' value in unit_type and 'COMPLETE' value in status DB fields
         
     | 
| 
      
 140 
     | 
    
         
            +
                  # indicates completed
         
     | 
| 
      
 141 
     | 
    
         
            +
                  unit_type_to_find = 'ingest'
         
     | 
| 
      
 142 
     | 
    
         
            +
                  status_to_find = 'COMPLETE'
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
                  # Get path out of DB as a hex string for completed ingests
         
     | 
| 
      
 145 
     | 
    
         
            +
                  query = 'SELECT hex(path) FROM unit WHERE unit_type = ? AND status = ?'
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
      
 147 
     | 
    
         
            +
                  # Archivematica stores path as BLOB, so need to convert path to Hex, to search for it
         
     | 
| 
      
 148 
     | 
    
         
            +
                  # and use hex function in DB query
         
     | 
| 
      
 149 
     | 
    
         
            +
                  @db.execute( query, [ unit_type_to_find, status_to_find ] ) do |row|
         
     | 
| 
      
 150 
     | 
    
         
            +
                    bin_path = StringUtil.hex_to_bin row[0]
         
     | 
| 
      
 151 
     | 
    
         
            +
                    if bin_path === path_to_find
         
     | 
| 
      
 152 
     | 
    
         
            +
                      preserved = true
         
     | 
| 
      
 153 
     | 
    
         
            +
                    end
         
     | 
| 
      
 154 
     | 
    
         
            +
                  end
         
     | 
| 
      
 155 
     | 
    
         
            +
             
     | 
| 
      
 156 
     | 
    
         
            +
                  preserved
         
     | 
| 
      
 157 
     | 
    
         
            +
                end
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
                private
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
                def create_db_connection
         
     | 
| 
      
 163 
     | 
    
         
            +
                  if Preservation.db_path.nil?
         
     | 
| 
      
 164 
     | 
    
         
            +
                    puts 'Missing db_path'
         
     | 
| 
      
 165 
     | 
    
         
            +
                    exit
         
     | 
| 
      
 166 
     | 
    
         
            +
                  end
         
     | 
| 
      
 167 
     | 
    
         
            +
                  @db = SQLite3::Database.new Preservation.db_path
         
     | 
| 
      
 168 
     | 
    
         
            +
                end
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
              end
         
     | 
| 
      
 171 
     | 
    
         
            +
             
     | 
| 
      
 172 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,188 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Preservation
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              # Ingest for Pure
         
     | 
| 
      
 4 
     | 
    
         
            +
              #
         
     | 
| 
      
 5 
     | 
    
         
            +
              class PureIngest < Ingest
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize
         
     | 
| 
      
 8 
     | 
    
         
            +
                  super
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                # For each uuid, if necessary, fetch the metadata,
         
     | 
| 
      
 12 
     | 
    
         
            +
                # prepare a directory in the ingest path and populate it with the files and
         
     | 
| 
      
 13 
     | 
    
         
            +
                # JSON description file.
         
     | 
| 
      
 14 
     | 
    
         
            +
                #
         
     | 
| 
      
 15 
     | 
    
         
            +
                # @param uuids [Array<String>] uuids to preserve
         
     | 
| 
      
 16 
     | 
    
         
            +
                # @param dir_name_scheme [Symbol] method to make directory name
         
     | 
| 
      
 17 
     | 
    
         
            +
                # @param delay [Integer] days to wait (after modification date) before preserving
         
     | 
| 
      
 18 
     | 
    
         
            +
                def prepare_dataset(uuids: [],
         
     | 
| 
      
 19 
     | 
    
         
            +
                                    dir_name_scheme: :uuid,
         
     | 
| 
      
 20 
     | 
    
         
            +
                                    delay: 0)
         
     | 
| 
      
 21 
     | 
    
         
            +
                  dir_base_path = Preservation.ingest_path
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                  uuids.each do |uuid|
         
     | 
| 
      
 24 
     | 
    
         
            +
                    dataset = Puree::Dataset.new
         
     | 
| 
      
 25 
     | 
    
         
            +
                    dataset.find uuid: uuid
         
     | 
| 
      
 26 
     | 
    
         
            +
                    d = dataset.metadata
         
     | 
| 
      
 27 
     | 
    
         
            +
                    if d.empty?
         
     | 
| 
      
 28 
     | 
    
         
            +
                      @logger.info 'No metadata for ' + uuid
         
     | 
| 
      
 29 
     | 
    
         
            +
                      next
         
     | 
| 
      
 30 
     | 
    
         
            +
                    end
         
     | 
| 
      
 31 
     | 
    
         
            +
                    # configurable to become more human-readable
         
     | 
| 
      
 32 
     | 
    
         
            +
                    dir_name = build_directory_name(d, dir_name_scheme)
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                    # continue only if dir_name is not empty (e.g. because there was no DOI)
         
     | 
| 
      
 35 
     | 
    
         
            +
                    # continue only if there is no DB entry
         
     | 
| 
      
 36 
     | 
    
         
            +
                    # continue only if the dataset has a DOI
         
     | 
| 
      
 37 
     | 
    
         
            +
                    # continue only if there are files for this resource
         
     | 
| 
      
 38 
     | 
    
         
            +
                    # continue only if it is time to preserve
         
     | 
| 
      
 39 
     | 
    
         
            +
                    if !dir_name.nil? &&
         
     | 
| 
      
 40 
     | 
    
         
            +
                       !dir_name.empty? &&
         
     | 
| 
      
 41 
     | 
    
         
            +
                       !@report.in_db?(dir_name) &&
         
     | 
| 
      
 42 
     | 
    
         
            +
                       !d['doi'].empty? &&
         
     | 
| 
      
 43 
     | 
    
         
            +
                       !d['file'].empty? &&
         
     | 
| 
      
 44 
     | 
    
         
            +
                       time_to_preserve?(d['modified'], delay)
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
                      dir_file_path = dir_base_path + '/' + dir_name
         
     | 
| 
      
 47 
     | 
    
         
            +
                      dir_metadata_path = dir_file_path + '/metadata/'
         
     | 
| 
      
 48 
     | 
    
         
            +
                      metadata_filename = dir_metadata_path + 'metadata.json'
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                      # calculate total size of data files
         
     | 
| 
      
 51 
     | 
    
         
            +
                      download_storage_required = 0
         
     | 
| 
      
 52 
     | 
    
         
            +
                      d['file'].each { |i| download_storage_required += i['size'].to_i }
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                      # do we have enough space in filesystem to fetch data files?
         
     | 
| 
      
 55 
     | 
    
         
            +
                      if enough_storage_for_download? download_storage_required
         
     | 
| 
      
 56 
     | 
    
         
            +
                        # @logger.info 'Sufficient disk space for ' + dir_file_path
         
     | 
| 
      
 57 
     | 
    
         
            +
                      else
         
     | 
| 
      
 58 
     | 
    
         
            +
                        @logger.error 'Insufficient disk space to store files fetched from Pure. Skipping ' + dir_file_path
         
     | 
| 
      
 59 
     | 
    
         
            +
                        next
         
     | 
| 
      
 60 
     | 
    
         
            +
                      end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
                      # has metadata file been created? if so, files and metadata are in place
         
     | 
| 
      
 63 
     | 
    
         
            +
                      # continue only if files not present in ingest location
         
     | 
| 
      
 64 
     | 
    
         
            +
                      if !File.size? metadata_filename
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
                        @logger.info 'Preparing ' + dir_name + ', Pure UUID ' + d['uuid']
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                        data = []
         
     | 
| 
      
 69 
     | 
    
         
            +
                        d['file'].each do |f|
         
     | 
| 
      
 70 
     | 
    
         
            +
                          o = package_dataset_metadata d, f
         
     | 
| 
      
 71 
     | 
    
         
            +
                          data << o
         
     | 
| 
      
 72 
     | 
    
         
            +
                          wget_str = build_wget Puree.username,
         
     | 
| 
      
 73 
     | 
    
         
            +
                                                Puree.password,
         
     | 
| 
      
 74 
     | 
    
         
            +
                                                f['url']
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                          Dir.mkdir(dir_file_path) if !Dir.exists?(dir_file_path)
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                          # fetch the file
         
     | 
| 
      
 79 
     | 
    
         
            +
                          Dir.chdir(dir_file_path) do
         
     | 
| 
      
 80 
     | 
    
         
            +
                            # puts 'Changing dir to ' + Dir.pwd
         
     | 
| 
      
 81 
     | 
    
         
            +
                            # puts 'Size of ' + f['name'] + ' is ' + File.size(f['name']).to_s
         
     | 
| 
      
 82 
     | 
    
         
            +
                            if File.size?(f['name'])
         
     | 
| 
      
 83 
     | 
    
         
            +
                              # puts 'Should be deleting ' + f['name']
         
     | 
| 
      
 84 
     | 
    
         
            +
                              File.delete(f['name'])
         
     | 
| 
      
 85 
     | 
    
         
            +
                            end
         
     | 
| 
      
 86 
     | 
    
         
            +
                            # puts f['name'] + ' missing or empty'
         
     | 
| 
      
 87 
     | 
    
         
            +
                            # puts wget_str
         
     | 
| 
      
 88 
     | 
    
         
            +
                            `#{wget_str}`
         
     | 
| 
      
 89 
     | 
    
         
            +
                          end
         
     | 
| 
      
 90 
     | 
    
         
            +
                        end
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
                        Dir.mkdir(dir_metadata_path) if !Dir.exists?(dir_metadata_path)
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
                        pretty = JSON.pretty_generate( data, :indent => '  ')
         
     | 
| 
      
 95 
     | 
    
         
            +
                        # puts pretty
         
     | 
| 
      
 96 
     | 
    
         
            +
                        File.write(metadata_filename,pretty)
         
     | 
| 
      
 97 
     | 
    
         
            +
                        @logger.info 'Created ' + metadata_filename
         
     | 
| 
      
 98 
     | 
    
         
            +
                      end
         
     | 
| 
      
 99 
     | 
    
         
            +
                    else
         
     | 
| 
      
 100 
     | 
    
         
            +
                      @logger.info 'Skipping ' + dir_name + ', Pure UUID ' + d['uuid']
         
     | 
| 
      
 101 
     | 
    
         
            +
                    end
         
     | 
| 
      
 102 
     | 
    
         
            +
                  end
         
     | 
| 
      
 103 
     | 
    
         
            +
                end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
                private
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                def package_dataset_metadata(d, f)
         
     | 
| 
      
 108 
     | 
    
         
            +
                    o = {}
         
     | 
| 
      
 109 
     | 
    
         
            +
                    o['filename'] = 'objects/' + f['name']
         
     | 
| 
      
 110 
     | 
    
         
            +
                    o['dc.title'] = d['title']
         
     | 
| 
      
 111 
     | 
    
         
            +
                    if !d['description'].empty?
         
     | 
| 
      
 112 
     | 
    
         
            +
                      o['dc.description'] = d['description']
         
     | 
| 
      
 113 
     | 
    
         
            +
                    end
         
     | 
| 
      
 114 
     | 
    
         
            +
                    o['dcterms.created'] = d['created']
         
     | 
| 
      
 115 
     | 
    
         
            +
                    if !d['available']['year'].empty?
         
     | 
| 
      
 116 
     | 
    
         
            +
                      o['dcterms.available'] = Puree::Date.iso(d['available'])
         
     | 
| 
      
 117 
     | 
    
         
            +
                    end
         
     | 
| 
      
 118 
     | 
    
         
            +
                    o['dc.publisher'] = d['publisher']
         
     | 
| 
      
 119 
     | 
    
         
            +
                    if !d['doi'].empty?
         
     | 
| 
      
 120 
     | 
    
         
            +
                      o['dc.identifier'] = d['doi']
         
     | 
| 
      
 121 
     | 
    
         
            +
                    end
         
     | 
| 
      
 122 
     | 
    
         
            +
                    if !d['spatial'].empty?
         
     | 
| 
      
 123 
     | 
    
         
            +
                      o['dcterms.spatial'] = d['spatial']
         
     | 
| 
      
 124 
     | 
    
         
            +
                    end
         
     | 
| 
      
 125 
     | 
    
         
            +
                    if !d['temporal']['start']['year'].empty?
         
     | 
| 
      
 126 
     | 
    
         
            +
                      temporal_range = ''
         
     | 
| 
      
 127 
     | 
    
         
            +
                      temporal_range << Puree::Date.iso(d['temporal']['start'])
         
     | 
| 
      
 128 
     | 
    
         
            +
                      if !d['temporal']['end']['year'].empty?
         
     | 
| 
      
 129 
     | 
    
         
            +
                        temporal_range << '/'
         
     | 
| 
      
 130 
     | 
    
         
            +
                        temporal_range << Puree::Date.iso(d['temporal']['end'])
         
     | 
| 
      
 131 
     | 
    
         
            +
                      end
         
     | 
| 
      
 132 
     | 
    
         
            +
                      o['dcterms.temporal'] = temporal_range
         
     | 
| 
      
 133 
     | 
    
         
            +
                    end
         
     | 
| 
      
 134 
     | 
    
         
            +
                    creators = []
         
     | 
| 
      
 135 
     | 
    
         
            +
                    contributors = []
         
     | 
| 
      
 136 
     | 
    
         
            +
                    person_types = %w(internal external other)
         
     | 
| 
      
 137 
     | 
    
         
            +
                    person_types.each do |person_type|
         
     | 
| 
      
 138 
     | 
    
         
            +
                      d['person'][person_type].each do |i|
         
     | 
| 
      
 139 
     | 
    
         
            +
                        if i['role'] == 'Creator'
         
     | 
| 
      
 140 
     | 
    
         
            +
                          creator = i['name']['last'] + ', ' + i['name']['first']
         
     | 
| 
      
 141 
     | 
    
         
            +
                          creators << creator
         
     | 
| 
      
 142 
     | 
    
         
            +
                        end
         
     | 
| 
      
 143 
     | 
    
         
            +
                        if i['role'] == 'Contributor'
         
     | 
| 
      
 144 
     | 
    
         
            +
                          contributor = i['name']['last'] + ', ' + i['name']['first']
         
     | 
| 
      
 145 
     | 
    
         
            +
                          contributors << contributor
         
     | 
| 
      
 146 
     | 
    
         
            +
                        end
         
     | 
| 
      
 147 
     | 
    
         
            +
                      end
         
     | 
| 
      
 148 
     | 
    
         
            +
                    end
         
     | 
| 
      
 149 
     | 
    
         
            +
                    o['dc.creator'] = creators
         
     | 
| 
      
 150 
     | 
    
         
            +
                    if !contributors.empty?
         
     | 
| 
      
 151 
     | 
    
         
            +
                      o['dc.contributor'] = contributors
         
     | 
| 
      
 152 
     | 
    
         
            +
                    end
         
     | 
| 
      
 153 
     | 
    
         
            +
                    keywords = []
         
     | 
| 
      
 154 
     | 
    
         
            +
                    d['keyword'].each { |i|
         
     | 
| 
      
 155 
     | 
    
         
            +
                      keywords << i
         
     | 
| 
      
 156 
     | 
    
         
            +
                    }
         
     | 
| 
      
 157 
     | 
    
         
            +
                    if !keywords.empty?
         
     | 
| 
      
 158 
     | 
    
         
            +
                      o['dc.subject'] = keywords
         
     | 
| 
      
 159 
     | 
    
         
            +
                    end
         
     | 
| 
      
 160 
     | 
    
         
            +
                    if !f['license']['name'].empty?
         
     | 
| 
      
 161 
     | 
    
         
            +
                      o['dcterms.license'] = f['license']['name']
         
     | 
| 
      
 162 
     | 
    
         
            +
                    end
         
     | 
| 
      
 163 
     | 
    
         
            +
                    # o['dc.format'] = f['mime']
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
                    related = []
         
     | 
| 
      
 166 
     | 
    
         
            +
                    publications = d['publication']
         
     | 
| 
      
 167 
     | 
    
         
            +
                    publications.each do |i|
         
     | 
| 
      
 168 
     | 
    
         
            +
                      o_related = {}
         
     | 
| 
      
 169 
     | 
    
         
            +
                      o_related['dc.title'] = i['title']
         
     | 
| 
      
 170 
     | 
    
         
            +
                      o_related['type'] = i['type']
         
     | 
| 
      
 171 
     | 
    
         
            +
                      pub = Puree::Publication.new
         
     | 
| 
      
 172 
     | 
    
         
            +
                      pub.find uuid: i['uuid']
         
     | 
| 
      
 173 
     | 
    
         
            +
                      doi = pub.doi
         
     | 
| 
      
 174 
     | 
    
         
            +
                      if doi
         
     | 
| 
      
 175 
     | 
    
         
            +
                        o_related['dc.identifier'] = doi
         
     | 
| 
      
 176 
     | 
    
         
            +
                      end
         
     | 
| 
      
 177 
     | 
    
         
            +
                      related << o_related
         
     | 
| 
      
 178 
     | 
    
         
            +
                    end
         
     | 
| 
      
 179 
     | 
    
         
            +
                    if !related.empty?
         
     | 
| 
      
 180 
     | 
    
         
            +
                      o['related'] = related
         
     | 
| 
      
 181 
     | 
    
         
            +
                    end
         
     | 
| 
      
 182 
     | 
    
         
            +
             
     | 
| 
      
 183 
     | 
    
         
            +
                    o
         
     | 
| 
      
 184 
     | 
    
         
            +
                end
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
              end
         
     | 
| 
      
 187 
     | 
    
         
            +
             
     | 
| 
      
 188 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,19 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            module Preservation
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
              # String utilities
         
     | 
| 
      
 4 
     | 
    
         
            +
              #
         
     | 
| 
      
 5 
     | 
    
         
            +
              module StringUtil
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Binary to hexadecimal
         
     | 
| 
      
 7 
     | 
    
         
            +
                #
         
     | 
| 
      
 8 
     | 
    
         
            +
                def self.bin_to_hex(s)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  s.each_byte.map { |b| b.to_s(16) }.join
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
                # Hexadecimal to binary
         
     | 
| 
      
 13 
     | 
    
         
            +
                def self.hex_to_bin(s)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  s.scan(/../).map { |x| x.hex.chr }.join
         
     | 
| 
      
 15 
     | 
    
         
            +
                end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,26 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # coding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            lib = File.expand_path('../lib', __FILE__)
         
     | 
| 
      
 3 
     | 
    
         
            +
            $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'preservation/version'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            Gem::Specification.new do |spec|
         
     | 
| 
      
 7 
     | 
    
         
            +
              spec.name          = "preservation"
         
     | 
| 
      
 8 
     | 
    
         
            +
              spec.version       = Preservation::VERSION
         
     | 
| 
      
 9 
     | 
    
         
            +
              spec.authors       = ["Adrian Albin-Clark"]
         
     | 
| 
      
 10 
     | 
    
         
            +
              spec.email         = ["a.albin-clark@lancaster.ac.uk"]
         
     | 
| 
      
 11 
     | 
    
         
            +
              spec.summary       = %q{Ingest management for Archivematica's Automation Tools.}
         
     | 
| 
      
 12 
     | 
    
         
            +
              spec.description   = %q{Transfer preparation, reporting and disk space management for Archivematica's Automation Tools.}
         
     | 
| 
      
 13 
     | 
    
         
            +
              spec.homepage      = "https://rubygems.org/gems/preservation"
         
     | 
| 
      
 14 
     | 
    
         
            +
              spec.license       = "MIT"
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              spec.files         = `git ls-files -z`.split("\x0")
         
     | 
| 
      
 17 
     | 
    
         
            +
              spec.executables   = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
         
     | 
| 
      
 18 
     | 
    
         
            +
              spec.test_files    = spec.files.grep(%r{^(test|spec|features)/})
         
     | 
| 
      
 19 
     | 
    
         
            +
              spec.require_paths = ["lib"]
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              spec.required_ruby_version = '~> 2.1'
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
              spec.add_runtime_dependency 'free_disk_space', '~> 1.0'
         
     | 
| 
      
 24 
     | 
    
         
            +
              spec.add_runtime_dependency 'puree', '~> 0.17'
         
     | 
| 
      
 25 
     | 
    
         
            +
              spec.add_runtime_dependency'sqlite3', '~> 1.3'
         
     | 
| 
      
 26 
     | 
    
         
            +
            end
         
     | 
    
        metadata
    ADDED
    
    | 
         @@ -0,0 +1,103 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            --- !ruby/object:Gem::Specification
         
     | 
| 
      
 2 
     | 
    
         
            +
            name: preservation
         
     | 
| 
      
 3 
     | 
    
         
            +
            version: !ruby/object:Gem::Version
         
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.1.0
         
     | 
| 
      
 5 
     | 
    
         
            +
            platform: ruby
         
     | 
| 
      
 6 
     | 
    
         
            +
            authors:
         
     | 
| 
      
 7 
     | 
    
         
            +
            - Adrian Albin-Clark
         
     | 
| 
      
 8 
     | 
    
         
            +
            autorequire: 
         
     | 
| 
      
 9 
     | 
    
         
            +
            bindir: bin
         
     | 
| 
      
 10 
     | 
    
         
            +
            cert_chain: []
         
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2016-09-13 00:00:00.000000000 Z
         
     | 
| 
      
 12 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 13 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 14 
     | 
    
         
            +
              name: free_disk_space
         
     | 
| 
      
 15 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 16 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 17 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 18 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 19 
     | 
    
         
            +
                    version: '1.0'
         
     | 
| 
      
 20 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 21 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 22 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 23 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 24 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 25 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 26 
     | 
    
         
            +
                    version: '1.0'
         
     | 
| 
      
 27 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 28 
     | 
    
         
            +
              name: puree
         
     | 
| 
      
 29 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 30 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 31 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 32 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 33 
     | 
    
         
            +
                    version: '0.17'
         
     | 
| 
      
 34 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 35 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 36 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 37 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 38 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 39 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 40 
     | 
    
         
            +
                    version: '0.17'
         
     | 
| 
      
 41 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 42 
     | 
    
         
            +
              name: sqlite3
         
     | 
| 
      
 43 
     | 
    
         
            +
              requirement: !ruby/object:Gem::Requirement
         
     | 
| 
      
 44 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 45 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 46 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 47 
     | 
    
         
            +
                    version: '1.3'
         
     | 
| 
      
 48 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 49 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 50 
     | 
    
         
            +
              version_requirements: !ruby/object:Gem::Requirement
         
     | 
| 
      
 51 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 52 
     | 
    
         
            +
                - - "~>"
         
     | 
| 
      
 53 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 54 
     | 
    
         
            +
                    version: '1.3'
         
     | 
| 
      
 55 
     | 
    
         
            +
            description: Transfer preparation, reporting and disk space management for Archivematica's
         
     | 
| 
      
 56 
     | 
    
         
            +
              Automation Tools.
         
     | 
| 
      
 57 
     | 
    
         
            +
            email:
         
     | 
| 
      
 58 
     | 
    
         
            +
            - a.albin-clark@lancaster.ac.uk
         
     | 
| 
      
 59 
     | 
    
         
            +
            executables: []
         
     | 
| 
      
 60 
     | 
    
         
            +
            extensions: []
         
     | 
| 
      
 61 
     | 
    
         
            +
            extra_rdoc_files: []
         
     | 
| 
      
 62 
     | 
    
         
            +
            files:
         
     | 
| 
      
 63 
     | 
    
         
            +
            - ".gitignore"
         
     | 
| 
      
 64 
     | 
    
         
            +
            - CHANGELOG.md
         
     | 
| 
      
 65 
     | 
    
         
            +
            - Gemfile
         
     | 
| 
      
 66 
     | 
    
         
            +
            - LICENSE.txt
         
     | 
| 
      
 67 
     | 
    
         
            +
            - PITCHME.md
         
     | 
| 
      
 68 
     | 
    
         
            +
            - README.md
         
     | 
| 
      
 69 
     | 
    
         
            +
            - Rakefile
         
     | 
| 
      
 70 
     | 
    
         
            +
            - lib/preservation.rb
         
     | 
| 
      
 71 
     | 
    
         
            +
            - lib/preservation/configuration.rb
         
     | 
| 
      
 72 
     | 
    
         
            +
            - lib/preservation/ingest.rb
         
     | 
| 
      
 73 
     | 
    
         
            +
            - lib/preservation/ingest_report.rb
         
     | 
| 
      
 74 
     | 
    
         
            +
            - lib/preservation/pure_ingest.rb
         
     | 
| 
      
 75 
     | 
    
         
            +
            - lib/preservation/string_util.rb
         
     | 
| 
      
 76 
     | 
    
         
            +
            - lib/preservation/version.rb
         
     | 
| 
      
 77 
     | 
    
         
            +
            - preservation.gemspec
         
     | 
| 
      
 78 
     | 
    
         
            +
            homepage: https://rubygems.org/gems/preservation
         
     | 
| 
      
 79 
     | 
    
         
            +
            licenses:
         
     | 
| 
      
 80 
     | 
    
         
            +
            - MIT
         
     | 
| 
      
 81 
     | 
    
         
            +
            metadata: {}
         
     | 
| 
      
 82 
     | 
    
         
            +
            post_install_message: 
         
     | 
| 
      
 83 
     | 
    
         
            +
            rdoc_options: []
         
     | 
| 
      
 84 
     | 
    
         
            +
            require_paths:
         
     | 
| 
      
 85 
     | 
    
         
            +
            - lib
         
     | 
| 
      
 86 
     | 
    
         
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 87 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 88 
     | 
    
         
            +
              - - "~>"
         
     | 
| 
      
 89 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 90 
     | 
    
         
            +
                  version: '2.1'
         
     | 
| 
      
 91 
     | 
    
         
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         
     | 
| 
      
 92 
     | 
    
         
            +
              requirements:
         
     | 
| 
      
 93 
     | 
    
         
            +
              - - ">="
         
     | 
| 
      
 94 
     | 
    
         
            +
                - !ruby/object:Gem::Version
         
     | 
| 
      
 95 
     | 
    
         
            +
                  version: '0'
         
     | 
| 
      
 96 
     | 
    
         
            +
            requirements: []
         
     | 
| 
      
 97 
     | 
    
         
            +
            rubyforge_project: 
         
     | 
| 
      
 98 
     | 
    
         
            +
            rubygems_version: 2.2.2
         
     | 
| 
      
 99 
     | 
    
         
            +
            signing_key: 
         
     | 
| 
      
 100 
     | 
    
         
            +
            specification_version: 4
         
     | 
| 
      
 101 
     | 
    
         
            +
            summary: Ingest management for Archivematica's Automation Tools.
         
     | 
| 
      
 102 
     | 
    
         
            +
            test_files: []
         
     | 
| 
      
 103 
     | 
    
         
            +
            has_rdoc: 
         
     |