dwcr 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +24 -0
- data/bin/dwcr +32 -0
- data/lib/cli/help.yml +66 -0
- data/lib/cli/load.rb +25 -0
- data/lib/cli/new.rb +43 -0
- data/lib/cli/shell.rb +55 -0
- data/lib/dwca_content_analyzer/column.rb +53 -0
- data/lib/dwca_content_analyzer/csv_converters.rb +16 -0
- data/lib/dwca_content_analyzer/file_contents.rb +43 -0
- data/lib/dwca_content_analyzer/file_set.rb +46 -0
- data/lib/dwcr/dynamic_model_queryable.rb +100 -0
- data/lib/dwcr/dynamic_models.rb +27 -0
- data/lib/dwcr/inflections.rb +16 -0
- data/lib/dwcr/metaschema/archive.rb +86 -0
- data/lib/dwcr/metaschema/attribute.rb +84 -0
- data/lib/dwcr/metaschema/content_file.rb +130 -0
- data/lib/dwcr/metaschema/entity.rb +195 -0
- data/lib/dwcr/metaschema/metaschema.rb +94 -0
- data/lib/dwcr/metaschema/metaschema_tables.yml +130 -0
- data/lib/dwcr/metaschema/xml_parsable.rb +113 -0
- data/lib/dwcr/schema.rb +58 -0
- data/lib/dwcr.rb +15 -0
- metadata +92 -0
| @@ -0,0 +1,113 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
            module DwCR
         | 
| 5 | 
            +
              #
         | 
| 6 | 
            +
              module Metaschema
         | 
| 7 | 
            +
                #
         | 
| 8 | 
            +
                module XMLParsable
         | 
| 9 | 
            +
                  # Validates the meta.xml file
         | 
| 10 | 
            +
                  # will raise errors if the file is not valid
         | 
| 11 | 
            +
                  # currently only covers validation against multiple core instances
         | 
| 12 | 
            +
                  def self.validate_meta(xml)
         | 
| 13 | 
            +
                    raise ArgumentError, 'Root is not archive' unless xml.root.name == 'archive'
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                    xml_elements = xml.root.elements
         | 
| 16 | 
            +
                    xml_core = xml_elements.css 'core'
         | 
| 17 | 
            +
                    raise ArgumentError, 'Missing core node' if xml_core.empty?
         | 
| 18 | 
            +
                    raise ArgumentError, 'Multiple core nodes' if xml_core.count > 1
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                    xml_elements -= xml_core
         | 
| 21 | 
            +
                    xml_xtns = xml_elements.css 'extension'
         | 
| 22 | 
            +
                    xml_elements -= xml_xtns
         | 
| 23 | 
            +
                    raise ArgumentError, 'Invalid node' unless xml_elements.empty?
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    xml
         | 
| 26 | 
            +
                  end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                  # Loads the _meta.xml_ file from path
         | 
| 29 | 
            +
                  # if _path_ is a directory, will try to locate the _meta.xml_ file in _path_
         | 
| 30 | 
            +
                  # wil default to working directory if no _path_ is given
         | 
| 31 | 
            +
                  def self.load_meta(path = nil)
         | 
| 32 | 
            +
                    path ||= Dir.pwd
         | 
| 33 | 
            +
                    meta = File.directory?(path) ? File.join(path, 'meta.xml') : path
         | 
| 34 | 
            +
                    xml = File.open(meta) { |f| Nokogiri::XML(f) }
         | 
| 35 | 
            +
                    XMLParsable.validate_meta xml
         | 
| 36 | 
            +
                  end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                  # Parses the _default_ value from an xml node
         | 
| 39 | 
            +
                  # applies to _field_ nodes
         | 
| 40 | 
            +
                  def default_from(xml)
         | 
| 41 | 
            +
                    xml.attributes['default']&.value
         | 
| 42 | 
            +
                  end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                  # Returns +true+ id the xml node represenst the _core_
         | 
| 45 | 
            +
                  # +false+ otherwise
         | 
| 46 | 
            +
                  # applies to child nodes of _archive_ (entities)
         | 
| 47 | 
            +
                  def is_core_from(xml)
         | 
| 48 | 
            +
                    case xml.name
         | 
| 49 | 
            +
                    when 'core'
         | 
| 50 | 
            +
                      true
         | 
| 51 | 
            +
                    when 'extension'
         | 
| 52 | 
            +
                      false
         | 
| 53 | 
            +
                    else
         | 
| 54 | 
            +
                      raise ArgumentError, "invalid node name: '#{xml.name}'"
         | 
| 55 | 
            +
                    end
         | 
| 56 | 
            +
                  end
         | 
| 57 | 
            +
             | 
| 58 | 
            +
                  # Returns the index of a _field_ node
         | 
| 59 | 
            +
                  # or the _coreid_ of an _extension_ node
         | 
| 60 | 
            +
                  # applies to _field_ and _extension_ nodes
         | 
| 61 | 
            +
                  def index_from(xml)
         | 
| 62 | 
            +
                    key_index = xml.css('coreid')&.first
         | 
| 63 | 
            +
                    return xml.attributes['index']&.value&.to_i unless key_index
         | 
| 64 | 
            +
                    key_index.attributes['index'].value.to_i
         | 
| 65 | 
            +
                  end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  # Returns the index of the key column in
         | 
| 68 | 
            +
                  # the _core_ (_id_)
         | 
| 69 | 
            +
                  # or an _extension_ (_coreid_)
         | 
| 70 | 
            +
                  # applies to child nodes of _archive_ (entities)
         | 
| 71 | 
            +
                  def key_column_from(xml)
         | 
| 72 | 
            +
                    key_tag = is_core_from(xml) ? 'id' : 'coreid'
         | 
| 73 | 
            +
                    xml.css(key_tag).first.attributes['index'].value.to_i
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                  # Returns an array with the names for any files
         | 
| 77 | 
            +
                  # associated with a child node of _archive_ (_core_ or _extension_)
         | 
| 78 | 
            +
                  # applies to child nodes of _archive_ (entities)
         | 
| 79 | 
            +
                  def files_from(xml)
         | 
| 80 | 
            +
                    xml.css('files').css('location').map(&:text)
         | 
| 81 | 
            +
                  end
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                  # Returns the term for an entity or attribute
         | 
| 84 | 
            +
                  # applies to _field_ nodes and child nodes of _archive_ (entities)
         | 
| 85 | 
            +
                  def term_from(xml)
         | 
| 86 | 
            +
                    term = xml.attributes['rowType'] || xml.attributes['term']
         | 
| 87 | 
            +
                    term&.value
         | 
| 88 | 
            +
                  end
         | 
| 89 | 
            +
             | 
| 90 | 
            +
                  # Updates an instance of the model class the mixin is included in
         | 
| 91 | 
            +
                  # with values parsed from xml
         | 
| 92 | 
            +
                  # applies to _field_ nodes
         | 
| 93 | 
            +
                  def update_from(xml, *fields)
         | 
| 94 | 
            +
                    update values_from(xml, *fields)
         | 
| 95 | 
            +
                    save
         | 
| 96 | 
            +
                  end
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                  # Returns the XMLParsable method that corresponds to
         | 
| 99 | 
            +
                  # the method name of the class the mixin is included to
         | 
| 100 | 
            +
                  def method(method_name)
         | 
| 101 | 
            +
                    method_name.to_s + '_from'
         | 
| 102 | 
            +
                  end
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                  # Returns a hash with model attributes as keys,
         | 
| 105 | 
            +
                  # values parsed from xml as values
         | 
| 106 | 
            +
                  # applies to _field_ nodes
         | 
| 107 | 
            +
                  def values_from(xml, *attrs)
         | 
| 108 | 
            +
                    values = attrs.map { |attr| send(method(attr), xml) }
         | 
| 109 | 
            +
                    attrs.zip(values).to_h.compact
         | 
| 110 | 
            +
                  end
         | 
| 111 | 
            +
                end
         | 
| 112 | 
            +
              end
         | 
| 113 | 
            +
            end
         | 
    
        data/lib/dwcr/schema.rb
    ADDED
    
    | @@ -0,0 +1,58 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            # This module provides functionality to create a
         | 
| 4 | 
            +
            # SQLite database from a DarwinCoreArchive
         | 
| 5 | 
            +
            # and provides an ORM layer using http://sequel.jeremyevans.net
         | 
| 6 | 
            +
            # Sequel::Model instances are created from the DwCA's meta.xml file
         | 
| 7 | 
            +
            module DwCR
         | 
| 8 | 
            +
              # Creates the table for +entity+ (a Entity instanc)
         | 
| 9 | 
            +
              # inserts foreign key for entities
         | 
| 10 | 
            +
              # skips the _coreid_ field declared in _extensions_ in the DwCA meta.xml
         | 
| 11 | 
            +
              # (this field is redundant, because relationships are re-established upon
         | 
| 12 | 
            +
              # import using SQL primary and foreign keys)
         | 
| 13 | 
            +
              # inserts the proper SQL foreign key into _extensions_
         | 
| 14 | 
            +
              # adds columns for any +attributes+ associated with +entity+
         | 
| 15 | 
            +
              def self.create_schema_table(entity)
         | 
| 16 | 
            +
                DB.create_table? entity.table_name do
         | 
| 17 | 
            +
                  primary_key :id
         | 
| 18 | 
            +
                  foreign_key :entity_id, :entities
         | 
| 19 | 
            +
                  foreign_key entity.core.foreign_key, entity.core.table_name if entity.core
         | 
| 20 | 
            +
                  entity.attributes.each do |a|
         | 
| 21 | 
            +
                    column(*a.to_table_column) unless a.foreign_key?
         | 
| 22 | 
            +
                  end
         | 
| 23 | 
            +
                end
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
             | 
| 26 | 
            +
              # Creates the database schema for the DwCA nodes
         | 
| 27 | 
            +
              # _options_:
         | 
| 28 | 
            +
              # - +type:+ +true+ or +false+
         | 
| 29 | 
            +
              # - +length:+ +true+ or +false+
         | 
| 30 | 
            +
              # if options are given, the schema will be updated
         | 
| 31 | 
            +
              # based on the DwCA files actual content,
         | 
| 32 | 
            +
              # analysing each column for type and length
         | 
| 33 | 
            +
              def self.create_schema(archive, **options)
         | 
| 34 | 
            +
                Metaschema.update(archive, options)
         | 
| 35 | 
            +
                archive.entities.each { |entity| DwCR.create_schema_table(entity) }
         | 
| 36 | 
            +
              end
         | 
| 37 | 
            +
             | 
| 38 | 
            +
              # Loads models for all Entity instances in the Archive instance
         | 
| 39 | 
            +
              # if no explicit Archive instance is given, it will load the first
         | 
| 40 | 
            +
              def self.load_models(archive = Metaschema::Archive.first)
         | 
| 41 | 
            +
                archive.entities.map do |entity|
         | 
| 42 | 
            +
                  entity_model = DwCR.create_model(entity)
         | 
| 43 | 
            +
                  Metaschema::Entity.associate(:one_to_many,
         | 
| 44 | 
            +
                                                   entity.table_name,
         | 
| 45 | 
            +
                                                   class: entity_model)
         | 
| 46 | 
            +
                  entity_model
         | 
| 47 | 
            +
                end
         | 
| 48 | 
            +
              end
         | 
| 49 | 
            +
             | 
| 50 | 
            +
              # Loads the contents of all CSV files associated with an archive
         | 
| 51 | 
            +
              # into the shema tables
         | 
| 52 | 
            +
              def self.load_contents_for(archive)
         | 
| 53 | 
            +
                archive.core.content_files.each(&:load)
         | 
| 54 | 
            +
                archive.extensions.each do |extension|
         | 
| 55 | 
            +
                  extension.content_files.each(&:load)
         | 
| 56 | 
            +
                end
         | 
| 57 | 
            +
              end
         | 
| 58 | 
            +
            end
         | 
    
        data/lib/dwcr.rb
    ADDED
    
    | @@ -0,0 +1,15 @@ | |
| 1 | 
            +
            require 'csv'
         | 
| 2 | 
            +
            require 'json'
         | 
| 3 | 
            +
            require 'nokogiri'
         | 
| 4 | 
            +
            require 'psych'
         | 
| 5 | 
            +
            require 'sequel'
         | 
| 6 | 
            +
            require 'sqlite3'
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            require_relative 'dwca_content_analyzer/file_set'
         | 
| 9 | 
            +
            require_relative 'dwcr/dynamic_model_queryable'
         | 
| 10 | 
            +
            require_relative 'dwcr/dynamic_models'
         | 
| 11 | 
            +
            require_relative 'dwcr/metaschema/metaschema'
         | 
| 12 | 
            +
            require_relative 'dwcr/schema'
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            Sequel.extension :inflector
         | 
| 15 | 
            +
            require_relative 'dwcr/inflections'
         | 
    
        metadata
    ADDED
    
    | @@ -0,0 +1,92 @@ | |
| 1 | 
            +
            --- !ruby/object:Gem::Specification
         | 
| 2 | 
            +
            name: dwcr
         | 
| 3 | 
            +
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            +
              version: 0.0.8
         | 
| 5 | 
            +
            platform: ruby
         | 
| 6 | 
            +
            authors:
         | 
| 7 | 
            +
            - Martin Stein
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 | 
            +
            bindir: bin
         | 
| 10 | 
            +
            cert_chain: []
         | 
| 11 | 
            +
            date: 2018-09-12 00:00:00.000000000 Z
         | 
| 12 | 
            +
            dependencies: []
         | 
| 13 | 
            +
            description: |+
         | 
| 14 | 
            +
              # DwCR
         | 
| 15 | 
            +
              DwCA in a SQLite database complete with
         | 
| 16 | 
            +
              [Sequel](https://github.com/jeremyevans/sequel) models.
         | 
| 17 | 
            +
             | 
| 18 | 
            +
              ## Command Line Tool
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              ### Create a new DwCR file
         | 
| 21 | 
            +
             | 
| 22 | 
            +
              From within the directory of a DwC Archive:
         | 
| 23 | 
            +
             | 
| 24 | 
            +
              ```ruby
         | 
| 25 | 
            +
              dwcr new -t dwcr-test.db
         | 
| 26 | 
            +
              ```
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              will create the file in the directory, named after the directory
         | 
| 29 | 
            +
             | 
| 30 | 
            +
              see `dwcr new --help` for options
         | 
| 31 | 
            +
             | 
| 32 | 
            +
              ### Load an existing DwCR file
         | 
| 33 | 
            +
             | 
| 34 | 
            +
              ```ruby
         | 
| 35 | 
            +
              dwcr load ~/documents/dwca/dwcr-test.db
         | 
| 36 | 
            +
              ```
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            email: loveablelobster@fastmail.fm
         | 
| 39 | 
            +
            executables:
         | 
| 40 | 
            +
            - dwcr
         | 
| 41 | 
            +
            extensions: []
         | 
| 42 | 
            +
            extra_rdoc_files: []
         | 
| 43 | 
            +
            files:
         | 
| 44 | 
            +
            - LICENSE
         | 
| 45 | 
            +
            - README.md
         | 
| 46 | 
            +
            - bin/dwcr
         | 
| 47 | 
            +
            - lib/cli/help.yml
         | 
| 48 | 
            +
            - lib/cli/load.rb
         | 
| 49 | 
            +
            - lib/cli/new.rb
         | 
| 50 | 
            +
            - lib/cli/shell.rb
         | 
| 51 | 
            +
            - lib/dwca_content_analyzer/column.rb
         | 
| 52 | 
            +
            - lib/dwca_content_analyzer/csv_converters.rb
         | 
| 53 | 
            +
            - lib/dwca_content_analyzer/file_contents.rb
         | 
| 54 | 
            +
            - lib/dwca_content_analyzer/file_set.rb
         | 
| 55 | 
            +
            - lib/dwcr.rb
         | 
| 56 | 
            +
            - lib/dwcr/dynamic_model_queryable.rb
         | 
| 57 | 
            +
            - lib/dwcr/dynamic_models.rb
         | 
| 58 | 
            +
            - lib/dwcr/inflections.rb
         | 
| 59 | 
            +
            - lib/dwcr/metaschema/archive.rb
         | 
| 60 | 
            +
            - lib/dwcr/metaschema/attribute.rb
         | 
| 61 | 
            +
            - lib/dwcr/metaschema/content_file.rb
         | 
| 62 | 
            +
            - lib/dwcr/metaschema/entity.rb
         | 
| 63 | 
            +
            - lib/dwcr/metaschema/metaschema.rb
         | 
| 64 | 
            +
            - lib/dwcr/metaschema/metaschema_tables.yml
         | 
| 65 | 
            +
            - lib/dwcr/metaschema/xml_parsable.rb
         | 
| 66 | 
            +
            - lib/dwcr/schema.rb
         | 
| 67 | 
            +
            homepage: https://github.com/loveablelobster/DwCR
         | 
| 68 | 
            +
            licenses:
         | 
| 69 | 
            +
            - MIT
         | 
| 70 | 
            +
            metadata: {}
         | 
| 71 | 
            +
            post_install_message: 
         | 
| 72 | 
            +
            rdoc_options: []
         | 
| 73 | 
            +
            require_paths:
         | 
| 74 | 
            +
            - lib
         | 
| 75 | 
            +
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 76 | 
            +
              requirements:
         | 
| 77 | 
            +
              - - ">="
         | 
| 78 | 
            +
                - !ruby/object:Gem::Version
         | 
| 79 | 
            +
                  version: '2.5'
         | 
| 80 | 
            +
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 81 | 
            +
              requirements:
         | 
| 82 | 
            +
              - - ">="
         | 
| 83 | 
            +
                - !ruby/object:Gem::Version
         | 
| 84 | 
            +
                  version: '0'
         | 
| 85 | 
            +
            requirements:
         | 
| 86 | 
            +
            - SQLite
         | 
| 87 | 
            +
            rubyforge_project: 
         | 
| 88 | 
            +
            rubygems_version: 2.7.6
         | 
| 89 | 
            +
            signing_key: 
         | 
| 90 | 
            +
            specification_version: 4
         | 
| 91 | 
            +
            summary: DwCA stored in a SQLite database, with Sequel models
         | 
| 92 | 
            +
            test_files: []
         |