dwcr 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCR
5
+ #
6
+ module Metaschema
7
+ #
8
+ module XMLParsable
9
+ # Validates the meta.xml file
10
+ # will raise errors if the file is not valid
11
+ # currently only covers validation against multiple core instances
12
+ def self.validate_meta(xml)
13
+ raise ArgumentError, 'Root is not archive' unless xml.root.name == 'archive'
14
+
15
+ xml_elements = xml.root.elements
16
+ xml_core = xml_elements.css 'core'
17
+ raise ArgumentError, 'Missing core node' if xml_core.empty?
18
+ raise ArgumentError, 'Multiple core nodes' if xml_core.count > 1
19
+
20
+ xml_elements -= xml_core
21
+ xml_xtns = xml_elements.css 'extension'
22
+ xml_elements -= xml_xtns
23
+ raise ArgumentError, 'Invalid node' unless xml_elements.empty?
24
+
25
+ xml
26
+ end
27
+
28
+ # Loads the _meta.xml_ file from path
29
+ # if _path_ is a directory, will try to locate the _meta.xml_ file in _path_
30
+ # wil default to working directory if no _path_ is given
31
+ def self.load_meta(path = nil)
32
+ path ||= Dir.pwd
33
+ meta = File.directory?(path) ? File.join(path, 'meta.xml') : path
34
+ xml = File.open(meta) { |f| Nokogiri::XML(f) }
35
+ XMLParsable.validate_meta xml
36
+ end
37
+
38
+ # Parses the _default_ value from an xml node
39
+ # applies to _field_ nodes
40
+ def default_from(xml)
41
+ xml.attributes['default']&.value
42
+ end
43
+
44
+ # Returns +true+ id the xml node represenst the _core_
45
+ # +false+ otherwise
46
+ # applies to child nodes of _archive_ (entities)
47
+ def is_core_from(xml)
48
+ case xml.name
49
+ when 'core'
50
+ true
51
+ when 'extension'
52
+ false
53
+ else
54
+ raise ArgumentError, "invalid node name: '#{xml.name}'"
55
+ end
56
+ end
57
+
58
+ # Returns the index of a _field_ node
59
+ # or the _coreid_ of an _extension_ node
60
+ # applies to _field_ and _extension_ nodes
61
+ def index_from(xml)
62
+ key_index = xml.css('coreid')&.first
63
+ return xml.attributes['index']&.value&.to_i unless key_index
64
+ key_index.attributes['index'].value.to_i
65
+ end
66
+
67
+ # Returns the index of the key column in
68
+ # the _core_ (_id_)
69
+ # or an _extension_ (_coreid_)
70
+ # applies to child nodes of _archive_ (entities)
71
+ def key_column_from(xml)
72
+ key_tag = is_core_from(xml) ? 'id' : 'coreid'
73
+ xml.css(key_tag).first.attributes['index'].value.to_i
74
+ end
75
+
76
+ # Returns an array with the names for any files
77
+ # associated with a child node of _archive_ (_core_ or _extension_)
78
+ # applies to child nodes of _archive_ (entities)
79
+ def files_from(xml)
80
+ xml.css('files').css('location').map(&:text)
81
+ end
82
+
83
+ # Returns the term for an entity or attribute
84
+ # applies to _field_ nodes and child nodes of _archive_ (entities)
85
+ def term_from(xml)
86
+ term = xml.attributes['rowType'] || xml.attributes['term']
87
+ term&.value
88
+ end
89
+
90
+ # Updates an instance of the model class the mixin is included in
91
+ # with values parsed from xml
92
+ # applies to _field_ nodes
93
+ def update_from(xml, *fields)
94
+ update values_from(xml, *fields)
95
+ save
96
+ end
97
+
98
+ # Returns the XMLParsable method that corresponds to
99
+ # the method name of the class the mixin is included to
100
+ def method(method_name)
101
+ method_name.to_s + '_from'
102
+ end
103
+
104
+ # Returns a hash with model attributes as keys,
105
+ # values parsed from xml as values
106
+ # applies to _field_ nodes
107
+ def values_from(xml, *attrs)
108
+ values = attrs.map { |attr| send(method(attr), xml) }
109
+ attrs.zip(values).to_h.compact
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This module provides functionality to create a
4
+ # SQLite database from a DarwinCoreArchive
5
+ # and provides an ORM layer using http://sequel.jeremyevans.net
6
+ # Sequel::Model instances are created from the DwCA's meta.xml file
7
+ module DwCR
8
+ # Creates the table for +entity+ (a Entity instanc)
9
+ # inserts foreign key for entities
10
+ # skips the _coreid_ field declared in _extensions_ in the DwCA meta.xml
11
+ # (this field is redundant, because relationships are re-established upon
12
+ # import using SQL primary and foreign keys)
13
+ # inserts the proper SQL foreign key into _extensions_
14
+ # adds columns for any +attributes+ associated with +entity+
15
+ def self.create_schema_table(entity)
16
+ DB.create_table? entity.table_name do
17
+ primary_key :id
18
+ foreign_key :entity_id, :entities
19
+ foreign_key entity.core.foreign_key, entity.core.table_name if entity.core
20
+ entity.attributes.each do |a|
21
+ column(*a.to_table_column) unless a.foreign_key?
22
+ end
23
+ end
24
+ end
25
+
26
+ # Creates the database schema for the DwCA nodes
27
+ # _options_:
28
+ # - +type:+ +true+ or +false+
29
+ # - +length:+ +true+ or +false+
30
+ # if options are given, the schema will be updated
31
+ # based on the DwCA files actual content,
32
+ # analysing each column for type and length
33
+ def self.create_schema(archive, **options)
34
+ Metaschema.update(archive, options)
35
+ archive.entities.each { |entity| DwCR.create_schema_table(entity) }
36
+ end
37
+
38
+ # Loads models for all Entity instances in the Archive instance
39
+ # if no explicit Archive instance is given, it will load the first
40
+ def self.load_models(archive = Metaschema::Archive.first)
41
+ archive.entities.map do |entity|
42
+ entity_model = DwCR.create_model(entity)
43
+ Metaschema::Entity.associate(:one_to_many,
44
+ entity.table_name,
45
+ class: entity_model)
46
+ entity_model
47
+ end
48
+ end
49
+
50
+ # Loads the contents of all CSV files associated with an archive
51
+ # into the shema tables
52
+ def self.load_contents_for(archive)
53
+ archive.core.content_files.each(&:load)
54
+ archive.extensions.each do |extension|
55
+ extension.content_files.each(&:load)
56
+ end
57
+ end
58
+ end
data/lib/dwcr.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'csv'
2
+ require 'json'
3
+ require 'nokogiri'
4
+ require 'psych'
5
+ require 'sequel'
6
+ require 'sqlite3'
7
+
8
+ require_relative 'dwca_content_analyzer/file_set'
9
+ require_relative 'dwcr/dynamic_model_queryable'
10
+ require_relative 'dwcr/dynamic_models'
11
+ require_relative 'dwcr/metaschema/metaschema'
12
+ require_relative 'dwcr/schema'
13
+
14
+ Sequel.extension :inflector
15
+ require_relative 'dwcr/inflections'
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dwcr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.8
5
+ platform: ruby
6
+ authors:
7
+ - Martin Stein
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-09-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |+
14
+ # DwCR
15
+ DwCA in a SQLite database complete with
16
+ [Sequel](https://github.com/jeremyevans/sequel) models.
17
+
18
+ ## Command Line Tool
19
+
20
+ ### Create a new DwCR file
21
+
22
+ From within the directory of a DwC Archive:
23
+
24
+ ```ruby
25
+ dwcr new -t dwcr-test.db
26
+ ```
27
+
28
+ will create the file in the directory, named after the directory
29
+
30
+ see `dwcr new --help` for options
31
+
32
+ ### Load an existing DwCR file
33
+
34
+ ```ruby
35
+ dwcr load ~/documents/dwca/dwcr-test.db
36
+ ```
37
+
38
+ email: loveablelobster@fastmail.fm
39
+ executables:
40
+ - dwcr
41
+ extensions: []
42
+ extra_rdoc_files: []
43
+ files:
44
+ - LICENSE
45
+ - README.md
46
+ - bin/dwcr
47
+ - lib/cli/help.yml
48
+ - lib/cli/load.rb
49
+ - lib/cli/new.rb
50
+ - lib/cli/shell.rb
51
+ - lib/dwca_content_analyzer/column.rb
52
+ - lib/dwca_content_analyzer/csv_converters.rb
53
+ - lib/dwca_content_analyzer/file_contents.rb
54
+ - lib/dwca_content_analyzer/file_set.rb
55
+ - lib/dwcr.rb
56
+ - lib/dwcr/dynamic_model_queryable.rb
57
+ - lib/dwcr/dynamic_models.rb
58
+ - lib/dwcr/inflections.rb
59
+ - lib/dwcr/metaschema/archive.rb
60
+ - lib/dwcr/metaschema/attribute.rb
61
+ - lib/dwcr/metaschema/content_file.rb
62
+ - lib/dwcr/metaschema/entity.rb
63
+ - lib/dwcr/metaschema/metaschema.rb
64
+ - lib/dwcr/metaschema/metaschema_tables.yml
65
+ - lib/dwcr/metaschema/xml_parsable.rb
66
+ - lib/dwcr/schema.rb
67
+ homepage: https://github.com/loveablelobster/DwCR
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '2.5'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements:
86
+ - SQLite
87
+ rubyforge_project:
88
+ rubygems_version: 2.7.6
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: DwCA stored in a SQLite database, with Sequel models
92
+ test_files: []