dwcr 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,113 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCR
5
+ #
6
+ module Metaschema
7
+ #
8
+ module XMLParsable
9
+ # Validates the meta.xml file
10
+ # will raise errors if the file is not valid
11
+ # currently only covers validation against multiple core instances
12
+ def self.validate_meta(xml)
13
+ raise ArgumentError, 'Root is not archive' unless xml.root.name == 'archive'
14
+
15
+ xml_elements = xml.root.elements
16
+ xml_core = xml_elements.css 'core'
17
+ raise ArgumentError, 'Missing core node' if xml_core.empty?
18
+ raise ArgumentError, 'Multiple core nodes' if xml_core.count > 1
19
+
20
+ xml_elements -= xml_core
21
+ xml_xtns = xml_elements.css 'extension'
22
+ xml_elements -= xml_xtns
23
+ raise ArgumentError, 'Invalid node' unless xml_elements.empty?
24
+
25
+ xml
26
+ end
27
+
28
+ # Loads the _meta.xml_ file from path
29
+ # if _path_ is a directory, will try to locate the _meta.xml_ file in _path_
30
+ # wil default to working directory if no _path_ is given
31
+ def self.load_meta(path = nil)
32
+ path ||= Dir.pwd
33
+ meta = File.directory?(path) ? File.join(path, 'meta.xml') : path
34
+ xml = File.open(meta) { |f| Nokogiri::XML(f) }
35
+ XMLParsable.validate_meta xml
36
+ end
37
+
38
+ # Parses the _default_ value from an xml node
39
+ # applies to _field_ nodes
40
+ def default_from(xml)
41
+ xml.attributes['default']&.value
42
+ end
43
+
44
+ # Returns +true+ id the xml node represenst the _core_
45
+ # +false+ otherwise
46
+ # applies to child nodes of _archive_ (entities)
47
+ def is_core_from(xml)
48
+ case xml.name
49
+ when 'core'
50
+ true
51
+ when 'extension'
52
+ false
53
+ else
54
+ raise ArgumentError, "invalid node name: '#{xml.name}'"
55
+ end
56
+ end
57
+
58
+ # Returns the index of a _field_ node
59
+ # or the _coreid_ of an _extension_ node
60
+ # applies to _field_ and _extension_ nodes
61
+ def index_from(xml)
62
+ key_index = xml.css('coreid')&.first
63
+ return xml.attributes['index']&.value&.to_i unless key_index
64
+ key_index.attributes['index'].value.to_i
65
+ end
66
+
67
+ # Returns the index of the key column in
68
+ # the _core_ (_id_)
69
+ # or an _extension_ (_coreid_)
70
+ # applies to child nodes of _archive_ (entities)
71
+ def key_column_from(xml)
72
+ key_tag = is_core_from(xml) ? 'id' : 'coreid'
73
+ xml.css(key_tag).first.attributes['index'].value.to_i
74
+ end
75
+
76
+ # Returns an array with the names for any files
77
+ # associated with a child node of _archive_ (_core_ or _extension_)
78
+ # applies to child nodes of _archive_ (entities)
79
+ def files_from(xml)
80
+ xml.css('files').css('location').map(&:text)
81
+ end
82
+
83
+ # Returns the term for an entity or attribute
84
+ # applies to _field_ nodes and child nodes of _archive_ (entities)
85
+ def term_from(xml)
86
+ term = xml.attributes['rowType'] || xml.attributes['term']
87
+ term&.value
88
+ end
89
+
90
+ # Updates an instance of the model class the mixin is included in
91
+ # with values parsed from xml
92
+ # applies to _field_ nodes
93
+ def update_from(xml, *fields)
94
+ update values_from(xml, *fields)
95
+ save
96
+ end
97
+
98
+ # Returns the XMLParsable method that corresponds to
99
+ # the method name of the class the mixin is included to
100
+ def method(method_name)
101
+ method_name.to_s + '_from'
102
+ end
103
+
104
+ # Returns a hash with model attributes as keys,
105
+ # values parsed from xml as values
106
+ # applies to _field_ nodes
107
+ def values_from(xml, *attrs)
108
+ values = attrs.map { |attr| send(method(attr), xml) }
109
+ attrs.zip(values).to_h.compact
110
+ end
111
+ end
112
+ end
113
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This module provides functionality to create a
4
+ # SQLite database from a DarwinCoreArchive
5
+ # and provides an ORM layer using http://sequel.jeremyevans.net
6
+ # Sequel::Model instances are created from the DwCA's meta.xml file
7
+ module DwCR
8
+ # Creates the table for +entity+ (a Entity instanc)
9
+ # inserts foreign key for entities
10
+ # skips the _coreid_ field declared in _extensions_ in the DwCA meta.xml
11
+ # (this field is redundant, because relationships are re-established upon
12
+ # import using SQL primary and foreign keys)
13
+ # inserts the proper SQL foreign key into _extensions_
14
+ # adds columns for any +attributes+ associated with +entity+
15
+ def self.create_schema_table(entity)
16
+ DB.create_table? entity.table_name do
17
+ primary_key :id
18
+ foreign_key :entity_id, :entities
19
+ foreign_key entity.core.foreign_key, entity.core.table_name if entity.core
20
+ entity.attributes.each do |a|
21
+ column(*a.to_table_column) unless a.foreign_key?
22
+ end
23
+ end
24
+ end
25
+
26
+ # Creates the database schema for the DwCA nodes
27
+ # _options_:
28
+ # - +type:+ +true+ or +false+
29
+ # - +length:+ +true+ or +false+
30
+ # if options are given, the schema will be updated
31
+ # based on the DwCA files actual content,
32
+ # analysing each column for type and length
33
+ def self.create_schema(archive, **options)
34
+ Metaschema.update(archive, options)
35
+ archive.entities.each { |entity| DwCR.create_schema_table(entity) }
36
+ end
37
+
38
+ # Loads models for all Entity instances in the Archive instance
39
+ # if no explicit Archive instance is given, it will load the first
40
+ def self.load_models(archive = Metaschema::Archive.first)
41
+ archive.entities.map do |entity|
42
+ entity_model = DwCR.create_model(entity)
43
+ Metaschema::Entity.associate(:one_to_many,
44
+ entity.table_name,
45
+ class: entity_model)
46
+ entity_model
47
+ end
48
+ end
49
+
50
+ # Loads the contents of all CSV files associated with an archive
51
+ # into the shema tables
52
+ def self.load_contents_for(archive)
53
+ archive.core.content_files.each(&:load)
54
+ archive.extensions.each do |extension|
55
+ extension.content_files.each(&:load)
56
+ end
57
+ end
58
+ end
data/lib/dwcr.rb ADDED
@@ -0,0 +1,15 @@
1
+ require 'csv'
2
+ require 'json'
3
+ require 'nokogiri'
4
+ require 'psych'
5
+ require 'sequel'
6
+ require 'sqlite3'
7
+
8
+ require_relative 'dwca_content_analyzer/file_set'
9
+ require_relative 'dwcr/dynamic_model_queryable'
10
+ require_relative 'dwcr/dynamic_models'
11
+ require_relative 'dwcr/metaschema/metaschema'
12
+ require_relative 'dwcr/schema'
13
+
14
+ Sequel.extension :inflector
15
+ require_relative 'dwcr/inflections'
metadata ADDED
@@ -0,0 +1,92 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: dwcr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.8
5
+ platform: ruby
6
+ authors:
7
+ - Martin Stein
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-09-12 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: |+
14
+ # DwCR
15
+ DwCA in a SQLite database complete with
16
+ [Sequel](https://github.com/jeremyevans/sequel) models.
17
+
18
+ ## Command Line Tool
19
+
20
+ ### Create a new DwCR file
21
+
22
+ From within the directory of a DwC Archive:
23
+
24
+ ```ruby
25
+ dwcr new -t dwcr-test.db
26
+ ```
27
+
28
+ will create the file in the directory, named after the directory
29
+
30
+ see `dwcr new --help` for options
31
+
32
+ ### Load an existing DwCR file
33
+
34
+ ```ruby
35
+ dwcr load ~/documents/dwca/dwcr-test.db
36
+ ```
37
+
38
+ email: loveablelobster@fastmail.fm
39
+ executables:
40
+ - dwcr
41
+ extensions: []
42
+ extra_rdoc_files: []
43
+ files:
44
+ - LICENSE
45
+ - README.md
46
+ - bin/dwcr
47
+ - lib/cli/help.yml
48
+ - lib/cli/load.rb
49
+ - lib/cli/new.rb
50
+ - lib/cli/shell.rb
51
+ - lib/dwca_content_analyzer/column.rb
52
+ - lib/dwca_content_analyzer/csv_converters.rb
53
+ - lib/dwca_content_analyzer/file_contents.rb
54
+ - lib/dwca_content_analyzer/file_set.rb
55
+ - lib/dwcr.rb
56
+ - lib/dwcr/dynamic_model_queryable.rb
57
+ - lib/dwcr/dynamic_models.rb
58
+ - lib/dwcr/inflections.rb
59
+ - lib/dwcr/metaschema/archive.rb
60
+ - lib/dwcr/metaschema/attribute.rb
61
+ - lib/dwcr/metaschema/content_file.rb
62
+ - lib/dwcr/metaschema/entity.rb
63
+ - lib/dwcr/metaschema/metaschema.rb
64
+ - lib/dwcr/metaschema/metaschema_tables.yml
65
+ - lib/dwcr/metaschema/xml_parsable.rb
66
+ - lib/dwcr/schema.rb
67
+ homepage: https://github.com/loveablelobster/DwCR
68
+ licenses:
69
+ - MIT
70
+ metadata: {}
71
+ post_install_message:
72
+ rdoc_options: []
73
+ require_paths:
74
+ - lib
75
+ required_ruby_version: !ruby/object:Gem::Requirement
76
+ requirements:
77
+ - - ">="
78
+ - !ruby/object:Gem::Version
79
+ version: '2.5'
80
+ required_rubygems_version: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
85
+ requirements:
86
+ - SQLite
87
+ rubyforge_project:
88
+ rubygems_version: 2.7.6
89
+ signing_key:
90
+ specification_version: 4
91
+ summary: DwCA stored in a SQLite database, with Sequel models
92
+ test_files: []