dwcr 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +24 -0
- data/bin/dwcr +32 -0
- data/lib/cli/help.yml +66 -0
- data/lib/cli/load.rb +25 -0
- data/lib/cli/new.rb +43 -0
- data/lib/cli/shell.rb +55 -0
- data/lib/dwca_content_analyzer/column.rb +53 -0
- data/lib/dwca_content_analyzer/csv_converters.rb +16 -0
- data/lib/dwca_content_analyzer/file_contents.rb +43 -0
- data/lib/dwca_content_analyzer/file_set.rb +46 -0
- data/lib/dwcr/dynamic_model_queryable.rb +100 -0
- data/lib/dwcr/dynamic_models.rb +27 -0
- data/lib/dwcr/inflections.rb +16 -0
- data/lib/dwcr/metaschema/archive.rb +86 -0
- data/lib/dwcr/metaschema/attribute.rb +84 -0
- data/lib/dwcr/metaschema/content_file.rb +130 -0
- data/lib/dwcr/metaschema/entity.rb +195 -0
- data/lib/dwcr/metaschema/metaschema.rb +94 -0
- data/lib/dwcr/metaschema/metaschema_tables.yml +130 -0
- data/lib/dwcr/metaschema/xml_parsable.rb +113 -0
- data/lib/dwcr/schema.rb +58 -0
- data/lib/dwcr.rb +15 -0
- metadata +92 -0
@@ -0,0 +1,113 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
module DwCR
|
5
|
+
#
|
6
|
+
module Metaschema
|
7
|
+
#
|
8
|
+
module XMLParsable
|
9
|
+
# Validates the meta.xml file
|
10
|
+
# will raise errors if the file is not valid
|
11
|
+
# currently only covers validation against multiple core instances
|
12
|
+
def self.validate_meta(xml)
|
13
|
+
raise ArgumentError, 'Root is not archive' unless xml.root.name == 'archive'
|
14
|
+
|
15
|
+
xml_elements = xml.root.elements
|
16
|
+
xml_core = xml_elements.css 'core'
|
17
|
+
raise ArgumentError, 'Missing core node' if xml_core.empty?
|
18
|
+
raise ArgumentError, 'Multiple core nodes' if xml_core.count > 1
|
19
|
+
|
20
|
+
xml_elements -= xml_core
|
21
|
+
xml_xtns = xml_elements.css 'extension'
|
22
|
+
xml_elements -= xml_xtns
|
23
|
+
raise ArgumentError, 'Invalid node' unless xml_elements.empty?
|
24
|
+
|
25
|
+
xml
|
26
|
+
end
|
27
|
+
|
28
|
+
# Loads the _meta.xml_ file from path
|
29
|
+
# if _path_ is a directory, will try to locate the _meta.xml_ file in _path_
|
30
|
+
# wil default to working directory if no _path_ is given
|
31
|
+
def self.load_meta(path = nil)
|
32
|
+
path ||= Dir.pwd
|
33
|
+
meta = File.directory?(path) ? File.join(path, 'meta.xml') : path
|
34
|
+
xml = File.open(meta) { |f| Nokogiri::XML(f) }
|
35
|
+
XMLParsable.validate_meta xml
|
36
|
+
end
|
37
|
+
|
38
|
+
# Parses the _default_ value from an xml node
|
39
|
+
# applies to _field_ nodes
|
40
|
+
def default_from(xml)
|
41
|
+
xml.attributes['default']&.value
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns +true+ id the xml node represenst the _core_
|
45
|
+
# +false+ otherwise
|
46
|
+
# applies to child nodes of _archive_ (entities)
|
47
|
+
def is_core_from(xml)
|
48
|
+
case xml.name
|
49
|
+
when 'core'
|
50
|
+
true
|
51
|
+
when 'extension'
|
52
|
+
false
|
53
|
+
else
|
54
|
+
raise ArgumentError, "invalid node name: '#{xml.name}'"
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
# Returns the index of a _field_ node
|
59
|
+
# or the _coreid_ of an _extension_ node
|
60
|
+
# applies to _field_ and _extension_ nodes
|
61
|
+
def index_from(xml)
|
62
|
+
key_index = xml.css('coreid')&.first
|
63
|
+
return xml.attributes['index']&.value&.to_i unless key_index
|
64
|
+
key_index.attributes['index'].value.to_i
|
65
|
+
end
|
66
|
+
|
67
|
+
# Returns the index of the key column in
|
68
|
+
# the _core_ (_id_)
|
69
|
+
# or an _extension_ (_coreid_)
|
70
|
+
# applies to child nodes of _archive_ (entities)
|
71
|
+
def key_column_from(xml)
|
72
|
+
key_tag = is_core_from(xml) ? 'id' : 'coreid'
|
73
|
+
xml.css(key_tag).first.attributes['index'].value.to_i
|
74
|
+
end
|
75
|
+
|
76
|
+
# Returns an array with the names for any files
|
77
|
+
# associated with a child node of _archive_ (_core_ or _extension_)
|
78
|
+
# applies to child nodes of _archive_ (entities)
|
79
|
+
def files_from(xml)
|
80
|
+
xml.css('files').css('location').map(&:text)
|
81
|
+
end
|
82
|
+
|
83
|
+
# Returns the term for an entity or attribute
|
84
|
+
# applies to _field_ nodes and child nodes of _archive_ (entities)
|
85
|
+
def term_from(xml)
|
86
|
+
term = xml.attributes['rowType'] || xml.attributes['term']
|
87
|
+
term&.value
|
88
|
+
end
|
89
|
+
|
90
|
+
# Updates an instance of the model class the mixin is included in
|
91
|
+
# with values parsed from xml
|
92
|
+
# applies to _field_ nodes
|
93
|
+
def update_from(xml, *fields)
|
94
|
+
update values_from(xml, *fields)
|
95
|
+
save
|
96
|
+
end
|
97
|
+
|
98
|
+
# Returns the XMLParsable method that corresponds to
|
99
|
+
# the method name of the class the mixin is included to
|
100
|
+
def method(method_name)
|
101
|
+
method_name.to_s + '_from'
|
102
|
+
end
|
103
|
+
|
104
|
+
# Returns a hash with model attributes as keys,
|
105
|
+
# values parsed from xml as values
|
106
|
+
# applies to _field_ nodes
|
107
|
+
def values_from(xml, *attrs)
|
108
|
+
values = attrs.map { |attr| send(method(attr), xml) }
|
109
|
+
attrs.zip(values).to_h.compact
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/lib/dwcr/schema.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# This module provides functionality to create a
|
4
|
+
# SQLite database from a DarwinCoreArchive
|
5
|
+
# and provides an ORM layer using http://sequel.jeremyevans.net
|
6
|
+
# Sequel::Model instances are created from the DwCA's meta.xml file
|
7
|
+
module DwCR
|
8
|
+
# Creates the table for +entity+ (a Entity instanc)
|
9
|
+
# inserts foreign key for entities
|
10
|
+
# skips the _coreid_ field declared in _extensions_ in the DwCA meta.xml
|
11
|
+
# (this field is redundant, because relationships are re-established upon
|
12
|
+
# import using SQL primary and foreign keys)
|
13
|
+
# inserts the proper SQL foreign key into _extensions_
|
14
|
+
# adds columns for any +attributes+ associated with +entity+
|
15
|
+
def self.create_schema_table(entity)
|
16
|
+
DB.create_table? entity.table_name do
|
17
|
+
primary_key :id
|
18
|
+
foreign_key :entity_id, :entities
|
19
|
+
foreign_key entity.core.foreign_key, entity.core.table_name if entity.core
|
20
|
+
entity.attributes.each do |a|
|
21
|
+
column(*a.to_table_column) unless a.foreign_key?
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Creates the database schema for the DwCA nodes
|
27
|
+
# _options_:
|
28
|
+
# - +type:+ +true+ or +false+
|
29
|
+
# - +length:+ +true+ or +false+
|
30
|
+
# if options are given, the schema will be updated
|
31
|
+
# based on the DwCA files actual content,
|
32
|
+
# analysing each column for type and length
|
33
|
+
def self.create_schema(archive, **options)
|
34
|
+
Metaschema.update(archive, options)
|
35
|
+
archive.entities.each { |entity| DwCR.create_schema_table(entity) }
|
36
|
+
end
|
37
|
+
|
38
|
+
# Loads models for all Entity instances in the Archive instance
|
39
|
+
# if no explicit Archive instance is given, it will load the first
|
40
|
+
def self.load_models(archive = Metaschema::Archive.first)
|
41
|
+
archive.entities.map do |entity|
|
42
|
+
entity_model = DwCR.create_model(entity)
|
43
|
+
Metaschema::Entity.associate(:one_to_many,
|
44
|
+
entity.table_name,
|
45
|
+
class: entity_model)
|
46
|
+
entity_model
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Loads the contents of all CSV files associated with an archive
|
51
|
+
# into the shema tables
|
52
|
+
def self.load_contents_for(archive)
|
53
|
+
archive.core.content_files.each(&:load)
|
54
|
+
archive.extensions.each do |extension|
|
55
|
+
extension.content_files.each(&:load)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/dwcr.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json'
|
3
|
+
require 'nokogiri'
|
4
|
+
require 'psych'
|
5
|
+
require 'sequel'
|
6
|
+
require 'sqlite3'
|
7
|
+
|
8
|
+
require_relative 'dwca_content_analyzer/file_set'
|
9
|
+
require_relative 'dwcr/dynamic_model_queryable'
|
10
|
+
require_relative 'dwcr/dynamic_models'
|
11
|
+
require_relative 'dwcr/metaschema/metaschema'
|
12
|
+
require_relative 'dwcr/schema'
|
13
|
+
|
14
|
+
Sequel.extension :inflector
|
15
|
+
require_relative 'dwcr/inflections'
|
metadata
ADDED
@@ -0,0 +1,92 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: dwcr
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.8
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Martin Stein
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2018-09-12 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: |+
|
14
|
+
# DwCR
|
15
|
+
DwCA in a SQLite database complete with
|
16
|
+
[Sequel](https://github.com/jeremyevans/sequel) models.
|
17
|
+
|
18
|
+
## Command Line Tool
|
19
|
+
|
20
|
+
### Create a new DwCR file
|
21
|
+
|
22
|
+
From within the directory of a DwC Archive:
|
23
|
+
|
24
|
+
```ruby
|
25
|
+
dwcr new -t dwcr-test.db
|
26
|
+
```
|
27
|
+
|
28
|
+
will create the file in the directory, named after the directory
|
29
|
+
|
30
|
+
see `dwcr new --help` for options
|
31
|
+
|
32
|
+
### Load an existing DwCR file
|
33
|
+
|
34
|
+
```ruby
|
35
|
+
dwcr load ~/documents/dwca/dwcr-test.db
|
36
|
+
```
|
37
|
+
|
38
|
+
email: loveablelobster@fastmail.fm
|
39
|
+
executables:
|
40
|
+
- dwcr
|
41
|
+
extensions: []
|
42
|
+
extra_rdoc_files: []
|
43
|
+
files:
|
44
|
+
- LICENSE
|
45
|
+
- README.md
|
46
|
+
- bin/dwcr
|
47
|
+
- lib/cli/help.yml
|
48
|
+
- lib/cli/load.rb
|
49
|
+
- lib/cli/new.rb
|
50
|
+
- lib/cli/shell.rb
|
51
|
+
- lib/dwca_content_analyzer/column.rb
|
52
|
+
- lib/dwca_content_analyzer/csv_converters.rb
|
53
|
+
- lib/dwca_content_analyzer/file_contents.rb
|
54
|
+
- lib/dwca_content_analyzer/file_set.rb
|
55
|
+
- lib/dwcr.rb
|
56
|
+
- lib/dwcr/dynamic_model_queryable.rb
|
57
|
+
- lib/dwcr/dynamic_models.rb
|
58
|
+
- lib/dwcr/inflections.rb
|
59
|
+
- lib/dwcr/metaschema/archive.rb
|
60
|
+
- lib/dwcr/metaschema/attribute.rb
|
61
|
+
- lib/dwcr/metaschema/content_file.rb
|
62
|
+
- lib/dwcr/metaschema/entity.rb
|
63
|
+
- lib/dwcr/metaschema/metaschema.rb
|
64
|
+
- lib/dwcr/metaschema/metaschema_tables.yml
|
65
|
+
- lib/dwcr/metaschema/xml_parsable.rb
|
66
|
+
- lib/dwcr/schema.rb
|
67
|
+
homepage: https://github.com/loveablelobster/DwCR
|
68
|
+
licenses:
|
69
|
+
- MIT
|
70
|
+
metadata: {}
|
71
|
+
post_install_message:
|
72
|
+
rdoc_options: []
|
73
|
+
require_paths:
|
74
|
+
- lib
|
75
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
76
|
+
requirements:
|
77
|
+
- - ">="
|
78
|
+
- !ruby/object:Gem::Version
|
79
|
+
version: '2.5'
|
80
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
85
|
+
requirements:
|
86
|
+
- SQLite
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 2.7.6
|
89
|
+
signing_key:
|
90
|
+
specification_version: 4
|
91
|
+
summary: DwCA stored in a SQLite database, with Sequel models
|
92
|
+
test_files: []
|