dwcr 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e1c757829c2f62acf12a5ca5c7eef5a5473d60ee1ef40500adc5b3df34ca0296
4
+ data.tar.gz: f58cb0ee2250939b93ef081b3f7a0ab7d02ed02c1f426b854f91df9a0c8864be
5
+ SHA512:
6
+ metadata.gz: da0050e27a07dd812f5e0421f86681c7b6d24d2c2a8e31d8d844af163e1cd3bccde3dd53af11d86a1a0b8defe26f6e648fb88e0aa080a3492e93206ea6144f95
7
+ data.tar.gz: 5b17dc2bff6c355766edd300b04c40cdda3cbe3089cfd69cafaeca90a3965ca2342710cc4a7379c7107ce0984956a176070ca6691742d3348fb83fe7cd092b0c
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Martin Stein
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,24 @@
1
+ # DwCR
2
+ DwCA in a SQLite database complete with
3
+ [Sequel](https://github.com/jeremyevans/sequel) models.
4
+
5
+ ## Command Line Tool
6
+
7
+ ### Create a new DwCR file
8
+
9
+ From within the directory of a DwC Archive:
10
+
11
+ ```ruby
12
+ dwcr new -t dwcr-test.db
13
+ ```
14
+
15
+ will create the file in the directory, named after the directory
16
+
17
+ see `dwcr new --help` for options
18
+
19
+ ### Load an existing DwCR file
20
+
21
+ ```ruby
22
+ dwcr load ~/documents/dwca/dwcr-test.db
23
+ ```
24
+
data/bin/dwcr ADDED
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # frozen_string_literal: true
4
+
5
+ require 'getoptlong'
6
+ require 'pry'
7
+ require 'dwcr'
8
+ require 'cli/shell'
9
+
10
+ # require_relative '../lib/dwcr'
11
+ # require_relative '../lib/cli/shell'
12
+
13
+ dwcr_cmds = %w[load new]
14
+
15
+ this_cmd = dwcr_cmds.include?(ARGV[0]) ? ARGV.shift : nil
16
+
17
+ SHELL = CLI::Shell.new this_cmd
18
+
19
+ if this_cmd
20
+ scrpt_dir = __dir__.split('/')
21
+ scrpt_dir.pop
22
+ scrpt_dir.concat %W[lib cli #{this_cmd}.rb]
23
+ cmd_scrpt = File.join scrpt_dir
24
+ load cmd_scrpt
25
+ end
26
+
27
+ SHELL.options.each do |opt, arg|
28
+ case opt
29
+ when '--help'
30
+ SHELL.print_help
31
+ end
32
+ end
data/lib/cli/help.yml ADDED
@@ -0,0 +1,66 @@
1
+ ---
2
+ NULL:
3
+ usage: >
4
+ dwcr [command] [options]
5
+ options:
6
+ -
7
+ - '--help'
8
+ - '-h'
9
+ - NO_ARGUMENT
10
+ - show help
11
+ load:
12
+ usage: |
13
+ dwcr load [file] [options]
14
+ loads an existing DwCR file
15
+ options:
16
+ -
17
+ - '--help'
18
+ - '-h'
19
+ - NO_ARGUMENT
20
+ - show help
21
+ -
22
+ - '--interactive'
23
+ - '-i'
24
+ - NO_ARGUMENT
25
+ - |
26
+ starts an interactive Pry session
27
+ new:
28
+ usage: |
29
+ dwcr new [options]
30
+ creates a new DwCR
31
+ default name for the DwCR is the
32
+ options:
33
+ -
34
+ - '--help'
35
+ - '-h'
36
+ - NO_ARGUMENT
37
+ - show help
38
+ -
39
+ - '--coltypes'
40
+ - '-c'
41
+ - NO_ARGUMENT
42
+ - |
43
+ set the column types based on contents from the included csv files
44
+ by default all columns will be of type 'text'
45
+ -
46
+ - '--meta'
47
+ - '-m'
48
+ - REQUIRED_ARGUMENT
49
+ - |
50
+ specify a meta.xml file for the schema
51
+ by default will search for meta.xml in the working directory
52
+ (see --path option)
53
+ -
54
+ - '--path'
55
+ - '-p'
56
+ - REQUIRED_ARGUMENT
57
+ - |
58
+ the directory where the DarwinCore Archive is located
59
+ defaults to the current directory
60
+ -
61
+ - '--target'
62
+ - '-t'
63
+ - OPTIONAL_ARGUMENT
64
+ - |
65
+ the target file to be created; if no argument is provided
66
+ this option will create an in-memory the SQLite database
data/lib/cli/load.rb ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CLI
4
+ SHELL.options.each do |opt, arg|
5
+ case opt
6
+ when '--help'
7
+ SHELL.print_help
8
+ exit true
9
+ when '--interactive'
10
+ SHELL.session = true
11
+ end
12
+ end
13
+
14
+ SHELL.target = ARGV.shift
15
+
16
+ ::DB = Sequel.sqlite(SHELL.target)
17
+
18
+ DwCR::Metaschema.load_models
19
+
20
+ DwCR::MODELS = DwCR.load_models
21
+
22
+ binding.pry if SHELL.session
23
+
24
+ exit true
25
+ end
data/lib/cli/new.rb ADDED
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CLI
4
+ schema_opts = {}
5
+
6
+ SHELL.options.each do |opt, arg|
7
+ case opt
8
+ when '--help'
9
+ SHELL.print_help
10
+ exit true
11
+ when '--coltypes'
12
+ schema_opts[:type] = true
13
+ when '--meta'
14
+ xml = arg
15
+ when '--path'
16
+ SHELL.path = arg
17
+ when '--target'
18
+ SHELL.target = arg.empty? ? nil : arg
19
+ end
20
+ end
21
+
22
+ xml ||= SHELL.path
23
+
24
+ ::DB = Sequel.sqlite(SHELL.target)
25
+
26
+ DwCR::Metaschema.create
27
+
28
+ archive = DwCR::Metaschema::Archive.create(path: SHELL.path)
29
+
30
+ meta_doc = DwCR::Metaschema::XMLParsable.load_meta xml
31
+
32
+ archive.load_nodes_from meta_doc
33
+
34
+ DwCR.create_schema(archive, schema_opts)
35
+
36
+ DwCR::MODELS = DwCR.load_models archive
37
+
38
+ DwCR.load_contents_for archive
39
+
40
+ binding.pry
41
+
42
+ puts 'done!'
43
+ end
data/lib/cli/shell.rb ADDED
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'getoptlong'
4
+ require 'psych'
5
+
6
+ #
7
+ module CLI
8
+ #
9
+ class Shell
10
+ attr_accessor :path, :session
11
+ attr_reader :options, :target
12
+
13
+ def initialize(cmd)
14
+ cmd_shell = Psych.load_file(File.join(__dir__, 'help.yml'))[cmd]
15
+ @usage = ["Usage: #{cmd_shell['usage']}\n"]
16
+ @options = nil
17
+ @usage << load_options(cmd_shell['options'])
18
+ @path = Dir.pwd
19
+ @target = target_directory @path
20
+ @session = false
21
+ end
22
+
23
+ def print_help
24
+ puts @usage
25
+ end
26
+
27
+ def target=(target_path)
28
+ @target = target_path ? target_directory(target_path) || target_path : nil
29
+ end
30
+
31
+ private
32
+
33
+ def load_options(raw_opts)
34
+ cmd_opts = []
35
+ pp_opts = raw_opts.map do |opt|
36
+ cmd_opts << opt[0..1].append(GetoptLong.const_get(opt[2]))
37
+ str = " #{opt[1]} #{opt[0]}"
38
+ fill = ''
39
+ (40 - str.length).times { fill += ' ' }
40
+ blankfill = ''
41
+ 40.times { blankfill += ' ' }
42
+ optlines = opt[3].lines
43
+ firstline = str + fill + optlines.shift
44
+ [firstline, optlines.map { |line| blankfill + line }].join#("\n")
45
+ end
46
+ @options = GetoptLong.new(*cmd_opts)
47
+ pp_opts
48
+ end
49
+
50
+ def target_directory(target_path)
51
+ return nil unless File.directory? target_path
52
+ File.join(target_path, File.basename(@path) + '.db')
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCAContentAnalyzer
5
+ #
6
+ class Column
7
+ attr_reader :index, :type, :length
8
+
9
+ def initialize(index, contents, *detectors)
10
+ raise ArgumentError unless index.is_a? Integer
11
+ detectors = [] if detectors.include? :none
12
+ detectors = %i[type= length=] if detectors.include? :all
13
+ detectors.map! { |d| (d.id2name + '=').to_sym }
14
+ @index = index
15
+ @type = nil
16
+ @length = nil
17
+ analyze(contents, detectors)
18
+ end
19
+
20
+ private
21
+
22
+ def analyze(contents, detectors)
23
+ return if detectors.empty?
24
+ cells = contents.compact
25
+ detectors.each { |detector| send(detector, cells) }
26
+ end
27
+
28
+ # collapses all types encountered in a file's column into a single type
29
+ def collapse(types)
30
+ return types.first if types.size == 1
31
+ return nil if types.empty?
32
+ return String if string?(types)
33
+ return Float if float?(types)
34
+ String
35
+ end
36
+
37
+ def length=(cells)
38
+ @length = cells.map(&:to_s).map(&:length).max || 0
39
+ end
40
+
41
+ def type=(cells)
42
+ @type = collapse(cells.map(&:class).uniq)
43
+ end
44
+
45
+ def float?(types)
46
+ types.size == 2 && types.include?(Float) && types.include?(Integer)
47
+ end
48
+
49
+ def string?(types)
50
+ types.include?(String)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ CSV::Converters[:safe_numeric] = lambda do |field|
4
+ case field.strip
5
+ when /^-?[0-9]+$/
6
+ field.to_i
7
+ when /^-?[0-9]*\.[0-9]+$/
8
+ field.to_f
9
+ else
10
+ field
11
+ end
12
+ end
13
+
14
+ CSV::Converters[:blank_to_nil] = lambda do |field|
15
+ field&.empty? ? nil : field
16
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ require_relative 'column'
6
+ require_relative 'csv_converters'
7
+
8
+ #
9
+ module DwCAContentAnalyzer
10
+ #
11
+ class FileContents
12
+ attr_reader :columns
13
+
14
+ def initialize(file, detectors = %i[type length])
15
+ @file = file
16
+ @detectors = detectors
17
+ @columns = analyze
18
+ end
19
+
20
+ private
21
+
22
+ def analyze
23
+ table = load_table @file
24
+ table.by_col!.map do |col|
25
+ header = col[0]
26
+ contents = col[1]
27
+ Column.new(header.to_i, contents, *@detectors)
28
+ end
29
+ end
30
+
31
+ # reads the first line of the CSV file
32
+ # returns the columns indices as an array
33
+ def headers(file)
34
+ Array.new(CSV.open(file, &:readline).size) { |i| i.to_s }
35
+ end
36
+
37
+ def load_table(file)
38
+ CSV.read(file,
39
+ headers: headers(file),
40
+ converters: %i[blank_to_nil safe_numeric date])
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'file_contents'
4
+
5
+ #
6
+ module DwCAContentAnalyzer
7
+ #
8
+ class FileSet
9
+ attr_reader :columns
10
+
11
+ def initialize(files, detectors = %i[type length])
12
+ @detectors = detectors
13
+ @columns = analyze files
14
+ end
15
+
16
+ private
17
+
18
+ def analyze(files)
19
+ consolidate(files.map { |file| columns_for file }.flatten)
20
+ end
21
+
22
+ def consolidate(files)
23
+ files.group_by(&:index).map do |index, column|
24
+ length = column.map(&:length).max
25
+ types = column.map(&:type).uniq
26
+ { index: index,
27
+ length: length,
28
+ type: common_type(types)&.to_s&.underscore }
29
+ end
30
+ end
31
+
32
+ def columns_for(file)
33
+ FileContents.new(file, @detectors).columns
34
+ end
35
+
36
+ def common_type(types)
37
+ if types.size == 1
38
+ types.first
39
+ elsif types.size == 2 && types.include?(Float) && types.include?(Integer)
40
+ Float
41
+ else
42
+ String
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ # convenience methods for the dynamic models
4
+ module DynamicModelQueryable
5
+ # convenience class methods for the dynamic models
6
+ module DynamicModelClassQueryable
7
+ # Returns the Metaschema::Attribute for a +column_name+ (the header of the
8
+ # column, can be passed as Symbol or String).
9
+ def attribute_for(column_name)
10
+ entity.attributes_dataset.first(name: column_name.to_s)
11
+ end
12
+
13
+ # Returns the Metschema::Entity the class belongs to.
14
+ def entity
15
+ @entity
16
+ end
17
+
18
+ # Returns a nested array of all terms in the order values
19
+ # will be returned by #to_a. Each item in the nested array will be an array
20
+ # with the entity term at index 0 and the attribute term at index 1.
21
+ def template(keys = :term)
22
+ tmpl = columns.map do |column|
23
+ next unless attribute = attribute_for(column)
24
+ [attribute.send(keys), entity.send(keys)]
25
+ end.compact
26
+ return tmpl.compact unless entity.is_core
27
+ entity.extensions.each do |xtn|
28
+ tmpl << xtn.model_get.template(keys)
29
+ end
30
+ tmpl
31
+ end
32
+ end
33
+
34
+ # Extends the class that DynamicModelQueryable is mixed in with
35
+ # DynamicModelClassQueryable
36
+ def self.included(host_class)
37
+ host_class.extend(DynamicModelClassQueryable)
38
+ end
39
+
40
+ # Returns the core row for +self+. Will return +nil+ if +self+ is the core.
41
+ def core_row
42
+ return nil if entity.is_core
43
+ send(entity.core.name)
44
+ end
45
+
46
+ # Returns an array of all related extension rows for +self+. Will return +nil+
47
+ # if +self+ is an extension.
48
+ def extension_rows
49
+ return nil unless entity.is_core
50
+ entity.extensions.map { |xtn| send(xtn.table_name) }.flatten
51
+ end
52
+
53
+ # Returns a value hash for +self+ without primary or foreign keys.
54
+ def row_values
55
+ keys_to_delete = %i[id entity_id]
56
+ keys_to_delete.push(entity.core&.foreign_key).compact
57
+ to_hash.clone.delete_if { |key, _| keys_to_delete.include? key }
58
+ end
59
+
60
+ # Returns a nested array of values only in consistent order.
61
+ def to_a
62
+ row_array = row_values.map { |_key, value| value }
63
+ return row_array unless entity.is_core
64
+ entity.extensions.inject(row_array) do |memo, xtn|
65
+ memo << send(xtn.table_name).map(&:to_a)
66
+ end
67
+ end
68
+
69
+ # Returns a value hash for the row without primary or foreign keys
70
+ # where the keys in the hash can be the _term_, _baseterm_, or _name_
71
+ # of the attributes, depending on the argument given
72
+ def to_hash_with(keys = :term)
73
+ return row_values if keys == :name
74
+ row_values.transform_keys do |key|
75
+ attribute = entity.attributes_dataset.first(name: key.to_s)
76
+ attribute.send(keys)
77
+ end
78
+ end
79
+
80
+ # Returns the #full_record for +self+ as JSON.
81
+ def to_json
82
+ JSON.generate(to_record)
83
+ end
84
+
85
+ # Returns the full record (current row and all related rows) for +self+
86
+ # as a hash with +keys+ (+:term+, +:baseterm+, or +:name+).
87
+ def to_record(keys: :term)
88
+ record_hash = to_hash_with(keys)
89
+ if entity.is_core
90
+ extension_rows.each do |row|
91
+ key = row.entity.send(keys)
92
+ record_hash[key] ||= []
93
+ record_hash[key] << row.to_hash_with(keys)
94
+ end
95
+ else
96
+ record_hash[entity.core.send(keys)] = core_row.to_hash_with(keys)
97
+ end
98
+ record_hash
99
+ end
100
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCR
5
+ # Creates a Sequel::Model class for a Entity instance
6
+ # adds all associations given for the Entity instance
7
+ def self.create_model(entity)
8
+ model_class = Class.new(Sequel::Model(entity.table_name)) do
9
+ include DynamicModelQueryable
10
+ @entity = entity
11
+ entity.model_associations.each do |association|
12
+ associate(*association)
13
+ next if association[0] == :many_to_one
14
+ plugin :association_dependencies
15
+ add_association_dependencies(association[1] => :destroy)
16
+ end
17
+
18
+ define_singleton_method(:finalize) do
19
+ @entity = nil
20
+ instance_methods(false).each { |method| remove_method(method) }
21
+ Module.nesting.last.send(:remove_const, entity.class_name)
22
+ end
23
+ end
24
+ const_set entity.class_name, model_class
25
+ model_class
26
+ end
27
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ INFLECTIONS = [
4
+ [:singular, /(media)$/i, '\1'],
5
+ [:plural, /(media)$/i, '\1'],
6
+ [:irregular, 'taxon', 'taxa'],
7
+ [:uncountable, 'metadata']
8
+ ].freeze
9
+
10
+ Sequel.inflections do |inflect|
11
+ INFLECTIONS.each { |i| inflect.send(*i) }
12
+ end
13
+
14
+ String.inflections do |inflect|
15
+ INFLECTIONS.each { |i| inflect.send(*i) }
16
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCR
5
+ #
6
+ module Metaschema
7
+ # This class represents the DarwinCoreArchive's _meta.xml_ file
8
+ # * +name+: the name for the DarwinCoreArchive
9
+ # the default is the directory name given in the path
10
+ # * +path+: the full path of the directory of the DarwinCoreArchives
11
+ # * +xmlns+: the XML Namespace
12
+ # default: 'http://rs.tdwg.org/dwc/text/'
13
+ # * +xmlns__xs+: the schema namespace prefix (xmlns:xs),
14
+ # default: 'http://www.w3.org/2001/XMLSchema'
15
+ # * +xmln__xsi+: schema instance namespace prefix (xmln:xsi),
16
+ # default: 'http://www.w3.org/2001/XMLSchema-instance'
17
+ # * +xsi__schema_location+ (xsi:schemaLocation)
18
+ # default: 'http://rs.tdwg.org/dwc/text/
19
+ # http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd'
20
+ # * *#entities*:
21
+ # the associated Entity objects
22
+ # * *#core*:
23
+ # the associated Entity object that is the core node in the DwCA
24
+ class Archive < Sequel::Model
25
+ include XMLParsable
26
+
27
+ ensure_core = lambda do |entity|
28
+ self.core_id = entity.id
29
+ entity.is_core = true
30
+ end
31
+
32
+ ensure_not_core = lambda do |ent, attr|
33
+ raise 'adding an extension without a core' unless ent.core
34
+ attr.is_core = false
35
+ attr.core = ent.core
36
+ end
37
+
38
+ one_to_many :entities
39
+ many_to_one :core, class: 'Entity',
40
+ class_namespace: 'DwCR::Metaschema',
41
+ key: :core_id,
42
+ setter: ensure_core
43
+ one_to_many :extensions, class: 'Entity',
44
+ class_namespace: 'DwCR::Metaschema',
45
+ key: :archive_id,
46
+ conditions: { is_core: false },
47
+ before_add: ensure_not_core
48
+
49
+ # Methods to add records to the :entities association form xml
50
+
51
+ # Gets _core_ and _extension_ nodes from the xml
52
+ # calls #add_entity_from(xml) to create
53
+ # Entity instances to the Archive for every node
54
+ # adds the foreign key field (_coreid_) to any _extension_
55
+ def load_nodes_from(xml)
56
+ self.core = add_entity_from xml.css('core').first
57
+ core.save
58
+ xml.css('extension').each do |node|
59
+ extn = add_entity_from node
60
+ extn.add_attribute(name: 'coreid', index: index_from(node))
61
+ add_extension(extn)
62
+ end
63
+ save
64
+ end
65
+
66
+ private
67
+
68
+ # Sequel Model hook that creates a default +name+
69
+ # from the +term+ if present
70
+ def before_create
71
+ self.name ||= path&.split('/')&.last
72
+ super
73
+ end
74
+
75
+ # Creates a Entity instance from xml node (_core_ or _extension_)
76
+ # adds Attribute instances for any _field_ given
77
+ # adds ContentFile instances for any child node of _files_
78
+ def add_entity_from(xml)
79
+ entity = add_entity(values_from(xml, :term, :key_column))
80
+ xml.css('field').each { |field| entity.add_attribute_from(field) }
81
+ entity.add_files_from(xml, path: path)
82
+ entity
83
+ end
84
+ end
85
+ end
86
+ end