dwcr 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: e1c757829c2f62acf12a5ca5c7eef5a5473d60ee1ef40500adc5b3df34ca0296
4
+ data.tar.gz: f58cb0ee2250939b93ef081b3f7a0ab7d02ed02c1f426b854f91df9a0c8864be
5
+ SHA512:
6
+ metadata.gz: da0050e27a07dd812f5e0421f86681c7b6d24d2c2a8e31d8d844af163e1cd3bccde3dd53af11d86a1a0b8defe26f6e648fb88e0aa080a3492e93206ea6144f95
7
+ data.tar.gz: 5b17dc2bff6c355766edd300b04c40cdda3cbe3089cfd69cafaeca90a3965ca2342710cc4a7379c7107ce0984956a176070ca6691742d3348fb83fe7cd092b0c
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2017 Martin Stein
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,24 @@
1
+ # DwCR
2
+ DwCA in a SQLite database complete with
3
+ [Sequel](https://github.com/jeremyevans/sequel) models.
4
+
5
+ ## Command Line Tool
6
+
7
+ ### Create a new DwCR file
8
+
9
+ From within the directory of a DwC Archive:
10
+
11
+ ```ruby
12
+ dwcr new -t dwcr-test.db
13
+ ```
14
+
15
+ will create the file in the directory, named after the directory
16
+
17
+ see `dwcr new --help` for options
18
+
19
+ ### Load an existing DwCR file
20
+
21
+ ```ruby
22
+ dwcr load ~/documents/dwca/dwcr-test.db
23
+ ```
24
+
data/bin/dwcr ADDED
@@ -0,0 +1,32 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # frozen_string_literal: true
4
+
5
+ require 'getoptlong'
6
+ require 'pry'
7
+ require 'dwcr'
8
+ require 'cli/shell'
9
+
10
+ # require_relative '../lib/dwcr'
11
+ # require_relative '../lib/cli/shell'
12
+
13
+ dwcr_cmds = %w[load new]
14
+
15
+ this_cmd = dwcr_cmds.include?(ARGV[0]) ? ARGV.shift : nil
16
+
17
+ SHELL = CLI::Shell.new this_cmd
18
+
19
+ if this_cmd
20
+ scrpt_dir = __dir__.split('/')
21
+ scrpt_dir.pop
22
+ scrpt_dir.concat %W[lib cli #{this_cmd}.rb]
23
+ cmd_scrpt = File.join scrpt_dir
24
+ load cmd_scrpt
25
+ end
26
+
27
+ SHELL.options.each do |opt, arg|
28
+ case opt
29
+ when '--help'
30
+ SHELL.print_help
31
+ end
32
+ end
data/lib/cli/help.yml ADDED
@@ -0,0 +1,66 @@
1
+ ---
2
+ NULL:
3
+ usage: >
4
+ dwcr [command] [options]
5
+ options:
6
+ -
7
+ - '--help'
8
+ - '-h'
9
+ - NO_ARGUMENT
10
+ - show help
11
+ load:
12
+ usage: |
13
+ dwcr load [file] [options]
14
+ loads an existing DwCR file
15
+ options:
16
+ -
17
+ - '--help'
18
+ - '-h'
19
+ - NO_ARGUMENT
20
+ - show help
21
+ -
22
+ - '--interactive'
23
+ - '-i'
24
+ - NO_ARGUMENT
25
+ - |
26
+ starts an interactive Pry session
27
+ new:
28
+ usage: |
29
+ dwcr new [options]
30
+ creates a new DwCR
31
+ default name for the DwCR is the
32
+ options:
33
+ -
34
+ - '--help'
35
+ - '-h'
36
+ - NO_ARGUMENT
37
+ - show help
38
+ -
39
+ - '--coltypes'
40
+ - '-c'
41
+ - NO_ARGUMENT
42
+ - |
43
+ set the column types based on contents from the included csv files
44
+ by default all columns will be of type 'text'
45
+ -
46
+ - '--meta'
47
+ - '-m'
48
+ - REQUIRED_ARGUMENT
49
+ - |
50
+ specify a meta.xml file for the schema
51
+ by default will search for meta.xml in the working directory
52
+ (see --path option)
53
+ -
54
+ - '--path'
55
+ - '-p'
56
+ - REQUIRED_ARGUMENT
57
+ - |
58
+ the directory where the DarwinCore Archive is located
59
+ defaults to the current directory
60
+ -
61
+ - '--target'
62
+ - '-t'
63
+ - OPTIONAL_ARGUMENT
64
+ - |
65
+ the target file to be created; if no argument is provided
66
+ this option will create an in-memory the SQLite database
data/lib/cli/load.rb ADDED
@@ -0,0 +1,25 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CLI
4
+ SHELL.options.each do |opt, arg|
5
+ case opt
6
+ when '--help'
7
+ SHELL.print_help
8
+ exit true
9
+ when '--interactive'
10
+ SHELL.session = true
11
+ end
12
+ end
13
+
14
+ SHELL.target = ARGV.shift
15
+
16
+ ::DB = Sequel.sqlite(SHELL.target)
17
+
18
+ DwCR::Metaschema.load_models
19
+
20
+ DwCR::MODELS = DwCR.load_models
21
+
22
+ binding.pry if SHELL.session
23
+
24
+ exit true
25
+ end
data/lib/cli/new.rb ADDED
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module CLI
4
+ schema_opts = {}
5
+
6
+ SHELL.options.each do |opt, arg|
7
+ case opt
8
+ when '--help'
9
+ SHELL.print_help
10
+ exit true
11
+ when '--coltypes'
12
+ schema_opts[:type] = true
13
+ when '--meta'
14
+ xml = arg
15
+ when '--path'
16
+ SHELL.path = arg
17
+ when '--target'
18
+ SHELL.target = arg.empty? ? nil : arg
19
+ end
20
+ end
21
+
22
+ xml ||= SHELL.path
23
+
24
+ ::DB = Sequel.sqlite(SHELL.target)
25
+
26
+ DwCR::Metaschema.create
27
+
28
+ archive = DwCR::Metaschema::Archive.create(path: SHELL.path)
29
+
30
+ meta_doc = DwCR::Metaschema::XMLParsable.load_meta xml
31
+
32
+ archive.load_nodes_from meta_doc
33
+
34
+ DwCR.create_schema(archive, schema_opts)
35
+
36
+ DwCR::MODELS = DwCR.load_models archive
37
+
38
+ DwCR.load_contents_for archive
39
+
40
+ binding.pry
41
+
42
+ puts 'done!'
43
+ end
data/lib/cli/shell.rb ADDED
@@ -0,0 +1,55 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'getoptlong'
4
+ require 'psych'
5
+
6
+ #
7
+ module CLI
8
+ #
9
+ class Shell
10
+ attr_accessor :path, :session
11
+ attr_reader :options, :target
12
+
13
+ def initialize(cmd)
14
+ cmd_shell = Psych.load_file(File.join(__dir__, 'help.yml'))[cmd]
15
+ @usage = ["Usage: #{cmd_shell['usage']}\n"]
16
+ @options = nil
17
+ @usage << load_options(cmd_shell['options'])
18
+ @path = Dir.pwd
19
+ @target = target_directory @path
20
+ @session = false
21
+ end
22
+
23
+ def print_help
24
+ puts @usage
25
+ end
26
+
27
+ def target=(target_path)
28
+ @target = target_path ? target_directory(target_path) || target_path : nil
29
+ end
30
+
31
+ private
32
+
33
+ def load_options(raw_opts)
34
+ cmd_opts = []
35
+ pp_opts = raw_opts.map do |opt|
36
+ cmd_opts << opt[0..1].append(GetoptLong.const_get(opt[2]))
37
+ str = " #{opt[1]} #{opt[0]}"
38
+ fill = ''
39
+ (40 - str.length).times { fill += ' ' }
40
+ blankfill = ''
41
+ 40.times { blankfill += ' ' }
42
+ optlines = opt[3].lines
43
+ firstline = str + fill + optlines.shift
44
+ [firstline, optlines.map { |line| blankfill + line }].join#("\n")
45
+ end
46
+ @options = GetoptLong.new(*cmd_opts)
47
+ pp_opts
48
+ end
49
+
50
+ def target_directory(target_path)
51
+ return nil unless File.directory? target_path
52
+ File.join(target_path, File.basename(@path) + '.db')
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCAContentAnalyzer
5
+ #
6
+ class Column
7
+ attr_reader :index, :type, :length
8
+
9
+ def initialize(index, contents, *detectors)
10
+ raise ArgumentError unless index.is_a? Integer
11
+ detectors = [] if detectors.include? :none
12
+ detectors = %i[type= length=] if detectors.include? :all
13
+ detectors.map! { |d| (d.id2name + '=').to_sym }
14
+ @index = index
15
+ @type = nil
16
+ @length = nil
17
+ analyze(contents, detectors)
18
+ end
19
+
20
+ private
21
+
22
+ def analyze(contents, detectors)
23
+ return if detectors.empty?
24
+ cells = contents.compact
25
+ detectors.each { |detector| send(detector, cells) }
26
+ end
27
+
28
+ # collapses all types encountered in a file's column into a single type
29
+ def collapse(types)
30
+ return types.first if types.size == 1
31
+ return nil if types.empty?
32
+ return String if string?(types)
33
+ return Float if float?(types)
34
+ String
35
+ end
36
+
37
+ def length=(cells)
38
+ @length = cells.map(&:to_s).map(&:length).max || 0
39
+ end
40
+
41
+ def type=(cells)
42
+ @type = collapse(cells.map(&:class).uniq)
43
+ end
44
+
45
+ def float?(types)
46
+ types.size == 2 && types.include?(Float) && types.include?(Integer)
47
+ end
48
+
49
+ def string?(types)
50
+ types.include?(String)
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ CSV::Converters[:safe_numeric] = lambda do |field|
4
+ case field.strip
5
+ when /^-?[0-9]+$/
6
+ field.to_i
7
+ when /^-?[0-9]*\.[0-9]+$/
8
+ field.to_f
9
+ else
10
+ field
11
+ end
12
+ end
13
+
14
+ CSV::Converters[:blank_to_nil] = lambda do |field|
15
+ field&.empty? ? nil : field
16
+ end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'csv'
4
+
5
+ require_relative 'column'
6
+ require_relative 'csv_converters'
7
+
8
+ #
9
+ module DwCAContentAnalyzer
10
+ #
11
+ class FileContents
12
+ attr_reader :columns
13
+
14
+ def initialize(file, detectors = %i[type length])
15
+ @file = file
16
+ @detectors = detectors
17
+ @columns = analyze
18
+ end
19
+
20
+ private
21
+
22
+ def analyze
23
+ table = load_table @file
24
+ table.by_col!.map do |col|
25
+ header = col[0]
26
+ contents = col[1]
27
+ Column.new(header.to_i, contents, *@detectors)
28
+ end
29
+ end
30
+
31
+ # reads the first line of the CSV file
32
+ # returns the columns indices as an array
33
+ def headers(file)
34
+ Array.new(CSV.open(file, &:readline).size) { |i| i.to_s }
35
+ end
36
+
37
+ def load_table(file)
38
+ CSV.read(file,
39
+ headers: headers(file),
40
+ converters: %i[blank_to_nil safe_numeric date])
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'file_contents'
4
+
5
+ #
6
+ module DwCAContentAnalyzer
7
+ #
8
+ class FileSet
9
+ attr_reader :columns
10
+
11
+ def initialize(files, detectors = %i[type length])
12
+ @detectors = detectors
13
+ @columns = analyze files
14
+ end
15
+
16
+ private
17
+
18
+ def analyze(files)
19
+ consolidate(files.map { |file| columns_for file }.flatten)
20
+ end
21
+
22
+ def consolidate(files)
23
+ files.group_by(&:index).map do |index, column|
24
+ length = column.map(&:length).max
25
+ types = column.map(&:type).uniq
26
+ { index: index,
27
+ length: length,
28
+ type: common_type(types)&.to_s&.underscore }
29
+ end
30
+ end
31
+
32
+ def columns_for(file)
33
+ FileContents.new(file, @detectors).columns
34
+ end
35
+
36
+ def common_type(types)
37
+ if types.size == 1
38
+ types.first
39
+ elsif types.size == 2 && types.include?(Float) && types.include?(Integer)
40
+ Float
41
+ else
42
+ String
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ # convenience methods for the dynamic models
4
+ module DynamicModelQueryable
5
+ # convenience class methods for the dynamic models
6
+ module DynamicModelClassQueryable
7
+ # Returns the Metaschema::Attribute for a +column_name+ (the header of the
8
+ # column, can be passed as Symbol or String).
9
+ def attribute_for(column_name)
10
+ entity.attributes_dataset.first(name: column_name.to_s)
11
+ end
12
+
13
+ # Returns the Metschema::Entity the class belongs to.
14
+ def entity
15
+ @entity
16
+ end
17
+
18
+ # Returns a nested array of all terms in the order values
19
+ # will be returned by #to_a. Each item in the nested array will be an array
20
+ # with the entity term at index 0 and the attribute term at index 1.
21
+ def template(keys = :term)
22
+ tmpl = columns.map do |column|
23
+ next unless attribute = attribute_for(column)
24
+ [attribute.send(keys), entity.send(keys)]
25
+ end.compact
26
+ return tmpl.compact unless entity.is_core
27
+ entity.extensions.each do |xtn|
28
+ tmpl << xtn.model_get.template(keys)
29
+ end
30
+ tmpl
31
+ end
32
+ end
33
+
34
+ # Extends the class that DynamicModelQueryable is mixed in with
35
+ # DynamicModelClassQueryable
36
+ def self.included(host_class)
37
+ host_class.extend(DynamicModelClassQueryable)
38
+ end
39
+
40
+ # Returns the core row for +self+. Will return +nil+ if +self+ is the core.
41
+ def core_row
42
+ return nil if entity.is_core
43
+ send(entity.core.name)
44
+ end
45
+
46
+ # Returns an array of all related extension rows for +self+. Will return +nil+
47
+ # if +self+ is an extension.
48
+ def extension_rows
49
+ return nil unless entity.is_core
50
+ entity.extensions.map { |xtn| send(xtn.table_name) }.flatten
51
+ end
52
+
53
+ # Returns a value hash for +self+ without primary or foreign keys.
54
+ def row_values
55
+ keys_to_delete = %i[id entity_id]
56
+ keys_to_delete.push(entity.core&.foreign_key).compact
57
+ to_hash.clone.delete_if { |key, _| keys_to_delete.include? key }
58
+ end
59
+
60
+ # Returns a nested array of values only in consistent order.
61
+ def to_a
62
+ row_array = row_values.map { |_key, value| value }
63
+ return row_array unless entity.is_core
64
+ entity.extensions.inject(row_array) do |memo, xtn|
65
+ memo << send(xtn.table_name).map(&:to_a)
66
+ end
67
+ end
68
+
69
+ # Returns a value hash for the row without primary or foreign keys
70
+ # where the keys in the hash can be the _term_, _baseterm_, or _name_
71
+ # of the attributes, depending on the argument given
72
+ def to_hash_with(keys = :term)
73
+ return row_values if keys == :name
74
+ row_values.transform_keys do |key|
75
+ attribute = entity.attributes_dataset.first(name: key.to_s)
76
+ attribute.send(keys)
77
+ end
78
+ end
79
+
80
+ # Returns the #full_record for +self+ as JSON.
81
+ def to_json
82
+ JSON.generate(to_record)
83
+ end
84
+
85
+ # Returns the full record (current row and all related rows) for +self+
86
+ # as a hash with +keys+ (+:term+, +:baseterm+, or +:name+).
87
+ def to_record(keys: :term)
88
+ record_hash = to_hash_with(keys)
89
+ if entity.is_core
90
+ extension_rows.each do |row|
91
+ key = row.entity.send(keys)
92
+ record_hash[key] ||= []
93
+ record_hash[key] << row.to_hash_with(keys)
94
+ end
95
+ else
96
+ record_hash[entity.core.send(keys)] = core_row.to_hash_with(keys)
97
+ end
98
+ record_hash
99
+ end
100
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCR
5
+ # Creates a Sequel::Model class for a Entity instance
6
+ # adds all associations given for the Entity instance
7
+ def self.create_model(entity)
8
+ model_class = Class.new(Sequel::Model(entity.table_name)) do
9
+ include DynamicModelQueryable
10
+ @entity = entity
11
+ entity.model_associations.each do |association|
12
+ associate(*association)
13
+ next if association[0] == :many_to_one
14
+ plugin :association_dependencies
15
+ add_association_dependencies(association[1] => :destroy)
16
+ end
17
+
18
+ define_singleton_method(:finalize) do
19
+ @entity = nil
20
+ instance_methods(false).each { |method| remove_method(method) }
21
+ Module.nesting.last.send(:remove_const, entity.class_name)
22
+ end
23
+ end
24
+ const_set entity.class_name, model_class
25
+ model_class
26
+ end
27
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ INFLECTIONS = [
4
+ [:singular, /(media)$/i, '\1'],
5
+ [:plural, /(media)$/i, '\1'],
6
+ [:irregular, 'taxon', 'taxa'],
7
+ [:uncountable, 'metadata']
8
+ ].freeze
9
+
10
+ Sequel.inflections do |inflect|
11
+ INFLECTIONS.each { |i| inflect.send(*i) }
12
+ end
13
+
14
+ String.inflections do |inflect|
15
+ INFLECTIONS.each { |i| inflect.send(*i) }
16
+ end
@@ -0,0 +1,86 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ module DwCR
5
+ #
6
+ module Metaschema
7
+ # This class represents the DarwinCoreArchive's _meta.xml_ file
8
+ # * +name+: the name for the DarwinCoreArchive
9
+ # the default is the directory name given in the path
10
+ # * +path+: the full path of the directory of the DarwinCoreArchives
11
+ # * +xmlns+: the XML Namespace
12
+ # default: 'http://rs.tdwg.org/dwc/text/'
13
+ # * +xmlns__xs+: the schema namespace prefix (xmlns:xs),
14
+ # default: 'http://www.w3.org/2001/XMLSchema'
15
+ # * +xmln__xsi+: schema instance namespace prefix (xmln:xsi),
16
+ # default: 'http://www.w3.org/2001/XMLSchema-instance'
17
+ # * +xsi__schema_location+ (xsi:schemaLocation)
18
+ # default: 'http://rs.tdwg.org/dwc/text/
19
+ # http://rs.tdwg.org/dwc/text/tdwg_dwc_text.xsd'
20
+ # * *#entities*:
21
+ # the associated Entity objects
22
+ # * *#core*:
23
+ # the associated Entity object that is the core node in the DwCA
24
+ class Archive < Sequel::Model
25
+ include XMLParsable
26
+
27
+ ensure_core = lambda do |entity|
28
+ self.core_id = entity.id
29
+ entity.is_core = true
30
+ end
31
+
32
+ ensure_not_core = lambda do |ent, attr|
33
+ raise 'adding an extension without a core' unless ent.core
34
+ attr.is_core = false
35
+ attr.core = ent.core
36
+ end
37
+
38
+ one_to_many :entities
39
+ many_to_one :core, class: 'Entity',
40
+ class_namespace: 'DwCR::Metaschema',
41
+ key: :core_id,
42
+ setter: ensure_core
43
+ one_to_many :extensions, class: 'Entity',
44
+ class_namespace: 'DwCR::Metaschema',
45
+ key: :archive_id,
46
+ conditions: { is_core: false },
47
+ before_add: ensure_not_core
48
+
49
+ # Methods to add records to the :entities association form xml
50
+
51
+ # Gets _core_ and _extension_ nodes from the xml
52
+ # calls #add_entity_from(xml) to create
53
+ # Entity instances to the Archive for every node
54
+ # adds the foreign key field (_coreid_) to any _extension_
55
+ def load_nodes_from(xml)
56
+ self.core = add_entity_from xml.css('core').first
57
+ core.save
58
+ xml.css('extension').each do |node|
59
+ extn = add_entity_from node
60
+ extn.add_attribute(name: 'coreid', index: index_from(node))
61
+ add_extension(extn)
62
+ end
63
+ save
64
+ end
65
+
66
+ private
67
+
68
+ # Sequel Model hook that creates a default +name+
69
+ # from the +term+ if present
70
+ def before_create
71
+ self.name ||= path&.split('/')&.last
72
+ super
73
+ end
74
+
75
+ # Creates a Entity instance from xml node (_core_ or _extension_)
76
+ # adds Attribute instances for any _field_ given
77
+ # adds ContentFile instances for any child node of _files_
78
+ def add_entity_from(xml)
79
+ entity = add_entity(values_from(xml, :term, :key_column))
80
+ xml.css('field').each { |field| entity.add_attribute_from(field) }
81
+ entity.add_files_from(xml, path: path)
82
+ entity
83
+ end
84
+ end
85
+ end
86
+ end