berkeley_library-tind 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +18 -0
- data/.gitignore +388 -0
- data/.idea/inspectionProfiles/Project_Default.xml +20 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules.xml +8 -0
- data/.idea/tind.iml +138 -0
- data/.idea/vcs.xml +6 -0
- data/.rubocop.yml +334 -0
- data/.ruby-version +1 -0
- data/.simplecov +8 -0
- data/.yardopts +1 -0
- data/CHANGES.md +58 -0
- data/Dockerfile +57 -0
- data/Gemfile +3 -0
- data/Jenkinsfile +18 -0
- data/LICENSE.md +21 -0
- data/README.md +73 -0
- data/Rakefile +20 -0
- data/berkeley_library-tind.gemspec +50 -0
- data/bin/tind-export +14 -0
- data/docker-compose.yml +15 -0
- data/lib/berkeley_library/tind.rb +3 -0
- data/lib/berkeley_library/tind/api.rb +1 -0
- data/lib/berkeley_library/tind/api/api.rb +132 -0
- data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
- data/lib/berkeley_library/tind/api/collection.rb +82 -0
- data/lib/berkeley_library/tind/api/date_range.rb +67 -0
- data/lib/berkeley_library/tind/api/format.rb +32 -0
- data/lib/berkeley_library/tind/api/search.rb +100 -0
- data/lib/berkeley_library/tind/config.rb +103 -0
- data/lib/berkeley_library/tind/export.rb +1 -0
- data/lib/berkeley_library/tind/export/column.rb +54 -0
- data/lib/berkeley_library/tind/export/column_group.rb +144 -0
- data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
- data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
- data/lib/berkeley_library/tind/export/config.rb +154 -0
- data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
- data/lib/berkeley_library/tind/export/export.rb +47 -0
- data/lib/berkeley_library/tind/export/export_command.rb +168 -0
- data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
- data/lib/berkeley_library/tind/export/export_format.rb +67 -0
- data/lib/berkeley_library/tind/export/exporter.rb +105 -0
- data/lib/berkeley_library/tind/export/filter.rb +52 -0
- data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
- data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
- data/lib/berkeley_library/tind/export/row.rb +24 -0
- data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
- data/lib/berkeley_library/tind/export/table.rb +175 -0
- data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
- data/lib/berkeley_library/tind/marc.rb +1 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
- data/lib/berkeley_library/tind/module_info.rb +14 -0
- data/lib/berkeley_library/util/arrays.rb +178 -0
- data/lib/berkeley_library/util/logging.rb +1 -0
- data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
- data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
- data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
- data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
- data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
- data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
- data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
- data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
- data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
- data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
- data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
- data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
- data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
- data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
- data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
- data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
- data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
- data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
- data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
- data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
- data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
- data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
- data/lib/berkeley_library/util/paths.rb +111 -0
- data/lib/berkeley_library/util/stringios.rb +30 -0
- data/lib/berkeley_library/util/strings.rb +42 -0
- data/lib/berkeley_library/util/sys_exits.rb +15 -0
- data/lib/berkeley_library/util/times.rb +22 -0
- data/lib/berkeley_library/util/uris.rb +44 -0
- data/lib/berkeley_library/util/uris/appender.rb +162 -0
- data/lib/berkeley_library/util/uris/requester.rb +62 -0
- data/lib/berkeley_library/util/uris/validator.rb +32 -0
- data/rakelib/bundle.rake +8 -0
- data/rakelib/coverage.rake +11 -0
- data/rakelib/gem.rake +54 -0
- data/rakelib/rubocop.rake +18 -0
- data/rakelib/spec.rake +2 -0
- data/spec/.rubocop.yml +40 -0
- data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
- data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
- data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
- data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
- data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
- data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
- data/spec/berkeley_library/tind/config_spec.rb +86 -0
- data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
- data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
- data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
- data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
- data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
- data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
- data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
- data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
- data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
- data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
- data/spec/berkeley_library/util/arrays_spec.rb +340 -0
- data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
- data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
- data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
- data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
- data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
- data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
- data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
- data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
- data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
- data/spec/berkeley_library/util/paths_spec.rb +90 -0
- data/spec/berkeley_library/util/stringios_spec.rb +34 -0
- data/spec/berkeley_library/util/strings_spec.rb +27 -0
- data/spec/berkeley_library/util/times_spec.rb +39 -0
- data/spec/berkeley_library/util/uris_spec.rb +118 -0
- data/spec/data/collection-names.txt +438 -0
- data/spec/data/collections.json +4827 -0
- data/spec/data/disjoint-records.xml +187 -0
- data/spec/data/record-184453.xml +58 -0
- data/spec/data/record-184458.xml +63 -0
- data/spec/data/record-187888.xml +78 -0
- data/spec/data/records-api-search-cjk-p1.xml +6381 -0
- data/spec/data/records-api-search-cjk-p2.xml +5 -0
- data/spec/data/records-api-search-p1.xml +4506 -0
- data/spec/data/records-api-search-p2.xml +4509 -0
- data/spec/data/records-api-search-p3.xml +4506 -0
- data/spec/data/records-api-search-p4.xml +4509 -0
- data/spec/data/records-api-search-p5.xml +4506 -0
- data/spec/data/records-api-search-p6.xml +2436 -0
- data/spec/data/records-api-search-p7.xml +5 -0
- data/spec/data/records-api-search.xml +234 -0
- data/spec/data/records-manual-search.xml +547 -0
- data/spec/spec_helper.rb +30 -0
- data/test/profile/table_from_records_profile.rb +46 -0
- metadata +585 -0
@@ -0,0 +1 @@
|
|
1
|
+
Dir.glob(File.expand_path('marc/*.rb', __dir__)).sort.each(&method(:require))
|
@@ -0,0 +1,144 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'marc/xml_parsers'
|
3
|
+
require 'marc_extensions'
|
4
|
+
|
5
|
+
module BerkeleyLibrary
|
6
|
+
module TIND
|
7
|
+
module MARC
|
8
|
+
# A customized XML reader for reading MARC records from TIND search results.
|
9
|
+
class XMLReader
|
10
|
+
include Enumerable
|
11
|
+
include ::MARC::NokogiriReader
|
12
|
+
|
13
|
+
# ############################################################
|
14
|
+
# Constant
|
15
|
+
|
16
|
+
COMMENT_TOTAL_RE = /Search-Engine-Total-Number-Of-Results: ([0-9]+)/.freeze
|
17
|
+
|
18
|
+
# ############################################################
|
19
|
+
# Attributes
|
20
|
+
|
21
|
+
attr_reader :search_id
|
22
|
+
|
23
|
+
# Returns the total number of records, based on the `<total/>` tag
|
24
|
+
# returned by the TIND Search API, or the special comment
|
25
|
+
# `Search-Engine-Total-Number-Of-Results` returned by TIND
|
26
|
+
# Regular Search in XML format.
|
27
|
+
#
|
28
|
+
# Note that the total is not guaranteed to be present, and if present,
|
29
|
+
# may not be present unless at least some records have been parsed.
|
30
|
+
#
|
31
|
+
# @return [Integer, nil] the total number of records, or `nil` if the total has not been read yet
|
32
|
+
def total
|
33
|
+
@total&.to_i
|
34
|
+
end
|
35
|
+
|
36
|
+
# Returns the number of records yielded.
|
37
|
+
#
|
38
|
+
# @return [Integer] the number of records yielded.
|
39
|
+
def records_yielded
|
40
|
+
@records_yielded ||= 0
|
41
|
+
end
|
42
|
+
|
43
|
+
# ############################################################
|
44
|
+
# Initializer
|
45
|
+
|
46
|
+
# Reads MARC records from an XML datasource given either as a file path,
|
47
|
+
# or as an IO object.
|
48
|
+
#
|
49
|
+
# @param source [String, Pathname, IO] the path to a file, or an IO to read from directly
|
50
|
+
# @param freeze [Boolean] whether to freeze each record after reading
|
51
|
+
def initialize(source, freeze: false)
|
52
|
+
@handle = ensure_io(source)
|
53
|
+
@freeze = freeze
|
54
|
+
init
|
55
|
+
end
|
56
|
+
|
57
|
+
class << self
|
58
|
+
include MARCExtensions::XMLReaderClassExtensions
|
59
|
+
end
|
60
|
+
|
61
|
+
# ############################################################
|
62
|
+
# MARC::GenericPullParser overrides
|
63
|
+
|
64
|
+
def yield_record
|
65
|
+
@record[:record].freeze if @freeze
|
66
|
+
super
|
67
|
+
ensure
|
68
|
+
increment_records_yielded!
|
69
|
+
end
|
70
|
+
|
71
|
+
# ############################################################
|
72
|
+
# Nokogiri::XML::SAX::Document overrides
|
73
|
+
|
74
|
+
# @see Nokogiri::XML::Sax::Document#start_element_namespace
|
75
|
+
# rubocop:disable Metrics/ParameterLists
|
76
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
77
|
+
super
|
78
|
+
|
79
|
+
@current_element_name = name
|
80
|
+
end
|
81
|
+
# rubocop:enable Metrics/ParameterLists
|
82
|
+
|
83
|
+
# @see Nokogiri::XML::Sax::Document#end_element_namespace
|
84
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
85
|
+
super
|
86
|
+
|
87
|
+
@current_element_name = nil
|
88
|
+
end
|
89
|
+
|
90
|
+
# @see Nokogiri::XML::Sax::Document#characters
|
91
|
+
def characters(string)
|
92
|
+
return unless (name = @current_element_name)
|
93
|
+
|
94
|
+
case name
|
95
|
+
when 'search_id'
|
96
|
+
@search_id = string
|
97
|
+
when 'total'
|
98
|
+
@total = string.to_i
|
99
|
+
else
|
100
|
+
super
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
# @see Nokogiri::XML::Sax::Document#comment
|
105
|
+
def comment(string)
|
106
|
+
return unless (md = COMMENT_TOTAL_RE.match(string))
|
107
|
+
|
108
|
+
@total = md[1].to_i
|
109
|
+
end
|
110
|
+
|
111
|
+
# ############################################################
|
112
|
+
# Private
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
def ensure_io(file)
|
117
|
+
return file if io_like?(file)
|
118
|
+
return File.new(file) if file_exists?(file)
|
119
|
+
return StringIO.new(file) if file =~ /^\s*</x
|
120
|
+
|
121
|
+
raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
|
122
|
+
end
|
123
|
+
|
124
|
+
# Returns true if `obj` is close enough to an IO object for Nokogiri
|
125
|
+
# to parse as one.
|
126
|
+
#
|
127
|
+
# @param obj [Object] the object that might be an IO
|
128
|
+
# @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
|
129
|
+
def io_like?(obj)
|
130
|
+
obj.respond_to?(:read) && obj.respond_to?(:close)
|
131
|
+
end
|
132
|
+
|
133
|
+
def file_exists?(path)
|
134
|
+
(path.respond_to?(:exist?) && path.exist?) ||
|
135
|
+
(path.respond_to?(:to_str) && File.exist?(path))
|
136
|
+
end
|
137
|
+
|
138
|
+
def increment_records_yielded!
|
139
|
+
@records_yielded = records_yielded + 1
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module TIND
|
3
|
+
class ModuleInfo
|
4
|
+
NAME = 'berkeley_library-tind'.freeze
|
5
|
+
AUTHOR = 'David Moles'.freeze
|
6
|
+
AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
|
7
|
+
SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
|
8
|
+
DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
|
9
|
+
LICENSE = 'MIT'.freeze
|
10
|
+
VERSION = '0.4.0'.freeze
|
11
|
+
HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
module BerkeleyLibrary
|
2
|
+
module Util
|
3
|
+
module Arrays
|
4
|
+
class << self
|
5
|
+
# Clients can chose to call class methods directly, or include the module
|
6
|
+
include Arrays
|
7
|
+
end
|
8
|
+
|
9
|
+
# Recursively checks whether the specified list contains, in the
|
10
|
+
# same order, all values in the other specified list (additional codes
|
11
|
+
# in between are fine)
|
12
|
+
#
|
13
|
+
# @param subset [Array] the values to look for
|
14
|
+
# @param superset [Array] the list of values to look in
|
15
|
+
# @return boolean True if all values were found, false otherwise
|
16
|
+
def ordered_superset?(superset:, subset:)
|
17
|
+
!find_indices(in_array: superset, for_array: subset).nil?
|
18
|
+
end
|
19
|
+
|
20
|
+
# Counts how many contiguous elements from the start of an
|
21
|
+
# sequence of values satisfy the given block.
|
22
|
+
#
|
23
|
+
# @overload count_while(arr:)
|
24
|
+
# Returns an enumerator.
|
25
|
+
# @param values [Enumerable] the values
|
26
|
+
# @return [Enumerator] the enumerator.
|
27
|
+
# @overload count_while(arr:, &block)
|
28
|
+
# Passes elements to the block until the block returns nil or false,
|
29
|
+
# then stops iterating and returns the count of matching elements.
|
30
|
+
# @param values [Enumerable] the values
|
31
|
+
# @return [Integer] the count
|
32
|
+
def count_while(values:)
|
33
|
+
return to_enum(:count_while, values: values) unless block_given?
|
34
|
+
|
35
|
+
values.inject(0) do |count, x|
|
36
|
+
matched = yield x
|
37
|
+
break count unless matched
|
38
|
+
|
39
|
+
count + 1
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
# Given two lists, one of which is a superset of the other, with elements
|
44
|
+
# in the same order (but possibly with additional elements in the superset),
|
45
|
+
# returns an array the length of the subset, containing for each element in
|
46
|
+
# the subset the index of the corresponding element in the superset.
|
47
|
+
#
|
48
|
+
# @overload find_matching_indices(for_array:, in_array:)
|
49
|
+
# For each value in `for_array`, finds the index of the first equal value
|
50
|
+
# in `in_array` after the previously matched value.
|
51
|
+
# @param in_array [Array] the list of values to look in
|
52
|
+
# @param for_array [Array] the values to look for
|
53
|
+
# @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
|
54
|
+
# or `nil` if not all values could be found
|
55
|
+
#
|
56
|
+
# @overload find_matching_indices(for_array:, in_array:)
|
57
|
+
# For each value in `for_array`, finds the index of the first value
|
58
|
+
# in `in_array` after the previously matched value that matches
|
59
|
+
# the specified match function.
|
60
|
+
# @param in_array [Array] the list of values to look in
|
61
|
+
# @param for_array [Array] the values to look for
|
62
|
+
# @yieldparam source [Object] the value to compare
|
63
|
+
# @yieldparam target [Object] the value to compare against
|
64
|
+
# @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
|
65
|
+
# or `nil` if not all values could be found
|
66
|
+
def find_indices(for_array:, in_array:, &block)
|
67
|
+
return find_indices_matching(for_array, in_array, &block) if block_given?
|
68
|
+
|
69
|
+
find_all_indices(for_array, in_array)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Given a block or a value, finds the index of the first matching value
|
73
|
+
# at or after the specified start index.
|
74
|
+
#
|
75
|
+
# @overload find_index(value, in_array:, start_index:)
|
76
|
+
# Finds the first index of the specified value.
|
77
|
+
# @param value [Object] the value to find
|
78
|
+
# @param in_array [Array] the array to search
|
79
|
+
# @param start_index [Integer] the index to start with
|
80
|
+
# @return [Integer, nil] the index, or `nil` if no value matches
|
81
|
+
# @overload find_index(&block)
|
82
|
+
# Finds the index of the first value matching
|
83
|
+
# the specified block.
|
84
|
+
# @param in_array [Array] the array to search
|
85
|
+
# @param start_index [Integer] the index to start with
|
86
|
+
# @yieldreturn [Boolean] whether the element matches
|
87
|
+
# @return [Integer, nil] the index, or `nil` if no value matches
|
88
|
+
# @overload find_index
|
89
|
+
# @param in_array [Array] the array to search
|
90
|
+
# @param start_index [Integer] the index to start with
|
91
|
+
# @return [Enumerator] a new enumerator
|
92
|
+
def find_index(*args, in_array:, start_index: 0, &block)
|
93
|
+
raise ArgumentError, "wrong number of arguments (given #{value.length}, expected 0..1" if args.size > 1
|
94
|
+
return Enumerator.new { |y| find_index(in_array: in_array, start_index: start_index, &y) } if args.empty? && !block_given?
|
95
|
+
return unless (relative_index = in_array[start_index..].find_index(*args, &block))
|
96
|
+
|
97
|
+
relative_index + start_index
|
98
|
+
end
|
99
|
+
|
100
|
+
# Given an array of unique integers _a<sub>1</sub>_, returns a new array
|
101
|
+
# _a<sub>2</sub>_ in which the value at each index _i<sub>2</sub>_ is the
|
102
|
+
# index _i<sub>1</sub>_ at which that value was found in _a<sub>1</sub>_.
|
103
|
+
# E.g., given `[0, 2, 3]`, returns `[0, nil, 1, 2]`. The indices need
|
104
|
+
# not be in order but must be unique.
|
105
|
+
#
|
106
|
+
# @param arr [Array<Integer>, nil] the array to invert.
|
107
|
+
# @return [Array<Integer, nil>, nil] the inverted array, or nil if the input array is nil
|
108
|
+
# @raise TypeError if `arr` is not an array of integers
|
109
|
+
# @raise ArgumentError if `arr` contains duplicate values
|
110
|
+
def invert(arr)
|
111
|
+
return unless arr
|
112
|
+
|
113
|
+
# noinspection RubyNilAnalysis
|
114
|
+
Array.new(arr.size).tap do |inv|
|
115
|
+
arr.each_with_index do |v, i|
|
116
|
+
next inv[v] = i unless (prev_index = inv[v])
|
117
|
+
|
118
|
+
raise ArgumentError, "Duplicate value #{v} at index #{i} already found at #{prev_index}"
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# Merges two arrays in an order-preserving manner.
|
124
|
+
# @param a1 [Array] the first array
|
125
|
+
# @param a2 [Array] the second array
|
126
|
+
# @return [Array] a merged array that is an ordered superset of both `a1` and `a2`
|
127
|
+
# @see Arrays#ordered_superset?
|
128
|
+
def merge(a1, a2)
|
129
|
+
return a1 if a2.empty?
|
130
|
+
return a2 if a1.empty?
|
131
|
+
|
132
|
+
shorter, longer = a1.size > a2.size ? [a2, a1] : [a1, a2]
|
133
|
+
do_merge(shorter, longer)
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
def do_merge(shorter, longer)
|
139
|
+
shorter.each_with_index do |v, ix_s|
|
140
|
+
next unless (ix_l = longer.find_index(v))
|
141
|
+
|
142
|
+
shorter_unmatched = shorter[0...ix_s]
|
143
|
+
longer_unmatched = longer[0...ix_l]
|
144
|
+
all_unmatched = sort_by_first_and_flatten(shorter_unmatched, longer_unmatched)
|
145
|
+
return (all_unmatched << v) + merge(shorter[ix_s + 1..], longer[ix_l + 1..])
|
146
|
+
end
|
147
|
+
|
148
|
+
sort_by_first_and_flatten(longer, shorter)
|
149
|
+
end
|
150
|
+
|
151
|
+
def sort_by_first_and_flatten(a1, a2)
|
152
|
+
return a1 if a2.empty?
|
153
|
+
return a2 if a1.empty?
|
154
|
+
return a2 + a1 if a1.first.respond_to?(:>) && a1.first > a2.first
|
155
|
+
|
156
|
+
a1 + a2
|
157
|
+
end
|
158
|
+
|
159
|
+
def find_all_indices(source, target)
|
160
|
+
source.each_with_object([]) do |src, target_indices|
|
161
|
+
target_offset = (target_indices.last&.+ 1) || 0
|
162
|
+
return nil unless (target_index = find_index(src, in_array: target, start_index: target_offset))
|
163
|
+
|
164
|
+
target_indices << target_index
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
def find_indices_matching(source, target)
|
169
|
+
source.each_with_object([]) do |src, target_indices|
|
170
|
+
target_offset = (target_indices.last&.+ 1) || 0
|
171
|
+
return nil unless (target_index = find_index(in_array: target, start_index: target_offset) { |tgt| yield src, tgt })
|
172
|
+
|
173
|
+
target_indices << target_index
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
require 'berkeley_library/logging'
|
@@ -0,0 +1,170 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
require 'zip'
|
3
|
+
require 'berkeley_library/util/logging'
|
4
|
+
require 'berkeley_library/util/ods/xml/content_doc'
|
5
|
+
require 'berkeley_library/util/ods/xml/styles_doc'
|
6
|
+
require 'berkeley_library/util/ods/xml/manifest_doc'
|
7
|
+
|
8
|
+
module BerkeleyLibrary
|
9
|
+
module Util
|
10
|
+
module ODS
|
11
|
+
class Spreadsheet
|
12
|
+
include BerkeleyLibrary::Logging
|
13
|
+
|
14
|
+
# ------------------------------------------------------------
|
15
|
+
# Utility methods
|
16
|
+
|
17
|
+
# Adds a table ('worksheet') to the spreadsheet.
|
18
|
+
#
|
19
|
+
# @param name [String] the table name
|
20
|
+
# @param protected [Boolean] whether to protect the table
|
21
|
+
# @return [BerkeleyLibrary::Util::ODS::XML::Table::Table] a new table with the specified name
|
22
|
+
def add_table(name, protected: true)
|
23
|
+
content.document_content.add_table(name, protected: protected)
|
24
|
+
end
|
25
|
+
|
26
|
+
# ------------------------------------------------------------
|
27
|
+
# Accessors
|
28
|
+
|
29
|
+
# Returns the content document
|
30
|
+
# @return [XML::ContentDoc] the container root-level content document
|
31
|
+
def content
|
32
|
+
@content ||= XML::ContentDoc.new
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns the container styles
|
36
|
+
# @return [XML::StylesDoc] the container root-level style document
|
37
|
+
def styles
|
38
|
+
@styles ||= XML::StylesDoc.new
|
39
|
+
end
|
40
|
+
|
41
|
+
# Returns the container manifest
|
42
|
+
# @return [XML::ManifestDoc] the container manifest document
|
43
|
+
def manifest
|
44
|
+
@manifest ||= XML::ManifestDoc.new.tap do |mf_doc|
|
45
|
+
manifest = mf_doc.manifest
|
46
|
+
manifest_docs.each { |doc| manifest.add_entry_for(doc) }
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Gets the document styles
|
51
|
+
#
|
52
|
+
# @return [BerkeleyLibrary::Util::ODS::XML::Office::AutomaticStyles] the styles
|
53
|
+
def auto_styles
|
54
|
+
content.document_content.automatic_styles
|
55
|
+
end
|
56
|
+
|
57
|
+
# ------------------------------------------------------------
|
58
|
+
# Output
|
59
|
+
|
60
|
+
# @overload write_to
|
61
|
+
# Writes to a new string.
|
62
|
+
# @return [String] a binary string containing the spreadsheet data.
|
63
|
+
# @overload write_to(out)
|
64
|
+
# Writes to the specified output stream.
|
65
|
+
# @param out [IO] the output stream
|
66
|
+
# @return[void]
|
67
|
+
# @overload write_to(path)
|
68
|
+
# Writes to the specified file. If `path` denotes a directory, the
|
69
|
+
# spreadsheet will be written as exploded, pretty-printed XML.
|
70
|
+
# @param path [String, Pathname] the path to the output file
|
71
|
+
# @return[void]
|
72
|
+
# @see BerkeleyLibrary::Util::ODS::Spreadsheet#write_exploded_to
|
73
|
+
# noinspection RubyYardReturnMatch
|
74
|
+
def write_to(out = nil)
|
75
|
+
return write_to_string unless out
|
76
|
+
return write_to_stream(out) if io_like?(out)
|
77
|
+
return write_exploded_to(out) if File.directory?(out)
|
78
|
+
|
79
|
+
write_to_file(out)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Writes to a new string.
|
83
|
+
def write_to_string
|
84
|
+
# noinspection RubyYardParamTypeMatch
|
85
|
+
StringIO.new.tap { |out| write_to_stream(out) }.string
|
86
|
+
end
|
87
|
+
|
88
|
+
# Writes to the specified output stream.
|
89
|
+
# @param out [IO]
|
90
|
+
def write_to_stream(out)
|
91
|
+
zip64_orig = Zip.write_zip64_support
|
92
|
+
begin
|
93
|
+
Zip.write_zip64_support = true
|
94
|
+
write_zipfile(out)
|
95
|
+
ensure
|
96
|
+
Zip.write_zip64_support = zip64_orig
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
# Writes to the specified file.
|
101
|
+
# @param path [String, Pathname]
|
102
|
+
def write_to_file(path)
|
103
|
+
File.open(path, 'wb') { |f| write_to_stream(f) }
|
104
|
+
end
|
105
|
+
|
106
|
+
# Writes this spreadsheet as an exploded set of pretty-printed XML files.
|
107
|
+
# NOTE: OpenOffice itself and many other tools get confused by the extra text
|
108
|
+
# nodes in the pretty-printed files and won't read them properly; this method
|
109
|
+
# is mostly for debugging.
|
110
|
+
#
|
111
|
+
# @return [Array<String>] a list of files written.
|
112
|
+
def write_exploded_to(dir)
|
113
|
+
raise ArgumentError, "Not a directory: #{dir.inspect}" unless File.directory?(dir)
|
114
|
+
|
115
|
+
[].tap do |files_written|
|
116
|
+
each_document do |doc|
|
117
|
+
output_path = write_exploded(doc, dir)
|
118
|
+
files_written << File.absolute_path(output_path)
|
119
|
+
logger.debug("Wrote #{files_written.last}")
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# ------------------------------------------------------------
|
125
|
+
# Private methods
|
126
|
+
|
127
|
+
private
|
128
|
+
|
129
|
+
def each_document(&block)
|
130
|
+
yield manifest
|
131
|
+
|
132
|
+
manifest_docs.each(&block)
|
133
|
+
end
|
134
|
+
|
135
|
+
def manifest_docs
|
136
|
+
[styles, content]
|
137
|
+
end
|
138
|
+
|
139
|
+
# Returns true if `out` is IO-like enough for {Zip::OutputStream}, false otherwise
|
140
|
+
# @return [Boolean] whether `out` can be passed to {Zip::OutputStream#write_buffer}
|
141
|
+
def io_like?(out)
|
142
|
+
%i[reopen rewind <<].all? { |m| out.respond_to?(m) }
|
143
|
+
end
|
144
|
+
|
145
|
+
def write_zipfile(out)
|
146
|
+
io = Zip::OutputStream.write_buffer(out) do |zip|
|
147
|
+
each_document { |doc| write_zip_entry(doc, zip) }
|
148
|
+
end
|
149
|
+
# NOTE: Zip::OutputStream plays games with the stream and
|
150
|
+
# doesn't necessarily write everything unless flushed, see:
|
151
|
+
# https://github.com/rubyzip/rubyzip/issues/265
|
152
|
+
io.flush
|
153
|
+
end
|
154
|
+
|
155
|
+
def write_zip_entry(doc, zip)
|
156
|
+
zip.put_next_entry(doc.path)
|
157
|
+
doc.to_xml(zip)
|
158
|
+
end
|
159
|
+
|
160
|
+
def write_exploded(doc, dir)
|
161
|
+
output_path = File.join(dir, doc.path)
|
162
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
163
|
+
doc.to_xml(output_path, compact: false)
|
164
|
+
output_path
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|