berkeley_library-tind 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/build.yml +18 -0
- data/.gitignore +388 -0
- data/.idea/inspectionProfiles/Project_Default.xml +20 -0
- data/.idea/misc.xml +4 -0
- data/.idea/modules.xml +8 -0
- data/.idea/tind.iml +138 -0
- data/.idea/vcs.xml +6 -0
- data/.rubocop.yml +334 -0
- data/.ruby-version +1 -0
- data/.simplecov +8 -0
- data/.yardopts +1 -0
- data/CHANGES.md +58 -0
- data/Dockerfile +57 -0
- data/Gemfile +3 -0
- data/Jenkinsfile +18 -0
- data/LICENSE.md +21 -0
- data/README.md +73 -0
- data/Rakefile +20 -0
- data/berkeley_library-tind.gemspec +50 -0
- data/bin/tind-export +14 -0
- data/docker-compose.yml +15 -0
- data/lib/berkeley_library/tind.rb +3 -0
- data/lib/berkeley_library/tind/api.rb +1 -0
- data/lib/berkeley_library/tind/api/api.rb +132 -0
- data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
- data/lib/berkeley_library/tind/api/collection.rb +82 -0
- data/lib/berkeley_library/tind/api/date_range.rb +67 -0
- data/lib/berkeley_library/tind/api/format.rb +32 -0
- data/lib/berkeley_library/tind/api/search.rb +100 -0
- data/lib/berkeley_library/tind/config.rb +103 -0
- data/lib/berkeley_library/tind/export.rb +1 -0
- data/lib/berkeley_library/tind/export/column.rb +54 -0
- data/lib/berkeley_library/tind/export/column_group.rb +144 -0
- data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
- data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
- data/lib/berkeley_library/tind/export/config.rb +154 -0
- data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
- data/lib/berkeley_library/tind/export/export.rb +47 -0
- data/lib/berkeley_library/tind/export/export_command.rb +168 -0
- data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
- data/lib/berkeley_library/tind/export/export_format.rb +67 -0
- data/lib/berkeley_library/tind/export/exporter.rb +105 -0
- data/lib/berkeley_library/tind/export/filter.rb +52 -0
- data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
- data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
- data/lib/berkeley_library/tind/export/row.rb +24 -0
- data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
- data/lib/berkeley_library/tind/export/table.rb +175 -0
- data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
- data/lib/berkeley_library/tind/marc.rb +1 -0
- data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
- data/lib/berkeley_library/tind/module_info.rb +14 -0
- data/lib/berkeley_library/util/arrays.rb +178 -0
- data/lib/berkeley_library/util/logging.rb +1 -0
- data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
- data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
- data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
- data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
- data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
- data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
- data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
- data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
- data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
- data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
- data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
- data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
- data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
- data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
- data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
- data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
- data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
- data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
- data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
- data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
- data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
- data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
- data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
- data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
- data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
- data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
- data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
- data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
- data/lib/berkeley_library/util/paths.rb +111 -0
- data/lib/berkeley_library/util/stringios.rb +30 -0
- data/lib/berkeley_library/util/strings.rb +42 -0
- data/lib/berkeley_library/util/sys_exits.rb +15 -0
- data/lib/berkeley_library/util/times.rb +22 -0
- data/lib/berkeley_library/util/uris.rb +44 -0
- data/lib/berkeley_library/util/uris/appender.rb +162 -0
- data/lib/berkeley_library/util/uris/requester.rb +62 -0
- data/lib/berkeley_library/util/uris/validator.rb +32 -0
- data/rakelib/bundle.rake +8 -0
- data/rakelib/coverage.rake +11 -0
- data/rakelib/gem.rake +54 -0
- data/rakelib/rubocop.rake +18 -0
- data/rakelib/spec.rake +2 -0
- data/spec/.rubocop.yml +40 -0
- data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
- data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
- data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
- data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
- data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
- data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
- data/spec/berkeley_library/tind/config_spec.rb +86 -0
- data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
- data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
- data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
- data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
- data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
- data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
- data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
- data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
- data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
- data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
- data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
- data/spec/berkeley_library/util/arrays_spec.rb +340 -0
- data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
- data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
- data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
- data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
- data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
- data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
- data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
- data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
- data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
- data/spec/berkeley_library/util/paths_spec.rb +90 -0
- data/spec/berkeley_library/util/stringios_spec.rb +34 -0
- data/spec/berkeley_library/util/strings_spec.rb +27 -0
- data/spec/berkeley_library/util/times_spec.rb +39 -0
- data/spec/berkeley_library/util/uris_spec.rb +118 -0
- data/spec/data/collection-names.txt +438 -0
- data/spec/data/collections.json +4827 -0
- data/spec/data/disjoint-records.xml +187 -0
- data/spec/data/record-184453.xml +58 -0
- data/spec/data/record-184458.xml +63 -0
- data/spec/data/record-187888.xml +78 -0
- data/spec/data/records-api-search-cjk-p1.xml +6381 -0
- data/spec/data/records-api-search-cjk-p2.xml +5 -0
- data/spec/data/records-api-search-p1.xml +4506 -0
- data/spec/data/records-api-search-p2.xml +4509 -0
- data/spec/data/records-api-search-p3.xml +4506 -0
- data/spec/data/records-api-search-p4.xml +4509 -0
- data/spec/data/records-api-search-p5.xml +4506 -0
- data/spec/data/records-api-search-p6.xml +2436 -0
- data/spec/data/records-api-search-p7.xml +5 -0
- data/spec/data/records-api-search.xml +234 -0
- data/spec/data/records-manual-search.xml +547 -0
- data/spec/spec_helper.rb +30 -0
- data/test/profile/table_from_records_profile.rb +46 -0
- metadata +585 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
Dir.glob(File.expand_path('marc/*.rb', __dir__)).sort.each(&method(:require))
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
require 'nokogiri'
|
|
2
|
+
require 'marc/xml_parsers'
|
|
3
|
+
require 'marc_extensions'
|
|
4
|
+
|
|
5
|
+
module BerkeleyLibrary
|
|
6
|
+
module TIND
|
|
7
|
+
module MARC
|
|
8
|
+
# A customized XML reader for reading MARC records from TIND search results.
|
|
9
|
+
class XMLReader
|
|
10
|
+
include Enumerable
|
|
11
|
+
include ::MARC::NokogiriReader
|
|
12
|
+
|
|
13
|
+
# ############################################################
|
|
14
|
+
# Constant
|
|
15
|
+
|
|
16
|
+
COMMENT_TOTAL_RE = /Search-Engine-Total-Number-Of-Results: ([0-9]+)/.freeze
|
|
17
|
+
|
|
18
|
+
# ############################################################
|
|
19
|
+
# Attributes
|
|
20
|
+
|
|
21
|
+
attr_reader :search_id
|
|
22
|
+
|
|
23
|
+
# Returns the total number of records, based on the `<total/>` tag
|
|
24
|
+
# returned by the TIND Search API, or the special comment
|
|
25
|
+
# `Search-Engine-Total-Number-Of-Results` returned by TIND
|
|
26
|
+
# Regular Search in XML format.
|
|
27
|
+
#
|
|
28
|
+
# Note that the total is not guaranteed to be present, and if present,
|
|
29
|
+
# may not be present unless at least some records have been parsed.
|
|
30
|
+
#
|
|
31
|
+
# @return [Integer, nil] the total number of records, or `nil` if the total has not been read yet
|
|
32
|
+
def total
|
|
33
|
+
@total&.to_i
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Returns the number of records yielded.
|
|
37
|
+
#
|
|
38
|
+
# @return [Integer] the number of records yielded.
|
|
39
|
+
def records_yielded
|
|
40
|
+
@records_yielded ||= 0
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# ############################################################
|
|
44
|
+
# Initializer
|
|
45
|
+
|
|
46
|
+
# Reads MARC records from an XML datasource given either as a file path,
|
|
47
|
+
# or as an IO object.
|
|
48
|
+
#
|
|
49
|
+
# @param source [String, Pathname, IO] the path to a file, or an IO to read from directly
|
|
50
|
+
# @param freeze [Boolean] whether to freeze each record after reading
|
|
51
|
+
def initialize(source, freeze: false)
|
|
52
|
+
@handle = ensure_io(source)
|
|
53
|
+
@freeze = freeze
|
|
54
|
+
init
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
class << self
|
|
58
|
+
include MARCExtensions::XMLReaderClassExtensions
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# ############################################################
|
|
62
|
+
# MARC::GenericPullParser overrides
|
|
63
|
+
|
|
64
|
+
def yield_record
|
|
65
|
+
@record[:record].freeze if @freeze
|
|
66
|
+
super
|
|
67
|
+
ensure
|
|
68
|
+
increment_records_yielded!
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
# ############################################################
|
|
72
|
+
# Nokogiri::XML::SAX::Document overrides
|
|
73
|
+
|
|
74
|
+
# @see Nokogiri::XML::Sax::Document#start_element_namespace
|
|
75
|
+
# rubocop:disable Metrics/ParameterLists
|
|
76
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
|
77
|
+
super
|
|
78
|
+
|
|
79
|
+
@current_element_name = name
|
|
80
|
+
end
|
|
81
|
+
# rubocop:enable Metrics/ParameterLists
|
|
82
|
+
|
|
83
|
+
# @see Nokogiri::XML::Sax::Document#end_element_namespace
|
|
84
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
|
85
|
+
super
|
|
86
|
+
|
|
87
|
+
@current_element_name = nil
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# @see Nokogiri::XML::Sax::Document#characters
|
|
91
|
+
def characters(string)
|
|
92
|
+
return unless (name = @current_element_name)
|
|
93
|
+
|
|
94
|
+
case name
|
|
95
|
+
when 'search_id'
|
|
96
|
+
@search_id = string
|
|
97
|
+
when 'total'
|
|
98
|
+
@total = string.to_i
|
|
99
|
+
else
|
|
100
|
+
super
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# @see Nokogiri::XML::Sax::Document#comment
|
|
105
|
+
def comment(string)
|
|
106
|
+
return unless (md = COMMENT_TOTAL_RE.match(string))
|
|
107
|
+
|
|
108
|
+
@total = md[1].to_i
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# ############################################################
|
|
112
|
+
# Private
|
|
113
|
+
|
|
114
|
+
private
|
|
115
|
+
|
|
116
|
+
def ensure_io(file)
|
|
117
|
+
return file if io_like?(file)
|
|
118
|
+
return File.new(file) if file_exists?(file)
|
|
119
|
+
return StringIO.new(file) if file =~ /^\s*</x
|
|
120
|
+
|
|
121
|
+
raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Returns true if `obj` is close enough to an IO object for Nokogiri
|
|
125
|
+
# to parse as one.
|
|
126
|
+
#
|
|
127
|
+
# @param obj [Object] the object that might be an IO
|
|
128
|
+
# @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
|
|
129
|
+
def io_like?(obj)
|
|
130
|
+
obj.respond_to?(:read) && obj.respond_to?(:close)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def file_exists?(path)
|
|
134
|
+
(path.respond_to?(:exist?) && path.exist?) ||
|
|
135
|
+
(path.respond_to?(:to_str) && File.exist?(path))
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def increment_records_yielded!
|
|
139
|
+
@records_yielded = records_yielded + 1
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module BerkeleyLibrary
|
|
2
|
+
module TIND
|
|
3
|
+
class ModuleInfo
|
|
4
|
+
NAME = 'berkeley_library-tind'.freeze
|
|
5
|
+
AUTHOR = 'David Moles'.freeze
|
|
6
|
+
AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
|
|
7
|
+
SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
|
|
8
|
+
DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
|
|
9
|
+
LICENSE = 'MIT'.freeze
|
|
10
|
+
VERSION = '0.4.0'.freeze
|
|
11
|
+
HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
module BerkeleyLibrary
|
|
2
|
+
module Util
|
|
3
|
+
module Arrays
|
|
4
|
+
class << self
|
|
5
|
+
# Clients can chose to call class methods directly, or include the module
|
|
6
|
+
include Arrays
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# Recursively checks whether the specified list contains, in the
|
|
10
|
+
# same order, all values in the other specified list (additional codes
|
|
11
|
+
# in between are fine)
|
|
12
|
+
#
|
|
13
|
+
# @param subset [Array] the values to look for
|
|
14
|
+
# @param superset [Array] the list of values to look in
|
|
15
|
+
# @return boolean True if all values were found, false otherwise
|
|
16
|
+
def ordered_superset?(superset:, subset:)
|
|
17
|
+
!find_indices(in_array: superset, for_array: subset).nil?
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Counts how many contiguous elements from the start of an
|
|
21
|
+
# sequence of values satisfy the given block.
|
|
22
|
+
#
|
|
23
|
+
# @overload count_while(arr:)
|
|
24
|
+
# Returns an enumerator.
|
|
25
|
+
# @param values [Enumerable] the values
|
|
26
|
+
# @return [Enumerator] the enumerator.
|
|
27
|
+
# @overload count_while(arr:, &block)
|
|
28
|
+
# Passes elements to the block until the block returns nil or false,
|
|
29
|
+
# then stops iterating and returns the count of matching elements.
|
|
30
|
+
# @param values [Enumerable] the values
|
|
31
|
+
# @return [Integer] the count
|
|
32
|
+
def count_while(values:)
|
|
33
|
+
return to_enum(:count_while, values: values) unless block_given?
|
|
34
|
+
|
|
35
|
+
values.inject(0) do |count, x|
|
|
36
|
+
matched = yield x
|
|
37
|
+
break count unless matched
|
|
38
|
+
|
|
39
|
+
count + 1
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Given two lists, one of which is a superset of the other, with elements
|
|
44
|
+
# in the same order (but possibly with additional elements in the superset),
|
|
45
|
+
# returns an array the length of the subset, containing for each element in
|
|
46
|
+
# the subset the index of the corresponding element in the superset.
|
|
47
|
+
#
|
|
48
|
+
# @overload find_matching_indices(for_array:, in_array:)
|
|
49
|
+
# For each value in `for_array`, finds the index of the first equal value
|
|
50
|
+
# in `in_array` after the previously matched value.
|
|
51
|
+
# @param in_array [Array] the list of values to look in
|
|
52
|
+
# @param for_array [Array] the values to look for
|
|
53
|
+
# @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
|
|
54
|
+
# or `nil` if not all values could be found
|
|
55
|
+
#
|
|
56
|
+
# @overload find_matching_indices(for_array:, in_array:)
|
|
57
|
+
# For each value in `for_array`, finds the index of the first value
|
|
58
|
+
# in `in_array` after the previously matched value that matches
|
|
59
|
+
# the specified match function.
|
|
60
|
+
# @param in_array [Array] the list of values to look in
|
|
61
|
+
# @param for_array [Array] the values to look for
|
|
62
|
+
# @yieldparam source [Object] the value to compare
|
|
63
|
+
# @yieldparam target [Object] the value to compare against
|
|
64
|
+
# @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
|
|
65
|
+
# or `nil` if not all values could be found
|
|
66
|
+
def find_indices(for_array:, in_array:, &block)
|
|
67
|
+
return find_indices_matching(for_array, in_array, &block) if block_given?
|
|
68
|
+
|
|
69
|
+
find_all_indices(for_array, in_array)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Given a block or a value, finds the index of the first matching value
|
|
73
|
+
# at or after the specified start index.
|
|
74
|
+
#
|
|
75
|
+
# @overload find_index(value, in_array:, start_index:)
|
|
76
|
+
# Finds the first index of the specified value.
|
|
77
|
+
# @param value [Object] the value to find
|
|
78
|
+
# @param in_array [Array] the array to search
|
|
79
|
+
# @param start_index [Integer] the index to start with
|
|
80
|
+
# @return [Integer, nil] the index, or `nil` if no value matches
|
|
81
|
+
# @overload find_index(&block)
|
|
82
|
+
# Finds the index of the first value matching
|
|
83
|
+
# the specified block.
|
|
84
|
+
# @param in_array [Array] the array to search
|
|
85
|
+
# @param start_index [Integer] the index to start with
|
|
86
|
+
# @yieldreturn [Boolean] whether the element matches
|
|
87
|
+
# @return [Integer, nil] the index, or `nil` if no value matches
|
|
88
|
+
# @overload find_index
|
|
89
|
+
# @param in_array [Array] the array to search
|
|
90
|
+
# @param start_index [Integer] the index to start with
|
|
91
|
+
# @return [Enumerator] a new enumerator
|
|
92
|
+
def find_index(*args, in_array:, start_index: 0, &block)
|
|
93
|
+
raise ArgumentError, "wrong number of arguments (given #{value.length}, expected 0..1" if args.size > 1
|
|
94
|
+
return Enumerator.new { |y| find_index(in_array: in_array, start_index: start_index, &y) } if args.empty? && !block_given?
|
|
95
|
+
return unless (relative_index = in_array[start_index..].find_index(*args, &block))
|
|
96
|
+
|
|
97
|
+
relative_index + start_index
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Given an array of unique integers _a<sub>1</sub>_, returns a new array
|
|
101
|
+
# _a<sub>2</sub>_ in which the value at each index _i<sub>2</sub>_ is the
|
|
102
|
+
# index _i<sub>1</sub>_ at which that value was found in _a<sub>1</sub>_.
|
|
103
|
+
# E.g., given `[0, 2, 3]`, returns `[0, nil, 1, 2]`. The indices need
|
|
104
|
+
# not be in order but must be unique.
|
|
105
|
+
#
|
|
106
|
+
# @param arr [Array<Integer>, nil] the array to invert.
|
|
107
|
+
# @return [Array<Integer, nil>, nil] the inverted array, or nil if the input array is nil
|
|
108
|
+
# @raise TypeError if `arr` is not an array of integers
|
|
109
|
+
# @raise ArgumentError if `arr` contains duplicate values
|
|
110
|
+
def invert(arr)
|
|
111
|
+
return unless arr
|
|
112
|
+
|
|
113
|
+
# noinspection RubyNilAnalysis
|
|
114
|
+
Array.new(arr.size).tap do |inv|
|
|
115
|
+
arr.each_with_index do |v, i|
|
|
116
|
+
next inv[v] = i unless (prev_index = inv[v])
|
|
117
|
+
|
|
118
|
+
raise ArgumentError, "Duplicate value #{v} at index #{i} already found at #{prev_index}"
|
|
119
|
+
end
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Merges two arrays in an order-preserving manner.
|
|
124
|
+
# @param a1 [Array] the first array
|
|
125
|
+
# @param a2 [Array] the second array
|
|
126
|
+
# @return [Array] a merged array that is an ordered superset of both `a1` and `a2`
|
|
127
|
+
# @see Arrays#ordered_superset?
|
|
128
|
+
def merge(a1, a2)
|
|
129
|
+
return a1 if a2.empty?
|
|
130
|
+
return a2 if a1.empty?
|
|
131
|
+
|
|
132
|
+
shorter, longer = a1.size > a2.size ? [a2, a1] : [a1, a2]
|
|
133
|
+
do_merge(shorter, longer)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
private
|
|
137
|
+
|
|
138
|
+
def do_merge(shorter, longer)
|
|
139
|
+
shorter.each_with_index do |v, ix_s|
|
|
140
|
+
next unless (ix_l = longer.find_index(v))
|
|
141
|
+
|
|
142
|
+
shorter_unmatched = shorter[0...ix_s]
|
|
143
|
+
longer_unmatched = longer[0...ix_l]
|
|
144
|
+
all_unmatched = sort_by_first_and_flatten(shorter_unmatched, longer_unmatched)
|
|
145
|
+
return (all_unmatched << v) + merge(shorter[ix_s + 1..], longer[ix_l + 1..])
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
sort_by_first_and_flatten(longer, shorter)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def sort_by_first_and_flatten(a1, a2)
|
|
152
|
+
return a1 if a2.empty?
|
|
153
|
+
return a2 if a1.empty?
|
|
154
|
+
return a2 + a1 if a1.first.respond_to?(:>) && a1.first > a2.first
|
|
155
|
+
|
|
156
|
+
a1 + a2
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
def find_all_indices(source, target)
|
|
160
|
+
source.each_with_object([]) do |src, target_indices|
|
|
161
|
+
target_offset = (target_indices.last&.+ 1) || 0
|
|
162
|
+
return nil unless (target_index = find_index(src, in_array: target, start_index: target_offset))
|
|
163
|
+
|
|
164
|
+
target_indices << target_index
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
def find_indices_matching(source, target)
|
|
169
|
+
source.each_with_object([]) do |src, target_indices|
|
|
170
|
+
target_offset = (target_indices.last&.+ 1) || 0
|
|
171
|
+
return nil unless (target_index = find_index(in_array: target, start_index: target_offset) { |tgt| yield src, tgt })
|
|
172
|
+
|
|
173
|
+
target_indices << target_index
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require 'berkeley_library/logging'
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
require 'zip'
|
|
3
|
+
require 'berkeley_library/util/logging'
|
|
4
|
+
require 'berkeley_library/util/ods/xml/content_doc'
|
|
5
|
+
require 'berkeley_library/util/ods/xml/styles_doc'
|
|
6
|
+
require 'berkeley_library/util/ods/xml/manifest_doc'
|
|
7
|
+
|
|
8
|
+
module BerkeleyLibrary
|
|
9
|
+
module Util
|
|
10
|
+
module ODS
|
|
11
|
+
class Spreadsheet
|
|
12
|
+
include BerkeleyLibrary::Logging
|
|
13
|
+
|
|
14
|
+
# ------------------------------------------------------------
|
|
15
|
+
# Utility methods
|
|
16
|
+
|
|
17
|
+
# Adds a table ('worksheet') to the spreadsheet.
|
|
18
|
+
#
|
|
19
|
+
# @param name [String] the table name
|
|
20
|
+
# @param protected [Boolean] whether to protect the table
|
|
21
|
+
# @return [BerkeleyLibrary::Util::ODS::XML::Table::Table] a new table with the specified name
|
|
22
|
+
def add_table(name, protected: true)
|
|
23
|
+
content.document_content.add_table(name, protected: protected)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# ------------------------------------------------------------
|
|
27
|
+
# Accessors
|
|
28
|
+
|
|
29
|
+
# Returns the content document
|
|
30
|
+
# @return [XML::ContentDoc] the container root-level content document
|
|
31
|
+
def content
|
|
32
|
+
@content ||= XML::ContentDoc.new
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Returns the container styles
|
|
36
|
+
# @return [XML::StylesDoc] the container root-level style document
|
|
37
|
+
def styles
|
|
38
|
+
@styles ||= XML::StylesDoc.new
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Returns the container manifest
|
|
42
|
+
# @return [XML::ManifestDoc] the container manifest document
|
|
43
|
+
def manifest
|
|
44
|
+
@manifest ||= XML::ManifestDoc.new.tap do |mf_doc|
|
|
45
|
+
manifest = mf_doc.manifest
|
|
46
|
+
manifest_docs.each { |doc| manifest.add_entry_for(doc) }
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Gets the document styles
|
|
51
|
+
#
|
|
52
|
+
# @return [BerkeleyLibrary::Util::ODS::XML::Office::AutomaticStyles] the styles
|
|
53
|
+
def auto_styles
|
|
54
|
+
content.document_content.automatic_styles
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# ------------------------------------------------------------
|
|
58
|
+
# Output
|
|
59
|
+
|
|
60
|
+
# @overload write_to
|
|
61
|
+
# Writes to a new string.
|
|
62
|
+
# @return [String] a binary string containing the spreadsheet data.
|
|
63
|
+
# @overload write_to(out)
|
|
64
|
+
# Writes to the specified output stream.
|
|
65
|
+
# @param out [IO] the output stream
|
|
66
|
+
# @return[void]
|
|
67
|
+
# @overload write_to(path)
|
|
68
|
+
# Writes to the specified file. If `path` denotes a directory, the
|
|
69
|
+
# spreadsheet will be written as exploded, pretty-printed XML.
|
|
70
|
+
# @param path [String, Pathname] the path to the output file
|
|
71
|
+
# @return[void]
|
|
72
|
+
# @see BerkeleyLibrary::Util::ODS::Spreadsheet#write_exploded_to
|
|
73
|
+
# noinspection RubyYardReturnMatch
|
|
74
|
+
def write_to(out = nil)
|
|
75
|
+
return write_to_string unless out
|
|
76
|
+
return write_to_stream(out) if io_like?(out)
|
|
77
|
+
return write_exploded_to(out) if File.directory?(out)
|
|
78
|
+
|
|
79
|
+
write_to_file(out)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Writes to a new string.
|
|
83
|
+
def write_to_string
|
|
84
|
+
# noinspection RubyYardParamTypeMatch
|
|
85
|
+
StringIO.new.tap { |out| write_to_stream(out) }.string
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
# Writes to the specified output stream.
|
|
89
|
+
# @param out [IO]
|
|
90
|
+
def write_to_stream(out)
|
|
91
|
+
zip64_orig = Zip.write_zip64_support
|
|
92
|
+
begin
|
|
93
|
+
Zip.write_zip64_support = true
|
|
94
|
+
write_zipfile(out)
|
|
95
|
+
ensure
|
|
96
|
+
Zip.write_zip64_support = zip64_orig
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Writes to the specified file.
|
|
101
|
+
# @param path [String, Pathname]
|
|
102
|
+
def write_to_file(path)
|
|
103
|
+
File.open(path, 'wb') { |f| write_to_stream(f) }
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Writes this spreadsheet as an exploded set of pretty-printed XML files.
|
|
107
|
+
# NOTE: OpenOffice itself and many other tools get confused by the extra text
|
|
108
|
+
# nodes in the pretty-printed files and won't read them properly; this method
|
|
109
|
+
# is mostly for debugging.
|
|
110
|
+
#
|
|
111
|
+
# @return [Array<String>] a list of files written.
|
|
112
|
+
def write_exploded_to(dir)
|
|
113
|
+
raise ArgumentError, "Not a directory: #{dir.inspect}" unless File.directory?(dir)
|
|
114
|
+
|
|
115
|
+
[].tap do |files_written|
|
|
116
|
+
each_document do |doc|
|
|
117
|
+
output_path = write_exploded(doc, dir)
|
|
118
|
+
files_written << File.absolute_path(output_path)
|
|
119
|
+
logger.debug("Wrote #{files_written.last}")
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# ------------------------------------------------------------
|
|
125
|
+
# Private methods
|
|
126
|
+
|
|
127
|
+
private
|
|
128
|
+
|
|
129
|
+
def each_document(&block)
|
|
130
|
+
yield manifest
|
|
131
|
+
|
|
132
|
+
manifest_docs.each(&block)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def manifest_docs
|
|
136
|
+
[styles, content]
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Returns true if `out` is IO-like enough for {Zip::OutputStream}, false otherwise
|
|
140
|
+
# @return [Boolean] whether `out` can be passed to {Zip::OutputStream#write_buffer}
|
|
141
|
+
def io_like?(out)
|
|
142
|
+
%i[reopen rewind <<].all? { |m| out.respond_to?(m) }
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def write_zipfile(out)
|
|
146
|
+
io = Zip::OutputStream.write_buffer(out) do |zip|
|
|
147
|
+
each_document { |doc| write_zip_entry(doc, zip) }
|
|
148
|
+
end
|
|
149
|
+
# NOTE: Zip::OutputStream plays games with the stream and
|
|
150
|
+
# doesn't necessarily write everything unless flushed, see:
|
|
151
|
+
# https://github.com/rubyzip/rubyzip/issues/265
|
|
152
|
+
io.flush
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def write_zip_entry(doc, zip)
|
|
156
|
+
zip.put_next_entry(doc.path)
|
|
157
|
+
doc.to_xml(zip)
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def write_exploded(doc, dir)
|
|
161
|
+
output_path = File.join(dir, doc.path)
|
|
162
|
+
FileUtils.mkdir_p(File.dirname(output_path))
|
|
163
|
+
doc.to_xml(output_path, compact: false)
|
|
164
|
+
output_path
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|