berkeley_library-tind 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/misc.xml +4 -0
  6. data/.idea/modules.xml +8 -0
  7. data/.idea/tind.iml +138 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +58 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +73 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-tind.gemspec +50 -0
  21. data/bin/tind-export +14 -0
  22. data/docker-compose.yml +15 -0
  23. data/lib/berkeley_library/tind.rb +3 -0
  24. data/lib/berkeley_library/tind/api.rb +1 -0
  25. data/lib/berkeley_library/tind/api/api.rb +132 -0
  26. data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
  27. data/lib/berkeley_library/tind/api/collection.rb +82 -0
  28. data/lib/berkeley_library/tind/api/date_range.rb +67 -0
  29. data/lib/berkeley_library/tind/api/format.rb +32 -0
  30. data/lib/berkeley_library/tind/api/search.rb +100 -0
  31. data/lib/berkeley_library/tind/config.rb +103 -0
  32. data/lib/berkeley_library/tind/export.rb +1 -0
  33. data/lib/berkeley_library/tind/export/column.rb +54 -0
  34. data/lib/berkeley_library/tind/export/column_group.rb +144 -0
  35. data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
  36. data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
  37. data/lib/berkeley_library/tind/export/config.rb +154 -0
  38. data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
  39. data/lib/berkeley_library/tind/export/export.rb +47 -0
  40. data/lib/berkeley_library/tind/export/export_command.rb +168 -0
  41. data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
  42. data/lib/berkeley_library/tind/export/export_format.rb +67 -0
  43. data/lib/berkeley_library/tind/export/exporter.rb +105 -0
  44. data/lib/berkeley_library/tind/export/filter.rb +52 -0
  45. data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
  46. data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
  47. data/lib/berkeley_library/tind/export/row.rb +24 -0
  48. data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
  49. data/lib/berkeley_library/tind/export/table.rb +175 -0
  50. data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
  51. data/lib/berkeley_library/tind/marc.rb +1 -0
  52. data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
  53. data/lib/berkeley_library/tind/module_info.rb +14 -0
  54. data/lib/berkeley_library/util/arrays.rb +178 -0
  55. data/lib/berkeley_library/util/logging.rb +1 -0
  56. data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
  57. data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
  58. data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
  59. data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
  60. data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
  61. data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
  62. data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
  63. data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
  64. data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
  65. data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
  66. data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
  67. data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
  68. data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
  69. data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
  70. data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
  71. data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
  72. data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
  73. data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
  74. data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
  75. data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
  76. data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
  77. data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
  78. data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
  79. data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
  80. data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
  81. data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
  82. data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
  83. data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
  84. data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
  85. data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
  86. data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
  87. data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
  88. data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
  89. data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
  90. data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
  91. data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
  92. data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
  93. data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
  94. data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
  95. data/lib/berkeley_library/util/paths.rb +111 -0
  96. data/lib/berkeley_library/util/stringios.rb +30 -0
  97. data/lib/berkeley_library/util/strings.rb +42 -0
  98. data/lib/berkeley_library/util/sys_exits.rb +15 -0
  99. data/lib/berkeley_library/util/times.rb +22 -0
  100. data/lib/berkeley_library/util/uris.rb +44 -0
  101. data/lib/berkeley_library/util/uris/appender.rb +162 -0
  102. data/lib/berkeley_library/util/uris/requester.rb +62 -0
  103. data/lib/berkeley_library/util/uris/validator.rb +32 -0
  104. data/rakelib/bundle.rake +8 -0
  105. data/rakelib/coverage.rake +11 -0
  106. data/rakelib/gem.rake +54 -0
  107. data/rakelib/rubocop.rake +18 -0
  108. data/rakelib/spec.rake +2 -0
  109. data/spec/.rubocop.yml +40 -0
  110. data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
  111. data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
  112. data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
  113. data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
  114. data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
  115. data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
  116. data/spec/berkeley_library/tind/config_spec.rb +86 -0
  117. data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
  118. data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
  119. data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
  120. data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
  121. data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
  122. data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
  123. data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
  124. data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
  125. data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
  126. data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
  127. data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
  128. data/spec/berkeley_library/util/arrays_spec.rb +340 -0
  129. data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
  130. data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
  131. data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
  132. data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
  133. data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
  134. data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
  135. data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
  136. data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
  137. data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
  138. data/spec/berkeley_library/util/paths_spec.rb +90 -0
  139. data/spec/berkeley_library/util/stringios_spec.rb +34 -0
  140. data/spec/berkeley_library/util/strings_spec.rb +27 -0
  141. data/spec/berkeley_library/util/times_spec.rb +39 -0
  142. data/spec/berkeley_library/util/uris_spec.rb +118 -0
  143. data/spec/data/collection-names.txt +438 -0
  144. data/spec/data/collections.json +4827 -0
  145. data/spec/data/disjoint-records.xml +187 -0
  146. data/spec/data/record-184453.xml +58 -0
  147. data/spec/data/record-184458.xml +63 -0
  148. data/spec/data/record-187888.xml +78 -0
  149. data/spec/data/records-api-search-cjk-p1.xml +6381 -0
  150. data/spec/data/records-api-search-cjk-p2.xml +5 -0
  151. data/spec/data/records-api-search-p1.xml +4506 -0
  152. data/spec/data/records-api-search-p2.xml +4509 -0
  153. data/spec/data/records-api-search-p3.xml +4506 -0
  154. data/spec/data/records-api-search-p4.xml +4509 -0
  155. data/spec/data/records-api-search-p5.xml +4506 -0
  156. data/spec/data/records-api-search-p6.xml +2436 -0
  157. data/spec/data/records-api-search-p7.xml +5 -0
  158. data/spec/data/records-api-search.xml +234 -0
  159. data/spec/data/records-manual-search.xml +547 -0
  160. data/spec/spec_helper.rb +30 -0
  161. data/test/profile/table_from_records_profile.rb +46 -0
  162. metadata +585 -0
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('marc/*.rb', __dir__)).sort.each(&method(:require))
@@ -0,0 +1,144 @@
1
+ require 'nokogiri'
2
+ require 'marc/xml_parsers'
3
+ require 'marc_extensions'
4
+
5
+ module BerkeleyLibrary
6
+ module TIND
7
+ module MARC
8
+ # A customized XML reader for reading MARC records from TIND search results.
9
+ class XMLReader
10
+ include Enumerable
11
+ include ::MARC::NokogiriReader
12
+
13
+ # ############################################################
14
+ # Constant
15
+
16
+ COMMENT_TOTAL_RE = /Search-Engine-Total-Number-Of-Results: ([0-9]+)/.freeze
17
+
18
+ # ############################################################
19
+ # Attributes
20
+
21
+ attr_reader :search_id
22
+
23
+ # Returns the total number of records, based on the `<total/>` tag
24
+ # returned by the TIND Search API, or the special comment
25
+ # `Search-Engine-Total-Number-Of-Results` returned by TIND
26
+ # Regular Search in XML format.
27
+ #
28
+ # Note that the total is not guaranteed to be present, and if present,
29
+ # may not be present unless at least some records have been parsed.
30
+ #
31
+ # @return [Integer, nil] the total number of records, or `nil` if the total has not been read yet
32
+ def total
33
+ @total&.to_i
34
+ end
35
+
36
+ # Returns the number of records yielded.
37
+ #
38
+ # @return [Integer] the number of records yielded.
39
+ def records_yielded
40
+ @records_yielded ||= 0
41
+ end
42
+
43
+ # ############################################################
44
+ # Initializer
45
+
46
+ # Reads MARC records from an XML datasource given either as a file path,
47
+ # or as an IO object.
48
+ #
49
+ # @param source [String, Pathname, IO] the path to a file, or an IO to read from directly
50
+ # @param freeze [Boolean] whether to freeze each record after reading
51
+ def initialize(source, freeze: false)
52
+ @handle = ensure_io(source)
53
+ @freeze = freeze
54
+ init
55
+ end
56
+
57
+ class << self
58
+ include MARCExtensions::XMLReaderClassExtensions
59
+ end
60
+
61
+ # ############################################################
62
+ # MARC::GenericPullParser overrides
63
+
64
+ def yield_record
65
+ @record[:record].freeze if @freeze
66
+ super
67
+ ensure
68
+ increment_records_yielded!
69
+ end
70
+
71
+ # ############################################################
72
+ # Nokogiri::XML::SAX::Document overrides
73
+
74
+ # @see Nokogiri::XML::Sax::Document#start_element_namespace
75
+ # rubocop:disable Metrics/ParameterLists
76
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
77
+ super
78
+
79
+ @current_element_name = name
80
+ end
81
+ # rubocop:enable Metrics/ParameterLists
82
+
83
+ # @see Nokogiri::XML::Sax::Document#end_element_namespace
84
+ def end_element_namespace(name, prefix = nil, uri = nil)
85
+ super
86
+
87
+ @current_element_name = nil
88
+ end
89
+
90
+ # @see Nokogiri::XML::Sax::Document#characters
91
+ def characters(string)
92
+ return unless (name = @current_element_name)
93
+
94
+ case name
95
+ when 'search_id'
96
+ @search_id = string
97
+ when 'total'
98
+ @total = string.to_i
99
+ else
100
+ super
101
+ end
102
+ end
103
+
104
+ # @see Nokogiri::XML::Sax::Document#comment
105
+ def comment(string)
106
+ return unless (md = COMMENT_TOTAL_RE.match(string))
107
+
108
+ @total = md[1].to_i
109
+ end
110
+
111
+ # ############################################################
112
+ # Private
113
+
114
+ private
115
+
116
+ def ensure_io(file)
117
+ return file if io_like?(file)
118
+ return File.new(file) if file_exists?(file)
119
+ return StringIO.new(file) if file =~ /^\s*</x
120
+
121
+ raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
122
+ end
123
+
124
+ # Returns true if `obj` is close enough to an IO object for Nokogiri
125
+ # to parse as one.
126
+ #
127
+ # @param obj [Object] the object that might be an IO
128
+ # @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
129
+ def io_like?(obj)
130
+ obj.respond_to?(:read) && obj.respond_to?(:close)
131
+ end
132
+
133
+ def file_exists?(path)
134
+ (path.respond_to?(:exist?) && path.exist?) ||
135
+ (path.respond_to?(:to_str) && File.exist?(path))
136
+ end
137
+
138
+ def increment_records_yielded!
139
+ @records_yielded = records_yielded + 1
140
+ end
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,14 @@
1
+ module BerkeleyLibrary
2
+ module TIND
3
+ class ModuleInfo
4
+ NAME = 'berkeley_library-tind'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
8
+ DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '0.4.0'.freeze
11
+ HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,178 @@
1
+ module BerkeleyLibrary
2
+ module Util
3
+ module Arrays
4
+ class << self
5
+ # Clients can chose to call class methods directly, or include the module
6
+ include Arrays
7
+ end
8
+
9
+ # Recursively checks whether the specified list contains, in the
10
+ # same order, all values in the other specified list (additional codes
11
+ # in between are fine)
12
+ #
13
+ # @param subset [Array] the values to look for
14
+ # @param superset [Array] the list of values to look in
15
+ # @return boolean True if all values were found, false otherwise
16
+ def ordered_superset?(superset:, subset:)
17
+ !find_indices(in_array: superset, for_array: subset).nil?
18
+ end
19
+
20
+ # Counts how many contiguous elements from the start of an
21
+ # sequence of values satisfy the given block.
22
+ #
23
+ # @overload count_while(arr:)
24
+ # Returns an enumerator.
25
+ # @param values [Enumerable] the values
26
+ # @return [Enumerator] the enumerator.
27
+ # @overload count_while(arr:, &block)
28
+ # Passes elements to the block until the block returns nil or false,
29
+ # then stops iterating and returns the count of matching elements.
30
+ # @param values [Enumerable] the values
31
+ # @return [Integer] the count
32
+ def count_while(values:)
33
+ return to_enum(:count_while, values: values) unless block_given?
34
+
35
+ values.inject(0) do |count, x|
36
+ matched = yield x
37
+ break count unless matched
38
+
39
+ count + 1
40
+ end
41
+ end
42
+
43
+ # Given two lists, one of which is a superset of the other, with elements
44
+ # in the same order (but possibly with additional elements in the superset),
45
+ # returns an array the length of the subset, containing for each element in
46
+ # the subset the index of the corresponding element in the superset.
47
+ #
48
+ # @overload find_matching_indices(for_array:, in_array:)
49
+ # For each value in `for_array`, finds the index of the first equal value
50
+ # in `in_array` after the previously matched value.
51
+ # @param in_array [Array] the list of values to look in
52
+ # @param for_array [Array] the values to look for
53
+ # @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
54
+ # or `nil` if not all values could be found
55
+ #
56
+ # @overload find_matching_indices(for_array:, in_array:)
57
+ # For each value in `for_array`, finds the index of the first value
58
+ # in `in_array` after the previously matched value that matches
59
+ # the specified match function.
60
+ # @param in_array [Array] the list of values to look in
61
+ # @param for_array [Array] the values to look for
62
+ # @yieldparam source [Object] the value to compare
63
+ # @yieldparam target [Object] the value to compare against
64
+ # @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
65
+ # or `nil` if not all values could be found
66
+ def find_indices(for_array:, in_array:, &block)
67
+ return find_indices_matching(for_array, in_array, &block) if block_given?
68
+
69
+ find_all_indices(for_array, in_array)
70
+ end
71
+
72
+ # Given a block or a value, finds the index of the first matching value
73
+ # at or after the specified start index.
74
+ #
75
+ # @overload find_index(value, in_array:, start_index:)
76
+ # Finds the first index of the specified value.
77
+ # @param value [Object] the value to find
78
+ # @param in_array [Array] the array to search
79
+ # @param start_index [Integer] the index to start with
80
+ # @return [Integer, nil] the index, or `nil` if no value matches
81
+ # @overload find_index(&block)
82
+ # Finds the index of the first value matching
83
+ # the specified block.
84
+ # @param in_array [Array] the array to search
85
+ # @param start_index [Integer] the index to start with
86
+ # @yieldreturn [Boolean] whether the element matches
87
+ # @return [Integer, nil] the index, or `nil` if no value matches
88
+ # @overload find_index
89
+ # @param in_array [Array] the array to search
90
+ # @param start_index [Integer] the index to start with
91
+ # @return [Enumerator] a new enumerator
92
+ def find_index(*args, in_array:, start_index: 0, &block)
93
+ raise ArgumentError, "wrong number of arguments (given #{value.length}, expected 0..1" if args.size > 1
94
+ return Enumerator.new { |y| find_index(in_array: in_array, start_index: start_index, &y) } if args.empty? && !block_given?
95
+ return unless (relative_index = in_array[start_index..].find_index(*args, &block))
96
+
97
+ relative_index + start_index
98
+ end
99
+
100
+ # Given an array of unique integers _a<sub>1</sub>_, returns a new array
101
+ # _a<sub>2</sub>_ in which the value at each index _i<sub>2</sub>_ is the
102
+ # index _i<sub>1</sub>_ at which that value was found in _a<sub>1</sub>_.
103
+ # E.g., given `[0, 2, 3]`, returns `[0, nil, 1, 2]`. The indices need
104
+ # not be in order but must be unique.
105
+ #
106
+ # @param arr [Array<Integer>, nil] the array to invert.
107
+ # @return [Array<Integer, nil>, nil] the inverted array, or nil if the input array is nil
108
+ # @raise TypeError if `arr` is not an array of integers
109
+ # @raise ArgumentError if `arr` contains duplicate values
110
+ def invert(arr)
111
+ return unless arr
112
+
113
+ # noinspection RubyNilAnalysis
114
+ Array.new(arr.size).tap do |inv|
115
+ arr.each_with_index do |v, i|
116
+ next inv[v] = i unless (prev_index = inv[v])
117
+
118
+ raise ArgumentError, "Duplicate value #{v} at index #{i} already found at #{prev_index}"
119
+ end
120
+ end
121
+ end
122
+
123
+ # Merges two arrays in an order-preserving manner.
124
+ # @param a1 [Array] the first array
125
+ # @param a2 [Array] the second array
126
+ # @return [Array] a merged array that is an ordered superset of both `a1` and `a2`
127
+ # @see Arrays#ordered_superset?
128
+ def merge(a1, a2)
129
+ return a1 if a2.empty?
130
+ return a2 if a1.empty?
131
+
132
+ shorter, longer = a1.size > a2.size ? [a2, a1] : [a1, a2]
133
+ do_merge(shorter, longer)
134
+ end
135
+
136
+ private
137
+
138
+ def do_merge(shorter, longer)
139
+ shorter.each_with_index do |v, ix_s|
140
+ next unless (ix_l = longer.find_index(v))
141
+
142
+ shorter_unmatched = shorter[0...ix_s]
143
+ longer_unmatched = longer[0...ix_l]
144
+ all_unmatched = sort_by_first_and_flatten(shorter_unmatched, longer_unmatched)
145
+ return (all_unmatched << v) + merge(shorter[ix_s + 1..], longer[ix_l + 1..])
146
+ end
147
+
148
+ sort_by_first_and_flatten(longer, shorter)
149
+ end
150
+
151
+ def sort_by_first_and_flatten(a1, a2)
152
+ return a1 if a2.empty?
153
+ return a2 if a1.empty?
154
+ return a2 + a1 if a1.first.respond_to?(:>) && a1.first > a2.first
155
+
156
+ a1 + a2
157
+ end
158
+
159
+ def find_all_indices(source, target)
160
+ source.each_with_object([]) do |src, target_indices|
161
+ target_offset = (target_indices.last&.+ 1) || 0
162
+ return nil unless (target_index = find_index(src, in_array: target, start_index: target_offset))
163
+
164
+ target_indices << target_index
165
+ end
166
+ end
167
+
168
+ def find_indices_matching(source, target)
169
+ source.each_with_object([]) do |src, target_indices|
170
+ target_offset = (target_indices.last&.+ 1) || 0
171
+ return nil unless (target_index = find_index(in_array: target, start_index: target_offset) { |tgt| yield src, tgt })
172
+
173
+ target_indices << target_index
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1 @@
1
+ require 'berkeley_library/logging'
@@ -0,0 +1,170 @@
1
+ require 'fileutils'
2
+ require 'zip'
3
+ require 'berkeley_library/util/logging'
4
+ require 'berkeley_library/util/ods/xml/content_doc'
5
+ require 'berkeley_library/util/ods/xml/styles_doc'
6
+ require 'berkeley_library/util/ods/xml/manifest_doc'
7
+
8
+ module BerkeleyLibrary
9
+ module Util
10
+ module ODS
11
+ class Spreadsheet
12
+ include BerkeleyLibrary::Logging
13
+
14
+ # ------------------------------------------------------------
15
+ # Utility methods
16
+
17
+ # Adds a table ('worksheet') to the spreadsheet.
18
+ #
19
+ # @param name [String] the table name
20
+ # @param protected [Boolean] whether to protect the table
21
+ # @return [BerkeleyLibrary::Util::ODS::XML::Table::Table] a new table with the specified name
22
+ def add_table(name, protected: true)
23
+ content.document_content.add_table(name, protected: protected)
24
+ end
25
+
26
+ # ------------------------------------------------------------
27
+ # Accessors
28
+
29
+ # Returns the content document
30
+ # @return [XML::ContentDoc] the container root-level content document
31
+ def content
32
+ @content ||= XML::ContentDoc.new
33
+ end
34
+
35
+ # Returns the container styles
36
+ # @return [XML::StylesDoc] the container root-level style document
37
+ def styles
38
+ @styles ||= XML::StylesDoc.new
39
+ end
40
+
41
+ # Returns the container manifest
42
+ # @return [XML::ManifestDoc] the container manifest document
43
+ def manifest
44
+ @manifest ||= XML::ManifestDoc.new.tap do |mf_doc|
45
+ manifest = mf_doc.manifest
46
+ manifest_docs.each { |doc| manifest.add_entry_for(doc) }
47
+ end
48
+ end
49
+
50
+ # Gets the document styles
51
+ #
52
+ # @return [BerkeleyLibrary::Util::ODS::XML::Office::AutomaticStyles] the styles
53
+ def auto_styles
54
+ content.document_content.automatic_styles
55
+ end
56
+
57
+ # ------------------------------------------------------------
58
+ # Output
59
+
60
+ # @overload write_to
61
+ # Writes to a new string.
62
+ # @return [String] a binary string containing the spreadsheet data.
63
+ # @overload write_to(out)
64
+ # Writes to the specified output stream.
65
+ # @param out [IO] the output stream
66
+ # @return[void]
67
+ # @overload write_to(path)
68
+ # Writes to the specified file. If `path` denotes a directory, the
69
+ # spreadsheet will be written as exploded, pretty-printed XML.
70
+ # @param path [String, Pathname] the path to the output file
71
+ # @return[void]
72
+ # @see BerkeleyLibrary::Util::ODS::Spreadsheet#write_exploded_to
73
+ # noinspection RubyYardReturnMatch
74
+ def write_to(out = nil)
75
+ return write_to_string unless out
76
+ return write_to_stream(out) if io_like?(out)
77
+ return write_exploded_to(out) if File.directory?(out)
78
+
79
+ write_to_file(out)
80
+ end
81
+
82
+ # Writes to a new string.
83
+ def write_to_string
84
+ # noinspection RubyYardParamTypeMatch
85
+ StringIO.new.tap { |out| write_to_stream(out) }.string
86
+ end
87
+
88
+ # Writes to the specified output stream.
89
+ # @param out [IO]
90
+ def write_to_stream(out)
91
+ zip64_orig = Zip.write_zip64_support
92
+ begin
93
+ Zip.write_zip64_support = true
94
+ write_zipfile(out)
95
+ ensure
96
+ Zip.write_zip64_support = zip64_orig
97
+ end
98
+ end
99
+
100
+ # Writes to the specified file.
101
+ # @param path [String, Pathname]
102
+ def write_to_file(path)
103
+ File.open(path, 'wb') { |f| write_to_stream(f) }
104
+ end
105
+
106
+ # Writes this spreadsheet as an exploded set of pretty-printed XML files.
107
+ # NOTE: OpenOffice itself and many other tools get confused by the extra text
108
+ # nodes in the pretty-printed files and won't read them properly; this method
109
+ # is mostly for debugging.
110
+ #
111
+ # @return [Array<String>] a list of files written.
112
+ def write_exploded_to(dir)
113
+ raise ArgumentError, "Not a directory: #{dir.inspect}" unless File.directory?(dir)
114
+
115
+ [].tap do |files_written|
116
+ each_document do |doc|
117
+ output_path = write_exploded(doc, dir)
118
+ files_written << File.absolute_path(output_path)
119
+ logger.debug("Wrote #{files_written.last}")
120
+ end
121
+ end
122
+ end
123
+
124
+ # ------------------------------------------------------------
125
+ # Private methods
126
+
127
+ private
128
+
129
+ def each_document(&block)
130
+ yield manifest
131
+
132
+ manifest_docs.each(&block)
133
+ end
134
+
135
+ def manifest_docs
136
+ [styles, content]
137
+ end
138
+
139
+ # Returns true if `out` is IO-like enough for {Zip::OutputStream}, false otherwise
140
+ # @return [Boolean] whether `out` can be passed to {Zip::OutputStream#write_buffer}
141
+ def io_like?(out)
142
+ %i[reopen rewind <<].all? { |m| out.respond_to?(m) }
143
+ end
144
+
145
+ def write_zipfile(out)
146
+ io = Zip::OutputStream.write_buffer(out) do |zip|
147
+ each_document { |doc| write_zip_entry(doc, zip) }
148
+ end
149
+ # NOTE: Zip::OutputStream plays games with the stream and
150
+ # doesn't necessarily write everything unless flushed, see:
151
+ # https://github.com/rubyzip/rubyzip/issues/265
152
+ io.flush
153
+ end
154
+
155
+ def write_zip_entry(doc, zip)
156
+ zip.put_next_entry(doc.path)
157
+ doc.to_xml(zip)
158
+ end
159
+
160
+ def write_exploded(doc, dir)
161
+ output_path = File.join(dir, doc.path)
162
+ FileUtils.mkdir_p(File.dirname(output_path))
163
+ doc.to_xml(output_path, compact: false)
164
+ output_path
165
+ end
166
+
167
+ end
168
+ end
169
+ end
170
+ end