berkeley_library-tind 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (162) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/build.yml +18 -0
  3. data/.gitignore +388 -0
  4. data/.idea/inspectionProfiles/Project_Default.xml +20 -0
  5. data/.idea/misc.xml +4 -0
  6. data/.idea/modules.xml +8 -0
  7. data/.idea/tind.iml +138 -0
  8. data/.idea/vcs.xml +6 -0
  9. data/.rubocop.yml +334 -0
  10. data/.ruby-version +1 -0
  11. data/.simplecov +8 -0
  12. data/.yardopts +1 -0
  13. data/CHANGES.md +58 -0
  14. data/Dockerfile +57 -0
  15. data/Gemfile +3 -0
  16. data/Jenkinsfile +18 -0
  17. data/LICENSE.md +21 -0
  18. data/README.md +73 -0
  19. data/Rakefile +20 -0
  20. data/berkeley_library-tind.gemspec +50 -0
  21. data/bin/tind-export +14 -0
  22. data/docker-compose.yml +15 -0
  23. data/lib/berkeley_library/tind.rb +3 -0
  24. data/lib/berkeley_library/tind/api.rb +1 -0
  25. data/lib/berkeley_library/tind/api/api.rb +132 -0
  26. data/lib/berkeley_library/tind/api/api_exception.rb +131 -0
  27. data/lib/berkeley_library/tind/api/collection.rb +82 -0
  28. data/lib/berkeley_library/tind/api/date_range.rb +67 -0
  29. data/lib/berkeley_library/tind/api/format.rb +32 -0
  30. data/lib/berkeley_library/tind/api/search.rb +100 -0
  31. data/lib/berkeley_library/tind/config.rb +103 -0
  32. data/lib/berkeley_library/tind/export.rb +1 -0
  33. data/lib/berkeley_library/tind/export/column.rb +54 -0
  34. data/lib/berkeley_library/tind/export/column_group.rb +144 -0
  35. data/lib/berkeley_library/tind/export/column_group_list.rb +131 -0
  36. data/lib/berkeley_library/tind/export/column_width_calculator.rb +76 -0
  37. data/lib/berkeley_library/tind/export/config.rb +154 -0
  38. data/lib/berkeley_library/tind/export/csv_exporter.rb +29 -0
  39. data/lib/berkeley_library/tind/export/export.rb +47 -0
  40. data/lib/berkeley_library/tind/export/export_command.rb +168 -0
  41. data/lib/berkeley_library/tind/export/export_exception.rb +8 -0
  42. data/lib/berkeley_library/tind/export/export_format.rb +67 -0
  43. data/lib/berkeley_library/tind/export/exporter.rb +105 -0
  44. data/lib/berkeley_library/tind/export/filter.rb +52 -0
  45. data/lib/berkeley_library/tind/export/no_results_error.rb +7 -0
  46. data/lib/berkeley_library/tind/export/ods_exporter.rb +138 -0
  47. data/lib/berkeley_library/tind/export/row.rb +24 -0
  48. data/lib/berkeley_library/tind/export/row_metrics.rb +18 -0
  49. data/lib/berkeley_library/tind/export/table.rb +175 -0
  50. data/lib/berkeley_library/tind/export/table_metrics.rb +116 -0
  51. data/lib/berkeley_library/tind/marc.rb +1 -0
  52. data/lib/berkeley_library/tind/marc/xml_reader.rb +144 -0
  53. data/lib/berkeley_library/tind/module_info.rb +14 -0
  54. data/lib/berkeley_library/util/arrays.rb +178 -0
  55. data/lib/berkeley_library/util/logging.rb +1 -0
  56. data/lib/berkeley_library/util/ods/spreadsheet.rb +170 -0
  57. data/lib/berkeley_library/util/ods/xml/content_doc.rb +26 -0
  58. data/lib/berkeley_library/util/ods/xml/document_node.rb +57 -0
  59. data/lib/berkeley_library/util/ods/xml/element_node.rb +106 -0
  60. data/lib/berkeley_library/util/ods/xml/loext/table_protection.rb +26 -0
  61. data/lib/berkeley_library/util/ods/xml/manifest/file_entry.rb +42 -0
  62. data/lib/berkeley_library/util/ods/xml/manifest/manifest.rb +73 -0
  63. data/lib/berkeley_library/util/ods/xml/manifest_doc.rb +26 -0
  64. data/lib/berkeley_library/util/ods/xml/namespace.rb +46 -0
  65. data/lib/berkeley_library/util/ods/xml/office/automatic_styles.rb +181 -0
  66. data/lib/berkeley_library/util/ods/xml/office/body.rb +17 -0
  67. data/lib/berkeley_library/util/ods/xml/office/document_content.rb +98 -0
  68. data/lib/berkeley_library/util/ods/xml/office/document_styles.rb +39 -0
  69. data/lib/berkeley_library/util/ods/xml/office/font_face_decls.rb +30 -0
  70. data/lib/berkeley_library/util/ods/xml/office/scripts.rb +17 -0
  71. data/lib/berkeley_library/util/ods/xml/office/spreadsheet.rb +37 -0
  72. data/lib/berkeley_library/util/ods/xml/office/styles.rb +39 -0
  73. data/lib/berkeley_library/util/ods/xml/style/cell_style.rb +58 -0
  74. data/lib/berkeley_library/util/ods/xml/style/column_style.rb +36 -0
  75. data/lib/berkeley_library/util/ods/xml/style/default_style.rb +31 -0
  76. data/lib/berkeley_library/util/ods/xml/style/family.rb +85 -0
  77. data/lib/berkeley_library/util/ods/xml/style/font_face.rb +46 -0
  78. data/lib/berkeley_library/util/ods/xml/style/paragraph_properties.rb +30 -0
  79. data/lib/berkeley_library/util/ods/xml/style/row_style.rb +37 -0
  80. data/lib/berkeley_library/util/ods/xml/style/style.rb +44 -0
  81. data/lib/berkeley_library/util/ods/xml/style/table_cell_properties.rb +40 -0
  82. data/lib/berkeley_library/util/ods/xml/style/table_column_properties.rb +30 -0
  83. data/lib/berkeley_library/util/ods/xml/style/table_properties.rb +25 -0
  84. data/lib/berkeley_library/util/ods/xml/style/table_row_properties.rb +28 -0
  85. data/lib/berkeley_library/util/ods/xml/style/table_style.rb +27 -0
  86. data/lib/berkeley_library/util/ods/xml/style/text_properties.rb +52 -0
  87. data/lib/berkeley_library/util/ods/xml/styles_doc.rb +26 -0
  88. data/lib/berkeley_library/util/ods/xml/table/named_expressions.rb +17 -0
  89. data/lib/berkeley_library/util/ods/xml/table/repeatable.rb +38 -0
  90. data/lib/berkeley_library/util/ods/xml/table/table.rb +193 -0
  91. data/lib/berkeley_library/util/ods/xml/table/table_cell.rb +46 -0
  92. data/lib/berkeley_library/util/ods/xml/table/table_column.rb +43 -0
  93. data/lib/berkeley_library/util/ods/xml/table/table_row.rb +136 -0
  94. data/lib/berkeley_library/util/ods/xml/text/p.rb +118 -0
  95. data/lib/berkeley_library/util/paths.rb +111 -0
  96. data/lib/berkeley_library/util/stringios.rb +30 -0
  97. data/lib/berkeley_library/util/strings.rb +42 -0
  98. data/lib/berkeley_library/util/sys_exits.rb +15 -0
  99. data/lib/berkeley_library/util/times.rb +22 -0
  100. data/lib/berkeley_library/util/uris.rb +44 -0
  101. data/lib/berkeley_library/util/uris/appender.rb +162 -0
  102. data/lib/berkeley_library/util/uris/requester.rb +62 -0
  103. data/lib/berkeley_library/util/uris/validator.rb +32 -0
  104. data/rakelib/bundle.rake +8 -0
  105. data/rakelib/coverage.rake +11 -0
  106. data/rakelib/gem.rake +54 -0
  107. data/rakelib/rubocop.rake +18 -0
  108. data/rakelib/spec.rake +2 -0
  109. data/spec/.rubocop.yml +40 -0
  110. data/spec/berkeley_library/tind/api/api_exception_spec.rb +91 -0
  111. data/spec/berkeley_library/tind/api/api_spec.rb +143 -0
  112. data/spec/berkeley_library/tind/api/collection_spec.rb +74 -0
  113. data/spec/berkeley_library/tind/api/date_range_spec.rb +110 -0
  114. data/spec/berkeley_library/tind/api/format_spec.rb +54 -0
  115. data/spec/berkeley_library/tind/api/search_spec.rb +364 -0
  116. data/spec/berkeley_library/tind/config_spec.rb +86 -0
  117. data/spec/berkeley_library/tind/export/column_group_spec.rb +29 -0
  118. data/spec/berkeley_library/tind/export/column_spec.rb +43 -0
  119. data/spec/berkeley_library/tind/export/config_spec.rb +206 -0
  120. data/spec/berkeley_library/tind/export/export_command_spec.rb +169 -0
  121. data/spec/berkeley_library/tind/export/export_format_spec.rb +59 -0
  122. data/spec/berkeley_library/tind/export/export_matcher.rb +112 -0
  123. data/spec/berkeley_library/tind/export/export_spec.rb +150 -0
  124. data/spec/berkeley_library/tind/export/exporter_spec.rb +125 -0
  125. data/spec/berkeley_library/tind/export/row_spec.rb +118 -0
  126. data/spec/berkeley_library/tind/export/table_spec.rb +322 -0
  127. data/spec/berkeley_library/tind/marc/xml_reader_spec.rb +93 -0
  128. data/spec/berkeley_library/util/arrays_spec.rb +340 -0
  129. data/spec/berkeley_library/util/ods/spreadsheet_spec.rb +124 -0
  130. data/spec/berkeley_library/util/ods/xml/content_doc_spec.rb +121 -0
  131. data/spec/berkeley_library/util/ods/xml/manifest/file_entry_spec.rb +27 -0
  132. data/spec/berkeley_library/util/ods/xml/manifest/manifest_spec.rb +33 -0
  133. data/spec/berkeley_library/util/ods/xml/office/document_content_spec.rb +60 -0
  134. data/spec/berkeley_library/util/ods/xml/style/automatic_styles_spec.rb +37 -0
  135. data/spec/berkeley_library/util/ods/xml/style/family_spec.rb +57 -0
  136. data/spec/berkeley_library/util/ods/xml/table/table_row_spec.rb +179 -0
  137. data/spec/berkeley_library/util/ods/xml/table/table_spec.rb +218 -0
  138. data/spec/berkeley_library/util/paths_spec.rb +90 -0
  139. data/spec/berkeley_library/util/stringios_spec.rb +34 -0
  140. data/spec/berkeley_library/util/strings_spec.rb +27 -0
  141. data/spec/berkeley_library/util/times_spec.rb +39 -0
  142. data/spec/berkeley_library/util/uris_spec.rb +118 -0
  143. data/spec/data/collection-names.txt +438 -0
  144. data/spec/data/collections.json +4827 -0
  145. data/spec/data/disjoint-records.xml +187 -0
  146. data/spec/data/record-184453.xml +58 -0
  147. data/spec/data/record-184458.xml +63 -0
  148. data/spec/data/record-187888.xml +78 -0
  149. data/spec/data/records-api-search-cjk-p1.xml +6381 -0
  150. data/spec/data/records-api-search-cjk-p2.xml +5 -0
  151. data/spec/data/records-api-search-p1.xml +4506 -0
  152. data/spec/data/records-api-search-p2.xml +4509 -0
  153. data/spec/data/records-api-search-p3.xml +4506 -0
  154. data/spec/data/records-api-search-p4.xml +4509 -0
  155. data/spec/data/records-api-search-p5.xml +4506 -0
  156. data/spec/data/records-api-search-p6.xml +2436 -0
  157. data/spec/data/records-api-search-p7.xml +5 -0
  158. data/spec/data/records-api-search.xml +234 -0
  159. data/spec/data/records-manual-search.xml +547 -0
  160. data/spec/spec_helper.rb +30 -0
  161. data/test/profile/table_from_records_profile.rb +46 -0
  162. metadata +585 -0
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('marc/*.rb', __dir__)).sort.each(&method(:require))
@@ -0,0 +1,144 @@
1
+ require 'nokogiri'
2
+ require 'marc/xml_parsers'
3
+ require 'marc_extensions'
4
+
5
+ module BerkeleyLibrary
6
+ module TIND
7
+ module MARC
8
+ # A customized XML reader for reading MARC records from TIND search results.
9
+ class XMLReader
10
+ include Enumerable
11
+ include ::MARC::NokogiriReader
12
+
13
+ # ############################################################
14
+ # Constant
15
+
16
+ COMMENT_TOTAL_RE = /Search-Engine-Total-Number-Of-Results: ([0-9]+)/.freeze
17
+
18
+ # ############################################################
19
+ # Attributes
20
+
21
+ attr_reader :search_id
22
+
23
+ # Returns the total number of records, based on the `<total/>` tag
24
+ # returned by the TIND Search API, or the special comment
25
+ # `Search-Engine-Total-Number-Of-Results` returned by TIND
26
+ # Regular Search in XML format.
27
+ #
28
+ # Note that the total is not guaranteed to be present, and if present,
29
+ # may not be present unless at least some records have been parsed.
30
+ #
31
+ # @return [Integer, nil] the total number of records, or `nil` if the total has not been read yet
32
+ def total
33
+ @total&.to_i
34
+ end
35
+
36
+ # Returns the number of records yielded.
37
+ #
38
+ # @return [Integer] the number of records yielded.
39
+ def records_yielded
40
+ @records_yielded ||= 0
41
+ end
42
+
43
+ # ############################################################
44
+ # Initializer
45
+
46
+ # Reads MARC records from an XML datasource given either as a file path,
47
+ # or as an IO object.
48
+ #
49
+ # @param source [String, Pathname, IO] the path to a file, or an IO to read from directly
50
+ # @param freeze [Boolean] whether to freeze each record after reading
51
+ def initialize(source, freeze: false)
52
+ @handle = ensure_io(source)
53
+ @freeze = freeze
54
+ init
55
+ end
56
+
57
+ class << self
58
+ include MARCExtensions::XMLReaderClassExtensions
59
+ end
60
+
61
+ # ############################################################
62
+ # MARC::GenericPullParser overrides
63
+
64
+ def yield_record
65
+ @record[:record].freeze if @freeze
66
+ super
67
+ ensure
68
+ increment_records_yielded!
69
+ end
70
+
71
+ # ############################################################
72
+ # Nokogiri::XML::SAX::Document overrides
73
+
74
+ # @see Nokogiri::XML::Sax::Document#start_element_namespace
75
+ # rubocop:disable Metrics/ParameterLists
76
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
77
+ super
78
+
79
+ @current_element_name = name
80
+ end
81
+ # rubocop:enable Metrics/ParameterLists
82
+
83
+ # @see Nokogiri::XML::Sax::Document#end_element_namespace
84
+ def end_element_namespace(name, prefix = nil, uri = nil)
85
+ super
86
+
87
+ @current_element_name = nil
88
+ end
89
+
90
+ # @see Nokogiri::XML::Sax::Document#characters
91
+ def characters(string)
92
+ return unless (name = @current_element_name)
93
+
94
+ case name
95
+ when 'search_id'
96
+ @search_id = string
97
+ when 'total'
98
+ @total = string.to_i
99
+ else
100
+ super
101
+ end
102
+ end
103
+
104
+ # @see Nokogiri::XML::Sax::Document#comment
105
+ def comment(string)
106
+ return unless (md = COMMENT_TOTAL_RE.match(string))
107
+
108
+ @total = md[1].to_i
109
+ end
110
+
111
+ # ############################################################
112
+ # Private
113
+
114
+ private
115
+
116
+ def ensure_io(file)
117
+ return file if io_like?(file)
118
+ return File.new(file) if file_exists?(file)
119
+ return StringIO.new(file) if file =~ /^\s*</x
120
+
121
+ raise ArgumentError, "Don't know how to read XML from #{file.inspect}: not an IO, file path, or XML text"
122
+ end
123
+
124
+ # Returns true if `obj` is close enough to an IO object for Nokogiri
125
+ # to parse as one.
126
+ #
127
+ # @param obj [Object] the object that might be an IO
128
+ # @see https://github.com/sparklemotion/nokogiri/blob/v1.11.1/lib/nokogiri/xml/sax/parser.rb#L81 Nokogiri::XML::SAX::Parser#parse
129
+ def io_like?(obj)
130
+ obj.respond_to?(:read) && obj.respond_to?(:close)
131
+ end
132
+
133
+ def file_exists?(path)
134
+ (path.respond_to?(:exist?) && path.exist?) ||
135
+ (path.respond_to?(:to_str) && File.exist?(path))
136
+ end
137
+
138
+ def increment_records_yielded!
139
+ @records_yielded = records_yielded + 1
140
+ end
141
+ end
142
+ end
143
+ end
144
+ end
@@ -0,0 +1,14 @@
1
+ module BerkeleyLibrary
2
+ module TIND
3
+ class ModuleInfo
4
+ NAME = 'berkeley_library-tind'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'TIND DA utilities for the UC Berkeley Library'.freeze
8
+ DESCRIPTION = 'UC Berkeley Library utility gem for working with the TIND DA digital archive.'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '0.4.0'.freeze
11
+ HOMEPAGE = 'https://github.com/BerkeleyLibrary/tind'.freeze
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,178 @@
1
+ module BerkeleyLibrary
2
+ module Util
3
+ module Arrays
4
+ class << self
5
+ # Clients can chose to call class methods directly, or include the module
6
+ include Arrays
7
+ end
8
+
9
+ # Recursively checks whether the specified list contains, in the
10
+ # same order, all values in the other specified list (additional codes
11
+ # in between are fine)
12
+ #
13
+ # @param subset [Array] the values to look for
14
+ # @param superset [Array] the list of values to look in
15
+ # @return boolean True if all values were found, false otherwise
16
+ def ordered_superset?(superset:, subset:)
17
+ !find_indices(in_array: superset, for_array: subset).nil?
18
+ end
19
+
20
+ # Counts how many contiguous elements from the start of an
21
+ # sequence of values satisfy the given block.
22
+ #
23
+ # @overload count_while(arr:)
24
+ # Returns an enumerator.
25
+ # @param values [Enumerable] the values
26
+ # @return [Enumerator] the enumerator.
27
+ # @overload count_while(arr:, &block)
28
+ # Passes elements to the block until the block returns nil or false,
29
+ # then stops iterating and returns the count of matching elements.
30
+ # @param values [Enumerable] the values
31
+ # @return [Integer] the count
32
+ def count_while(values:)
33
+ return to_enum(:count_while, values: values) unless block_given?
34
+
35
+ values.inject(0) do |count, x|
36
+ matched = yield x
37
+ break count unless matched
38
+
39
+ count + 1
40
+ end
41
+ end
42
+
43
+ # Given two lists, one of which is a superset of the other, with elements
44
+ # in the same order (but possibly with additional elements in the superset),
45
+ # returns an array the length of the subset, containing for each element in
46
+ # the subset the index of the corresponding element in the superset.
47
+ #
48
+ # @overload find_matching_indices(for_array:, in_array:)
49
+ # For each value in `for_array`, finds the index of the first equal value
50
+ # in `in_array` after the previously matched value.
51
+ # @param in_array [Array] the list of values to look in
52
+ # @param for_array [Array] the values to look for
53
+ # @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
54
+ # or `nil` if not all values could be found
55
+ #
56
+ # @overload find_matching_indices(for_array:, in_array:)
57
+ # For each value in `for_array`, finds the index of the first value
58
+ # in `in_array` after the previously matched value that matches
59
+ # the specified match function.
60
+ # @param in_array [Array] the list of values to look in
61
+ # @param for_array [Array] the values to look for
62
+ # @yieldparam source [Object] the value to compare
63
+ # @yieldparam target [Object] the value to compare against
64
+ # @return [Array<Integer>, nil] the indices in `in_array` of each value in `for_array`,
65
+ # or `nil` if not all values could be found
66
+ def find_indices(for_array:, in_array:, &block)
67
+ return find_indices_matching(for_array, in_array, &block) if block_given?
68
+
69
+ find_all_indices(for_array, in_array)
70
+ end
71
+
72
+ # Given a block or a value, finds the index of the first matching value
73
+ # at or after the specified start index.
74
+ #
75
+ # @overload find_index(value, in_array:, start_index:)
76
+ # Finds the first index of the specified value.
77
+ # @param value [Object] the value to find
78
+ # @param in_array [Array] the array to search
79
+ # @param start_index [Integer] the index to start with
80
+ # @return [Integer, nil] the index, or `nil` if no value matches
81
+ # @overload find_index(&block)
82
+ # Finds the index of the first value matching
83
+ # the specified block.
84
+ # @param in_array [Array] the array to search
85
+ # @param start_index [Integer] the index to start with
86
+ # @yieldreturn [Boolean] whether the element matches
87
+ # @return [Integer, nil] the index, or `nil` if no value matches
88
+ # @overload find_index
89
+ # @param in_array [Array] the array to search
90
+ # @param start_index [Integer] the index to start with
91
+ # @return [Enumerator] a new enumerator
92
+ def find_index(*args, in_array:, start_index: 0, &block)
93
+ raise ArgumentError, "wrong number of arguments (given #{value.length}, expected 0..1" if args.size > 1
94
+ return Enumerator.new { |y| find_index(in_array: in_array, start_index: start_index, &y) } if args.empty? && !block_given?
95
+ return unless (relative_index = in_array[start_index..].find_index(*args, &block))
96
+
97
+ relative_index + start_index
98
+ end
99
+
100
+ # Given an array of unique integers _a<sub>1</sub>_, returns a new array
101
+ # _a<sub>2</sub>_ in which the value at each index _i<sub>2</sub>_ is the
102
+ # index _i<sub>1</sub>_ at which that value was found in _a<sub>1</sub>_.
103
+ # E.g., given `[0, 2, 3]`, returns `[0, nil, 1, 2]`. The indices need
104
+ # not be in order but must be unique.
105
+ #
106
+ # @param arr [Array<Integer>, nil] the array to invert.
107
+ # @return [Array<Integer, nil>, nil] the inverted array, or nil if the input array is nil
108
+ # @raise TypeError if `arr` is not an array of integers
109
+ # @raise ArgumentError if `arr` contains duplicate values
110
+ def invert(arr)
111
+ return unless arr
112
+
113
+ # noinspection RubyNilAnalysis
114
+ Array.new(arr.size).tap do |inv|
115
+ arr.each_with_index do |v, i|
116
+ next inv[v] = i unless (prev_index = inv[v])
117
+
118
+ raise ArgumentError, "Duplicate value #{v} at index #{i} already found at #{prev_index}"
119
+ end
120
+ end
121
+ end
122
+
123
+ # Merges two arrays in an order-preserving manner.
124
+ # @param a1 [Array] the first array
125
+ # @param a2 [Array] the second array
126
+ # @return [Array] a merged array that is an ordered superset of both `a1` and `a2`
127
+ # @see Arrays#ordered_superset?
128
+ def merge(a1, a2)
129
+ return a1 if a2.empty?
130
+ return a2 if a1.empty?
131
+
132
+ shorter, longer = a1.size > a2.size ? [a2, a1] : [a1, a2]
133
+ do_merge(shorter, longer)
134
+ end
135
+
136
+ private
137
+
138
+ def do_merge(shorter, longer)
139
+ shorter.each_with_index do |v, ix_s|
140
+ next unless (ix_l = longer.find_index(v))
141
+
142
+ shorter_unmatched = shorter[0...ix_s]
143
+ longer_unmatched = longer[0...ix_l]
144
+ all_unmatched = sort_by_first_and_flatten(shorter_unmatched, longer_unmatched)
145
+ return (all_unmatched << v) + merge(shorter[ix_s + 1..], longer[ix_l + 1..])
146
+ end
147
+
148
+ sort_by_first_and_flatten(longer, shorter)
149
+ end
150
+
151
+ def sort_by_first_and_flatten(a1, a2)
152
+ return a1 if a2.empty?
153
+ return a2 if a1.empty?
154
+ return a2 + a1 if a1.first.respond_to?(:>) && a1.first > a2.first
155
+
156
+ a1 + a2
157
+ end
158
+
159
+ def find_all_indices(source, target)
160
+ source.each_with_object([]) do |src, target_indices|
161
+ target_offset = (target_indices.last&.+ 1) || 0
162
+ return nil unless (target_index = find_index(src, in_array: target, start_index: target_offset))
163
+
164
+ target_indices << target_index
165
+ end
166
+ end
167
+
168
+ def find_indices_matching(source, target)
169
+ source.each_with_object([]) do |src, target_indices|
170
+ target_offset = (target_indices.last&.+ 1) || 0
171
+ return nil unless (target_index = find_index(in_array: target, start_index: target_offset) { |tgt| yield src, tgt })
172
+
173
+ target_indices << target_index
174
+ end
175
+ end
176
+ end
177
+ end
178
+ end
@@ -0,0 +1 @@
1
+ require 'berkeley_library/logging'
@@ -0,0 +1,170 @@
1
+ require 'fileutils'
2
+ require 'zip'
3
+ require 'berkeley_library/util/logging'
4
+ require 'berkeley_library/util/ods/xml/content_doc'
5
+ require 'berkeley_library/util/ods/xml/styles_doc'
6
+ require 'berkeley_library/util/ods/xml/manifest_doc'
7
+
8
+ module BerkeleyLibrary
9
+ module Util
10
+ module ODS
11
+ class Spreadsheet
12
+ include BerkeleyLibrary::Logging
13
+
14
+ # ------------------------------------------------------------
15
+ # Utility methods
16
+
17
+ # Adds a table ('worksheet') to the spreadsheet.
18
+ #
19
+ # @param name [String] the table name
20
+ # @param protected [Boolean] whether to protect the table
21
+ # @return [BerkeleyLibrary::Util::ODS::XML::Table::Table] a new table with the specified name
22
+ def add_table(name, protected: true)
23
+ content.document_content.add_table(name, protected: protected)
24
+ end
25
+
26
+ # ------------------------------------------------------------
27
+ # Accessors
28
+
29
+ # Returns the content document
30
+ # @return [XML::ContentDoc] the container root-level content document
31
+ def content
32
+ @content ||= XML::ContentDoc.new
33
+ end
34
+
35
+ # Returns the container styles
36
+ # @return [XML::StylesDoc] the container root-level style document
37
+ def styles
38
+ @styles ||= XML::StylesDoc.new
39
+ end
40
+
41
+ # Returns the container manifest
42
+ # @return [XML::ManifestDoc] the container manifest document
43
+ def manifest
44
+ @manifest ||= XML::ManifestDoc.new.tap do |mf_doc|
45
+ manifest = mf_doc.manifest
46
+ manifest_docs.each { |doc| manifest.add_entry_for(doc) }
47
+ end
48
+ end
49
+
50
+ # Gets the document styles
51
+ #
52
+ # @return [BerkeleyLibrary::Util::ODS::XML::Office::AutomaticStyles] the styles
53
+ def auto_styles
54
+ content.document_content.automatic_styles
55
+ end
56
+
57
+ # ------------------------------------------------------------
58
+ # Output
59
+
60
+ # @overload write_to
61
+ # Writes to a new string.
62
+ # @return [String] a binary string containing the spreadsheet data.
63
+ # @overload write_to(out)
64
+ # Writes to the specified output stream.
65
+ # @param out [IO] the output stream
66
+ # @return[void]
67
+ # @overload write_to(path)
68
+ # Writes to the specified file. If `path` denotes a directory, the
69
+ # spreadsheet will be written as exploded, pretty-printed XML.
70
+ # @param path [String, Pathname] the path to the output file
71
+ # @return[void]
72
+ # @see BerkeleyLibrary::Util::ODS::Spreadsheet#write_exploded_to
73
+ # noinspection RubyYardReturnMatch
74
+ def write_to(out = nil)
75
+ return write_to_string unless out
76
+ return write_to_stream(out) if io_like?(out)
77
+ return write_exploded_to(out) if File.directory?(out)
78
+
79
+ write_to_file(out)
80
+ end
81
+
82
+ # Writes to a new string.
83
+ def write_to_string
84
+ # noinspection RubyYardParamTypeMatch
85
+ StringIO.new.tap { |out| write_to_stream(out) }.string
86
+ end
87
+
88
+ # Writes to the specified output stream.
89
+ # @param out [IO]
90
+ def write_to_stream(out)
91
+ zip64_orig = Zip.write_zip64_support
92
+ begin
93
+ Zip.write_zip64_support = true
94
+ write_zipfile(out)
95
+ ensure
96
+ Zip.write_zip64_support = zip64_orig
97
+ end
98
+ end
99
+
100
+ # Writes to the specified file.
101
+ # @param path [String, Pathname]
102
+ def write_to_file(path)
103
+ File.open(path, 'wb') { |f| write_to_stream(f) }
104
+ end
105
+
106
+ # Writes this spreadsheet as an exploded set of pretty-printed XML files.
107
+ # NOTE: OpenOffice itself and many other tools get confused by the extra text
108
+ # nodes in the pretty-printed files and won't read them properly; this method
109
+ # is mostly for debugging.
110
+ #
111
+ # @return [Array<String>] a list of files written.
112
+ def write_exploded_to(dir)
113
+ raise ArgumentError, "Not a directory: #{dir.inspect}" unless File.directory?(dir)
114
+
115
+ [].tap do |files_written|
116
+ each_document do |doc|
117
+ output_path = write_exploded(doc, dir)
118
+ files_written << File.absolute_path(output_path)
119
+ logger.debug("Wrote #{files_written.last}")
120
+ end
121
+ end
122
+ end
123
+
124
+ # ------------------------------------------------------------
125
+ # Private methods
126
+
127
+ private
128
+
129
+ def each_document(&block)
130
+ yield manifest
131
+
132
+ manifest_docs.each(&block)
133
+ end
134
+
135
+ def manifest_docs
136
+ [styles, content]
137
+ end
138
+
139
+ # Returns true if `out` is IO-like enough for {Zip::OutputStream}, false otherwise
140
+ # @return [Boolean] whether `out` can be passed to {Zip::OutputStream#write_buffer}
141
+ def io_like?(out)
142
+ %i[reopen rewind <<].all? { |m| out.respond_to?(m) }
143
+ end
144
+
145
+ def write_zipfile(out)
146
+ io = Zip::OutputStream.write_buffer(out) do |zip|
147
+ each_document { |doc| write_zip_entry(doc, zip) }
148
+ end
149
+ # NOTE: Zip::OutputStream plays games with the stream and
150
+ # doesn't necessarily write everything unless flushed, see:
151
+ # https://github.com/rubyzip/rubyzip/issues/265
152
+ io.flush
153
+ end
154
+
155
+ def write_zip_entry(doc, zip)
156
+ zip.put_next_entry(doc.path)
157
+ doc.to_xml(zip)
158
+ end
159
+
160
+ def write_exploded(doc, dir)
161
+ output_path = File.join(dir, doc.path)
162
+ FileUtils.mkdir_p(File.dirname(output_path))
163
+ doc.to_xml(output_path, compact: false)
164
+ output_path
165
+ end
166
+
167
+ end
168
+ end
169
+ end
170
+ end