berkeley_library-holdings 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 3eb22100a823f3b72627e1298176ff71d2f03a1a73262018b2739d43fcceded1
4
+ data.tar.gz: 128191b7233193db6135bb5d9229f71ffe017c945261047203d6a1f9734c9cb5
5
+ SHA512:
6
+ metadata.gz: e7ee7b5b3178c507f6e7e937d56d9b0fe48cfbe6116ce12a29343e3ed4d9155803b14fdca813e5a0a967856033e45e7051687915c90f3ea371e5d36ca817719e
7
+ data.tar.gz: 2bfe0f25c2f0375f4b95f78a49440a56688473ac5cd68ddea3c3e492df72662955762749ffa87b0ee0e53c13dc1b214525645a4bf5bd18241681c0455b61f701
data/CHANGELOG.md ADDED
@@ -0,0 +1,5 @@
1
+ ## [Unreleased]
2
+
3
+ ## [0.1.0] - 2023-02-24
4
+
5
+ - Initial release
data/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ # The MIT License (MIT)
2
+
3
+ Copyright © 2023 The Regents of the University of California
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a
6
+ copy of this software and associated documentation files (the “Software”),
7
+ to deal in the Software without restriction, including without limitation
8
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ and/or sell copies of the Software, and to permit persons to whom the
10
+ Software is furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,6 @@
1
+ # BerkeleyLibrary::Holdings
2
+
3
+ [![Build Status](https://github.com/BerkeleyLibrary/holdings/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/BerkeleyLibrary/holdings/actions/workflows/build.yml)
4
+ [![Gem Version](https://img.shields.io/gem/v/berkeley_library-holdings.svg)](https://github.com/BerkeleyLibrary/holdings/releases)
5
+
6
+ Miscellaneous holdings-related utilities for the UC Berkeley Library.
@@ -0,0 +1,8 @@
1
+ module BerkeleyLibrary
2
+ module Holdings
3
+ module Constants
4
+ OCLC_COL_HEADER = 'OCLC Number'.freeze
5
+
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,56 @@
1
+ require 'berkeley_library/util'
2
+
3
+ module BerkeleyLibrary
4
+ module Holdings
5
+ module HathiTrust
6
+ module Config
7
+ include BerkeleyLibrary::Util::URIs
8
+
9
+ ENV_HATHITRUST_BASE_URL = 'LIT_HATHITRUST_BASE_URL'.freeze
10
+
11
+ # The default HathiTrust base URL, if ENV_HATHITRUST_BASE_URL is not set.
12
+ DEFAULT_HATHITRUST_BASE_URL = 'https://catalog.hathitrust.org/api/'.freeze
13
+
14
+ class << self
15
+ include Config
16
+ end
17
+
18
+ def base_uri
19
+ @base_uri ||= default_hathitrust_base_uri
20
+ end
21
+
22
+ def base_uri=(value)
23
+ @base_uri = uri_or_nil(value)
24
+ end
25
+
26
+ private
27
+
28
+ def reset!
29
+ remove_instance_variable(:@base_uri) if instance_variable_defined?(:@base_uri)
30
+ end
31
+
32
+ def default_hathitrust_base_uri
33
+ uri_or_nil(default_hathitrust_base_url)
34
+ end
35
+
36
+ def default_hathitrust_base_url
37
+ ENV[ENV_HATHITRUST_BASE_URL] || rails_hathitrust_base_url || DEFAULT_HATHITRUST_BASE_URL
38
+ end
39
+
40
+ def rails_hathitrust_base_url
41
+ return unless (rails_config = self.rails_config)
42
+ return unless rails_config.respond_to?(:hathitrust_base_url)
43
+
44
+ rails_config.hathitrust_base_url
45
+ end
46
+
47
+ def rails_config
48
+ return unless defined?(Rails)
49
+ return unless (app = Rails.application)
50
+
51
+ app.config
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,56 @@
1
+ require 'berkeley_library/holdings/hathi_trust/record_url_request_base'
2
+
3
+ module BerkeleyLibrary
4
+ module Holdings
5
+ module HathiTrust
6
+ class RecordUrlBatchRequest
7
+ include RecordUrlRequestBase
8
+
9
+ # Per HathiTrust API docs: https://www.hathitrust.org/bib_api
10
+ MAX_BATCH_SIZE = 20
11
+
12
+ attr_reader :oclc_numbers
13
+
14
+ def initialize(oclc_numbers)
15
+ @oclc_numbers = ensure_valid_oclc_numbers!(oclc_numbers)
16
+ end
17
+
18
+ def execute
19
+ response_body = URIs.get(uri, log: false)
20
+ record_urls_from(response_body)
21
+ end
22
+
23
+ def uri
24
+ @uri ||= URIs.append(volumes_base_uri, 'json', URIs.path_escape(oclc_list))
25
+ end
26
+
27
+ private
28
+
29
+ def ensure_valid_oclc_numbers!(oclc_numbers)
30
+ raise ArgumentError, 'No OCLC numbers provided' if oclc_numbers.empty?
31
+ raise ArgumentError, "Too many OCLC numbers; expected <= #{MAX_BATCH_SIZE}, was #{oclc_numbers.size}" if oclc_numbers.size > MAX_BATCH_SIZE
32
+
33
+ OCLCNumber.ensure_oclc_numbers!(oclc_numbers)
34
+ end
35
+
36
+ def oclc_list
37
+ @oclc_list = oclc_numbers.map(&method(:key_for)).join('|')
38
+ end
39
+
40
+ def key_for(oclc_number)
41
+ "oclc:#{oclc_number}"
42
+ end
43
+
44
+ def record_urls_from(json_str)
45
+ json = JSON.parse(json_str)
46
+ oclc_numbers.filter_map do |oclc_num|
47
+ next unless (entry = json[key_for(oclc_num)])
48
+
49
+ record_url = find_record_url(entry, oclc_num)
50
+ [oclc_num, record_url] if record_url
51
+ end.to_h
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,33 @@
1
+ require 'berkeley_library/holdings/hathi_trust/record_url_request_base'
2
+
3
+ module BerkeleyLibrary
4
+ module Holdings
5
+ module HathiTrust
6
+ class RecordUrlRequest
7
+ include RecordUrlRequestBase
8
+
9
+ attr_reader :oclc_number
10
+
11
+ def initialize(oclc_number)
12
+ @oclc_number = OCLCNumber.ensure_oclc_number!(oclc_number)
13
+ end
14
+
15
+ def execute
16
+ response_body = URIs.get(uri, log: false)
17
+ record_url_from(response_body, oclc_number)
18
+ end
19
+
20
+ def uri
21
+ @uri ||= URIs.append(volumes_base_uri, 'oclc', "#{oclc_number}.json")
22
+ end
23
+
24
+ private
25
+
26
+ def record_url_from(json_str, oclc_number)
27
+ json_obj = JSON.parse(json_str)
28
+ find_record_url(json_obj, oclc_number)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,37 @@
1
+ require 'json'
2
+ require 'berkeley_library/util'
3
+ require 'berkeley_library/holdings/oclc_number'
4
+ require 'berkeley_library/holdings/hathi_trust/config'
5
+
6
+ module BerkeleyLibrary
7
+ module Holdings
8
+ module HathiTrust
9
+ module RecordUrlRequestBase
10
+ include BerkeleyLibrary::Util
11
+
12
+ protected
13
+
14
+ def volumes_base_uri
15
+ URIs.append(Config.base_uri, 'volumes', 'brief')
16
+ end
17
+
18
+ def find_record_url(json_obj, oclc_number)
19
+ return unless (records = json_obj['records'])
20
+ return unless (record = find_record(records, oclc_number))
21
+
22
+ record['recordURL']
23
+ end
24
+
25
+ def find_record(records, oclc_number)
26
+ return if records.empty?
27
+
28
+ records.values.find do |rec|
29
+ (oclc_nums = rec['oclcs']) &&
30
+ oclc_nums.include?(oclc_number) &&
31
+ rec.key?('recordURL')
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('hathi_trust/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'berkeley_library/holdings/world_cat/symbols'
2
+
3
+ module BerkeleyLibrary
4
+ module Holdings
5
+ class HoldingsResult
6
+ attr_reader :oclc_number, :wc_symbols, :ht_record_url, :wc_error, :ht_error
7
+
8
+ def initialize(oclc_number, wc_symbols: [], wc_error: nil, ht_record_url: nil, ht_error: nil)
9
+ @oclc_number = oclc_number
10
+ @wc_symbols = wc_symbols
11
+ @wc_error = wc_error
12
+ @ht_record_url = ht_record_url
13
+ @ht_error = ht_error
14
+ end
15
+
16
+ def nrlf?
17
+ @has_nrlf ||= wc_symbols.intersection(WorldCat::Symbols::NRLF).any?
18
+ end
19
+
20
+ def srlf?
21
+ @has_srlf ||= wc_symbols.intersection(WorldCat::Symbols::SRLF).any?
22
+ end
23
+
24
+ def uc_symbols
25
+ @uc_symbols ||= wc_symbols.intersection(WorldCat::Symbols::UC)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ module BerkeleyLibrary
2
+ module Holdings
3
+ module ModuleInfo
4
+ NAME = 'berkeley_library-holdings'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'Holdings-related utilities for the UC Berkeley Library'.freeze
8
+ DESCRIPTION = 'A collection of holdings-related utilities for the UC Berkeley Library'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '1.0.1'.freeze
11
+ HOMEPAGE = 'https://github.com/BerkeleyLibrary/holdings'.freeze
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,22 @@
1
+ module BerkeleyLibrary
2
+ module Holdings
3
+ module OCLCNumber
4
+ class << self
5
+ def ensure_oclc_number!(oclc_number)
6
+ raise ArgumentError, 'OCLC number cannot be nil' if oclc_number.nil?
7
+ raise ArgumentError, "OCLC number #{oclc_number.inspect} is not a string" unless oclc_number.is_a?(String)
8
+ raise ArgumentError, 'OCLC number cannot be empty' if oclc_number == ''
9
+ raise ArgumentError, "OCLC number #{oclc_number.inspect} must not be blank" if oclc_number.strip == ''
10
+
11
+ oclc_number
12
+ end
13
+
14
+ def ensure_oclc_numbers!(oclc_numbers)
15
+ oclc_numbers.tap do |numbers|
16
+ numbers.each { |num| ensure_oclc_number!(num) }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,81 @@
1
+ require 'berkeley_library/util/uris'
2
+
3
+ module BerkeleyLibrary
4
+ module Holdings
5
+ module WorldCat
6
+ module Config
7
+ include BerkeleyLibrary::Util::URIs
8
+
9
+ # The environment variable from which to read the WorldCat API key.
10
+ ENV_WORLDCAT_API_KEY = 'LIT_WORLDCAT_API_KEY'.freeze
11
+
12
+ # The environment variable from which to read the WorldCat base URL.
13
+ ENV_WORLDCAT_BASE_URL = 'LIT_WORLDCAT_BASE_URL'.freeze
14
+
15
+ # The default WorldCat base URL, if ENV_WORLDCAT_BASE_URL is not set.
16
+ DEFAULT_WORLDCAT_BASE_URL = 'https://www.worldcat.org/webservices/'.freeze
17
+
18
+ class << self
19
+ include Config
20
+ end
21
+
22
+ # Sets the WorldCat API key.
23
+ # @param value [String] the API key.
24
+ attr_writer :api_key
25
+
26
+ # Gets the WorldCat API key.
27
+ # @return [String, nil] the WorldCat API key, or `nil` if not set.
28
+ def api_key
29
+ @api_key ||= default_worldcat_api_key
30
+ end
31
+
32
+ def base_uri
33
+ @base_uri ||= default_worldcat_base_uri
34
+ end
35
+
36
+ def base_uri=(value)
37
+ @base_uri = uri_or_nil(value)
38
+ end
39
+
40
+ private
41
+
42
+ def reset!
43
+ %i[@api_key @base_uri].each { |v| remove_instance_variable(v) if instance_variable_defined?(v) }
44
+ end
45
+
46
+ def default_worldcat_api_key
47
+ ENV[ENV_WORLDCAT_API_KEY] || rails_worldcat_api_key
48
+ end
49
+
50
+ def default_worldcat_base_uri
51
+ uri_or_nil(default_worldcat_base_url)
52
+ end
53
+
54
+ def default_worldcat_base_url
55
+ ENV[ENV_WORLDCAT_BASE_URL] || rails_worldcat_base_url || DEFAULT_WORLDCAT_BASE_URL
56
+ end
57
+
58
+ def rails_worldcat_base_url
59
+ return unless (rails_config = self.rails_config)
60
+ return unless rails_config.respond_to?(:worldcat_base_url)
61
+
62
+ rails_config.worldcat_base_url
63
+ end
64
+
65
+ def rails_worldcat_api_key
66
+ return unless (rails_config = self.rails_config)
67
+ return unless rails_config.respond_to?(:worldcat_api_key)
68
+
69
+ rails_config.worldcat_api_key
70
+ end
71
+
72
+ def rails_config
73
+ return unless defined?(Rails)
74
+ return unless (app = Rails.application)
75
+
76
+ app.config
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+ require 'berkeley_library/util'
3
+ require 'berkeley_library/holdings/oclc_number'
4
+ require 'berkeley_library/holdings/world_cat/symbols'
5
+
6
+ module BerkeleyLibrary
7
+ module Holdings
8
+ module WorldCat
9
+ # @see https://developer.api.oclc.org/wcv1#/Holdings
10
+ class LibrariesRequest
11
+ include BerkeleyLibrary::Util
12
+
13
+ XPATH_INST_ID_VALS = '/holdings/holding/institutionIdentifier/value'.freeze
14
+
15
+ attr_reader :oclc_number, :symbols
16
+
17
+ def initialize(oclc_number, symbols: Symbols::ALL)
18
+ @oclc_number = OCLCNumber.ensure_oclc_number!(oclc_number)
19
+ @symbols = Symbols.ensure_valid!(symbols)
20
+ end
21
+
22
+ def uri
23
+ @uri ||= URIs.append(holdings_base_uri, oclc_number)
24
+ end
25
+
26
+ # TODO: Check that this works w/more than 10 results
27
+ # See https://developer.api.oclc.org/wcv1#/Holdings
28
+ def params
29
+ @params ||= {
30
+ 'oclcsymbol' => symbols.join(','),
31
+ 'servicelevel' => 'full',
32
+ 'frbrGrouping' => 'off',
33
+ 'wskey' => Config.api_key
34
+ }
35
+ end
36
+
37
+ def execute
38
+ response_body = URIs.get(uri, params:, log: false)
39
+ holdings_syms = holdings_from(response_body)
40
+ holdings_syms.select { |sym| symbols.include?(sym) } # just in case
41
+ end
42
+
43
+ private
44
+
45
+ def holdings_base_uri
46
+ URIs.append(Config.base_uri, 'catalog', 'content', 'libraries')
47
+ end
48
+
49
+ def holdings_from(xml)
50
+ xml_doc = Nokogiri::XML(xml)
51
+ id_vals = xml_doc.xpath(XPATH_INST_ID_VALS)
52
+ id_vals.filter_map { |value| value.text.strip }
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,37 @@
1
+ module BerkeleyLibrary
2
+ module Holdings
3
+ module WorldCat
4
+ module Symbols
5
+ NRLF = %w[ZAP ZAPSP].freeze
6
+ SRLF = %w[HH0 ZAS ZASSP].freeze
7
+ RLF = (NRLF + SRLF).freeze
8
+
9
+ UC = %w[CLU CRU CUI CUN CUS CUT CUV CUX CUY CUZ MERUC].freeze
10
+ ALL = (RLF + UC).freeze
11
+
12
+ class << self
13
+ include Symbols
14
+ end
15
+
16
+ def valid?(sym)
17
+ ALL.include?(sym)
18
+ end
19
+
20
+ def ensure_valid!(symbols)
21
+ raise ArgumentError, "Not a list of institution symbols: #{symbols.inspect}" unless array_like?(symbols)
22
+ raise ArgumentError, 'No institution symbols provided' if symbols.empty?
23
+
24
+ return symbols unless (invalid = symbols.reject { |s| Symbols.valid?(s) }).any?
25
+
26
+ raise ArgumentError, "Invalid institution symbol(s): #{invalid.map(&:inspect).join(', ')}"
27
+ end
28
+
29
+ private
30
+
31
+ def array_like?(a)
32
+ %i[reject empty?].all? { |m| a.respond_to?(m) }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('world_cat/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'marcel'
2
+ require 'rubyXL'
3
+ require 'berkeley_library/holdings/constants'
4
+ require 'berkeley_library/util/xlsx/spreadsheet'
5
+
6
+ module BerkeleyLibrary
7
+ module Holdings
8
+ class XLSXReader
9
+ include Constants
10
+
11
+ attr_reader :ss, :oclc_col_index
12
+
13
+ def initialize(xlsx_path)
14
+ @ss = Util::XLSX::Spreadsheet.new(xlsx_path)
15
+ @oclc_col_index = ss.find_column_index_by_header!(OCLC_COL_HEADER)
16
+ end
17
+
18
+ def each_oclc_number
19
+ return to_enum(:each_oclc_number) unless block_given?
20
+
21
+ ss.each_value(oclc_col_index, include_header: false) do |v|
22
+ next if (v_str = v.to_s).strip == ''
23
+
24
+ yield v_str
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,140 @@
1
+ require 'berkeley_library/logging'
2
+ require 'berkeley_library/holdings/constants'
3
+
4
+ module BerkeleyLibrary
5
+ module Holdings
6
+ class XLSXWriter
7
+ include Constants
8
+ include BerkeleyLibrary::Logging
9
+
10
+ COL_NRLF = 'NRLF'.freeze
11
+ COL_SRLF = 'SRLF'.freeze
12
+ COL_OTHER_UC = 'Other UC'.freeze
13
+ COL_WC_ERROR = 'WorldCat Error'.freeze
14
+
15
+ COL_HATHI_TRUST = 'Hathi Trust'.freeze
16
+ COL_HATHI_TRUST_ERROR = "#{COL_HATHI_TRUST} Error".freeze
17
+
18
+ V_NRLF = 'nrlf'.freeze
19
+ V_SRLF = 'srlf'.freeze
20
+
21
+ attr_reader :ss, :rlf, :uc, :hathi_trust
22
+
23
+ def initialize(ss, rlf: true, uc: true, hathi_trust: true)
24
+ @ss = ss
25
+ @rlf = rlf
26
+ @uc = uc
27
+ @hathi_trust = hathi_trust
28
+
29
+ ensure_columns!
30
+ end
31
+
32
+ def <<(result)
33
+ r_index = row_index_for(result.oclc_number)
34
+ write_wc_cols(r_index, result) if rlf || uc
35
+ write_ht_cols(r_index, result) if hathi_trust
36
+ end
37
+
38
+ private
39
+
40
+ def write_wc_cols(r_index, result)
41
+ write_wc_error(r_index, result)
42
+ write_rlf(r_index, result) if rlf
43
+ write_uc(r_index, result) if uc
44
+ end
45
+
46
+ def write_ht_cols(r_index, result)
47
+ write_ht_error(r_index, result)
48
+ write_hathi(r_index, result)
49
+ end
50
+
51
+ def ensure_columns!
52
+ if rlf
53
+ nrlf_col_index
54
+ srlf_col_index
55
+ end
56
+ uc_col_index if uc
57
+ ht_col_index if hathi_trust
58
+ end
59
+
60
+ def row_index_for(oclc_number)
61
+ row_index = row_index_by_oclc_number[oclc_number]
62
+ return row_index if row_index
63
+
64
+ raise ArgumentError, "Unknown OCLC number: #{oclc_number}"
65
+ end
66
+
67
+ def write_rlf(r_index, result)
68
+ ss.set_value_at(r_index, nrlf_col_index, V_NRLF) if result.nrlf?
69
+ ss.set_value_at(r_index, srlf_col_index, V_SRLF) if result.srlf?
70
+ end
71
+
72
+ def write_uc(r_index, result)
73
+ return if (uc_symbols = result.uc_symbols).empty?
74
+
75
+ ss.set_value_at(r_index, uc_col_index, uc_symbols.join(','))
76
+ end
77
+
78
+ def write_hathi(r_index, result)
79
+ return unless (ht_record_url = result.ht_record_url)
80
+
81
+ ss.set_value_at(r_index, ht_col_index, ht_record_url)
82
+ end
83
+
84
+ def write_wc_error(r_index, result)
85
+ return unless (wc_error = result.wc_error)
86
+
87
+ ss.set_value_at(r_index, wc_err_col_index, wc_error)
88
+ end
89
+
90
+ def write_ht_error(r_index, result)
91
+ return unless (ht_error = result.ht_error)
92
+
93
+ ss.set_value_at(r_index, ht_err_col_index, ht_error)
94
+ end
95
+
96
+ def oclc_col_index
97
+ @oclc_col_index ||= ss.find_column_index_by_header!(OCLC_COL_HEADER)
98
+ end
99
+
100
+ def nrlf_col_index
101
+ @nrlf_col_index ||= ss.ensure_column!(COL_NRLF)
102
+ end
103
+
104
+ def srlf_col_index
105
+ @srlf_col_index ||= ss.ensure_column!(COL_SRLF)
106
+ end
107
+
108
+ def uc_col_index
109
+ @uc_col_index ||= ss.ensure_column!(COL_OTHER_UC)
110
+ end
111
+
112
+ def wc_err_col_index
113
+ @wc_err_col_index ||= ss.ensure_column!(COL_WC_ERROR)
114
+ end
115
+
116
+ def ht_col_index
117
+ @ht_col_index ||= ss.ensure_column!(COL_HATHI_TRUST)
118
+ end
119
+
120
+ def ht_err_col_index
121
+ @ht_err_col_index ||= ss.ensure_column!(COL_HATHI_TRUST_ERROR)
122
+ end
123
+
124
+ def row_index_by_oclc_number
125
+ # Start at 1 to skip header row
126
+ @row_index_by_oclc_number ||= (1...ss.row_count).each_with_object({}) do |r_index, r_indices|
127
+ oclc_number_raw = ss.value_at(r_index, oclc_col_index)
128
+ next unless oclc_number_raw
129
+
130
+ oclc_number = oclc_number_raw.to_s
131
+ if r_indices.key?(oclc_number)
132
+ logger.warn("Skipping duplicate OCLC number #{oclc_number} in row #{r_index}")
133
+ else
134
+ r_indices[oclc_number] = r_index
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('holdings/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,156 @@
1
+ require 'marcel'
2
+ require 'rubyXL'
3
+ require 'rubyXL/convenience_methods/cell'
4
+ require 'rubyXL/convenience_methods/worksheet'
5
+ require 'zip'
6
+
7
+ module BerkeleyLibrary
8
+ module Util
9
+ module XLSX
10
+ # Convenience wrapper RubyXL::Workbook
11
+ class Spreadsheet
12
+
13
+ # .xlsx format, a.k.a. "Office Open XML Workbook" spreadsheet
14
+ MIME_TYPE_OOXML_WB = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'.freeze
15
+
16
+ # path to Excel worksheet file in zipped OOXML archive
17
+ RE_EXCEL_WORKSHEET_ZIP_ENTRY = %r{^xl/worksheets/[^/.]+\.xml$}
18
+
19
+ DEFAULT_WORKSHEET_NAME = 'Sheet1'.freeze
20
+
21
+ attr_reader :workbook, :xlsx_path
22
+
23
+ delegate :stream, to: :workbook
24
+
25
+ def initialize(xlsx_path = nil)
26
+ @workbook = xlsx_path ? ensure_xlsx_workbook!(xlsx_path) : RubyXL::Workbook.new
27
+ @xlsx_path = xlsx_path
28
+ end
29
+
30
+ def save_as(new_xlsx_path)
31
+ workbook.write(new_xlsx_path)
32
+ @xlsx_path = new_xlsx_path
33
+ end
34
+
35
+ def worksheet
36
+ @worksheet ||= workbook.worksheets[0]
37
+ end
38
+
39
+ def header_row
40
+ @header_row ||= (hr = worksheet[0]) ? hr : worksheet.add_row
41
+ end
42
+
43
+ def find_column_index_by_header(header)
44
+ find_column_index(header_row, header)
45
+ end
46
+
47
+ def find_column_index_by_header!(header)
48
+ c_index = find_column_index_by_header(header)
49
+ return c_index if c_index
50
+
51
+ raise ArgumentError, "#{header.inspect} column not found"
52
+ end
53
+
54
+ def find_column_index(row, *args)
55
+ case args.size
56
+ when 0
57
+ (0...row.size).find { |c_index| yield row[c_index] }
58
+ when 1
59
+ find_column_index(row) { |cell| cell&.value == args[0] }
60
+ else
61
+ raise ArgumentError, "Wrong number of arguments (given #{args.size}, expected 0..1"
62
+ end
63
+ end
64
+
65
+ def each_value(c_index, include_header: true)
66
+ return to_enum(:each_value, c_index, include_header:) unless block_given?
67
+
68
+ start_index = include_header ? 0 : 1
69
+ (start_index...row_count).each do |r_index|
70
+ yield value_at(r_index, c_index)
71
+ end
72
+ end
73
+
74
+ def cell_at(r_index, c_index)
75
+ return unless (row = worksheet[r_index])
76
+
77
+ row[c_index]
78
+ end
79
+
80
+ def value_at(r_index, c_index)
81
+ return unless (cell = cell_at(r_index, c_index))
82
+
83
+ cell.value
84
+ end
85
+
86
+ def set_value_at(r_index, c_index, value)
87
+ if (cell = cell_at(r_index, c_index))
88
+ cell.change_contents(value)
89
+ else
90
+ worksheet.add_cell(r_index, c_index, value)
91
+ end
92
+ end
93
+
94
+ def rows
95
+ sheet_data.rows
96
+ end
97
+
98
+ def row_count
99
+ sheet_data.size
100
+ end
101
+
102
+ def column_count(r_index = nil)
103
+ if r_index
104
+ return (row = worksheet[r_index]) ? row.size : 0
105
+ end
106
+
107
+ rows.inject(0) do |cc_max, r|
108
+ r ? [r.size, cc_max].max : cc_max
109
+ end
110
+ end
111
+
112
+ def ensure_column!(header)
113
+ c_index_existing = find_column_index_by_header(header)
114
+ return c_index_existing if c_index_existing
115
+
116
+ column_count.tap { |cc| worksheet.insert_cell(0, cc, header) }
117
+ end
118
+
119
+ private
120
+
121
+ def sheet_data
122
+ worksheet.sheet_data
123
+ end
124
+
125
+ def ensure_xlsx_workbook!(xlsx_path)
126
+ # RubyXL will try to parse an Excel 95 or 97 file (which are still)
127
+ # zip-based) but then choke when it tries to read the worksheet, so
128
+ # we explicitly check the MIME type here
129
+ check_mime_type!(xlsx_path)
130
+
131
+ RubyXL::Parser.parse(xlsx_path)
132
+ end
133
+
134
+ def check_mime_type!(xlsx_path)
135
+ xlsx_pathname = Pathname.new(xlsx_path)
136
+ mime_type = Marcel::MimeType.for(xlsx_pathname)
137
+
138
+ # TODO: test w/application/vnd.ms-excel.sheet.macroenabled.12
139
+ return if Marcel::Magic.child?(mime_type, MIME_TYPE_OOXML_WB)
140
+
141
+ # Marcel fails to recognize some OOXML files, probably due to unexpected entry order
142
+ # and/or large entries pushing the signature it's looking for too deep into the file
143
+ return ensure_xlsx!(xlsx_path) if Marcel::Magic.child?(mime_type, 'application/zip')
144
+
145
+ raise ArgumentError, "Expected Excel Workbook (.xlsx), got #{mime_type}: #{xlsx_path}"
146
+ end
147
+
148
+ def ensure_xlsx!(zipfile_path)
149
+ return if Zip::File.open(zipfile_path) { |zf| zf.any? { |e| e.name =~ RE_EXCEL_WORKSHEET_ZIP_ENTRY } }
150
+
151
+ raise ArgumentError, "No Excel worksheets found in ZIP archive #{zipfile_path}"
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('xlsx/*.rb', __dir__)).each(&method(:require))
metadata ADDED
@@ -0,0 +1,311 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: berkeley_library-holdings
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.1
5
+ platform: ruby
6
+ authors:
7
+ - David Moles
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-04-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: berkeley_library-logging
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: berkeley_library-util
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 0.1.8
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '0.1'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.1.8
47
+ - !ruby/object:Gem::Dependency
48
+ name: marcel
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 1.0.2
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: 1.0.2
61
+ - !ruby/object:Gem::Dependency
62
+ name: rest-client
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '2.1'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.1'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rubyXL
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '3.4'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.4'
89
+ - !ruby/object:Gem::Dependency
90
+ name: bundle-audit
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '0.1'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '0.1'
103
+ - !ruby/object:Gem::Dependency
104
+ name: ci_reporter_rspec
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.0'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: colorize
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '0.8'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '0.8'
131
+ - !ruby/object:Gem::Dependency
132
+ name: dotenv
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '2.7'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '2.7'
145
+ - !ruby/object:Gem::Dependency
146
+ name: rake
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '13.0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '13.0'
159
+ - !ruby/object:Gem::Dependency
160
+ name: rspec
161
+ requirement: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '3.10'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '3.10'
173
+ - !ruby/object:Gem::Dependency
174
+ name: rubocop
175
+ requirement: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - '='
178
+ - !ruby/object:Gem::Version
179
+ version: '1.39'
180
+ type: :development
181
+ prerelease: false
182
+ version_requirements: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - '='
185
+ - !ruby/object:Gem::Version
186
+ version: '1.39'
187
+ - !ruby/object:Gem::Dependency
188
+ name: rubocop-rake
189
+ requirement: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - '='
192
+ - !ruby/object:Gem::Version
193
+ version: 0.6.0
194
+ type: :development
195
+ prerelease: false
196
+ version_requirements: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - '='
199
+ - !ruby/object:Gem::Version
200
+ version: 0.6.0
201
+ - !ruby/object:Gem::Dependency
202
+ name: rubocop-rspec
203
+ requirement: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - '='
206
+ - !ruby/object:Gem::Version
207
+ version: 2.4.0
208
+ type: :development
209
+ prerelease: false
210
+ version_requirements: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - '='
213
+ - !ruby/object:Gem::Version
214
+ version: 2.4.0
215
+ - !ruby/object:Gem::Dependency
216
+ name: ruby-prof
217
+ requirement: !ruby/object:Gem::Requirement
218
+ requirements:
219
+ - - "~>"
220
+ - !ruby/object:Gem::Version
221
+ version: 0.17.0
222
+ type: :development
223
+ prerelease: false
224
+ version_requirements: !ruby/object:Gem::Requirement
225
+ requirements:
226
+ - - "~>"
227
+ - !ruby/object:Gem::Version
228
+ version: 0.17.0
229
+ - !ruby/object:Gem::Dependency
230
+ name: simplecov
231
+ requirement: !ruby/object:Gem::Requirement
232
+ requirements:
233
+ - - "~>"
234
+ - !ruby/object:Gem::Version
235
+ version: '0.21'
236
+ type: :development
237
+ prerelease: false
238
+ version_requirements: !ruby/object:Gem::Requirement
239
+ requirements:
240
+ - - "~>"
241
+ - !ruby/object:Gem::Version
242
+ version: '0.21'
243
+ - !ruby/object:Gem::Dependency
244
+ name: webmock
245
+ requirement: !ruby/object:Gem::Requirement
246
+ requirements:
247
+ - - "~>"
248
+ - !ruby/object:Gem::Version
249
+ version: '3.12'
250
+ type: :development
251
+ prerelease: false
252
+ version_requirements: !ruby/object:Gem::Requirement
253
+ requirements:
254
+ - - "~>"
255
+ - !ruby/object:Gem::Version
256
+ version: '3.12'
257
+ description: A collection of holdings-related utilities for the UC Berkeley Library
258
+ email: dmoles@berkeley.edu
259
+ executables: []
260
+ extensions: []
261
+ extra_rdoc_files: []
262
+ files:
263
+ - CHANGELOG.md
264
+ - LICENSE.md
265
+ - README.md
266
+ - lib/berkeley_library/holdings.rb
267
+ - lib/berkeley_library/holdings/constants.rb
268
+ - lib/berkeley_library/holdings/hathi_trust.rb
269
+ - lib/berkeley_library/holdings/hathi_trust/config.rb
270
+ - lib/berkeley_library/holdings/hathi_trust/record_url_batch_request.rb
271
+ - lib/berkeley_library/holdings/hathi_trust/record_url_request.rb
272
+ - lib/berkeley_library/holdings/hathi_trust/record_url_request_base.rb
273
+ - lib/berkeley_library/holdings/holdings_result.rb
274
+ - lib/berkeley_library/holdings/module_info.rb
275
+ - lib/berkeley_library/holdings/oclc_number.rb
276
+ - lib/berkeley_library/holdings/world_cat.rb
277
+ - lib/berkeley_library/holdings/world_cat/config.rb
278
+ - lib/berkeley_library/holdings/world_cat/libraries_request.rb
279
+ - lib/berkeley_library/holdings/world_cat/symbols.rb
280
+ - lib/berkeley_library/holdings/xlsx_reader.rb
281
+ - lib/berkeley_library/holdings/xlsx_writer.rb
282
+ - lib/berkeley_library/util/xlsx.rb
283
+ - lib/berkeley_library/util/xlsx/spreadsheet.rb
284
+ homepage: https://github.com/BerkeleyLibrary/holdings
285
+ licenses:
286
+ - MIT
287
+ metadata:
288
+ homepage_uri: https://github.com/BerkeleyLibrary/holdings
289
+ source_code_uri: https://github.com/BerkeleyLibrary/holdings
290
+ changelog_uri: https://github.com/BerkeleyLibrary/holdings/CHANGELOG.md
291
+ rubygems_mfa_required: 'true'
292
+ post_install_message:
293
+ rdoc_options: []
294
+ require_paths:
295
+ - lib
296
+ required_ruby_version: !ruby/object:Gem::Requirement
297
+ requirements:
298
+ - - ">="
299
+ - !ruby/object:Gem::Version
300
+ version: 3.1.0
301
+ required_rubygems_version: !ruby/object:Gem::Requirement
302
+ requirements:
303
+ - - ">="
304
+ - !ruby/object:Gem::Version
305
+ version: '0'
306
+ requirements: []
307
+ rubygems_version: 3.3.25
308
+ signing_key:
309
+ specification_version: 4
310
+ summary: Holdings-related utilities for the UC Berkeley Library
311
+ test_files: []