berkeley_library-location 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: af0453d4f60ad7276b64c48937b36cc76d250eb2a41ff23914f30d7c35edbf85
4
+ data.tar.gz: 2cb9fb72635d239f6b6726f4278b555913b9f209dddb43dbeb03072b495de3d1
5
+ SHA512:
6
+ metadata.gz: 5a9dc82778e93207bc4f23e328b1699f4cb58abed4b7deda70cb8ec713b4c47a95d36be19fdab962b591df78c15182af841d111ef941a6d8ab2436b6587109fe
7
+ data.tar.gz: f6f78720f85d04530e656940d5dcaa1056cfd086de7ca280253d4a3d6aeca22a98236ad6ba2dc6666c4b3b6e765bd1e0cf3842dc69104e7b30eaa50685a69e9e
data/CHANGES.md ADDED
@@ -0,0 +1,33 @@
1
+ # 2.0.0 (2023-06-06)
2
+
3
+ - Rename from "holdings" to "location"
4
+
5
+ # 1.0.5 (2023-06-01)
6
+
7
+ - Update to `berkeley_library-util` 0.1.9 to handle non-ASCII OCLC numbers
8
+ - Fix issue where locating blank columns could fail on spreadsheets with nil rows
9
+
10
+ # 1.0.4 (2023-04-28)
11
+
12
+ - Escape OCLC numbers before constructing query URIs
13
+ (not an issue for correct OCLC numbers, but can be an issue in the event of bad data)
14
+
15
+ # 1.0.3 (2023-04-27)
16
+
17
+ - Fix issue requiring RubyXL extensions to be explicitly required
18
+
19
+ # 1.0.2 (2023-04-27)
20
+
21
+ - Overwrite existing blank columns when writing results to spreadsheet
22
+
23
+ # 1.0.1 (2023-04-26)
24
+
25
+ - First working RubyGems release
26
+
27
+ # 1.0.0 (2023-04-25)
28
+
29
+ - Initial (broken) RubyGems release
30
+
31
+ # 0.1.0 (2023-02-24)
32
+
33
+ - Initial release
data/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ # The MIT License (MIT)
2
+
3
+ Copyright © 2023 The Regents of the University of California
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a
6
+ copy of this software and associated documentation files (the “Software”),
7
+ to deal in the Software without restriction, including without limitation
8
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ and/or sell copies of the Software, and to permit persons to whom the
10
+ Software is furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,6 @@
1
+ # BerkeleyLibrary::Location
2
+
3
+ [![Build Status](https://github.com/BerkeleyLibrary/location/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/BerkeleyLibrary/location/actions/workflows/build.yml)
4
+ [![Gem Version](https://img.shields.io/gem/v/berkeley_library-location.svg)](https://github.com/BerkeleyLibrary/location/releases)
5
+
6
+ Miscellaneous location-related utilities for the UC Berkeley Library.
@@ -0,0 +1,8 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module Constants
4
+ OCLC_COL_HEADER = 'OCLC Number'.freeze
5
+
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,56 @@
1
+ require 'berkeley_library/util'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module HathiTrust
6
+ module Config
7
+ include BerkeleyLibrary::Util::URIs
8
+
9
+ ENV_HATHITRUST_BASE_URL = 'LIT_HATHITRUST_BASE_URL'.freeze
10
+
11
+ # The default HathiTrust base URL, if ENV_HATHITRUST_BASE_URL is not set.
12
+ DEFAULT_HATHITRUST_BASE_URL = 'https://catalog.hathitrust.org/api/'.freeze
13
+
14
+ class << self
15
+ include Config
16
+ end
17
+
18
+ def base_uri
19
+ @base_uri ||= default_hathitrust_base_uri
20
+ end
21
+
22
+ def base_uri=(value)
23
+ @base_uri = uri_or_nil(value)
24
+ end
25
+
26
+ private
27
+
28
+ def reset!
29
+ remove_instance_variable(:@base_uri) if instance_variable_defined?(:@base_uri)
30
+ end
31
+
32
+ def default_hathitrust_base_uri
33
+ uri_or_nil(default_hathitrust_base_url)
34
+ end
35
+
36
+ def default_hathitrust_base_url
37
+ ENV[ENV_HATHITRUST_BASE_URL] || rails_hathitrust_base_url || DEFAULT_HATHITRUST_BASE_URL
38
+ end
39
+
40
+ def rails_hathitrust_base_url
41
+ return unless (rails_config = self.rails_config)
42
+ return unless rails_config.respond_to?(:hathitrust_base_url)
43
+
44
+ rails_config.hathitrust_base_url
45
+ end
46
+
47
+ def rails_config
48
+ return unless defined?(Rails)
49
+ return unless (app = Rails.application)
50
+
51
+ app.config
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,56 @@
1
+ require 'berkeley_library/location/hathi_trust/record_url_request_base'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module HathiTrust
6
+ class RecordUrlBatchRequest
7
+ include RecordUrlRequestBase
8
+
9
+ # Per HathiTrust API docs: https://www.hathitrust.org/bib_api
10
+ MAX_BATCH_SIZE = 20
11
+
12
+ attr_reader :oclc_numbers
13
+
14
+ def initialize(oclc_numbers)
15
+ @oclc_numbers = ensure_valid_oclc_numbers!(oclc_numbers)
16
+ end
17
+
18
+ def execute
19
+ response_body = URIs.get(uri, log: false)
20
+ record_urls_from(response_body)
21
+ end
22
+
23
+ def uri
24
+ @uri ||= URIs.append(volumes_base_uri, 'json', URIs.path_escape(oclc_list))
25
+ end
26
+
27
+ private
28
+
29
+ def ensure_valid_oclc_numbers!(oclc_numbers)
30
+ raise ArgumentError, 'No OCLC numbers provided' if oclc_numbers.empty?
31
+ raise ArgumentError, "Too many OCLC numbers; expected <= #{MAX_BATCH_SIZE}, was #{oclc_numbers.size}" if oclc_numbers.size > MAX_BATCH_SIZE
32
+
33
+ OCLCNumber.ensure_oclc_numbers!(oclc_numbers)
34
+ end
35
+
36
+ def oclc_list
37
+ @oclc_list = oclc_numbers.map(&method(:key_for)).join('|')
38
+ end
39
+
40
+ def key_for(oclc_number)
41
+ "oclc:#{oclc_number}"
42
+ end
43
+
44
+ def record_urls_from(json_str)
45
+ json = JSON.parse(json_str)
46
+ oclc_numbers.filter_map do |oclc_num|
47
+ next unless (entry = json[key_for(oclc_num)])
48
+
49
+ record_url = find_record_url(entry, oclc_num)
50
+ [oclc_num, record_url] if record_url
51
+ end.to_h
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,33 @@
1
+ require 'berkeley_library/location/hathi_trust/record_url_request_base'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module HathiTrust
6
+ class RecordUrlRequest
7
+ include RecordUrlRequestBase
8
+
9
+ attr_reader :oclc_number
10
+
11
+ def initialize(oclc_number)
12
+ @oclc_number = OCLCNumber.ensure_oclc_number!(oclc_number)
13
+ end
14
+
15
+ def execute
16
+ response_body = URIs.get(uri, log: false)
17
+ record_url_from(response_body, oclc_number)
18
+ end
19
+
20
+ def uri
21
+ @uri ||= URIs.append(volumes_base_uri, 'oclc', URIs.path_escape("#{oclc_number}.json"))
22
+ end
23
+
24
+ private
25
+
26
+ def record_url_from(json_str, oclc_number)
27
+ json_obj = JSON.parse(json_str)
28
+ find_record_url(json_obj, oclc_number)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,37 @@
1
+ require 'json'
2
+ require 'berkeley_library/util'
3
+ require 'berkeley_library/location/oclc_number'
4
+ require 'berkeley_library/location/hathi_trust/config'
5
+
6
+ module BerkeleyLibrary
7
+ module Location
8
+ module HathiTrust
9
+ module RecordUrlRequestBase
10
+ include BerkeleyLibrary::Util
11
+
12
+ protected
13
+
14
+ def volumes_base_uri
15
+ URIs.append(Config.base_uri, 'volumes', 'brief')
16
+ end
17
+
18
+ def find_record_url(json_obj, oclc_number)
19
+ return unless (records = json_obj['records'])
20
+ return unless (record = find_record(records, oclc_number))
21
+
22
+ record['recordURL']
23
+ end
24
+
25
+ def find_record(records, oclc_number)
26
+ return if records.empty?
27
+
28
+ records.values.find do |rec|
29
+ (oclc_nums = rec['oclcs']) &&
30
+ oclc_nums.include?(oclc_number) &&
31
+ rec.key?('recordURL')
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('hathi_trust/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'berkeley_library/location/world_cat/symbols'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ class LocationResult
6
+ attr_reader :oclc_number, :wc_symbols, :ht_record_url, :wc_error, :ht_error
7
+
8
+ def initialize(oclc_number, wc_symbols: [], wc_error: nil, ht_record_url: nil, ht_error: nil)
9
+ @oclc_number = oclc_number
10
+ @wc_symbols = wc_symbols
11
+ @wc_error = wc_error
12
+ @ht_record_url = ht_record_url
13
+ @ht_error = ht_error
14
+ end
15
+
16
+ def nrlf?
17
+ @has_nrlf ||= wc_symbols.intersection(WorldCat::Symbols::NRLF).any?
18
+ end
19
+
20
+ def srlf?
21
+ @has_srlf ||= wc_symbols.intersection(WorldCat::Symbols::SRLF).any?
22
+ end
23
+
24
+ def uc_symbols
25
+ @uc_symbols ||= wc_symbols.intersection(WorldCat::Symbols::UC)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module ModuleInfo
4
+ NAME = 'berkeley_library-location'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'Locaton-related utilities for the UC Berkeley Library'.freeze
8
+ DESCRIPTION = 'A collection of location-related utilities for the UC Berkeley Library'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '2.0.0'.freeze
11
+ HOMEPAGE = 'https://github.com/BerkeleyLibrary/location'.freeze
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,22 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module OCLCNumber
4
+ class << self
5
+ def ensure_oclc_number!(oclc_number)
6
+ raise ArgumentError, 'OCLC number cannot be nil' if oclc_number.nil?
7
+ raise ArgumentError, "OCLC number #{oclc_number.inspect} is not a string" unless oclc_number.is_a?(String)
8
+ raise ArgumentError, 'OCLC number cannot be empty' if oclc_number == ''
9
+ raise ArgumentError, "OCLC number #{oclc_number.inspect} must not be blank" if oclc_number.strip == ''
10
+
11
+ oclc_number
12
+ end
13
+
14
+ def ensure_oclc_numbers!(oclc_numbers)
15
+ oclc_numbers.tap do |numbers|
16
+ numbers.each { |num| ensure_oclc_number!(num) }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,81 @@
1
+ require 'berkeley_library/util/uris'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module WorldCat
6
+ module Config
7
+ include BerkeleyLibrary::Util::URIs
8
+
9
+ # The environment variable from which to read the WorldCat API key.
10
+ ENV_WORLDCAT_API_KEY = 'LIT_WORLDCAT_API_KEY'.freeze
11
+
12
+ # The environment variable from which to read the WorldCat base URL.
13
+ ENV_WORLDCAT_BASE_URL = 'LIT_WORLDCAT_BASE_URL'.freeze
14
+
15
+ # The default WorldCat base URL, if ENV_WORLDCAT_BASE_URL is not set.
16
+ DEFAULT_WORLDCAT_BASE_URL = 'https://www.worldcat.org/webservices/'.freeze
17
+
18
+ class << self
19
+ include Config
20
+ end
21
+
22
+ # Sets the WorldCat API key.
23
+ # @param value [String] the API key.
24
+ attr_writer :api_key
25
+
26
+ # Gets the WorldCat API key.
27
+ # @return [String, nil] the WorldCat API key, or `nil` if not set.
28
+ def api_key
29
+ @api_key ||= default_worldcat_api_key
30
+ end
31
+
32
+ def base_uri
33
+ @base_uri ||= default_worldcat_base_uri
34
+ end
35
+
36
+ def base_uri=(value)
37
+ @base_uri = uri_or_nil(value)
38
+ end
39
+
40
+ private
41
+
42
+ def reset!
43
+ %i[@api_key @base_uri].each { |v| remove_instance_variable(v) if instance_variable_defined?(v) }
44
+ end
45
+
46
+ def default_worldcat_api_key
47
+ ENV[ENV_WORLDCAT_API_KEY] || rails_worldcat_api_key
48
+ end
49
+
50
+ def default_worldcat_base_uri
51
+ uri_or_nil(default_worldcat_base_url)
52
+ end
53
+
54
+ def default_worldcat_base_url
55
+ ENV[ENV_WORLDCAT_BASE_URL] || rails_worldcat_base_url || DEFAULT_WORLDCAT_BASE_URL
56
+ end
57
+
58
+ def rails_worldcat_base_url
59
+ return unless (rails_config = self.rails_config)
60
+ return unless rails_config.respond_to?(:worldcat_base_url)
61
+
62
+ rails_config.worldcat_base_url
63
+ end
64
+
65
+ def rails_worldcat_api_key
66
+ return unless (rails_config = self.rails_config)
67
+ return unless rails_config.respond_to?(:worldcat_api_key)
68
+
69
+ rails_config.worldcat_api_key
70
+ end
71
+
72
+ def rails_config
73
+ return unless defined?(Rails)
74
+ return unless (app = Rails.application)
75
+
76
+ app.config
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+ require 'berkeley_library/util'
3
+ require 'berkeley_library/location/oclc_number'
4
+ require 'berkeley_library/location/world_cat/symbols'
5
+
6
+ module BerkeleyLibrary
7
+ module Location
8
+ module WorldCat
9
+ # @see https://developer.api.oclc.org/wcv1#/Holdings
10
+ class LibrariesRequest
11
+ include BerkeleyLibrary::Util
12
+
13
+ XPATH_INST_ID_VALS = '/holdings/holding/institutionIdentifier/value'.freeze
14
+
15
+ attr_reader :oclc_number, :symbols
16
+
17
+ def initialize(oclc_number, symbols: Symbols::ALL)
18
+ @oclc_number = OCLCNumber.ensure_oclc_number!(oclc_number)
19
+ @symbols = Symbols.ensure_valid!(symbols)
20
+ end
21
+
22
+ def uri
23
+ @uri ||= URIs.append(libraries_base_uri, URIs.path_escape(oclc_number))
24
+ end
25
+
26
+ # TODO: Check that this works w/more than 10 results
27
+ # See https://developer.api.oclc.org/wcv1#/Holdings
28
+ def params
29
+ @params ||= {
30
+ 'oclcsymbol' => symbols.join(','),
31
+ 'servicelevel' => 'full',
32
+ 'frbrGrouping' => 'off',
33
+ 'wskey' => Config.api_key
34
+ }
35
+ end
36
+
37
+ def execute
38
+ response_body = URIs.get(uri, params:, log: false)
39
+ inst_symbols = inst_symbols_from(response_body)
40
+ inst_symbols.select { |sym| symbols.include?(sym) } # just in case
41
+ end
42
+
43
+ private
44
+
45
+ def libraries_base_uri
46
+ URIs.append(Config.base_uri, 'catalog', 'content', 'libraries')
47
+ end
48
+
49
+ def inst_symbols_from(xml)
50
+ xml_doc = Nokogiri::XML(xml)
51
+ id_vals = xml_doc.xpath(XPATH_INST_ID_VALS)
52
+ id_vals.filter_map { |value| value.text.strip }
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,37 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module WorldCat
4
+ module Symbols
5
+ NRLF = %w[ZAP ZAPSP].freeze
6
+ SRLF = %w[HH0 ZAS ZASSP].freeze
7
+ RLF = (NRLF + SRLF).freeze
8
+
9
+ UC = %w[CLU CRU CUI CUN CUS CUT CUV CUX CUY CUZ MERUC].freeze
10
+ ALL = (RLF + UC).freeze
11
+
12
+ class << self
13
+ include Symbols
14
+ end
15
+
16
+ def valid?(sym)
17
+ ALL.include?(sym)
18
+ end
19
+
20
+ def ensure_valid!(symbols)
21
+ raise ArgumentError, "Not a list of institution symbols: #{symbols.inspect}" unless array_like?(symbols)
22
+ raise ArgumentError, 'No institution symbols provided' if symbols.empty?
23
+
24
+ return symbols unless (invalid = symbols.reject { |s| Symbols.valid?(s) }).any?
25
+
26
+ raise ArgumentError, "Invalid institution symbol(s): #{invalid.map(&:inspect).join(', ')}"
27
+ end
28
+
29
+ private
30
+
31
+ def array_like?(a)
32
+ %i[reject empty?].all? { |m| a.respond_to?(m) }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('world_cat/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'marcel'
2
+ require 'rubyXL'
3
+ require 'berkeley_library/location/constants'
4
+ require 'berkeley_library/util/xlsx/spreadsheet'
5
+
6
+ module BerkeleyLibrary
7
+ module Location
8
+ class XLSXReader
9
+ include Constants
10
+
11
+ attr_reader :ss, :oclc_col_index
12
+
13
+ def initialize(xlsx_path)
14
+ @ss = Util::XLSX::Spreadsheet.new(xlsx_path)
15
+ @oclc_col_index = ss.find_column_index_by_header!(OCLC_COL_HEADER)
16
+ end
17
+
18
+ def each_oclc_number
19
+ return to_enum(:each_oclc_number) unless block_given?
20
+
21
+ ss.each_value(oclc_col_index, include_header: false) do |v|
22
+ next if (v_str = v.to_s).strip == ''
23
+
24
+ yield v_str
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,140 @@
1
+ require 'berkeley_library/logging'
2
+ require 'berkeley_library/location/constants'
3
+
4
+ module BerkeleyLibrary
5
+ module Location
6
+ class XLSXWriter
7
+ include Constants
8
+ include BerkeleyLibrary::Logging
9
+
10
+ COL_NRLF = 'NRLF'.freeze
11
+ COL_SRLF = 'SRLF'.freeze
12
+ COL_OTHER_UC = 'Other UC'.freeze
13
+ COL_WC_ERROR = 'WorldCat Error'.freeze
14
+
15
+ COL_HATHI_TRUST = 'Hathi Trust'.freeze
16
+ COL_HATHI_TRUST_ERROR = "#{COL_HATHI_TRUST} Error".freeze
17
+
18
+ V_NRLF = 'nrlf'.freeze
19
+ V_SRLF = 'srlf'.freeze
20
+
21
+ attr_reader :ss, :rlf, :uc, :hathi_trust
22
+
23
+ def initialize(ss, rlf: true, uc: true, hathi_trust: true)
24
+ @ss = ss
25
+ @rlf = rlf
26
+ @uc = uc
27
+ @hathi_trust = hathi_trust
28
+
29
+ ensure_columns!
30
+ end
31
+
32
+ def <<(result)
33
+ r_index = row_index_for(result.oclc_number)
34
+ write_wc_cols(r_index, result) if rlf || uc
35
+ write_ht_cols(r_index, result) if hathi_trust
36
+ end
37
+
38
+ private
39
+
40
+ def write_wc_cols(r_index, result)
41
+ write_wc_error(r_index, result)
42
+ write_rlf(r_index, result) if rlf
43
+ write_uc(r_index, result) if uc
44
+ end
45
+
46
+ def write_ht_cols(r_index, result)
47
+ write_ht_error(r_index, result)
48
+ write_hathi(r_index, result)
49
+ end
50
+
51
+ def ensure_columns!
52
+ if rlf
53
+ nrlf_col_index
54
+ srlf_col_index
55
+ end
56
+ uc_col_index if uc
57
+ ht_col_index if hathi_trust
58
+ end
59
+
60
+ def row_index_for(oclc_number)
61
+ row_index = row_index_by_oclc_number[oclc_number]
62
+ return row_index if row_index
63
+
64
+ raise ArgumentError, "Unknown OCLC number: #{oclc_number}"
65
+ end
66
+
67
+ def write_rlf(r_index, result)
68
+ ss.set_value_at(r_index, nrlf_col_index, V_NRLF) if result.nrlf?
69
+ ss.set_value_at(r_index, srlf_col_index, V_SRLF) if result.srlf?
70
+ end
71
+
72
+ def write_uc(r_index, result)
73
+ return if (uc_symbols = result.uc_symbols).empty?
74
+
75
+ ss.set_value_at(r_index, uc_col_index, uc_symbols.join(','))
76
+ end
77
+
78
+ def write_hathi(r_index, result)
79
+ return unless (ht_record_url = result.ht_record_url)
80
+
81
+ ss.set_value_at(r_index, ht_col_index, ht_record_url)
82
+ end
83
+
84
+ def write_wc_error(r_index, result)
85
+ return unless (wc_error = result.wc_error)
86
+
87
+ ss.set_value_at(r_index, wc_err_col_index, wc_error)
88
+ end
89
+
90
+ def write_ht_error(r_index, result)
91
+ return unless (ht_error = result.ht_error)
92
+
93
+ ss.set_value_at(r_index, ht_err_col_index, ht_error)
94
+ end
95
+
96
+ def oclc_col_index
97
+ @oclc_col_index ||= ss.find_column_index_by_header!(OCLC_COL_HEADER)
98
+ end
99
+
100
+ def nrlf_col_index
101
+ @nrlf_col_index ||= ss.ensure_column!(COL_NRLF)
102
+ end
103
+
104
+ def srlf_col_index
105
+ @srlf_col_index ||= ss.ensure_column!(COL_SRLF)
106
+ end
107
+
108
+ def uc_col_index
109
+ @uc_col_index ||= ss.ensure_column!(COL_OTHER_UC)
110
+ end
111
+
112
+ def wc_err_col_index
113
+ @wc_err_col_index ||= ss.ensure_column!(COL_WC_ERROR)
114
+ end
115
+
116
+ def ht_col_index
117
+ @ht_col_index ||= ss.ensure_column!(COL_HATHI_TRUST)
118
+ end
119
+
120
+ def ht_err_col_index
121
+ @ht_err_col_index ||= ss.ensure_column!(COL_HATHI_TRUST_ERROR)
122
+ end
123
+
124
+ def row_index_by_oclc_number
125
+ # Start at 1 to skip header row
126
+ @row_index_by_oclc_number ||= (1...ss.row_count).each_with_object({}) do |r_index, r_indices|
127
+ oclc_number_raw = ss.value_at(r_index, oclc_col_index)
128
+ next unless oclc_number_raw
129
+
130
+ oclc_number = oclc_number_raw.to_s
131
+ if r_indices.key?(oclc_number)
132
+ logger.warn("Skipping duplicate OCLC number #{oclc_number} in row #{r_index}")
133
+ else
134
+ r_indices[oclc_number] = r_index
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('location/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,31 @@
1
+ require 'rubyXL'
2
+
3
+ module BerkeleyLibrary
4
+ module Util
5
+ module XLSX
6
+ module RubyXLCellExtensions
7
+ # Workaround for https://github.com/weshatheleopard/rubyXL/issues/441
8
+ def initialize(params = nil)
9
+ super
10
+
11
+ return unless params.respond_to?(:[])
12
+
13
+ @worksheet ||= params[:worksheet]
14
+ self.row ||= params[:row] # NOTE: not an instance variable
15
+ end
16
+
17
+ def blank?
18
+ return true if value.nil?
19
+
20
+ value.respond_to?(:strip) && value.strip.empty?
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ module RubyXL
28
+ class Cell
29
+ prepend BerkeleyLibrary::Util::XLSX::RubyXLCellExtensions
30
+ end
31
+ end
@@ -0,0 +1,26 @@
1
+ require 'rubyXL'
2
+ require 'berkeley_library/util/xlsx/rubyxl_cell_extensions'
3
+
4
+ module BerkeleyLibrary
5
+ module Util
6
+ module XLSX
7
+ module RubyXLWorksheetExtensions
8
+ def first_blank_column_index
9
+ sheet_data.rows.inject(0) do |first_blank_c_index, row|
10
+ next first_blank_c_index unless row
11
+
12
+ trailing_blank_cells = row.cells.reverse.take_while(&:blank?)
13
+ row_first_blank_c_index = row.size - trailing_blank_cells.size
14
+ [first_blank_c_index, row_first_blank_c_index].max
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ module RubyXL
23
+ class Worksheet
24
+ prepend BerkeleyLibrary::Util::XLSX::RubyXLWorksheetExtensions
25
+ end
26
+ end
@@ -0,0 +1,158 @@
1
+ require 'marcel'
2
+ require 'rubyXL'
3
+ require 'rubyXL/convenience_methods/cell'
4
+ require 'rubyXL/convenience_methods/worksheet'
5
+ require 'zip'
6
+ require 'berkeley_library/util/xlsx'
7
+
8
+ module BerkeleyLibrary
9
+ module Util
10
+ module XLSX
11
+ # Convenience wrapper RubyXL::Workbook
12
+ class Spreadsheet
13
+
14
+ # .xlsx format, a.k.a. "Office Open XML Workbook" spreadsheet
15
+ MIME_TYPE_OOXML_WB = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'.freeze
16
+
17
+ # path to Excel worksheet file in zipped OOXML archive
18
+ RE_EXCEL_WORKSHEET_ZIP_ENTRY = %r{^xl/worksheets/[^/.]+\.xml$}
19
+
20
+ DEFAULT_WORKSHEET_NAME = 'Sheet1'.freeze
21
+
22
+ attr_reader :workbook, :xlsx_path
23
+
24
+ delegate :stream, to: :workbook
25
+
26
+ def initialize(xlsx_path = nil)
27
+ @workbook = xlsx_path ? ensure_xlsx_workbook!(xlsx_path) : RubyXL::Workbook.new
28
+ @xlsx_path = xlsx_path
29
+ end
30
+
31
+ def save_as(new_xlsx_path)
32
+ workbook.write(new_xlsx_path)
33
+ @xlsx_path = new_xlsx_path
34
+ end
35
+
36
+ def worksheet
37
+ @worksheet ||= workbook.worksheets[0]
38
+ end
39
+
40
+ def header_row
41
+ @header_row ||= (hr = worksheet[0]) ? hr : worksheet.add_row
42
+ end
43
+
44
+ def find_column_index_by_header(header)
45
+ find_column_index(header_row, header)
46
+ end
47
+
48
+ def find_column_index_by_header!(header)
49
+ c_index = find_column_index_by_header(header)
50
+ return c_index if c_index
51
+
52
+ raise ArgumentError, "#{header.inspect} column not found"
53
+ end
54
+
55
+ def find_column_index(row, *args)
56
+ case args.size
57
+ when 0
58
+ (0...row.size).find { |c_index| yield row[c_index] }
59
+ when 1
60
+ find_column_index(row) { |cell| cell&.value == args[0] }
61
+ else
62
+ raise ArgumentError, "Wrong number of arguments (given #{args.size}, expected 0..1"
63
+ end
64
+ end
65
+
66
+ def each_value(c_index, include_header: true)
67
+ return to_enum(:each_value, c_index, include_header:) unless block_given?
68
+
69
+ start_index = include_header ? 0 : 1
70
+ (start_index...row_count).each do |r_index|
71
+ yield value_at(r_index, c_index)
72
+ end
73
+ end
74
+
75
+ def cell_at(r_index, c_index)
76
+ return unless (row = worksheet[r_index])
77
+
78
+ row[c_index]
79
+ end
80
+
81
+ def value_at(r_index, c_index)
82
+ return unless (cell = cell_at(r_index, c_index))
83
+
84
+ cell.value
85
+ end
86
+
87
+ def set_value_at(r_index, c_index, value)
88
+ if (cell = cell_at(r_index, c_index))
89
+ cell.change_contents(value)
90
+ else
91
+ worksheet.add_cell(r_index, c_index, value)
92
+ end
93
+ end
94
+
95
+ def rows
96
+ sheet_data.rows
97
+ end
98
+
99
+ def row_count
100
+ sheet_data.size
101
+ end
102
+
103
+ def column_count(r_index = nil)
104
+ if r_index
105
+ return (row = worksheet[r_index]) ? row.size : 0
106
+ end
107
+
108
+ rows.inject(0) do |cc_max, r|
109
+ r ? [r.size, cc_max].max : cc_max
110
+ end
111
+ end
112
+
113
+ def ensure_column!(header)
114
+ c_index_existing = find_column_index_by_header(header)
115
+ return c_index_existing if c_index_existing
116
+
117
+ c_index_next = worksheet.first_blank_column_index
118
+ c_index_next.tap { |cc| worksheet.add_cell(0, cc, header) }
119
+ end
120
+
121
+ private
122
+
123
+ def sheet_data
124
+ worksheet.sheet_data
125
+ end
126
+
127
+ def ensure_xlsx_workbook!(xlsx_path)
128
+ # RubyXL will try to parse an Excel 95 or 97 file (which are still)
129
+ # zip-based) but then choke when it tries to read the worksheet, so
130
+ # we explicitly check the MIME type here
131
+ check_mime_type!(xlsx_path)
132
+
133
+ RubyXL::Parser.parse(xlsx_path)
134
+ end
135
+
136
+ def check_mime_type!(xlsx_path)
137
+ xlsx_pathname = Pathname.new(xlsx_path)
138
+ mime_type = Marcel::MimeType.for(xlsx_pathname)
139
+
140
+ # TODO: test w/application/vnd.ms-excel.sheet.macroenabled.12
141
+ return if Marcel::Magic.child?(mime_type, MIME_TYPE_OOXML_WB)
142
+
143
+ # Marcel fails to recognize some OOXML files, probably due to unexpected entry order
144
+ # and/or large entries pushing the signature it's looking for too deep into the file
145
+ return ensure_xlsx!(xlsx_path) if Marcel::Magic.child?(mime_type, 'application/zip')
146
+
147
+ raise ArgumentError, "Expected Excel Workbook (.xlsx), got #{mime_type}: #{xlsx_path}"
148
+ end
149
+
150
+ def ensure_xlsx!(zipfile_path)
151
+ return if Zip::File.open(zipfile_path) { |zf| zf.any? { |e| e.name =~ RE_EXCEL_WORKSHEET_ZIP_ENTRY } }
152
+
153
+ raise ArgumentError, "No Excel worksheets found in ZIP archive #{zipfile_path}"
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('xlsx/*.rb', __dir__)).each(&method(:require))
metadata ADDED
@@ -0,0 +1,313 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: berkeley_library-location
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - David Moles
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-06-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: berkeley_library-logging
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: berkeley_library-util
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 0.1.9
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '0.1'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.1.9
47
+ - !ruby/object:Gem::Dependency
48
+ name: marcel
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 1.0.2
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: 1.0.2
61
+ - !ruby/object:Gem::Dependency
62
+ name: rest-client
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '2.1'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.1'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rubyXL
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '3.4'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.4'
89
+ - !ruby/object:Gem::Dependency
90
+ name: bundle-audit
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '0.1'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '0.1'
103
+ - !ruby/object:Gem::Dependency
104
+ name: ci_reporter_rspec
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.0'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: colorize
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '0.8'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '0.8'
131
+ - !ruby/object:Gem::Dependency
132
+ name: dotenv
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '2.7'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '2.7'
145
+ - !ruby/object:Gem::Dependency
146
+ name: rake
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '13.0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '13.0'
159
+ - !ruby/object:Gem::Dependency
160
+ name: rspec
161
+ requirement: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '3.10'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '3.10'
173
+ - !ruby/object:Gem::Dependency
174
+ name: rubocop
175
+ requirement: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - '='
178
+ - !ruby/object:Gem::Version
179
+ version: '1.39'
180
+ type: :development
181
+ prerelease: false
182
+ version_requirements: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - '='
185
+ - !ruby/object:Gem::Version
186
+ version: '1.39'
187
+ - !ruby/object:Gem::Dependency
188
+ name: rubocop-rake
189
+ requirement: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - '='
192
+ - !ruby/object:Gem::Version
193
+ version: 0.6.0
194
+ type: :development
195
+ prerelease: false
196
+ version_requirements: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - '='
199
+ - !ruby/object:Gem::Version
200
+ version: 0.6.0
201
+ - !ruby/object:Gem::Dependency
202
+ name: rubocop-rspec
203
+ requirement: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - '='
206
+ - !ruby/object:Gem::Version
207
+ version: 2.4.0
208
+ type: :development
209
+ prerelease: false
210
+ version_requirements: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - '='
213
+ - !ruby/object:Gem::Version
214
+ version: 2.4.0
215
+ - !ruby/object:Gem::Dependency
216
+ name: ruby-prof
217
+ requirement: !ruby/object:Gem::Requirement
218
+ requirements:
219
+ - - "~>"
220
+ - !ruby/object:Gem::Version
221
+ version: 0.17.0
222
+ type: :development
223
+ prerelease: false
224
+ version_requirements: !ruby/object:Gem::Requirement
225
+ requirements:
226
+ - - "~>"
227
+ - !ruby/object:Gem::Version
228
+ version: 0.17.0
229
+ - !ruby/object:Gem::Dependency
230
+ name: simplecov
231
+ requirement: !ruby/object:Gem::Requirement
232
+ requirements:
233
+ - - "~>"
234
+ - !ruby/object:Gem::Version
235
+ version: '0.21'
236
+ type: :development
237
+ prerelease: false
238
+ version_requirements: !ruby/object:Gem::Requirement
239
+ requirements:
240
+ - - "~>"
241
+ - !ruby/object:Gem::Version
242
+ version: '0.21'
243
+ - !ruby/object:Gem::Dependency
244
+ name: webmock
245
+ requirement: !ruby/object:Gem::Requirement
246
+ requirements:
247
+ - - "~>"
248
+ - !ruby/object:Gem::Version
249
+ version: '3.12'
250
+ type: :development
251
+ prerelease: false
252
+ version_requirements: !ruby/object:Gem::Requirement
253
+ requirements:
254
+ - - "~>"
255
+ - !ruby/object:Gem::Version
256
+ version: '3.12'
257
+ description: A collection of location-related utilities for the UC Berkeley Library
258
+ email: dmoles@berkeley.edu
259
+ executables: []
260
+ extensions: []
261
+ extra_rdoc_files: []
262
+ files:
263
+ - CHANGES.md
264
+ - LICENSE.md
265
+ - README.md
266
+ - lib/berkeley_library/location.rb
267
+ - lib/berkeley_library/location/constants.rb
268
+ - lib/berkeley_library/location/hathi_trust.rb
269
+ - lib/berkeley_library/location/hathi_trust/config.rb
270
+ - lib/berkeley_library/location/hathi_trust/record_url_batch_request.rb
271
+ - lib/berkeley_library/location/hathi_trust/record_url_request.rb
272
+ - lib/berkeley_library/location/hathi_trust/record_url_request_base.rb
273
+ - lib/berkeley_library/location/location_result.rb
274
+ - lib/berkeley_library/location/module_info.rb
275
+ - lib/berkeley_library/location/oclc_number.rb
276
+ - lib/berkeley_library/location/world_cat.rb
277
+ - lib/berkeley_library/location/world_cat/config.rb
278
+ - lib/berkeley_library/location/world_cat/libraries_request.rb
279
+ - lib/berkeley_library/location/world_cat/symbols.rb
280
+ - lib/berkeley_library/location/xlsx_reader.rb
281
+ - lib/berkeley_library/location/xlsx_writer.rb
282
+ - lib/berkeley_library/util/xlsx.rb
283
+ - lib/berkeley_library/util/xlsx/rubyxl_cell_extensions.rb
284
+ - lib/berkeley_library/util/xlsx/rubyxl_worksheet_extensions.rb
285
+ - lib/berkeley_library/util/xlsx/spreadsheet.rb
286
+ homepage: https://github.com/BerkeleyLibrary/location
287
+ licenses:
288
+ - MIT
289
+ metadata:
290
+ homepage_uri: https://github.com/BerkeleyLibrary/location
291
+ source_code_uri: https://github.com/BerkeleyLibrary/location
292
+ changelog_uri: https://github.com/BerkeleyLibrary/location/CHANGELOG.md
293
+ rubygems_mfa_required: 'true'
294
+ post_install_message:
295
+ rdoc_options: []
296
+ require_paths:
297
+ - lib
298
+ required_ruby_version: !ruby/object:Gem::Requirement
299
+ requirements:
300
+ - - ">="
301
+ - !ruby/object:Gem::Version
302
+ version: 3.1.0
303
+ required_rubygems_version: !ruby/object:Gem::Requirement
304
+ requirements:
305
+ - - ">="
306
+ - !ruby/object:Gem::Version
307
+ version: '0'
308
+ requirements: []
309
+ rubygems_version: 3.3.25
310
+ signing_key:
311
+ specification_version: 4
312
+ summary: Locaton-related utilities for the UC Berkeley Library
313
+ test_files: []