berkeley_library-location 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: af0453d4f60ad7276b64c48937b36cc76d250eb2a41ff23914f30d7c35edbf85
4
+ data.tar.gz: 2cb9fb72635d239f6b6726f4278b555913b9f209dddb43dbeb03072b495de3d1
5
+ SHA512:
6
+ metadata.gz: 5a9dc82778e93207bc4f23e328b1699f4cb58abed4b7deda70cb8ec713b4c47a95d36be19fdab962b591df78c15182af841d111ef941a6d8ab2436b6587109fe
7
+ data.tar.gz: f6f78720f85d04530e656940d5dcaa1056cfd086de7ca280253d4a3d6aeca22a98236ad6ba2dc6666c4b3b6e765bd1e0cf3842dc69104e7b30eaa50685a69e9e
data/CHANGES.md ADDED
@@ -0,0 +1,33 @@
1
+ # 2.0.0 (2023-06-06)
2
+
3
+ - Rename from "holdings" to "location"
4
+
5
+ # 1.0.5 (2023-06-01)
6
+
7
+ - Update to `berkeley_library-util` 0.1.9 to handle non-ASCII OCLC numbers
8
+ - Fix issue where locating blank columns could fail on spreadsheets with nil rows
9
+
10
+ # 1.0.4 (2023-04-28)
11
+
12
+ - Escape OCLC numbers before constructing query URIs
13
+ (not an issue for correct OCLC numbers, but can be an issue in the event of bad data)
14
+
15
+ # 1.0.3 (2023-04-27)
16
+
17
+ - Fix issue requiring RubyXL extensions to be explicitly required
18
+
19
+ # 1.0.2 (2023-04-27)
20
+
21
+ - Overwrite existing blank columns when writing results to spreadsheet
22
+
23
+ # 1.0.1 (2023-04-26)
24
+
25
+ - First working RubyGems release
26
+
27
+ # 1.0.0 (2023-04-25)
28
+
29
+ - Initial (broken) RubyGems release
30
+
31
+ # 0.1.0 (2023-02-24)
32
+
33
+ - Initial release
data/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ # The MIT License (MIT)
2
+
3
+ Copyright © 2023 The Regents of the University of California
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a
6
+ copy of this software and associated documentation files (the “Software”),
7
+ to deal in the Software without restriction, including without limitation
8
+ the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
+ and/or sell copies of the Software, and to permit persons to whom the
10
+ Software is furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
+ DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,6 @@
1
+ # BerkeleyLibrary::Location
2
+
3
+ [![Build Status](https://github.com/BerkeleyLibrary/location/actions/workflows/build.yml/badge.svg?branch=main)](https://github.com/BerkeleyLibrary/location/actions/workflows/build.yml)
4
+ [![Gem Version](https://img.shields.io/gem/v/berkeley_library-location.svg)](https://github.com/BerkeleyLibrary/location/releases)
5
+
6
+ Miscellaneous location-related utilities for the UC Berkeley Library.
@@ -0,0 +1,8 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module Constants
4
+ OCLC_COL_HEADER = 'OCLC Number'.freeze
5
+
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,56 @@
1
+ require 'berkeley_library/util'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module HathiTrust
6
+ module Config
7
+ include BerkeleyLibrary::Util::URIs
8
+
9
+ ENV_HATHITRUST_BASE_URL = 'LIT_HATHITRUST_BASE_URL'.freeze
10
+
11
+ # The default HathiTrust base URL, if ENV_HATHITRUST_BASE_URL is not set.
12
+ DEFAULT_HATHITRUST_BASE_URL = 'https://catalog.hathitrust.org/api/'.freeze
13
+
14
+ class << self
15
+ include Config
16
+ end
17
+
18
+ def base_uri
19
+ @base_uri ||= default_hathitrust_base_uri
20
+ end
21
+
22
+ def base_uri=(value)
23
+ @base_uri = uri_or_nil(value)
24
+ end
25
+
26
+ private
27
+
28
+ def reset!
29
+ remove_instance_variable(:@base_uri) if instance_variable_defined?(:@base_uri)
30
+ end
31
+
32
+ def default_hathitrust_base_uri
33
+ uri_or_nil(default_hathitrust_base_url)
34
+ end
35
+
36
+ def default_hathitrust_base_url
37
+ ENV[ENV_HATHITRUST_BASE_URL] || rails_hathitrust_base_url || DEFAULT_HATHITRUST_BASE_URL
38
+ end
39
+
40
+ def rails_hathitrust_base_url
41
+ return unless (rails_config = self.rails_config)
42
+ return unless rails_config.respond_to?(:hathitrust_base_url)
43
+
44
+ rails_config.hathitrust_base_url
45
+ end
46
+
47
+ def rails_config
48
+ return unless defined?(Rails)
49
+ return unless (app = Rails.application)
50
+
51
+ app.config
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,56 @@
1
+ require 'berkeley_library/location/hathi_trust/record_url_request_base'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module HathiTrust
6
+ class RecordUrlBatchRequest
7
+ include RecordUrlRequestBase
8
+
9
+ # Per HathiTrust API docs: https://www.hathitrust.org/bib_api
10
+ MAX_BATCH_SIZE = 20
11
+
12
+ attr_reader :oclc_numbers
13
+
14
+ def initialize(oclc_numbers)
15
+ @oclc_numbers = ensure_valid_oclc_numbers!(oclc_numbers)
16
+ end
17
+
18
+ def execute
19
+ response_body = URIs.get(uri, log: false)
20
+ record_urls_from(response_body)
21
+ end
22
+
23
+ def uri
24
+ @uri ||= URIs.append(volumes_base_uri, 'json', URIs.path_escape(oclc_list))
25
+ end
26
+
27
+ private
28
+
29
+ def ensure_valid_oclc_numbers!(oclc_numbers)
30
+ raise ArgumentError, 'No OCLC numbers provided' if oclc_numbers.empty?
31
+ raise ArgumentError, "Too many OCLC numbers; expected <= #{MAX_BATCH_SIZE}, was #{oclc_numbers.size}" if oclc_numbers.size > MAX_BATCH_SIZE
32
+
33
+ OCLCNumber.ensure_oclc_numbers!(oclc_numbers)
34
+ end
35
+
36
+ def oclc_list
37
+ @oclc_list = oclc_numbers.map(&method(:key_for)).join('|')
38
+ end
39
+
40
+ def key_for(oclc_number)
41
+ "oclc:#{oclc_number}"
42
+ end
43
+
44
+ def record_urls_from(json_str)
45
+ json = JSON.parse(json_str)
46
+ oclc_numbers.filter_map do |oclc_num|
47
+ next unless (entry = json[key_for(oclc_num)])
48
+
49
+ record_url = find_record_url(entry, oclc_num)
50
+ [oclc_num, record_url] if record_url
51
+ end.to_h
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,33 @@
1
+ require 'berkeley_library/location/hathi_trust/record_url_request_base'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module HathiTrust
6
+ class RecordUrlRequest
7
+ include RecordUrlRequestBase
8
+
9
+ attr_reader :oclc_number
10
+
11
+ def initialize(oclc_number)
12
+ @oclc_number = OCLCNumber.ensure_oclc_number!(oclc_number)
13
+ end
14
+
15
+ def execute
16
+ response_body = URIs.get(uri, log: false)
17
+ record_url_from(response_body, oclc_number)
18
+ end
19
+
20
+ def uri
21
+ @uri ||= URIs.append(volumes_base_uri, 'oclc', URIs.path_escape("#{oclc_number}.json"))
22
+ end
23
+
24
+ private
25
+
26
+ def record_url_from(json_str, oclc_number)
27
+ json_obj = JSON.parse(json_str)
28
+ find_record_url(json_obj, oclc_number)
29
+ end
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,37 @@
1
+ require 'json'
2
+ require 'berkeley_library/util'
3
+ require 'berkeley_library/location/oclc_number'
4
+ require 'berkeley_library/location/hathi_trust/config'
5
+
6
+ module BerkeleyLibrary
7
+ module Location
8
+ module HathiTrust
9
+ module RecordUrlRequestBase
10
+ include BerkeleyLibrary::Util
11
+
12
+ protected
13
+
14
+ def volumes_base_uri
15
+ URIs.append(Config.base_uri, 'volumes', 'brief')
16
+ end
17
+
18
+ def find_record_url(json_obj, oclc_number)
19
+ return unless (records = json_obj['records'])
20
+ return unless (record = find_record(records, oclc_number))
21
+
22
+ record['recordURL']
23
+ end
24
+
25
+ def find_record(records, oclc_number)
26
+ return if records.empty?
27
+
28
+ records.values.find do |rec|
29
+ (oclc_nums = rec['oclcs']) &&
30
+ oclc_nums.include?(oclc_number) &&
31
+ rec.key?('recordURL')
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('hathi_trust/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'berkeley_library/location/world_cat/symbols'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ class LocationResult
6
+ attr_reader :oclc_number, :wc_symbols, :ht_record_url, :wc_error, :ht_error
7
+
8
+ def initialize(oclc_number, wc_symbols: [], wc_error: nil, ht_record_url: nil, ht_error: nil)
9
+ @oclc_number = oclc_number
10
+ @wc_symbols = wc_symbols
11
+ @wc_error = wc_error
12
+ @ht_record_url = ht_record_url
13
+ @ht_error = ht_error
14
+ end
15
+
16
+ def nrlf?
17
+ @has_nrlf ||= wc_symbols.intersection(WorldCat::Symbols::NRLF).any?
18
+ end
19
+
20
+ def srlf?
21
+ @has_srlf ||= wc_symbols.intersection(WorldCat::Symbols::SRLF).any?
22
+ end
23
+
24
+ def uc_symbols
25
+ @uc_symbols ||= wc_symbols.intersection(WorldCat::Symbols::UC)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,14 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module ModuleInfo
4
+ NAME = 'berkeley_library-location'.freeze
5
+ AUTHOR = 'David Moles'.freeze
6
+ AUTHOR_EMAIL = 'dmoles@berkeley.edu'.freeze
7
+ SUMMARY = 'Locaton-related utilities for the UC Berkeley Library'.freeze
8
+ DESCRIPTION = 'A collection of location-related utilities for the UC Berkeley Library'.freeze
9
+ LICENSE = 'MIT'.freeze
10
+ VERSION = '2.0.0'.freeze
11
+ HOMEPAGE = 'https://github.com/BerkeleyLibrary/location'.freeze
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,22 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module OCLCNumber
4
+ class << self
5
+ def ensure_oclc_number!(oclc_number)
6
+ raise ArgumentError, 'OCLC number cannot be nil' if oclc_number.nil?
7
+ raise ArgumentError, "OCLC number #{oclc_number.inspect} is not a string" unless oclc_number.is_a?(String)
8
+ raise ArgumentError, 'OCLC number cannot be empty' if oclc_number == ''
9
+ raise ArgumentError, "OCLC number #{oclc_number.inspect} must not be blank" if oclc_number.strip == ''
10
+
11
+ oclc_number
12
+ end
13
+
14
+ def ensure_oclc_numbers!(oclc_numbers)
15
+ oclc_numbers.tap do |numbers|
16
+ numbers.each { |num| ensure_oclc_number!(num) }
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,81 @@
1
+ require 'berkeley_library/util/uris'
2
+
3
+ module BerkeleyLibrary
4
+ module Location
5
+ module WorldCat
6
+ module Config
7
+ include BerkeleyLibrary::Util::URIs
8
+
9
+ # The environment variable from which to read the WorldCat API key.
10
+ ENV_WORLDCAT_API_KEY = 'LIT_WORLDCAT_API_KEY'.freeze
11
+
12
+ # The environment variable from which to read the WorldCat base URL.
13
+ ENV_WORLDCAT_BASE_URL = 'LIT_WORLDCAT_BASE_URL'.freeze
14
+
15
+ # The default WorldCat base URL, if ENV_WORLDCAT_BASE_URL is not set.
16
+ DEFAULT_WORLDCAT_BASE_URL = 'https://www.worldcat.org/webservices/'.freeze
17
+
18
+ class << self
19
+ include Config
20
+ end
21
+
22
+ # Sets the WorldCat API key.
23
+ # @param value [String] the API key.
24
+ attr_writer :api_key
25
+
26
+ # Gets the WorldCat API key.
27
+ # @return [String, nil] the WorldCat API key, or `nil` if not set.
28
+ def api_key
29
+ @api_key ||= default_worldcat_api_key
30
+ end
31
+
32
+ def base_uri
33
+ @base_uri ||= default_worldcat_base_uri
34
+ end
35
+
36
+ def base_uri=(value)
37
+ @base_uri = uri_or_nil(value)
38
+ end
39
+
40
+ private
41
+
42
+ def reset!
43
+ %i[@api_key @base_uri].each { |v| remove_instance_variable(v) if instance_variable_defined?(v) }
44
+ end
45
+
46
+ def default_worldcat_api_key
47
+ ENV[ENV_WORLDCAT_API_KEY] || rails_worldcat_api_key
48
+ end
49
+
50
+ def default_worldcat_base_uri
51
+ uri_or_nil(default_worldcat_base_url)
52
+ end
53
+
54
+ def default_worldcat_base_url
55
+ ENV[ENV_WORLDCAT_BASE_URL] || rails_worldcat_base_url || DEFAULT_WORLDCAT_BASE_URL
56
+ end
57
+
58
+ def rails_worldcat_base_url
59
+ return unless (rails_config = self.rails_config)
60
+ return unless rails_config.respond_to?(:worldcat_base_url)
61
+
62
+ rails_config.worldcat_base_url
63
+ end
64
+
65
+ def rails_worldcat_api_key
66
+ return unless (rails_config = self.rails_config)
67
+ return unless rails_config.respond_to?(:worldcat_api_key)
68
+
69
+ rails_config.worldcat_api_key
70
+ end
71
+
72
+ def rails_config
73
+ return unless defined?(Rails)
74
+ return unless (app = Rails.application)
75
+
76
+ app.config
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,57 @@
1
+ require 'nokogiri'
2
+ require 'berkeley_library/util'
3
+ require 'berkeley_library/location/oclc_number'
4
+ require 'berkeley_library/location/world_cat/symbols'
5
+
6
+ module BerkeleyLibrary
7
+ module Location
8
+ module WorldCat
9
+ # @see https://developer.api.oclc.org/wcv1#/Holdings
10
+ class LibrariesRequest
11
+ include BerkeleyLibrary::Util
12
+
13
+ XPATH_INST_ID_VALS = '/holdings/holding/institutionIdentifier/value'.freeze
14
+
15
+ attr_reader :oclc_number, :symbols
16
+
17
+ def initialize(oclc_number, symbols: Symbols::ALL)
18
+ @oclc_number = OCLCNumber.ensure_oclc_number!(oclc_number)
19
+ @symbols = Symbols.ensure_valid!(symbols)
20
+ end
21
+
22
+ def uri
23
+ @uri ||= URIs.append(libraries_base_uri, URIs.path_escape(oclc_number))
24
+ end
25
+
26
+ # TODO: Check that this works w/more than 10 results
27
+ # See https://developer.api.oclc.org/wcv1#/Holdings
28
+ def params
29
+ @params ||= {
30
+ 'oclcsymbol' => symbols.join(','),
31
+ 'servicelevel' => 'full',
32
+ 'frbrGrouping' => 'off',
33
+ 'wskey' => Config.api_key
34
+ }
35
+ end
36
+
37
+ def execute
38
+ response_body = URIs.get(uri, params:, log: false)
39
+ inst_symbols = inst_symbols_from(response_body)
40
+ inst_symbols.select { |sym| symbols.include?(sym) } # just in case
41
+ end
42
+
43
+ private
44
+
45
+ def libraries_base_uri
46
+ URIs.append(Config.base_uri, 'catalog', 'content', 'libraries')
47
+ end
48
+
49
+ def inst_symbols_from(xml)
50
+ xml_doc = Nokogiri::XML(xml)
51
+ id_vals = xml_doc.xpath(XPATH_INST_ID_VALS)
52
+ id_vals.filter_map { |value| value.text.strip }
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,37 @@
1
+ module BerkeleyLibrary
2
+ module Location
3
+ module WorldCat
4
+ module Symbols
5
+ NRLF = %w[ZAP ZAPSP].freeze
6
+ SRLF = %w[HH0 ZAS ZASSP].freeze
7
+ RLF = (NRLF + SRLF).freeze
8
+
9
+ UC = %w[CLU CRU CUI CUN CUS CUT CUV CUX CUY CUZ MERUC].freeze
10
+ ALL = (RLF + UC).freeze
11
+
12
+ class << self
13
+ include Symbols
14
+ end
15
+
16
+ def valid?(sym)
17
+ ALL.include?(sym)
18
+ end
19
+
20
+ def ensure_valid!(symbols)
21
+ raise ArgumentError, "Not a list of institution symbols: #{symbols.inspect}" unless array_like?(symbols)
22
+ raise ArgumentError, 'No institution symbols provided' if symbols.empty?
23
+
24
+ return symbols unless (invalid = symbols.reject { |s| Symbols.valid?(s) }).any?
25
+
26
+ raise ArgumentError, "Invalid institution symbol(s): #{invalid.map(&:inspect).join(', ')}"
27
+ end
28
+
29
+ private
30
+
31
+ def array_like?(a)
32
+ %i[reject empty?].all? { |m| a.respond_to?(m) }
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('world_cat/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,29 @@
1
+ require 'marcel'
2
+ require 'rubyXL'
3
+ require 'berkeley_library/location/constants'
4
+ require 'berkeley_library/util/xlsx/spreadsheet'
5
+
6
+ module BerkeleyLibrary
7
+ module Location
8
+ class XLSXReader
9
+ include Constants
10
+
11
+ attr_reader :ss, :oclc_col_index
12
+
13
+ def initialize(xlsx_path)
14
+ @ss = Util::XLSX::Spreadsheet.new(xlsx_path)
15
+ @oclc_col_index = ss.find_column_index_by_header!(OCLC_COL_HEADER)
16
+ end
17
+
18
+ def each_oclc_number
19
+ return to_enum(:each_oclc_number) unless block_given?
20
+
21
+ ss.each_value(oclc_col_index, include_header: false) do |v|
22
+ next if (v_str = v.to_s).strip == ''
23
+
24
+ yield v_str
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,140 @@
1
+ require 'berkeley_library/logging'
2
+ require 'berkeley_library/location/constants'
3
+
4
+ module BerkeleyLibrary
5
+ module Location
6
+ class XLSXWriter
7
+ include Constants
8
+ include BerkeleyLibrary::Logging
9
+
10
+ COL_NRLF = 'NRLF'.freeze
11
+ COL_SRLF = 'SRLF'.freeze
12
+ COL_OTHER_UC = 'Other UC'.freeze
13
+ COL_WC_ERROR = 'WorldCat Error'.freeze
14
+
15
+ COL_HATHI_TRUST = 'Hathi Trust'.freeze
16
+ COL_HATHI_TRUST_ERROR = "#{COL_HATHI_TRUST} Error".freeze
17
+
18
+ V_NRLF = 'nrlf'.freeze
19
+ V_SRLF = 'srlf'.freeze
20
+
21
+ attr_reader :ss, :rlf, :uc, :hathi_trust
22
+
23
+ def initialize(ss, rlf: true, uc: true, hathi_trust: true)
24
+ @ss = ss
25
+ @rlf = rlf
26
+ @uc = uc
27
+ @hathi_trust = hathi_trust
28
+
29
+ ensure_columns!
30
+ end
31
+
32
+ def <<(result)
33
+ r_index = row_index_for(result.oclc_number)
34
+ write_wc_cols(r_index, result) if rlf || uc
35
+ write_ht_cols(r_index, result) if hathi_trust
36
+ end
37
+
38
+ private
39
+
40
+ def write_wc_cols(r_index, result)
41
+ write_wc_error(r_index, result)
42
+ write_rlf(r_index, result) if rlf
43
+ write_uc(r_index, result) if uc
44
+ end
45
+
46
+ def write_ht_cols(r_index, result)
47
+ write_ht_error(r_index, result)
48
+ write_hathi(r_index, result)
49
+ end
50
+
51
+ def ensure_columns!
52
+ if rlf
53
+ nrlf_col_index
54
+ srlf_col_index
55
+ end
56
+ uc_col_index if uc
57
+ ht_col_index if hathi_trust
58
+ end
59
+
60
+ def row_index_for(oclc_number)
61
+ row_index = row_index_by_oclc_number[oclc_number]
62
+ return row_index if row_index
63
+
64
+ raise ArgumentError, "Unknown OCLC number: #{oclc_number}"
65
+ end
66
+
67
+ def write_rlf(r_index, result)
68
+ ss.set_value_at(r_index, nrlf_col_index, V_NRLF) if result.nrlf?
69
+ ss.set_value_at(r_index, srlf_col_index, V_SRLF) if result.srlf?
70
+ end
71
+
72
+ def write_uc(r_index, result)
73
+ return if (uc_symbols = result.uc_symbols).empty?
74
+
75
+ ss.set_value_at(r_index, uc_col_index, uc_symbols.join(','))
76
+ end
77
+
78
+ def write_hathi(r_index, result)
79
+ return unless (ht_record_url = result.ht_record_url)
80
+
81
+ ss.set_value_at(r_index, ht_col_index, ht_record_url)
82
+ end
83
+
84
+ def write_wc_error(r_index, result)
85
+ return unless (wc_error = result.wc_error)
86
+
87
+ ss.set_value_at(r_index, wc_err_col_index, wc_error)
88
+ end
89
+
90
+ def write_ht_error(r_index, result)
91
+ return unless (ht_error = result.ht_error)
92
+
93
+ ss.set_value_at(r_index, ht_err_col_index, ht_error)
94
+ end
95
+
96
+ def oclc_col_index
97
+ @oclc_col_index ||= ss.find_column_index_by_header!(OCLC_COL_HEADER)
98
+ end
99
+
100
+ def nrlf_col_index
101
+ @nrlf_col_index ||= ss.ensure_column!(COL_NRLF)
102
+ end
103
+
104
+ def srlf_col_index
105
+ @srlf_col_index ||= ss.ensure_column!(COL_SRLF)
106
+ end
107
+
108
+ def uc_col_index
109
+ @uc_col_index ||= ss.ensure_column!(COL_OTHER_UC)
110
+ end
111
+
112
+ def wc_err_col_index
113
+ @wc_err_col_index ||= ss.ensure_column!(COL_WC_ERROR)
114
+ end
115
+
116
+ def ht_col_index
117
+ @ht_col_index ||= ss.ensure_column!(COL_HATHI_TRUST)
118
+ end
119
+
120
+ def ht_err_col_index
121
+ @ht_err_col_index ||= ss.ensure_column!(COL_HATHI_TRUST_ERROR)
122
+ end
123
+
124
+ def row_index_by_oclc_number
125
+ # Start at 1 to skip header row
126
+ @row_index_by_oclc_number ||= (1...ss.row_count).each_with_object({}) do |r_index, r_indices|
127
+ oclc_number_raw = ss.value_at(r_index, oclc_col_index)
128
+ next unless oclc_number_raw
129
+
130
+ oclc_number = oclc_number_raw.to_s
131
+ if r_indices.key?(oclc_number)
132
+ logger.warn("Skipping duplicate OCLC number #{oclc_number} in row #{r_index}")
133
+ else
134
+ r_indices[oclc_number] = r_index
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('location/*.rb', __dir__)).each(&method(:require))
@@ -0,0 +1,31 @@
1
+ require 'rubyXL'
2
+
3
+ module BerkeleyLibrary
4
+ module Util
5
+ module XLSX
6
+ module RubyXLCellExtensions
7
+ # Workaround for https://github.com/weshatheleopard/rubyXL/issues/441
8
+ def initialize(params = nil)
9
+ super
10
+
11
+ return unless params.respond_to?(:[])
12
+
13
+ @worksheet ||= params[:worksheet]
14
+ self.row ||= params[:row] # NOTE: not an instance variable
15
+ end
16
+
17
+ def blank?
18
+ return true if value.nil?
19
+
20
+ value.respond_to?(:strip) && value.strip.empty?
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
26
+
27
+ module RubyXL
28
+ class Cell
29
+ prepend BerkeleyLibrary::Util::XLSX::RubyXLCellExtensions
30
+ end
31
+ end
@@ -0,0 +1,26 @@
1
+ require 'rubyXL'
2
+ require 'berkeley_library/util/xlsx/rubyxl_cell_extensions'
3
+
4
+ module BerkeleyLibrary
5
+ module Util
6
+ module XLSX
7
+ module RubyXLWorksheetExtensions
8
+ def first_blank_column_index
9
+ sheet_data.rows.inject(0) do |first_blank_c_index, row|
10
+ next first_blank_c_index unless row
11
+
12
+ trailing_blank_cells = row.cells.reverse.take_while(&:blank?)
13
+ row_first_blank_c_index = row.size - trailing_blank_cells.size
14
+ [first_blank_c_index, row_first_blank_c_index].max
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
21
+
22
+ module RubyXL
23
+ class Worksheet
24
+ prepend BerkeleyLibrary::Util::XLSX::RubyXLWorksheetExtensions
25
+ end
26
+ end
@@ -0,0 +1,158 @@
1
+ require 'marcel'
2
+ require 'rubyXL'
3
+ require 'rubyXL/convenience_methods/cell'
4
+ require 'rubyXL/convenience_methods/worksheet'
5
+ require 'zip'
6
+ require 'berkeley_library/util/xlsx'
7
+
8
+ module BerkeleyLibrary
9
+ module Util
10
+ module XLSX
11
+ # Convenience wrapper RubyXL::Workbook
12
+ class Spreadsheet
13
+
14
+ # .xlsx format, a.k.a. "Office Open XML Workbook" spreadsheet
15
+ MIME_TYPE_OOXML_WB = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'.freeze
16
+
17
+ # path to Excel worksheet file in zipped OOXML archive
18
+ RE_EXCEL_WORKSHEET_ZIP_ENTRY = %r{^xl/worksheets/[^/.]+\.xml$}
19
+
20
+ DEFAULT_WORKSHEET_NAME = 'Sheet1'.freeze
21
+
22
+ attr_reader :workbook, :xlsx_path
23
+
24
+ delegate :stream, to: :workbook
25
+
26
+ def initialize(xlsx_path = nil)
27
+ @workbook = xlsx_path ? ensure_xlsx_workbook!(xlsx_path) : RubyXL::Workbook.new
28
+ @xlsx_path = xlsx_path
29
+ end
30
+
31
+ def save_as(new_xlsx_path)
32
+ workbook.write(new_xlsx_path)
33
+ @xlsx_path = new_xlsx_path
34
+ end
35
+
36
+ def worksheet
37
+ @worksheet ||= workbook.worksheets[0]
38
+ end
39
+
40
+ def header_row
41
+ @header_row ||= (hr = worksheet[0]) ? hr : worksheet.add_row
42
+ end
43
+
44
+ def find_column_index_by_header(header)
45
+ find_column_index(header_row, header)
46
+ end
47
+
48
+ def find_column_index_by_header!(header)
49
+ c_index = find_column_index_by_header(header)
50
+ return c_index if c_index
51
+
52
+ raise ArgumentError, "#{header.inspect} column not found"
53
+ end
54
+
55
+ def find_column_index(row, *args)
56
+ case args.size
57
+ when 0
58
+ (0...row.size).find { |c_index| yield row[c_index] }
59
+ when 1
60
+ find_column_index(row) { |cell| cell&.value == args[0] }
61
+ else
62
+ raise ArgumentError, "Wrong number of arguments (given #{args.size}, expected 0..1"
63
+ end
64
+ end
65
+
66
+ def each_value(c_index, include_header: true)
67
+ return to_enum(:each_value, c_index, include_header:) unless block_given?
68
+
69
+ start_index = include_header ? 0 : 1
70
+ (start_index...row_count).each do |r_index|
71
+ yield value_at(r_index, c_index)
72
+ end
73
+ end
74
+
75
+ def cell_at(r_index, c_index)
76
+ return unless (row = worksheet[r_index])
77
+
78
+ row[c_index]
79
+ end
80
+
81
+ def value_at(r_index, c_index)
82
+ return unless (cell = cell_at(r_index, c_index))
83
+
84
+ cell.value
85
+ end
86
+
87
+ def set_value_at(r_index, c_index, value)
88
+ if (cell = cell_at(r_index, c_index))
89
+ cell.change_contents(value)
90
+ else
91
+ worksheet.add_cell(r_index, c_index, value)
92
+ end
93
+ end
94
+
95
+ def rows
96
+ sheet_data.rows
97
+ end
98
+
99
+ def row_count
100
+ sheet_data.size
101
+ end
102
+
103
+ def column_count(r_index = nil)
104
+ if r_index
105
+ return (row = worksheet[r_index]) ? row.size : 0
106
+ end
107
+
108
+ rows.inject(0) do |cc_max, r|
109
+ r ? [r.size, cc_max].max : cc_max
110
+ end
111
+ end
112
+
113
+ def ensure_column!(header)
114
+ c_index_existing = find_column_index_by_header(header)
115
+ return c_index_existing if c_index_existing
116
+
117
+ c_index_next = worksheet.first_blank_column_index
118
+ c_index_next.tap { |cc| worksheet.add_cell(0, cc, header) }
119
+ end
120
+
121
+ private
122
+
123
+ def sheet_data
124
+ worksheet.sheet_data
125
+ end
126
+
127
+ def ensure_xlsx_workbook!(xlsx_path)
128
+ # RubyXL will try to parse an Excel 95 or 97 file (which are still)
129
+ # zip-based) but then choke when it tries to read the worksheet, so
130
+ # we explicitly check the MIME type here
131
+ check_mime_type!(xlsx_path)
132
+
133
+ RubyXL::Parser.parse(xlsx_path)
134
+ end
135
+
136
+ def check_mime_type!(xlsx_path)
137
+ xlsx_pathname = Pathname.new(xlsx_path)
138
+ mime_type = Marcel::MimeType.for(xlsx_pathname)
139
+
140
+ # TODO: test w/application/vnd.ms-excel.sheet.macroenabled.12
141
+ return if Marcel::Magic.child?(mime_type, MIME_TYPE_OOXML_WB)
142
+
143
+ # Marcel fails to recognize some OOXML files, probably due to unexpected entry order
144
+ # and/or large entries pushing the signature it's looking for too deep into the file
145
+ return ensure_xlsx!(xlsx_path) if Marcel::Magic.child?(mime_type, 'application/zip')
146
+
147
+ raise ArgumentError, "Expected Excel Workbook (.xlsx), got #{mime_type}: #{xlsx_path}"
148
+ end
149
+
150
+ def ensure_xlsx!(zipfile_path)
151
+ return if Zip::File.open(zipfile_path) { |zf| zf.any? { |e| e.name =~ RE_EXCEL_WORKSHEET_ZIP_ENTRY } }
152
+
153
+ raise ArgumentError, "No Excel worksheets found in ZIP archive #{zipfile_path}"
154
+ end
155
+ end
156
+ end
157
+ end
158
+ end
@@ -0,0 +1 @@
1
+ Dir.glob(File.expand_path('xlsx/*.rb', __dir__)).each(&method(:require))
metadata ADDED
@@ -0,0 +1,313 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: berkeley_library-location
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - David Moles
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-06-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: berkeley_library-logging
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.2'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.2'
27
+ - !ruby/object:Gem::Dependency
28
+ name: berkeley_library-util
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.1'
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: 0.1.9
37
+ type: :runtime
38
+ prerelease: false
39
+ version_requirements: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - "~>"
42
+ - !ruby/object:Gem::Version
43
+ version: '0.1'
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 0.1.9
47
+ - !ruby/object:Gem::Dependency
48
+ name: marcel
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: 1.0.2
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - "~>"
59
+ - !ruby/object:Gem::Version
60
+ version: 1.0.2
61
+ - !ruby/object:Gem::Dependency
62
+ name: rest-client
63
+ requirement: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - "~>"
66
+ - !ruby/object:Gem::Version
67
+ version: '2.1'
68
+ type: :runtime
69
+ prerelease: false
70
+ version_requirements: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.1'
75
+ - !ruby/object:Gem::Dependency
76
+ name: rubyXL
77
+ requirement: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '3.4'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '3.4'
89
+ - !ruby/object:Gem::Dependency
90
+ name: bundle-audit
91
+ requirement: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '0.1'
96
+ type: :development
97
+ prerelease: false
98
+ version_requirements: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '0.1'
103
+ - !ruby/object:Gem::Dependency
104
+ name: ci_reporter_rspec
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.0'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: colorize
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '0.8'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '0.8'
131
+ - !ruby/object:Gem::Dependency
132
+ name: dotenv
133
+ requirement: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - "~>"
136
+ - !ruby/object:Gem::Version
137
+ version: '2.7'
138
+ type: :development
139
+ prerelease: false
140
+ version_requirements: !ruby/object:Gem::Requirement
141
+ requirements:
142
+ - - "~>"
143
+ - !ruby/object:Gem::Version
144
+ version: '2.7'
145
+ - !ruby/object:Gem::Dependency
146
+ name: rake
147
+ requirement: !ruby/object:Gem::Requirement
148
+ requirements:
149
+ - - "~>"
150
+ - !ruby/object:Gem::Version
151
+ version: '13.0'
152
+ type: :development
153
+ prerelease: false
154
+ version_requirements: !ruby/object:Gem::Requirement
155
+ requirements:
156
+ - - "~>"
157
+ - !ruby/object:Gem::Version
158
+ version: '13.0'
159
+ - !ruby/object:Gem::Dependency
160
+ name: rspec
161
+ requirement: !ruby/object:Gem::Requirement
162
+ requirements:
163
+ - - "~>"
164
+ - !ruby/object:Gem::Version
165
+ version: '3.10'
166
+ type: :development
167
+ prerelease: false
168
+ version_requirements: !ruby/object:Gem::Requirement
169
+ requirements:
170
+ - - "~>"
171
+ - !ruby/object:Gem::Version
172
+ version: '3.10'
173
+ - !ruby/object:Gem::Dependency
174
+ name: rubocop
175
+ requirement: !ruby/object:Gem::Requirement
176
+ requirements:
177
+ - - '='
178
+ - !ruby/object:Gem::Version
179
+ version: '1.39'
180
+ type: :development
181
+ prerelease: false
182
+ version_requirements: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - '='
185
+ - !ruby/object:Gem::Version
186
+ version: '1.39'
187
+ - !ruby/object:Gem::Dependency
188
+ name: rubocop-rake
189
+ requirement: !ruby/object:Gem::Requirement
190
+ requirements:
191
+ - - '='
192
+ - !ruby/object:Gem::Version
193
+ version: 0.6.0
194
+ type: :development
195
+ prerelease: false
196
+ version_requirements: !ruby/object:Gem::Requirement
197
+ requirements:
198
+ - - '='
199
+ - !ruby/object:Gem::Version
200
+ version: 0.6.0
201
+ - !ruby/object:Gem::Dependency
202
+ name: rubocop-rspec
203
+ requirement: !ruby/object:Gem::Requirement
204
+ requirements:
205
+ - - '='
206
+ - !ruby/object:Gem::Version
207
+ version: 2.4.0
208
+ type: :development
209
+ prerelease: false
210
+ version_requirements: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - '='
213
+ - !ruby/object:Gem::Version
214
+ version: 2.4.0
215
+ - !ruby/object:Gem::Dependency
216
+ name: ruby-prof
217
+ requirement: !ruby/object:Gem::Requirement
218
+ requirements:
219
+ - - "~>"
220
+ - !ruby/object:Gem::Version
221
+ version: 0.17.0
222
+ type: :development
223
+ prerelease: false
224
+ version_requirements: !ruby/object:Gem::Requirement
225
+ requirements:
226
+ - - "~>"
227
+ - !ruby/object:Gem::Version
228
+ version: 0.17.0
229
+ - !ruby/object:Gem::Dependency
230
+ name: simplecov
231
+ requirement: !ruby/object:Gem::Requirement
232
+ requirements:
233
+ - - "~>"
234
+ - !ruby/object:Gem::Version
235
+ version: '0.21'
236
+ type: :development
237
+ prerelease: false
238
+ version_requirements: !ruby/object:Gem::Requirement
239
+ requirements:
240
+ - - "~>"
241
+ - !ruby/object:Gem::Version
242
+ version: '0.21'
243
+ - !ruby/object:Gem::Dependency
244
+ name: webmock
245
+ requirement: !ruby/object:Gem::Requirement
246
+ requirements:
247
+ - - "~>"
248
+ - !ruby/object:Gem::Version
249
+ version: '3.12'
250
+ type: :development
251
+ prerelease: false
252
+ version_requirements: !ruby/object:Gem::Requirement
253
+ requirements:
254
+ - - "~>"
255
+ - !ruby/object:Gem::Version
256
+ version: '3.12'
257
+ description: A collection of location-related utilities for the UC Berkeley Library
258
+ email: dmoles@berkeley.edu
259
+ executables: []
260
+ extensions: []
261
+ extra_rdoc_files: []
262
+ files:
263
+ - CHANGES.md
264
+ - LICENSE.md
265
+ - README.md
266
+ - lib/berkeley_library/location.rb
267
+ - lib/berkeley_library/location/constants.rb
268
+ - lib/berkeley_library/location/hathi_trust.rb
269
+ - lib/berkeley_library/location/hathi_trust/config.rb
270
+ - lib/berkeley_library/location/hathi_trust/record_url_batch_request.rb
271
+ - lib/berkeley_library/location/hathi_trust/record_url_request.rb
272
+ - lib/berkeley_library/location/hathi_trust/record_url_request_base.rb
273
+ - lib/berkeley_library/location/location_result.rb
274
+ - lib/berkeley_library/location/module_info.rb
275
+ - lib/berkeley_library/location/oclc_number.rb
276
+ - lib/berkeley_library/location/world_cat.rb
277
+ - lib/berkeley_library/location/world_cat/config.rb
278
+ - lib/berkeley_library/location/world_cat/libraries_request.rb
279
+ - lib/berkeley_library/location/world_cat/symbols.rb
280
+ - lib/berkeley_library/location/xlsx_reader.rb
281
+ - lib/berkeley_library/location/xlsx_writer.rb
282
+ - lib/berkeley_library/util/xlsx.rb
283
+ - lib/berkeley_library/util/xlsx/rubyxl_cell_extensions.rb
284
+ - lib/berkeley_library/util/xlsx/rubyxl_worksheet_extensions.rb
285
+ - lib/berkeley_library/util/xlsx/spreadsheet.rb
286
+ homepage: https://github.com/BerkeleyLibrary/location
287
+ licenses:
288
+ - MIT
289
+ metadata:
290
+ homepage_uri: https://github.com/BerkeleyLibrary/location
291
+ source_code_uri: https://github.com/BerkeleyLibrary/location
292
+ changelog_uri: https://github.com/BerkeleyLibrary/location/CHANGELOG.md
293
+ rubygems_mfa_required: 'true'
294
+ post_install_message:
295
+ rdoc_options: []
296
+ require_paths:
297
+ - lib
298
+ required_ruby_version: !ruby/object:Gem::Requirement
299
+ requirements:
300
+ - - ">="
301
+ - !ruby/object:Gem::Version
302
+ version: 3.1.0
303
+ required_rubygems_version: !ruby/object:Gem::Requirement
304
+ requirements:
305
+ - - ">="
306
+ - !ruby/object:Gem::Version
307
+ version: '0'
308
+ requirements: []
309
+ rubygems_version: 3.3.25
310
+ signing_key:
311
+ specification_version: 4
312
+ summary: Locaton-related utilities for the UC Berkeley Library
313
+ test_files: []