high_level_browse 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/ruby.yml +38 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile +1 -1
- data/Rakefile +3 -6
- data/high_level_browse.gemspec +16 -15
- data/lib/high_level_browse/call_number_range.rb +20 -19
- data/lib/high_level_browse/db.rb +63 -57
- data/lib/high_level_browse/range_tree.rb +13 -13
- data/lib/high_level_browse/version.rb +3 -3
- data/lib/high_level_browse.rb +9 -10
- data/spec/data/hlb.json.gz +0 -0
- data/spec/high_level_browse_spec.rb +23 -0
- data/spec/spec_helper.rb +14 -0
- metadata +39 -7
- data/test/minitest_helper.rb +0 -13
- data/test/test_high_level_browse.rb +0 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9724032a385683374e08526620f1588020ca677e8e55e828c95fb9ba8518dd4
|
4
|
+
data.tar.gz: 95a7ce1130b8eec58ab9b87532c1da8c388d64bc407fb51fa4679907a172e7ab
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b17ebd104a06a41e8ccef38cd6afff6eca424ed32ffba94b678e1dd21f3ce1c0b13946d69851e23af6b3be4d710d456199d8478e29e9f2699a2f1e46d2ffcf0a
|
7
|
+
data.tar.gz: 8b2c9b47c394f530db121898f9c4199eb6f3dfd9d74806aedc80264d40bb5f009235b851b7827bbd2629766acaa587258175fee93534112eccc6c158cdfcce41
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# This workflow uses actions that are not certified by GitHub.
|
2
|
+
# They are provided by a third-party and are governed by
|
3
|
+
# separate terms of service, privacy policy, and support
|
4
|
+
# documentation.
|
5
|
+
# This workflow will download a prebuilt Ruby version, install dependencies and run tests with Rake
|
6
|
+
# For more information see: https://github.com/marketplace/actions/setup-ruby-jruby-and-truffleruby
|
7
|
+
|
8
|
+
name: Ruby
|
9
|
+
|
10
|
+
on:
|
11
|
+
push:
|
12
|
+
branches: [ "main" ]
|
13
|
+
pull_request:
|
14
|
+
branches: [ "main" ]
|
15
|
+
|
16
|
+
permissions:
|
17
|
+
contents: read
|
18
|
+
|
19
|
+
jobs:
|
20
|
+
test:
|
21
|
+
|
22
|
+
runs-on: ubuntu-latest
|
23
|
+
strategy:
|
24
|
+
matrix:
|
25
|
+
ruby-version: ['2.7', '3.0', '3.1', jruby]
|
26
|
+
|
27
|
+
steps:
|
28
|
+
- uses: actions/checkout@v3
|
29
|
+
- name: Set up Ruby
|
30
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
31
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
32
|
+
# uses: ruby/setup-ruby@v1
|
33
|
+
uses: ruby/setup-ruby@2b019609e2b0f1ea1a2bc8ca11cb82ab46ada124
|
34
|
+
with:
|
35
|
+
ruby-version: ${{ matrix.ruby-version }}
|
36
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
37
|
+
- name: Run tests
|
38
|
+
run: bundle exec rake spec
|
data/.rspec
ADDED
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
data/Rakefile
CHANGED
data/high_level_browse.gemspec
CHANGED
@@ -1,25 +1,26 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
1
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
3
|
+
require "high_level_browse/version"
|
5
4
|
|
6
5
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
11
|
-
spec.summary
|
6
|
+
spec.name = "high_level_browse"
|
7
|
+
spec.version = HighLevelBrowse::VERSION
|
8
|
+
spec.authors = ["Bill Dueber"]
|
9
|
+
spec.email = ["bill@dueber.com"]
|
10
|
+
spec.summary = "Map LC call numbers to academic categories."
|
12
11
|
spec.homepage = ""
|
13
|
-
spec.license
|
12
|
+
spec.license = "MIT"
|
14
13
|
|
15
|
-
spec.files
|
16
|
-
spec.executables
|
17
|
-
spec.test_files
|
14
|
+
spec.files = `git ls-files -z`.split("\x0")
|
15
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
18
17
|
spec.require_paths = ["lib"]
|
19
18
|
|
20
|
-
spec.add_dependency
|
19
|
+
spec.add_dependency "nokogiri", "~>1.0"
|
21
20
|
|
22
|
-
spec.add_development_dependency "bundler",
|
21
|
+
spec.add_development_dependency "bundler", "~>2.0"
|
23
22
|
spec.add_development_dependency "rake"
|
24
|
-
spec.add_development_dependency "
|
23
|
+
spec.add_development_dependency "rspec", "~>3.0"
|
24
|
+
spec.add_development_dependency "standard"
|
25
|
+
spec.add_development_dependency "pry"
|
25
26
|
end
|
@@ -1,9 +1,10 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "high_level_browse/range_tree"
|
2
4
|
|
3
5
|
# An efficient set of CallNumberRanges from which to get topics
|
4
6
|
class HighLevelBrowse::CallNumberRangeSet < HighLevelBrowse::RangeTree
|
5
|
-
|
6
|
-
ANY_DIGIT = /\d/.freeze
|
7
|
+
ANY_DIGIT = /\d/
|
7
8
|
|
8
9
|
def has_digits(str)
|
9
10
|
ANY_DIGIT.match?(str)
|
@@ -14,9 +15,9 @@ class HighLevelBrowse::CallNumberRangeSet < HighLevelBrowse::RangeTree
|
|
14
15
|
# @return [Array<Array<String>>] Arrays of topic labels
|
15
16
|
def topics_for(raw_lc)
|
16
17
|
normalized = ::HighLevelBrowse::CallNumberRange.callnumber_normalize(raw_lc)
|
17
|
-
|
18
|
+
search(normalized).map(&:topic_array).uniq
|
18
19
|
rescue => e
|
19
|
-
|
20
|
+
raise "Error getting topics for '#{raw_lc}': #{e}"
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
@@ -57,7 +58,7 @@ class HighLevelBrowse::CallNumberRange
|
|
57
58
|
|
58
59
|
# Normalize the callnumber in a slightly more sane way
|
59
60
|
# @param [String] cn The raw callnumber to normalize
|
60
|
-
CN = /\A\s*(?<letters>\p{L}{1,3})\s*(?<digits>\d{1,5}(?!\d))(?:\.(?<decimals>\d+))?(?<rest>.*)\Z
|
61
|
+
CN = /\A\s*(?<letters>\p{L}{1,3})\s*(?<digits>\d{1,5}(?!\d))(?:\.(?<decimals>\d+))?(?<rest>.*)\Z/
|
61
62
|
|
62
63
|
def self.callnumber_normalize(cs_str)
|
63
64
|
return nil if cs_str.nil?
|
@@ -72,7 +73,7 @@ class HighLevelBrowse::CallNumberRange
|
|
72
73
|
decimals = m[:decimals] ? "." + m[:decimals] : ""
|
73
74
|
rest = cleanup_freetext(m[:rest])
|
74
75
|
clean = m[:letters] + digits + decimals + " " + rest
|
75
|
-
clean.strip.gsub(/\s+/,
|
76
|
+
clean.strip.gsub(/\s+/, " ")
|
76
77
|
end
|
77
78
|
|
78
79
|
# @param [String] str String to clean up
|
@@ -85,11 +86,11 @@ class HighLevelBrowse::CallNumberRange
|
|
85
86
|
s = replace_dot_before_letter_with_space(s)
|
86
87
|
s = remove_dots_between_letters(s)
|
87
88
|
s = force_space_between_digit_and_letter(s)
|
88
|
-
s.strip.gsub(/\s+/,
|
89
|
+
s.strip.gsub(/\s+/, " ")
|
89
90
|
end
|
90
91
|
|
91
92
|
def self.replace_dot_before_letter_with_space(s)
|
92
|
-
s.gsub
|
93
|
+
s.gsub(/\.(\p{L})/, '\\1')
|
93
94
|
end
|
94
95
|
|
95
96
|
# @param [String] str
|
@@ -112,12 +113,12 @@ class HighLevelBrowse::CallNumberRange
|
|
112
113
|
|
113
114
|
# Compare based on @min, then end
|
114
115
|
# @param [CallNumberRange] o the range to compare to
|
115
|
-
def <=>(
|
116
|
-
[
|
116
|
+
def <=>(other)
|
117
|
+
[min, max] <=> [other.min, other.max]
|
117
118
|
end
|
118
119
|
|
119
120
|
def to_s
|
120
|
-
"[#{
|
121
|
+
"[#{min_raw} - #{max_raw}]"
|
121
122
|
end
|
122
123
|
|
123
124
|
def reconstitute(min, max, min_raw, max_raw, firstletter, topic_array)
|
@@ -139,14 +140,13 @@ class HighLevelBrowse::CallNumberRange
|
|
139
140
|
# @nodoc
|
140
141
|
# JSON roundtrip
|
141
142
|
def to_json(*a)
|
142
|
-
{
|
143
|
-
}.to_json(*a)
|
143
|
+
{"json_class" => self.class.name, "data" => [@min, @max, @min_raw, @max_raw, @firstletter, @topic_array]}.to_json(*a)
|
144
144
|
end
|
145
145
|
|
146
146
|
# @nodoc
|
147
147
|
def self.json_create(h)
|
148
|
-
cnr =
|
149
|
-
cnr.reconstitute(*(h[
|
148
|
+
cnr = allocate
|
149
|
+
cnr.reconstitute(*(h["data"]))
|
150
150
|
cnr
|
151
151
|
end
|
152
152
|
|
@@ -158,7 +158,8 @@ class HighLevelBrowse::CallNumberRange
|
|
158
158
|
if possible_min.nil? # didn't normalize
|
159
159
|
@illegal = true
|
160
160
|
nil
|
161
|
-
else
|
161
|
+
else
|
162
|
+
@min = possible_min
|
162
163
|
end
|
163
164
|
end
|
164
165
|
|
@@ -169,7 +170,8 @@ class HighLevelBrowse::CallNumberRange
|
|
169
170
|
if possible_max.nil? # didn't normalize
|
170
171
|
@illegal = true
|
171
172
|
nil
|
172
|
-
else
|
173
|
+
else
|
174
|
+
@max = possible_max + "~" # add a tilde to make it a true endpoint
|
173
175
|
end
|
174
176
|
end
|
175
177
|
|
@@ -187,5 +189,4 @@ class HighLevelBrowse::CallNumberRange
|
|
187
189
|
|
188
190
|
alias_method :cover?, :contains
|
189
191
|
alias_method :member?, :contains
|
190
|
-
|
191
192
|
end
|
data/lib/high_level_browse/db.rb
CHANGED
@@ -1,20 +1,21 @@
|
|
1
|
-
|
2
|
-
require 'high_level_browse/call_number_range'
|
3
|
-
require 'zlib'
|
4
|
-
require 'json'
|
1
|
+
# frozen_string_literal: true
|
5
2
|
|
6
|
-
|
3
|
+
require "nokogiri"
|
4
|
+
require "high_level_browse/call_number_range"
|
5
|
+
require "zlib"
|
6
|
+
require "json"
|
7
7
|
|
8
|
+
class HighLevelBrowse::DB
|
8
9
|
# Hard-code filename. If you need more than one, put them
|
9
10
|
# in different directories
|
10
|
-
FILENAME =
|
11
|
+
FILENAME = "hlb.json.gz"
|
11
12
|
|
12
13
|
# Given a bunch of CallNumberRange objects, create a new
|
13
14
|
# database with an efficient structure for querying
|
14
15
|
# @param [Array<HighLevelBrowse::CallNumberRange>] array_of_ranges
|
15
16
|
def initialize(array_of_ranges)
|
16
17
|
@all = array_of_ranges
|
17
|
-
@ranges =
|
18
|
+
@ranges = create_letter_indexed_ranges(@all)
|
18
19
|
end
|
19
20
|
|
20
21
|
# Given an array of ranges, create efficient
|
@@ -22,7 +23,7 @@ class HighLevelBrowse::DB
|
|
22
23
|
# @private
|
23
24
|
def create_letter_indexed_ranges(all)
|
24
25
|
bins = {}
|
25
|
-
(
|
26
|
+
("a".."z").each do |letter|
|
26
27
|
cnrs = all.find_all { |x| x.firstletter == letter }
|
27
28
|
bins[letter] = HighLevelBrowse::CallNumberRangeSet.new(cnrs)
|
28
29
|
end
|
@@ -41,12 +42,14 @@ class HighLevelBrowse::DB
|
|
41
42
|
def topics(*raw_callnumber_strings)
|
42
43
|
raw_callnumber_strings.reduce([]) do |acc, raw_callnumber_string|
|
43
44
|
firstletter = if raw_callnumber_string.nil?
|
44
|
-
|
45
|
-
|
46
|
-
|
45
|
+
nil
|
46
|
+
else
|
47
|
+
raw_callnumber_string.to_s.strip.downcase[0]
|
48
|
+
end
|
47
49
|
if @ranges.has_key? firstletter
|
48
50
|
acc + @ranges[firstletter].topics_for(raw_callnumber_string)
|
49
|
-
else
|
51
|
+
else
|
52
|
+
acc
|
50
53
|
end
|
51
54
|
end.uniq
|
52
55
|
end
|
@@ -61,7 +64,7 @@ class HighLevelBrowse::DB
|
|
61
64
|
def self.new_from_xml(xml)
|
62
65
|
noko_doc_root = Nokogiri::XML(xml)
|
63
66
|
simple_array_of_cnrs = cnrs_within_noko_node(node: noko_doc_root)
|
64
|
-
|
67
|
+
new(simple_array_of_cnrs).freeze
|
65
68
|
end
|
66
69
|
|
67
70
|
# Save to disk
|
@@ -78,9 +81,9 @@ class HighLevelBrowse::DB
|
|
78
81
|
# @return [DB] The loaded database
|
79
82
|
def self.load(dir:)
|
80
83
|
simple_array_of_cnrs = Zlib::GzipReader.open(File.join(dir, FILENAME)) do |infile|
|
81
|
-
JSON.
|
84
|
+
JSON.parse(infile.read, create_additions: true).to_a
|
82
85
|
end
|
83
|
-
db =
|
86
|
+
db = new(simple_array_of_cnrs)
|
84
87
|
db.freeze
|
85
88
|
db
|
86
89
|
end
|
@@ -93,51 +96,54 @@ class HighLevelBrowse::DB
|
|
93
96
|
self
|
94
97
|
end
|
95
98
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
99
|
+
class << self
|
100
|
+
private
|
101
|
+
|
102
|
+
# Recurse through the parsed XML document, at each stage keeping track of
|
103
|
+
# * where we are (what are the xpath children?)
|
104
|
+
# * what the current topics are ([level1, level2])
|
105
|
+
# Get all the call numbers assocaited with the topic represented by the given node,
|
106
|
+
# as well as all the children of the given node, and send it back as a big ol' array
|
107
|
+
# @param [Nokogiri::XML::Node] node A node of the parsed HLB XML file
|
108
|
+
# @param [Array<String>] decendent_xpaths A list of xpaths to the decendents of this node
|
109
|
+
# @param [Array<String>] topic_array An array with all levels of the topics associated with this node
|
110
|
+
# @return [Array<HighLevelBrowse::CallNumberRange>]
|
111
|
+
def cnrs_within_noko_node(node:, decendent_xpaths: ["/hlb/subject", "topic"], topic_array: [])
|
112
|
+
if decendent_xpaths.empty?
|
113
|
+
[] # base case -- we're as low as we're going to go
|
114
|
+
else
|
115
|
+
current_xpath_component = decendent_xpaths[0]
|
116
|
+
new_xpath = decendent_xpaths[1..]
|
117
|
+
new_topic = topic_array.dup
|
118
|
+
new_topic.push node[:name] unless node == node.document # skip the root
|
119
|
+
cnrs = []
|
120
|
+
# For each sub-component, get both the call-number-ranges (cnrs) assocaited
|
121
|
+
# with this level, as well as recusively getting from all the children
|
122
|
+
node.xpath(current_xpath_component).each do |c|
|
123
|
+
cnrs += call_numbers_list_from_leaves(node: c, topic_array: new_topic)
|
124
|
+
cnrs += cnrs_within_noko_node(node: c, decendent_xpaths: new_xpath, topic_array: new_topic)
|
125
|
+
end
|
126
|
+
cnrs
|
127
|
+
end
|
122
128
|
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# Given a second-to-lowest-level node, get its topic and
|
126
|
-
# extract call number ranges from its children
|
127
|
-
def self.call_numbers_list_from_leaves(node:, topic_array:)
|
128
|
-
cnrs = []
|
129
|
-
new_topic = topic_array.dup.push node[:name]
|
130
|
-
node.xpath('call-numbers').each do |cn_node|
|
131
|
-
min = cn_node[:start]
|
132
|
-
max = cn_node[:end]
|
133
129
|
|
134
|
-
|
135
|
-
|
136
|
-
|
130
|
+
# Given a second-to-lowest-level node, get its topic and
|
131
|
+
# extract call number ranges from its children
|
132
|
+
def call_numbers_list_from_leaves(node:, topic_array:)
|
133
|
+
cnrs = []
|
134
|
+
new_topic = topic_array.dup.push node[:name]
|
135
|
+
node.xpath("call-numbers").each do |cn_node|
|
136
|
+
min = cn_node[:start]
|
137
|
+
max = cn_node[:end]
|
138
|
+
|
139
|
+
new_cnr = HighLevelBrowse::CallNumberRange.new(min: min, max: max, topic_array: new_topic)
|
140
|
+
if new_cnr.illegal?
|
141
|
+
# do some sort of logging else cnrs.push new_cnr
|
142
|
+
else
|
143
|
+
cnrs << new_cnr
|
144
|
+
end
|
137
145
|
end
|
146
|
+
cnrs
|
138
147
|
end
|
139
|
-
cnrs
|
140
|
-
|
141
148
|
end
|
142
|
-
|
143
149
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
# Never released as a gem, as near as I can tell.
|
2
4
|
# Taken from https://github.com/clearhaus/range-tree,
|
3
5
|
# which was released under the MIT license
|
@@ -10,11 +12,11 @@ module HighLevelBrowse
|
|
10
12
|
class RangeTree
|
11
13
|
class Node
|
12
14
|
def initialize(left, range, right, min, max)
|
13
|
-
@left
|
15
|
+
@left = left
|
14
16
|
@range = range
|
15
17
|
@right = right
|
16
|
-
@min
|
17
|
-
@max
|
18
|
+
@min = min || range.min
|
19
|
+
@max = max || range.max
|
18
20
|
end
|
19
21
|
|
20
22
|
attr_reader :left, :range, :right, :min, :max
|
@@ -26,7 +28,7 @@ module HighLevelBrowse
|
|
26
28
|
# same left endpoint, then it's more efficient if also secondarily sorted by
|
27
29
|
# the right endpoint (or equivalently by the length).
|
28
30
|
|
29
|
-
@root = RangeTree.split(ranges.sort{|a,b| (a.min <=> b.min) || (a.max <=> b.max)})
|
31
|
+
@root = RangeTree.split(ranges.sort { |a, b| (a.min <=> b.min) || (a.max <=> b.max) })
|
30
32
|
end
|
31
33
|
|
32
34
|
attr_reader :root
|
@@ -34,17 +36,17 @@ module HighLevelBrowse
|
|
34
36
|
def self.split(ranges)
|
35
37
|
return nil if ranges.empty?
|
36
38
|
|
37
|
-
middle = ranges.length/2
|
39
|
+
middle = ranges.length / 2
|
38
40
|
|
39
|
-
left
|
41
|
+
left = split(ranges.slice(0, middle)) # Handle middle == 0 correctly.
|
40
42
|
range = ranges[middle] # Current range.
|
41
|
-
right = split(ranges[(middle+1)..-1]) # Handle middle == ranges.length correctly.
|
43
|
+
right = split(ranges[(middle + 1)..-1]) # Handle middle == ranges.length correctly.
|
42
44
|
|
43
45
|
ary = [left, range, right].compact
|
44
46
|
|
45
47
|
Node.new(left, range, right,
|
46
|
-
|
47
|
-
|
48
|
+
ary.map(&:min).min, # Subtree's min.
|
49
|
+
ary.map(&:max).max) # Subtree's max.
|
48
50
|
end
|
49
51
|
|
50
52
|
def search(range, limit: Float::INFINITY)
|
@@ -59,8 +61,7 @@ module HighLevelBrowse
|
|
59
61
|
def self.search_helper(q, root, result, limit)
|
60
62
|
return if root.nil?
|
61
63
|
# Visit left child?
|
62
|
-
if (l = root.left)
|
63
|
-
not l.max < q.min # The interesting part.
|
64
|
+
if (l = root.left) && l.max && q.min && (l.max >= q.min) # The interesting part.
|
64
65
|
search_helper(q, root.left, result, limit)
|
65
66
|
end
|
66
67
|
|
@@ -78,8 +79,7 @@ module HighLevelBrowse
|
|
78
79
|
result << root.range if RangeTree.ranges_intersect?(q, root.range)
|
79
80
|
|
80
81
|
# Visit right child?
|
81
|
-
if (r = root.right)
|
82
|
-
not q.max < r.min # The interesting part.
|
82
|
+
if (r = root.right) && q.max && r.min && (q.max >= r.min) # The interesting part.
|
83
83
|
search_helper(q, root.right, result, limit)
|
84
84
|
end
|
85
85
|
end
|
@@ -1,3 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
1
|
+
module HighLevelBrowse
|
2
|
+
VERSION = "1.1.0"
|
3
|
+
end
|
data/lib/high_level_browse.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require "high_level_browse/version"
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
4
|
+
require "high_level_browse/db"
|
5
|
+
require "uri"
|
6
|
+
require "open-uri"
|
5
7
|
|
6
8
|
module HighLevelBrowse
|
7
|
-
|
8
|
-
SOURCE_URL = ENV['HLB_XML_ENDPOINT'] || 'https://www.lib.umich.edu/browse/categories/xml.php'
|
9
|
+
SOURCE_URL = ENV["HLB_XML_ENDPOINT"] || "https://www.lib.umich.edu/browse/categories/xml.php"
|
9
10
|
|
10
11
|
# Fetch a new version of the raw file and turn it into a db
|
11
12
|
# @return [DB] The loaded database
|
@@ -15,27 +16,25 @@ module HighLevelBrowse
|
|
15
16
|
uri.extend OpenURI::OpenRead
|
16
17
|
|
17
18
|
xml = uri.read
|
18
|
-
|
19
|
+
|
20
|
+
DB.new_from_xml(xml)
|
19
21
|
rescue => e
|
20
22
|
raise "Could not fetch xml from '#{SOURCE_URL}': #{e}"
|
21
23
|
end
|
22
24
|
|
23
|
-
|
24
25
|
# Fetch and save to the specified directory
|
25
26
|
# @param [String] dir The directory where the hlb.json.gz file will end up
|
26
27
|
# @return [DB] The fetched and saved database
|
27
28
|
def self.fetch_and_save(dir:)
|
28
|
-
db =
|
29
|
+
db = fetch
|
29
30
|
db.save(dir: dir)
|
30
31
|
db
|
31
32
|
end
|
32
33
|
|
33
|
-
|
34
34
|
# Load from disk
|
35
35
|
# @param [String] dir The directory where the hlb.json.gz file is located
|
36
36
|
# @return [DB] The loaded database
|
37
37
|
def self.load(dir:)
|
38
38
|
DB.load(dir: dir)
|
39
39
|
end
|
40
|
-
|
41
40
|
end
|
Binary file
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
RSpec.describe HighLevelBrowse do
|
4
|
+
|
5
|
+
let(:hlb) { HighLevelBrowse.fetch }
|
6
|
+
|
7
|
+
it "has a version number" do
|
8
|
+
expect(HighLevelBrowse::VERSION).not_to be nil
|
9
|
+
end
|
10
|
+
|
11
|
+
it "runs basic sanity checks" do
|
12
|
+
expect(hlb['QA1']).to eq([["Science", "Mathematics"]])
|
13
|
+
expect(hlb['P11']).to eq [["Humanities", "Linguistics"]]
|
14
|
+
expect(hlb['AAA11']).to eq []
|
15
|
+
end
|
16
|
+
|
17
|
+
it "can do a basic save/load" do
|
18
|
+
dir = Dir.tmpdir
|
19
|
+
HighLevelBrowse.fetch_and_save(dir: dir)
|
20
|
+
loaded = HighLevelBrowse.load(dir: dir)
|
21
|
+
expect(loaded['QA1']).to eq [["Science", "Mathematics"]]
|
22
|
+
end
|
23
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
require "bundler/setup"
|
2
|
+
require "high_level_browse"
|
3
|
+
|
4
|
+
RSpec.configure do |config|
|
5
|
+
# Enable flags like --only-failures and --next-failure
|
6
|
+
config.example_status_persistence_file_path = ".rspec_status"
|
7
|
+
|
8
|
+
# Disable RSpec exposing methods globally on `Module` and `main`
|
9
|
+
config.disable_monkey_patching!
|
10
|
+
|
11
|
+
config.expect_with :rspec do |c|
|
12
|
+
c.syntax = :expect
|
13
|
+
end
|
14
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: high_level_browse
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Bill Dueber
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -53,7 +53,35 @@ dependencies:
|
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
55
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
56
|
+
name: rspec
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '3.0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '3.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: standard
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: pry
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
58
86
|
requirements:
|
59
87
|
- - ">="
|
@@ -76,7 +104,9 @@ executables:
|
|
76
104
|
extensions: []
|
77
105
|
extra_rdoc_files: []
|
78
106
|
files:
|
107
|
+
- ".github/workflows/ruby.yml"
|
79
108
|
- ".gitignore"
|
109
|
+
- ".rspec"
|
80
110
|
- ".travis.yml"
|
81
111
|
- CHANGELOG.md
|
82
112
|
- Gemfile
|
@@ -92,8 +122,9 @@ files:
|
|
92
122
|
- lib/high_level_browse/db.rb
|
93
123
|
- lib/high_level_browse/range_tree.rb
|
94
124
|
- lib/high_level_browse/version.rb
|
95
|
-
-
|
96
|
-
-
|
125
|
+
- spec/data/hlb.json.gz
|
126
|
+
- spec/high_level_browse_spec.rb
|
127
|
+
- spec/spec_helper.rb
|
97
128
|
homepage: ''
|
98
129
|
licenses:
|
99
130
|
- MIT
|
@@ -118,5 +149,6 @@ signing_key:
|
|
118
149
|
specification_version: 4
|
119
150
|
summary: Map LC call numbers to academic categories.
|
120
151
|
test_files:
|
121
|
-
-
|
122
|
-
-
|
152
|
+
- spec/data/hlb.json.gz
|
153
|
+
- spec/high_level_browse_spec.rb
|
154
|
+
- spec/spec_helper.rb
|
data/test/minitest_helper.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
|
2
|
-
|
3
|
-
# Both oga and minitest have stupid warnings that I don't want to
|
4
|
-
# hear about
|
5
|
-
|
6
|
-
verbose = $VERBOSE
|
7
|
-
$VERBOSE = nil
|
8
|
-
require 'minitest'
|
9
|
-
require 'minitest/spec'
|
10
|
-
require 'minitest/autorun'
|
11
|
-
$VERBOSE = verbose
|
12
|
-
|
13
|
-
require 'high_level_browse'
|