high_level_browse 0.1.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 286d0ce64d0d9e8dffa58b716f111d086310654d
4
- data.tar.gz: 2a13aad07ee29e47b0bcc00f4ba16740491e9bfd
2
+ SHA256:
3
+ metadata.gz: 5eae24ef8906dfb25f1f949b4b29b69a33834dc63a7b0d96cd356324b14a79df
4
+ data.tar.gz: 919434516a5098b1c4f9766434c68b7b7e6960128a0eb59abc7e442da520de5e
5
5
  SHA512:
6
- metadata.gz: 9960852abc0686da303da11c8ead326df0ea7e7df89432962f7d1353e62350afbc7a3ad556d1beecfe6cce816c1bf654ce4bdee78bb195caefdb08caeb67b7cf
7
- data.tar.gz: 3d29b51feb0bd70c37eea28248eff4f3dccd8a38cdb23617be8998fdfa821e392743d0f87c25e9f233ee326b4097072ca63d3d04bcdf5c21216ec43a96ecae04
6
+ metadata.gz: 5aa8cb1cb8472c788c1def8efff185a8fc16957ef97450178687941f6dc5a03444f6d85b1c25f601aa34eef02ed51e8ab7475c9d067e2db35aa94118d319a7dc
7
+ data.tar.gz: 390752555d91aec5060be2a34097b9cc6ca664b22c44ae6b29a989e9cb766bc45b83599310d2fec2ee79fe3155678c41fc52c24f6a3658692ab7b2ac32bb513d
data/CHANGELOG.md ADDED
@@ -0,0 +1,13 @@
1
+ # High Level Browse
2
+
3
+ ## 1.0.0
4
+
5
+ * New normalization algorithm.
6
+ * Because the normalization algorithm has changed, the on-disk file
7
+ (`hlb.json.gz`) needs to be re-generated, because the old normalization
8
+ form and the new one won't match each other.
9
+
10
+ 0.2.0
11
+
12
+ * First real release
13
+
data/bin/hlb CHANGED
@@ -28,7 +28,7 @@ require 'tmpdir'
28
28
  filename = HighLevelBrowse::DB::FILENAME
29
29
  dir = Dir.tmpdir()
30
30
  fullpath = File.join(dir, filename)
31
-
31
+ puts fullpath
32
32
  hlb = if File.exist?(fullpath)
33
33
  HighLevelBrowse.load(dir: dir)
34
34
  else
@@ -7,7 +7,6 @@ end
7
7
 
8
8
  require 'marc'
9
9
  require 'high_level_browse'
10
- require 'lcsort'
11
10
  require 'tmpdir'
12
11
 
13
12
 
@@ -38,7 +37,7 @@ Counter = Struct.new(:count, :invalid, :found, :notfound, :hlb) do
38
37
 
39
38
 
40
39
  def check_cn(cn)
41
- normalized = Lcsort.normalize(cn)
40
+ normalized = HighLevelBrowse::CallNumberRange.callnumber_normalize(cn)
42
41
  return :invalid if normalized.nil?
43
42
  cats = hlb[cn]
44
43
  if cats.empty?
@@ -17,10 +17,9 @@ Gem::Specification.new do |spec|
17
17
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
18
18
  spec.require_paths = ["lib"]
19
19
 
20
- spec.add_dependency 'oga', '~> 2.1'
21
- spec.add_dependency 'lcsort'
20
+ spec.add_dependency 'nokogiri', '~>1.0'
22
21
 
23
- spec.add_development_dependency "bundler", "~> 1.6"
22
+ spec.add_development_dependency "bundler", '~>2.0'
24
23
  spec.add_development_dependency "rake"
25
24
  spec.add_development_dependency "minitest"
26
25
  end
@@ -1,21 +1,25 @@
1
- require 'lcsort'
2
1
  require 'high_level_browse/range_tree'
3
2
 
4
-
5
3
  # An efficient set of CallNumberRanges from which to get topics
6
4
  class HighLevelBrowse::CallNumberRangeSet < HighLevelBrowse::RangeTree
7
5
 
6
+ ANY_DIGIT = /\d/.freeze
7
+
8
+ def has_digits(str)
9
+ ANY_DIGIT.match?(str)
10
+ end
8
11
 
9
12
  # Returns the array of topic arrays for the given LC string
10
13
  # @param [String] raw_lc A raw LC string (eg., 'qa 112.3 .A4 1990')
11
14
  # @return [Array<Array<String>>] Arrays of topic labels
12
15
  def topics_for(raw_lc)
13
- normalized = Lcsort.normalize(HighLevelBrowse::CallNumberRange.preprocess(raw_lc))
16
+ normalized = ::HighLevelBrowse::CallNumberRange.callnumber_normalize(raw_lc)
14
17
  self.search(normalized).map(&:topic_array).uniq
18
+ rescue => e
19
+ require 'pry'; binding.pry
15
20
  end
16
21
  end
17
22
 
18
-
19
23
  # A callnumber-range keeps track of the original begin/end
20
24
  # strings as well as the normalized versions, and can be
21
25
  # serialized to JSON
@@ -25,7 +29,6 @@ class HighLevelBrowse::CallNumberRange
25
29
 
26
30
  attr_reader :min, :max, :min_raw, :max_raw, :firstletter
27
31
 
28
-
29
32
  attr_accessor :topic_array, :redundant
30
33
 
31
34
  SPACE_OR_PUNCT = /\A[\s\p{Punct}]*(.*?)[\s\p{Punct}]*\Z/
@@ -42,27 +45,71 @@ class HighLevelBrowse::CallNumberRange
42
45
  def self.force_break_between_digit_and_letter(str)
43
46
  str.gsub(DIGIT_TO_LETTER, '\1 \2')
44
47
  end
48
+
45
49
  # @nodoc
46
50
  # Preprocess the string, removing spaces/punctuation off the end
47
51
  # and forcing a space where there's a digit->letter transition
48
- def self.preprocess(str)
49
- str ||= ''
50
- force_break_between_digit_and_letter(
51
- strip_spaces_and_punct(str)
52
- )
52
+ # def self.preprocess(str)
53
+ # str ||= ''
54
+ # force_break_between_digit_and_letter(strip_spaces_and_punct(str)
55
+ # )
56
+ # end
57
+
58
+ # Normalize the callnumber in a slightly more sane way
59
+ # @param [String] cn The raw callnumber to normalize
60
+ CN = /\A\s*(?<letters>\p{L}{1,3})\s*(?<digits>\d{1,5}(?!\d))(?:\.(?<decimals>\d+))?(?<rest>.*)\Z/.freeze
61
+
62
+ def self.callnumber_normalize(cs_str)
63
+ return nil if cs_str.nil?
64
+
65
+ cs_str = cs_str.downcase
66
+ return cs_str if /\A\s*\p{L}{1,3}+\s*\Z/.match? cs_str # just letters
67
+
68
+ m = CN.match(cs_str)
69
+ return nil unless m
70
+
71
+ digits = m[:digits].size.to_s + m[:digits]
72
+ decimals = m[:decimals] ? "." + m[:decimals] : ""
73
+ rest = cleanup_freetext(m[:rest])
74
+ clean = m[:letters] + digits + decimals + " " + rest
75
+ clean.strip.gsub(/\s+/, ' ')
53
76
  end
54
77
 
78
+ # @param [String] str String to clean up
79
+ def self.cleanup_freetext(str)
80
+ return "" if str.nil?
81
+
82
+ s = str.strip
83
+ return s if s == ""
84
+
85
+ s = replace_dot_before_letter_with_space(s)
86
+ s = remove_dots_between_letters(s)
87
+ s = force_space_between_digit_and_letter(s)
88
+ s.strip.gsub(/\s+/, ' ')
89
+ end
90
+
91
+ def self.replace_dot_before_letter_with_space(s)
92
+ s.gsub /\.(\p{L})/, '\\1'
93
+ end
94
+
95
+ # @param [String] str
96
+ def self.remove_dots_between_letters(str)
97
+ str.gsub(/(\p{L})\.(\p{L})/, '\\1\\2')
98
+ end
99
+
100
+ def self.force_space_between_digit_and_letter(s)
101
+ s.gsub(/(\d)(\p{L})/, '\\1 \\2')
102
+ end
55
103
 
56
104
  def initialize(min:, max:, topic_array:)
57
- @illegal = false
58
- @redundant = false
59
- self.min = self.class.preprocess(min)
60
- self.max = self.class.preprocess(max)
105
+ @illegal = false
106
+ @redundant = false
107
+ self.min = min
108
+ self.max = max
61
109
  @topic_array = topic_array
62
110
  @firstletter = self.min[0] unless @illegal
63
111
  end
64
112
 
65
-
66
113
  # Compare based on @min, then end
67
114
  # @param [CallNumberRange] o the range to compare to
68
115
  def <=>(o)
@@ -74,31 +121,25 @@ class HighLevelBrowse::CallNumberRange
74
121
  end
75
122
 
76
123
  def reconstitute(min, max, min_raw, max_raw, firstletter, topic_array)
77
- @min = min
78
- @max = max
79
- @min_raw = min_raw
80
- @max_raw = max_raw
124
+ @min = min
125
+ @max = max
126
+ @min_raw = min_raw
127
+ @max_raw = max_raw
81
128
  @firstletter = firstletter
82
129
  @topic_array = topic_array
83
130
  end
84
131
 
85
-
86
132
  # Two ranges are equal if their @min, @max, and topic array
87
133
  # are all the same
88
134
  # @param [CallNumberRange] o the range to compare to
89
135
  def ==(other)
90
- @min == other.min and
91
- @max == other.max and
92
- @topic_array == other.topic_array
136
+ @min == other.min and @max == other.max and @topic_array == other.topic_array
93
137
  end
94
138
 
95
-
96
139
  # @nodoc
97
140
  # JSON roundtrip
98
141
  def to_json(*a)
99
- {
100
- 'json_class' => self.class.name,
101
- 'data' => [@min, @max, @min_raw, @max_raw, @firstletter, @topic_array]
142
+ {'json_class' => self.class.name, 'data' => [@min, @max, @min_raw, @max_raw, @firstletter, @topic_array]
102
143
  }.to_json(*a)
103
144
  end
104
145
 
@@ -109,29 +150,26 @@ class HighLevelBrowse::CallNumberRange
109
150
  cnr
110
151
  end
111
152
 
112
-
113
153
  # In both @min= and end=, we also rescue any parsing errors
114
154
  # and simply set the @illegal flag so we can use it later on.
115
155
  def min=(x)
116
- @min_raw = x
117
- possible_min = Lcsort.normalize(x)
156
+ @min_raw = x
157
+ possible_min = self.class.callnumber_normalize(x)
118
158
  if possible_min.nil? # didn't normalize
119
159
  @illegal = true
120
160
  nil
121
- else
122
- @min = possible_min
161
+ else @min = possible_min
123
162
  end
124
163
  end
125
164
 
126
165
  # Same as start. Set the illegal flag if we get an error
127
166
  def max=(x)
128
- @max_raw = x
129
- possible_max = Lcsort.normalize(x)
167
+ @max_raw = x
168
+ possible_max = self.class.callnumber_normalize(x)
130
169
  if possible_max.nil? # didn't normalize
131
170
  @illegal = true
132
171
  nil
133
- else
134
- @max = possible_max + '~' # add a tilde to make it a true endpoint
172
+ else @max = possible_max + '~' # add a tilde to make it a true endpoint
135
173
  end
136
174
  end
137
175
 
@@ -139,7 +177,6 @@ class HighLevelBrowse::CallNumberRange
139
177
  @illegal
140
178
  end
141
179
 
142
-
143
180
  def surrounds(other)
144
181
  @min <= other.min and @max >= other.max
145
182
  end
@@ -1,4 +1,4 @@
1
- require 'oga'
1
+ require 'nokogiri'
2
2
  require 'high_level_browse/call_number_range'
3
3
  require 'zlib'
4
4
  require 'json'
@@ -13,7 +13,7 @@ class HighLevelBrowse::DB
13
13
  # database with an efficient structure for querying
14
14
  # @param [Array<HighLevelBrowse::CallNumberRange>] array_of_ranges
15
15
  def initialize(array_of_ranges)
16
- @all = array_of_ranges
16
+ @all = array_of_ranges
17
17
  @ranges = self.create_letter_indexed_ranges(@all)
18
18
  end
19
19
 
@@ -22,8 +22,8 @@ class HighLevelBrowse::DB
22
22
  # @private
23
23
  def create_letter_indexed_ranges(all)
24
24
  bins = {}
25
- ('A'..'Z').each do |letter|
26
- cnrs = all.find_all {|x| x.firstletter == letter}
25
+ ('a'..'z').each do |letter|
26
+ cnrs = all.find_all { |x| x.firstletter == letter }
27
27
  bins[letter] = HighLevelBrowse::CallNumberRangeSet.new(cnrs)
28
28
  end
29
29
  bins
@@ -40,16 +40,17 @@ class HighLevelBrowse::DB
40
40
  # @return [Array<Array>] A (possibly empty) array of arrays of topics
41
41
  def topics(*raw_callnumber_strings)
42
42
  raw_callnumber_strings.reduce([]) do |acc, raw_callnumber_string|
43
- firstletter = raw_callnumber_string.strip.upcase[0]
43
+ firstletter = if raw_callnumber_string.nil?
44
+ nil
45
+ else raw_callnumber_string.to_s.strip.downcase[0]
46
+ end
44
47
  if @ranges.has_key? firstletter
45
48
  acc + @ranges[firstletter].topics_for(raw_callnumber_string)
46
- else
47
- acc
49
+ else acc
48
50
  end
49
51
  end.uniq
50
52
  end
51
53
 
52
-
53
54
  alias_method :[], :topics
54
55
 
55
56
  # Create a new object from a string with the XML
@@ -58,12 +59,11 @@ class HighLevelBrowse::DB
58
59
  # (e.g., from 'https://www.lib.umich.edu/browse/categories/xml.php')
59
60
  # @return [DB]
60
61
  def self.new_from_xml(xml)
61
- oga_doc_root = Oga.parse_xml(xml)
62
- simple_array_of_cnrs = cnrs_within_oga_node(node: oga_doc_root)
62
+ noko_doc_root = Nokogiri::XML(xml)
63
+ simple_array_of_cnrs = cnrs_within_noko_node(node: noko_doc_root)
63
64
  self.new(simple_array_of_cnrs).freeze
64
65
  end
65
66
 
66
-
67
67
  # Save to disk
68
68
  # @param [String] dir The directory where the hlb.json.gz file will be saved
69
69
  # @return [DB] The loaded database
@@ -73,7 +73,6 @@ class HighLevelBrowse::DB
73
73
  end
74
74
  end
75
75
 
76
-
77
76
  # Load from disk
78
77
  # @param [String] dir The directory where the hlb.json.gz file is located
79
78
  # @return [DB] The loaded database
@@ -81,12 +80,11 @@ class HighLevelBrowse::DB
81
80
  simple_array_of_cnrs = Zlib::GzipReader.open(File.join(dir, FILENAME)) do |infile|
82
81
  JSON.load(infile.read).to_a
83
82
  end
84
- db = self.new(simple_array_of_cnrs)
83
+ db = self.new(simple_array_of_cnrs)
85
84
  db.freeze
86
85
  db
87
86
  end
88
87
 
89
-
90
88
  # Freeze everything
91
89
  # @return [DB] the frozen db
92
90
  def freeze
@@ -102,49 +100,44 @@ class HighLevelBrowse::DB
102
100
  # * what the current topics are ([level1, level2])
103
101
  # Get all the call numbers assocaited with the topic represented by the given node,
104
102
  # as well as all the children of the given node, and send it back as a big ol' array
105
- # @param [Oga::Node] node A node of the parsed HLB XML file
103
+ # @param [Nokogiri::XML::Node] node A node of the parsed HLB XML file
106
104
  # @param [Array<String>] decendent_xpaths A list of xpaths to the decendents of this node
107
105
  # @param [Array<String>] topic_array An array with all levels of the topics associated with this node
108
106
  # @return [Array<HighLevelBrowse::CallNumberRange>]
109
- def self.cnrs_within_oga_node(node:, decendent_xpaths: ['/hlb/subject', 'topic', 'sub-topic'], topic_array: [])
107
+ def self.cnrs_within_noko_node(node:, decendent_xpaths: ['/hlb/subject', 'topic', 'sub-topic'], topic_array: [])
110
108
  if decendent_xpaths.empty?
111
109
  [] # base case -- we're as low as we're going to go
112
- else
113
- current_xpath_component = decendent_xpaths[0]
114
- new_xpath = decendent_xpaths[1..-1]
115
- new_topic = topic_array.dup
116
- new_topic.push node.get(:name) unless node == node.root_node # skip the root
117
- cnrs = []
118
- # For each sub-component, get both the call-number-ranges (cnrs) assocaited
119
- # with this level, as well as recusively getting from all the children
120
- node.xpath(current_xpath_component).each do |c|
121
- cnrs += call_numbers_list_from_leaves(node: c, topic_array: new_topic)
122
- cnrs += cnrs_within_oga_node(node: c, decendent_xpaths: new_xpath, topic_array: new_topic)
123
- end
124
- cnrs
110
+ else current_xpath_component = decendent_xpaths[0]
111
+ new_xpath = decendent_xpaths[1..-1]
112
+ new_topic = topic_array.dup
113
+ new_topic.push node[:name] unless node == node.document # skip the root
114
+ cnrs = []
115
+ # For each sub-component, get both the call-number-ranges (cnrs) assocaited
116
+ # with this level, as well as recusively getting from all the children
117
+ node.xpath(current_xpath_component).each do |c|
118
+ cnrs += call_numbers_list_from_leaves(node: c, topic_array: new_topic)
119
+ cnrs += cnrs_within_noko_node(node: c, decendent_xpaths: new_xpath, topic_array: new_topic)
120
+ end
121
+ cnrs
125
122
  end
126
123
  end
127
124
 
128
-
129
125
  # Given a second-to-lowest-level node, get its topic and
130
126
  # extract call number ranges from its children
131
127
  def self.call_numbers_list_from_leaves(node:, topic_array:)
132
- cnrs = []
133
- new_topic = topic_array.dup.push node.get(:name)
128
+ cnrs = []
129
+ new_topic = topic_array.dup.push node[:name]
134
130
  node.xpath('call-numbers').each do |cn_node|
135
- min = cn_node.get(:start)
136
- max = cn_node.get(:end)
131
+ min = cn_node[:start]
132
+ max = cn_node[:end]
137
133
 
138
134
  new_cnr = HighLevelBrowse::CallNumberRange.new(min: min, max: max, topic_array: new_topic)
139
135
  if new_cnr.illegal?
140
- # do some sort of logging
141
- else
142
- cnrs.push new_cnr
136
+ # do some sort of logging else cnrs.push new_cnr
143
137
  end
144
138
  end
145
139
  cnrs
146
140
 
147
141
  end
148
142
 
149
-
150
143
  end
@@ -48,8 +48,8 @@ module HighLevelBrowse
48
48
  end
49
49
 
50
50
  def search(range, limit: Float::INFINITY)
51
+ return [] if range.nil?
51
52
  range = range.is_a?(Range) ? range : (range..range)
52
-
53
53
  result = []
54
54
  RangeTree.search_helper(range, @root, result, limit)
55
55
 
@@ -58,7 +58,6 @@ module HighLevelBrowse
58
58
 
59
59
  def self.search_helper(q, root, result, limit)
60
60
  return if root.nil?
61
-
62
61
  # Visit left child?
63
62
  if (l = root.left) and l.max and q.min and \
64
63
  not l.max < q.min # The interesting part.
@@ -70,6 +69,10 @@ module HighLevelBrowse
70
69
  # point of checking, there wasn't added too many, but after left child has
71
70
  # been checked, we might hit the limit and then, "this" will add one as
72
71
  # well.
72
+ #
73
+ # (I'm leaving the above paragraph intact as a reminder to myself to
74
+ # read things over and make sure they're, you know, actual English. At this point
75
+ # in 2022 I have no idea what the heck I was saying.
73
76
 
74
77
  # Add root?
75
78
  result << root.range if RangeTree.ranges_intersect?(q, root.range)
@@ -1,3 +1,3 @@
1
1
  module HighLevelBrowse
2
- VERSION = "0.1.0"
2
+ VERSION = "1.0.0"
3
3
  end
@@ -5,7 +5,6 @@ $LOAD_PATH.unshift File.expand_path('../../lib', __FILE__)
5
5
 
6
6
  verbose = $VERBOSE
7
7
  $VERBOSE = nil
8
- require 'oga'
9
8
  require 'minitest'
10
9
  require 'minitest/spec'
11
10
  require 'minitest/autorun'
@@ -13,15 +13,3 @@ describe "loads" do
13
13
  end
14
14
  end
15
15
 
16
- describe "Works the same as before" do
17
- it "gets the same output for 30k randomly chosen call numbers" do
18
- h = HighLevelBrowse.fetch_and_save(dir: TESTDIR)
19
- JSON.load(File.open(File.join(TESTDIR, '30k_random_old_mappings.json'))).each do |rec|
20
- cn = rec['cn'].strip
21
- newcats = h[cn]
22
- next if rec['jar'].empty?
23
- assert_equal [cn, rec['jar'].sort], [rec['cn'], newcats.sort]
24
- end
25
-
26
- end
27
- end
metadata CHANGED
@@ -1,57 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: high_level_browse
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Bill Dueber
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-02 00:00:00.000000000 Z
11
+ date: 2022-05-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: oga
14
+ name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '2.1'
19
+ version: '1.0'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '2.1'
27
- - !ruby/object:Gem::Dependency
28
- name: lcsort
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - ">="
32
- - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - ">="
39
- - !ruby/object:Gem::Version
40
- version: '0'
26
+ version: '1.0'
41
27
  - !ruby/object:Gem::Dependency
42
28
  name: bundler
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
31
  - - "~>"
46
32
  - !ruby/object:Gem::Version
47
- version: '1.6'
33
+ version: '2.0'
48
34
  type: :development
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
38
  - - "~>"
53
39
  - !ruby/object:Gem::Version
54
- version: '1.6'
40
+ version: '2.0'
55
41
  - !ruby/object:Gem::Dependency
56
42
  name: rake
57
43
  requirement: !ruby/object:Gem::Requirement
@@ -80,7 +66,7 @@ dependencies:
80
66
  - - ">="
81
67
  - !ruby/object:Gem::Version
82
68
  version: '0'
83
- description:
69
+ description:
84
70
  email:
85
71
  - bill@dueber.com
86
72
  executables:
@@ -92,12 +78,11 @@ extra_rdoc_files: []
92
78
  files:
93
79
  - ".gitignore"
94
80
  - ".travis.yml"
81
+ - CHANGELOG.md
95
82
  - Gemfile
96
83
  - LICENSE.txt
97
84
  - README.md
98
85
  - Rakefile
99
- - bench/bench.rb
100
- - bench/hlb.json.gz
101
86
  - bin/fetch_new_hlb
102
87
  - bin/hlb
103
88
  - bin/test_marc_file_for_hlb
@@ -113,7 +98,7 @@ homepage: ''
113
98
  licenses:
114
99
  - MIT
115
100
  metadata: {}
116
- post_install_message:
101
+ post_install_message:
117
102
  rdoc_options: []
118
103
  require_paths:
119
104
  - lib
@@ -128,9 +113,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
113
  - !ruby/object:Gem::Version
129
114
  version: '0'
130
115
  requirements: []
131
- rubyforge_project:
132
- rubygems_version: 2.6.8
133
- signing_key:
116
+ rubygems_version: 3.3.3
117
+ signing_key:
134
118
  specification_version: 4
135
119
  summary: Map LC call numbers to academic categories.
136
120
  test_files:
data/bench/bench.rb DELETED
@@ -1,57 +0,0 @@
1
- require 'benchmark/ips'
2
- $:.unshift '../lib'
3
- $:.unshift '.'
4
-
5
-
6
- # On my laptop under normal load (e.g., not very scientific at all)
7
- # I get the following running in a single thread
8
- # ruby 2.3 ~8500 lookups/second
9
- # ruby 2.4 ~9100 lookups/second
10
- # jruby 9 ~20k lookups/second
11
- # jruby 9, old HLB.jar ~6500 lookups/second
12
- # jruby 1.7 error, can't do named arguments
13
- # jruby 1.7, old HLB.jar ~6700 lookups/second
14
- #
15
- # The old HLB.jar has a different (worse) algorithm, but is of
16
- # interest because it's what I'm writing this to replace.
17
-
18
- # umich_traject holds .jar files with the old java implementation; see
19
- # https://github.com/hathitrust/ht_traject/tree/9e8d414fd9bb2c79e243d289c4d39c05d2de27e5/lib/umich_traject
20
- #
21
-
22
- TEST_OLD_STUFF = defined? JRUBY_VERSION and Dir.exist?('./umich_traject')
23
- if TEST_OLD_STUFF
24
- puts "Loading old HLB3.jar stuff"
25
- require 'umich_traject/jackson-core-asl-1.4.3.jar'
26
- require 'umich_traject/jackson-mapper-asl-1.4.3.jar'
27
- require 'umich_traject/apache-solr-umichnormalizers.jar'
28
- require 'umich_traject/HLB3.jar'
29
- java_import Java::edu.umich.lib.hlb::HLB
30
- puts "Initializing HLB"
31
- HLB.initialize()
32
- end
33
-
34
- require 'high_level_browse'
35
-
36
- h = HighLevelBrowse.load(dir: '.')
37
-
38
- cns = File.read('call_numbers.txt').split(/\n/).cycle
39
-
40
- puts RUBY_DESCRIPTION
41
-
42
- total = 0
43
- Benchmark.ips do |x|
44
- x.config(:time => 25, :warmup => 25)
45
-
46
- x.report("HLB lookups") do
47
- total += h[cns.next].count
48
- end
49
-
50
- if TEST_OLD_STUFF
51
- total = 0
52
- x.report("Old java lookups") do
53
- total += HLB.categories(cns.next).to_a.count
54
- end
55
- x.compare!
56
- end
57
- end
data/bench/hlb.json.gz DELETED
Binary file