borrow_direct 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- YTdkNTczZjRiY2FlYWQ3OWJiYzkyMGNlYzg5YTg1NGMzMjhlMTBhOA==
4
+ OTY4MWZjYWFkNWZlM2UyNzQxMWQ3M2E0OTcwZGY2MjBlMTlkZDI5ZQ==
5
5
  data.tar.gz: !binary |-
6
- ZWFjNzMyMjdkODIyNjY5Y2U4NDZmY2Y2MjFiODY4MDJlMGQ0N2ZkOA==
6
+ YjQ5OTI1N2NjNGQzZjQ3Njc4NGQ0ZmE0MTlmNzY1NjA0ODlmNmIzYw==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- MTNiZWNlMWVkYTA5OTIzNDA4Mzc3NDlkZDQ4MjhkYzMzZDdhNjVlZDQzM2Yz
10
- N2YzNGYzMjFlMmZkZmM1MDI2YjE2NWE5OTllYjAwZjY2NjcwYjg1MzI0NTY0
11
- YzcxMTQ1MDZkN2FjMWQ3YjFhODUyY2JjNGRkZGJmY2YxZGVlNjc=
9
+ YzVhYmY2OTg2ZDg1ZmI5ZjMwOTIwYWNiYmQ1MzUwZDBiZWZhZjhmNjAxOWQ5
10
+ OWNkYWFjZWY3ZDNlN2ZhMmYyNDI0OWFkMzQwMmE4MWQ0NTZhY2IxNjg2ZTBk
11
+ OGE4M2RhYmM2MGRjMmNlNzQ2ZjA4ZTEyNjhjNDllN2UxOWRlMjQ=
12
12
  data.tar.gz: !binary |-
13
- MDM4Yjg1Y2UzNDUwNjIwMWRlOGJkMGRjM2IzNjIxODJkOTdkYTU1MGI1NWIx
14
- OTY3NjM5YTFhYWI1NmJjMGUzODU4MDZjYjYzMTdmYjQ3YzZhMTUxM2RjYjcw
15
- MWU4NTU1ZDEyYjRmMzMwMmE4MWMyNTQ1ZTU5NjEwMDQwODJhMjg=
13
+ OGU0YTdiMDY3ZGE5ZDI3MjQxNjJiNTFlZjgxMzcwZjc3OWExN2UyYzAwNTVl
14
+ NzM0NmM5ZWI0Y2EyZjVjMTliMDg3MjAxZDkxY2FhMDFkMzc0NTNhNjhkMTlh
15
+ MzcyNzVlMjdiZDU3ZmE4NDRhMjY5ZTZjZjAyZjBkYmNiOTg3OGQ=
@@ -4,6 +4,8 @@ module BorrowDirect
4
4
  # Generate a "deep link" to query results in BD's native
5
5
  # HTML interface.
6
6
  class GenerateQuery
7
+ PUNCT_STRIP_REGEX = /[[:space:]\)\(\]\[\;\:\.\,\\\/\"\<\>\!]/
8
+
7
9
  attr_accessor :url_base
8
10
 
9
11
  # Hash from our own API argument to BD field code
@@ -100,18 +102,10 @@ module BorrowDirect
100
102
  title.sub!(/[\:\;](.*)$/, '')
101
103
 
102
104
 
103
- # We want to remove anything that isn't a letter, number, or apostrophe.
104
- # Other punctuation and weird chars don't help our query.
105
- # We want to do it in a way that's unicode-aware.
106
- #
107
- # This is harder than expected, as ruby regex unicode-aware character
108
- # classes don't seem to handle combining diacritics well.
109
- #
110
- # This crazy way does it, replace anything that matches unicode
111
- # space (may include more than just ascii ' ') or punct class, unless
112
- # it's an apostrophe or an ampersand, which are allowed --
113
- # and replaces them with plain ascii space.
114
- title.gsub!(/[[:space:][:punct:]&&[^\'\&]]/, ' ')
105
+ # We want to remove some punctuation that is better
106
+ # turned into a space in the query. Along with
107
+ # any kind of unicode space, why not.
108
+ title.gsub!(PUNCT_STRIP_REGEX, ' ')
115
109
 
116
110
  # compress any remaining whitespace
117
111
  title.strip!
@@ -130,15 +124,32 @@ module BorrowDirect
130
124
 
131
125
  # Lowercase, and try to get just the last name out of something
132
126
  # that looks like a cataloging authorized heading.
127
+ #
128
+ # Try to remove leading 'by' stuff when we're getting a 245c
133
129
  def normalized_author(author)
130
+
134
131
  return "" if author.nil? || author.empty?
135
132
 
136
133
  author = author.downcase
137
- # Just take everything before the comma if we have one
138
- if author =~ /\A(.*),/
134
+ # Just take everything before the comma if we have one --
135
+ # or before an "and", for stripping individuals out of 245c
136
+ # multiples.
137
+ if author =~ /\A(.*)(,|\sand\s)/
139
138
  author = $1
140
139
  end
141
140
 
141
+
142
+ author.gsub!(/\A.*by\s*/, '')
143
+
144
+ # We want to remove some punctuation that is better
145
+ # turned into a space in the query. Along with
146
+ # any kind of unicode space, why not.
147
+ author.gsub!(PUNCT_STRIP_REGEX, ' ')
148
+
149
+ # compress any remaining whitespace
150
+ author.strip!
151
+ author.gsub!(/\s+/, ' ')
152
+
142
153
  return author
143
154
  end
144
155
 
@@ -1,3 +1,3 @@
1
1
  module BorrowDirect
2
- VERSION = "1.0.3"
2
+ VERSION = "1.0.4"
3
3
  end
@@ -141,11 +141,29 @@ describe "GenerateQuery" do
141
141
  assert_equal "vel#{a_acute_combined}squez's stuff", normalized_title
142
142
  end
143
143
 
144
+ it "preserves apostrophes" do
145
+ assert_equal "c l r james's caribbean", @generator.normalized_title("C L R James's Caribbean")
146
+ end
147
+
144
148
  it "allows ampersands" do
145
149
  assert_equal "x & y", @generator.normalized_title("x & y")
146
150
  end
147
151
 
148
152
 
153
+ it "gets author reasonably out of some 245c type things" do
154
+ assert_equal "edward foster", @generator.normalized_author("edited by Edward Foster")
155
+ assert_equal "edward foster", @generator.normalized_author("by Edward Foster")
156
+
157
+ assert_equal "leonard diepeveen", @generator.normalized_author("edited by Leonard Diepeveen.")
158
+
159
+ assert_equal "amalia avramidou", @generator.normalized_author("edited by Amalia Avramidou and Denise Demetriou.")
160
+ assert_equal "james elkins", @generator.normalized_author("edited by James Elkins and Robert Williams.")
161
+
162
+ # Hmm, should we really be stripping those periods? Not sure, but seems
163
+ # to do okay in searching.
164
+ assert_equal "h a shapiro", @generator.normalized_author("edited by H.A. Shapiro.")
165
+ end
166
+
149
167
 
150
168
  end
151
169
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: borrow_direct
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Rochkind
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-18 00:00:00.000000000 Z
11
+ date: 2015-03-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httpclient