borrow_direct 1.0.3 → 1.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/lib/borrow_direct/generate_query.rb +25 -14
- data/lib/borrow_direct/version.rb +1 -1
- data/test/generate_query_test.rb +18 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OTY4MWZjYWFkNWZlM2UyNzQxMWQ3M2E0OTcwZGY2MjBlMTlkZDI5ZQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YjQ5OTI1N2NjNGQzZjQ3Njc4NGQ0ZmE0MTlmNzY1NjA0ODlmNmIzYw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YzVhYmY2OTg2ZDg1ZmI5ZjMwOTIwYWNiYmQ1MzUwZDBiZWZhZjhmNjAxOWQ5
|
10
|
+
OWNkYWFjZWY3ZDNlN2ZhMmYyNDI0OWFkMzQwMmE4MWQ0NTZhY2IxNjg2ZTBk
|
11
|
+
OGE4M2RhYmM2MGRjMmNlNzQ2ZjA4ZTEyNjhjNDllN2UxOWRlMjQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OGU0YTdiMDY3ZGE5ZDI3MjQxNjJiNTFlZjgxMzcwZjc3OWExN2UyYzAwNTVl
|
14
|
+
NzM0NmM5ZWI0Y2EyZjVjMTliMDg3MjAxZDkxY2FhMDFkMzc0NTNhNjhkMTlh
|
15
|
+
MzcyNzVlMjdiZDU3ZmE4NDRhMjY5ZTZjZjAyZjBkYmNiOTg3OGQ=
|
@@ -4,6 +4,8 @@ module BorrowDirect
|
|
4
4
|
# Generate a "deep link" to query results in BD's native
|
5
5
|
# HTML interface.
|
6
6
|
class GenerateQuery
|
7
|
+
PUNCT_STRIP_REGEX = /[[:space:]\)\(\]\[\;\:\.\,\\\/\"\<\>\!]/
|
8
|
+
|
7
9
|
attr_accessor :url_base
|
8
10
|
|
9
11
|
# Hash from our own API argument to BD field code
|
@@ -100,18 +102,10 @@ module BorrowDirect
|
|
100
102
|
title.sub!(/[\:\;](.*)$/, '')
|
101
103
|
|
102
104
|
|
103
|
-
# We want to remove
|
104
|
-
#
|
105
|
-
#
|
106
|
-
|
107
|
-
# This is harder than expected, as ruby regex unicode-aware character
|
108
|
-
# classes don't seem to handle combining diacritics well.
|
109
|
-
#
|
110
|
-
# This crazy way does it, replace anything that matches unicode
|
111
|
-
# space (may include more than just ascii ' ') or punct class, unless
|
112
|
-
# it's an apostrophe or an ampersand, which are allowed --
|
113
|
-
# and replaces them with plain ascii space.
|
114
|
-
title.gsub!(/[[:space:][:punct:]&&[^\'\&]]/, ' ')
|
105
|
+
# We want to remove some punctuation that is better
|
106
|
+
# turned into a space in the query. Along with
|
107
|
+
# any kind of unicode space, why not.
|
108
|
+
title.gsub!(PUNCT_STRIP_REGEX, ' ')
|
115
109
|
|
116
110
|
# compress any remaining whitespace
|
117
111
|
title.strip!
|
@@ -130,15 +124,32 @@ module BorrowDirect
|
|
130
124
|
|
131
125
|
# Lowercase, and try to get just the last name out of something
|
132
126
|
# that looks like a cataloging authorized heading.
|
127
|
+
#
|
128
|
+
# Try to remove leading 'by' stuff when we're getting a 245c
|
133
129
|
def normalized_author(author)
|
130
|
+
|
134
131
|
return "" if author.nil? || author.empty?
|
135
132
|
|
136
133
|
author = author.downcase
|
137
|
-
# Just take everything before the comma if we have one
|
138
|
-
|
134
|
+
# Just take everything before the comma if we have one --
|
135
|
+
# or before an "and", for stripping individuals out of 245c
|
136
|
+
# multiples.
|
137
|
+
if author =~ /\A(.*)(,|\sand\s)/
|
139
138
|
author = $1
|
140
139
|
end
|
141
140
|
|
141
|
+
|
142
|
+
author.gsub!(/\A.*by\s*/, '')
|
143
|
+
|
144
|
+
# We want to remove some punctuation that is better
|
145
|
+
# turned into a space in the query. Along with
|
146
|
+
# any kind of unicode space, why not.
|
147
|
+
author.gsub!(PUNCT_STRIP_REGEX, ' ')
|
148
|
+
|
149
|
+
# compress any remaining whitespace
|
150
|
+
author.strip!
|
151
|
+
author.gsub!(/\s+/, ' ')
|
152
|
+
|
142
153
|
return author
|
143
154
|
end
|
144
155
|
|
data/test/generate_query_test.rb
CHANGED
@@ -141,11 +141,29 @@ describe "GenerateQuery" do
|
|
141
141
|
assert_equal "vel#{a_acute_combined}squez's stuff", normalized_title
|
142
142
|
end
|
143
143
|
|
144
|
+
it "preserves apostrophes" do
|
145
|
+
assert_equal "c l r james's caribbean", @generator.normalized_title("C L R James's Caribbean")
|
146
|
+
end
|
147
|
+
|
144
148
|
it "allows ampersands" do
|
145
149
|
assert_equal "x & y", @generator.normalized_title("x & y")
|
146
150
|
end
|
147
151
|
|
148
152
|
|
153
|
+
it "gets author reasonably out of some 245c type things" do
|
154
|
+
assert_equal "edward foster", @generator.normalized_author("edited by Edward Foster")
|
155
|
+
assert_equal "edward foster", @generator.normalized_author("by Edward Foster")
|
156
|
+
|
157
|
+
assert_equal "leonard diepeveen", @generator.normalized_author("edited by Leonard Diepeveen.")
|
158
|
+
|
159
|
+
assert_equal "amalia avramidou", @generator.normalized_author("edited by Amalia Avramidou and Denise Demetriou.")
|
160
|
+
assert_equal "james elkins", @generator.normalized_author("edited by James Elkins and Robert Williams.")
|
161
|
+
|
162
|
+
# Hmm, should we really be stripping those periods? Not sure, but seems
|
163
|
+
# to do okay in searching.
|
164
|
+
assert_equal "h a shapiro", @generator.normalized_author("edited by H.A. Shapiro.")
|
165
|
+
end
|
166
|
+
|
149
167
|
|
150
168
|
end
|
151
169
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: borrow_direct
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Rochkind
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-03-
|
11
|
+
date: 2015-03-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: httpclient
|