borrow_direct 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZWJhNjQyYWUzNTZkNGQzMDc5MmIwZmFmM2Q2MWY5ODhmODk3NTFmOA==
4
+ ZWQyOTk1Y2ViNGM5ZTUyYTljZWRjMTEwNTlkMzcxZGEzM2ViNDJkOA==
5
5
  data.tar.gz: !binary |-
6
- ZmIzMmNjM2NlYzk3OWNlZDE3YjkwZjhmOWMxYjVkMTM2M2IyYWEyYg==
6
+ MGMwOWQ2ODJlODI5MTk3NDNhYWZlMDY1ZDZjZTAzYjg3YWJhNjg4Ng==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- NmRlYWRmZWM3YmViNGMzZjAzMDhmN2IyMWI0MTI1OTViNGI4MWUxYjVkYTk2
10
- ODFjNDE3OGI5MTU0MDg5YTI0Yjc5MWQ2ZmZlYjM2NzRhZjdiZGNmMmI2MzY3
11
- M2Q5M2IzYzBlZjg2MjgyZTg5MDJkNTQyY2I0ZDMxOGMyMzA4NWE=
9
+ ZWU4N2IzYjM5M2EwMGJmZTU1ZTkxNzRmMTBiZDBmOTYyYmVlMTk0MDIwMDJm
10
+ ODBiMTBiNzA1OWNlOGFmZjNhNmQzMzM4YzI5ZjQzYzEwZTc3NzU3NzNhNTE2
11
+ YTdhMTRmODcyZjZkMzZlNDhjNmJkNTVkMzYzNjgyNzc4MTVhOGU=
12
12
  data.tar.gz: !binary |-
13
- NjcwMzJhZTkzYjNmMTQ2YzhiMTg3ZTk2ZjQ0NTljNjk2NDhlNDJjNDdmYWFm
14
- Y2VjYjM1MDFlZDc5ODZkYTQwYjUzYmU1OGY3M2U5NjBmZjMxNDQyM2NmNjQ4
15
- YzQzOTFhNjI1MjcwZjIxOWYyNWM3MTM3MGI1ODZhZjExOTMzNDY=
13
+ ODg1YjdjM2M2ZTkxMzIzMDljZDZkMGEyNzUzMzBmNjA3MmNkYWU5OWFmNDNl
14
+ YjRhYzYwZmFhNjQ1M2YzMzMxMWFlNjhlZDQ0YWYzMjIyYjFlMmM4OWZjYWE1
15
+ MTY1ODE0Y2YyYTM2OTlhZjhiMzQyMWVjMWZkYzI4ZjMwODAwZTE=
@@ -99,9 +99,19 @@ module BorrowDirect
99
99
  # before the first colon OR semi-colon
100
100
  title.sub!(/[\:\;](.*)$/, '')
101
101
 
102
- # remove any remaining non-alphanumeric, excepting apostrophe, replacing
103
- # with space. The punctuation doesn't help our queries.
104
- title.gsub!(/[^[:alnum:][:space:]\']/, ' ')
102
+
103
+ # We want to remove anything that isn't a letter, number, or apostrophe.
104
+ # Other punctuation and weird chars don't help our query.
105
+ # We want to do it in a way that's unicode-aware.
106
+ #
107
+ # This is harder than expected, as ruby regex unicode-aware character
108
+ # classes don't seem to handle combining diacritics well.
109
+ #
110
+ # This crazy way does it, replace anything that matches unicode
111
+ # space (may include more than just ascii ' ') or punct class, unless it's an apostrophe --
112
+ # and replaces them with plain ascii space. (apostrophes are allowed to make it through,
113
+ # for possessive use)
114
+ title.gsub!(/[[:space:][:punct:]&&[^\']]/, ' ')
105
115
 
106
116
  # compress any remaining whitespace
107
117
  title.strip!
@@ -1,3 +1,3 @@
1
1
  module BorrowDirect
2
- VERSION = "1.0.1"
2
+ VERSION = "1.0.2"
3
3
  end
@@ -130,6 +130,17 @@ describe "GenerateQuery" do
130
130
  query = assert_bd_query_url(url)
131
131
  end
132
132
 
133
+ it "handles combining diacritics" do
134
+ # Some of the code we started with had a problem with combining diacritics.
135
+ a_acute_combined = [97, 204, 129].pack("c*").force_encoding("UTF-8")
136
+
137
+ orig_title = "Vel#{a_acute_combined}squez's stuff...."
138
+
139
+ normalized_title = @generator.normalized_title(orig_title)
140
+
141
+ assert_equal "vel#{a_acute_combined}squez's stuff", normalized_title
142
+ end
143
+
133
144
 
134
145
 
135
146
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: borrow_direct
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Rochkind
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-03-17 00:00:00.000000000 Z
11
+ date: 2015-03-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: httpclient