naconormalizer 0.9.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +17 -0
- data/.travis.yml +3 -0
- data/.yardopts +6 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +13 -0
- data/README.md +57 -0
- data/Rakefile +8 -0
- data/lib/naconormalizer.rb +49 -0
- data/lib/naconormalizer/version.rb +3 -0
- data/naconormalize.gemspec +26 -0
- data/test/minitest_helper.rb +5 -0
- data/test/test_naconormalize.rb +12 -0
- data/vendor/oclcnaconormalizer.jar +0 -0
- data/vendor/oclcnaconormalizer_license.txt +15 -0
- metadata +103 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 971bb85c597f97f37b23fe795a5e3a214fd23951
|
4
|
+
data.tar.gz: 52cdbfb967f9de7ecda4098c7746bfedce8918f1
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0757f9bd19d81cd4ddcc9eb393463769a5d156c8afce4f08489c0cabcb4fe4a952c663bdfa7233960475bd26dfdcc6d3e19d4c37539570da55fa2eb45f856b36
|
7
|
+
data.tar.gz: ea7f7edc28c5fedada56c0ca82099f5b83a08aa4ee4aec5eb1baaccff044ee99c9dd6f2c2eb6d5f03765f23f67485a41663689713b90df10efbbdb3527713e25
|
data/.gitignore
ADDED
data/.travis.yml
ADDED
data/.yardopts
ADDED
data/Gemfile
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
Copyright (c) 2013 Bill Dueber
|
2
|
+
|
3
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
4
|
+
you may not use this file except in compliance with the License.
|
5
|
+
You may obtain a copy of the License at
|
6
|
+
|
7
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
8
|
+
|
9
|
+
Unless required by applicable law or agreed to in writing, software
|
10
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
11
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12
|
+
See the License for the specific language governing permissions and
|
13
|
+
limitations under the License.
|
data/README.md
ADDED
@@ -0,0 +1,57 @@
|
|
1
|
+
# NacoNormalizer
|
2
|
+
|
3
|
+
A tiny shim around the OCLC's java code that performs NACO normalization,
|
4
|
+
a string normalization process used by libraries (and others) to normalize
|
5
|
+
author and title strings for sorting purposes.
|
6
|
+
|
7
|
+
See http://www.loc.gov/aba/pcc/naco/normrule-2.html for more information about NACO normalization
|
8
|
+
|
9
|
+
The called Java code adapted from https://code.google.com/p/oclcnaconormalizer/ and copyright OCLC
|
10
|
+
|
11
|
+
## Usage
|
12
|
+
|
13
|
+
~~~
|
14
|
+
|
15
|
+
# default normalizer strips html, keeps the first comma
|
16
|
+
# (for "Lastname, Firstname" support) and lowercases
|
17
|
+
# everything (where "lowercase" means "do the appropriate
|
18
|
+
# thing given the script being used")
|
19
|
+
|
20
|
+
author_normalizer = NacoNormalizer.new
|
21
|
+
sortable_author = author_normalizer.normalize(author_name)
|
22
|
+
|
23
|
+
# When normalizing titles, we don't want to keep the first
|
24
|
+
# comma -- throw it out with all the rest of the punctuation
|
25
|
+
title_normalizer = NacoNormalizer.new(:keep_first_comma => false)
|
26
|
+
sortable_title = title_normalizer.normalize(title)
|
27
|
+
|
28
|
+
~~~
|
29
|
+
|
30
|
+
## Installation
|
31
|
+
|
32
|
+
Add this line to your application's Gemfile:
|
33
|
+
|
34
|
+
gem 'NacoNormalizer'
|
35
|
+
|
36
|
+
And then execute:
|
37
|
+
|
38
|
+
$ bundle
|
39
|
+
|
40
|
+
Or install it yourself as:
|
41
|
+
|
42
|
+
$ gem install NacoNormalizer
|
43
|
+
|
44
|
+
|
45
|
+
# Changes
|
46
|
+
|
47
|
+
__v0.9.0 2013-12-06__
|
48
|
+
|
49
|
+
* First public release
|
50
|
+
|
51
|
+
## Contributing
|
52
|
+
|
53
|
+
1. Fork it
|
54
|
+
2. Create your feature branch (`git checkout -b my-new-feature`)
|
55
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
56
|
+
4. Push to the branch (`git push origin my-new-feature`)
|
57
|
+
5. Create new Pull Request
|
data/Rakefile
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'java'
|
2
|
+
require "naconormalizer/version"
|
3
|
+
require_relative "../vendor/oclcNacoNormalizer.jar"
|
4
|
+
|
5
|
+
|
6
|
+
# A tiny shim around the OCLC's java code that performs NACO normalization,
|
7
|
+
# used by libraries (and others) to normalize author and title strings for
|
8
|
+
# sorting purposes.
|
9
|
+
#
|
10
|
+
# See http://www.loc.gov/aba/pcc/naco/normrule-2.html
|
11
|
+
#
|
12
|
+
# Java code adapted from https://code.google.com/p/oclcnaconormalizer/ and copyright OCLC
|
13
|
+
#
|
14
|
+
# @author Bill Dueber
|
15
|
+
|
16
|
+
class NacoNormalizer
|
17
|
+
|
18
|
+
OCLCNormalizer = org.oclc.util::NacoNormalize
|
19
|
+
|
20
|
+
Defaults = { :keep_caps => false, :strip_html=>true, :keep_first_comma => true }
|
21
|
+
|
22
|
+
# Create a new normalizer that will use the passed options (if any)
|
23
|
+
# @param [Hash] opts The hash of options
|
24
|
+
# @option opts [Boolean] :keep_caps (false) Don't "lowercase" capital letters
|
25
|
+
# @option opts [Boolean] :keep_first_comma (true) Keep the first comma, useful for Lastname,Firstname data
|
26
|
+
# @option opts [Boolean] :strip_html (true) Strip any spurious HTML out of the passed string when normalizing
|
27
|
+
#
|
28
|
+
# @example
|
29
|
+
# author_normalizer = NacoNormalizer.new
|
30
|
+
# title_normalizer = NacoNormalizer.new(:keep_first_comma => false)
|
31
|
+
#
|
32
|
+
# sortable_author = author_normalizer.normalize(author_name)
|
33
|
+
# sortable_title = title_normalizer.normalize(title)
|
34
|
+
|
35
|
+
def initialize(opts={})
|
36
|
+
opts = opts.merge(Defaults)
|
37
|
+
@keep_caps = opts[:keep_caps]
|
38
|
+
@strip_html = opts[:strip_html]
|
39
|
+
@keep_first_comma = opts[:keep_first_comma]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Normalize a string using the options passed to the constructor
|
43
|
+
# @param [String] str The string to normalize
|
44
|
+
# @return [String] The normalized string
|
45
|
+
def normalize(str, keep_first_comma = @keep_first_comma, keep_caps = @keep_caps, strip_html = @strip_html )
|
46
|
+
OCLCNormalizer.nacoNormalize(str, keep_caps, strip_html, keep_first_comma)
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'naconormalizer/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.platform = "java"
|
8
|
+
spec.name = "naconormalizer"
|
9
|
+
spec.version = NacoNormalizer::VERSION
|
10
|
+
spec.authors = ["Bill Dueber"]
|
11
|
+
spec.email = ["bill@dueber.com"]
|
12
|
+
spec.description = %q{Apply NACO normaliation to a string. This is just a tiny wrapper around code taken from https://code.google.com/p/oclcNacoNormalizerr/}
|
13
|
+
spec.summary = %q{Apply NACO normaliation to a string}
|
14
|
+
spec.homepage = ""
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files`.split($/)
|
18
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
19
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.3"
|
23
|
+
spec.add_development_dependency "rake"
|
24
|
+
spec.add_development_dependency "minitest"
|
25
|
+
|
26
|
+
end
|
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'minitest_helper'
|
2
|
+
|
3
|
+
class TestNacoNormalizer < MiniTest::Test
|
4
|
+
def test_that_it_has_a_version_number
|
5
|
+
refute_nil ::NacoNormalizer::VERSION
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_basics
|
9
|
+
author_normalizer = NacoNormalizer.new
|
10
|
+
assert_equal 'bill dueber', author_normalizer.normalize('[ Bill Dueber -- ]')
|
11
|
+
end
|
12
|
+
end
|
Binary file
|
@@ -0,0 +1,15 @@
|
|
1
|
+
/*
|
2
|
+
Copyright 2006 OCLC Online Computer Library Center, Inc.
|
3
|
+
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
you may not use this file except in compliance with the License.
|
6
|
+
You may obtain a copy of the License at
|
7
|
+
|
8
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
See the License for the specific language governing permissions and
|
14
|
+
limitations under the License.
|
15
|
+
*/
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: naconormalizer
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.9.0
|
5
|
+
platform: java
|
6
|
+
authors:
|
7
|
+
- Bill Dueber
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2013-12-06 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
version_requirements: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ~>
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.3'
|
20
|
+
requirement: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: '1.3'
|
25
|
+
prerelease: false
|
26
|
+
type: :development
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
requirement: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - '>='
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
prerelease: false
|
40
|
+
type: :development
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: minitest
|
43
|
+
version_requirements: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: '0'
|
53
|
+
prerelease: false
|
54
|
+
type: :development
|
55
|
+
description: Apply NACO normaliation to a string. This is just a tiny wrapper around code taken from https://code.google.com/p/oclcNacoNormalizerr/
|
56
|
+
email:
|
57
|
+
- bill@dueber.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- .gitignore
|
63
|
+
- .travis.yml
|
64
|
+
- .yardopts
|
65
|
+
- Gemfile
|
66
|
+
- LICENSE.txt
|
67
|
+
- README.md
|
68
|
+
- Rakefile
|
69
|
+
- lib/naconormalizer.rb
|
70
|
+
- lib/naconormalizer/version.rb
|
71
|
+
- naconormalize.gemspec
|
72
|
+
- test/minitest_helper.rb
|
73
|
+
- test/test_naconormalize.rb
|
74
|
+
- vendor/oclcnaconormalizer.jar
|
75
|
+
- vendor/oclcnaconormalizer_license.txt
|
76
|
+
homepage: ''
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata: {}
|
80
|
+
post_install_message:
|
81
|
+
rdoc_options: []
|
82
|
+
require_paths:
|
83
|
+
- lib
|
84
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - '>='
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '0'
|
89
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
90
|
+
requirements:
|
91
|
+
- - '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
requirements: []
|
95
|
+
rubyforge_project:
|
96
|
+
rubygems_version: 2.1.9
|
97
|
+
signing_key:
|
98
|
+
specification_version: 4
|
99
|
+
summary: Apply NACO normaliation to a string
|
100
|
+
test_files:
|
101
|
+
- test/minitest_helper.rb
|
102
|
+
- test/test_naconormalize.rb
|
103
|
+
has_rdoc:
|