revs-utils 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/.gitignore +10 -0
- data/.rvmrc.example +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +75 -0
- data/LICENSE +14 -0
- data/README.rdoc +54 -0
- data/Rakefile +11 -0
- data/bin/console +16 -0
- data/bin/revs_lc_automobile_terms.rb +26 -0
- data/bin/run_all_tests +3 -0
- data/config/boot.rb +6 -0
- data/config/manifest_headers.yml +34 -0
- data/files/revs-lc-marque-terms.obj +0 -0
- data/lib/revs-utils/version.rb +5 -0
- data/lib/revs-utils.rb +299 -0
- data/revs-utils.gemspec +28 -0
- data/spec/revs-utils_spec.rb +253 -0
- data/spec/sample-csv-files/bad-source_id.csv +3 -0
- data/spec/sample-csv-files/blank-label.csv +3 -0
- data/spec/sample-csv-files/blank-sourceid.csv +216 -0
- data/spec/sample-csv-files/clean-sheet.csv +216 -0
- data/spec/sample-csv-files/date-and-year.csv +2 -0
- data/spec/sample-csv-files/date-instead-of-year.csv +216 -0
- data/spec/sample-csv-files/location-and-other-fields.csv +2 -0
- data/spec/sample-csv-files/malformed-sourceid.csv +216 -0
- data/spec/sample-csv-files/no-blank-label.csv +3 -0
- data/spec/sample-csv-files/no-label-column.csv +3 -0
- data/spec/sample-csv-files/no-sourceid.csv +216 -0
- data/spec/spec_helper.rb +8 -0
- metadata +175 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
---
|
|
2
|
+
!binary "U0hBMQ==":
|
|
3
|
+
metadata.gz: !binary |-
|
|
4
|
+
NGY1MmUzYTdjMjU1OTI2MGU5NzA0YTIzZDVkOGY0Nzg1OWYwMzZjOA==
|
|
5
|
+
data.tar.gz: !binary |-
|
|
6
|
+
ZjQ0OWRiOTlmZGQ2YzY1YjU4Zjc1NWU3MWIyODU4NjRiMTg4Nzg5MQ==
|
|
7
|
+
SHA512:
|
|
8
|
+
metadata.gz: !binary |-
|
|
9
|
+
Y2RkOTIyYTVhNmRlODRmYWViNzMwMzkyNGM3MjVmYTc1MzNjYWMyMDgyZDk0
|
|
10
|
+
OTNiYjE2NjQ2YzJiMGY2NjA0OGNjY2UzZDdhZTc0ZWRhMjhkNWRlYmY5ZTdi
|
|
11
|
+
M2I5NzJhMjY2OTMyYTg3NDYyZTFiYWQzYTQ3MjhiZDg5NjMxYjA=
|
|
12
|
+
data.tar.gz: !binary |-
|
|
13
|
+
MGNhMDEwMTM5Y2Q3YmNiODcxMTU5NzI3MjU5MTZkYjY4YWI1YmZmNWZhMzA0
|
|
14
|
+
MzhjMWY0ZjExZjg3MTdlNTBiMjJjMGVjYjhhZjk5MGIwOGYxMDBhOWU1MzEz
|
|
15
|
+
ZDUwNWNmOGU4YzEyNmI0NjY0MzM4OTEwYzgzNjJiOTVhMTQzMGI=
|
data/.gitignore
ADDED
data/.rvmrc.example
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
rvm use 1.9.3@assembly-image --create
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
PATH
|
|
2
|
+
remote: .
|
|
3
|
+
specs:
|
|
4
|
+
revs-utils (1.0.8)
|
|
5
|
+
actionpack (~> 3)
|
|
6
|
+
countries (= 0.9.2)
|
|
7
|
+
rdf
|
|
8
|
+
|
|
9
|
+
GEM
|
|
10
|
+
remote: http://rubygems.org/
|
|
11
|
+
remote: http://sul-gems.stanford.edu/
|
|
12
|
+
specs:
|
|
13
|
+
actionpack (3.2.19)
|
|
14
|
+
activemodel (= 3.2.19)
|
|
15
|
+
activesupport (= 3.2.19)
|
|
16
|
+
builder (~> 3.0.0)
|
|
17
|
+
erubis (~> 2.7.0)
|
|
18
|
+
journey (~> 1.0.4)
|
|
19
|
+
rack (~> 1.4.5)
|
|
20
|
+
rack-cache (~> 1.2)
|
|
21
|
+
rack-test (~> 0.6.1)
|
|
22
|
+
sprockets (~> 2.2.1)
|
|
23
|
+
activemodel (3.2.19)
|
|
24
|
+
activesupport (= 3.2.19)
|
|
25
|
+
builder (~> 3.0.0)
|
|
26
|
+
activesupport (3.2.19)
|
|
27
|
+
i18n (~> 0.6, >= 0.6.4)
|
|
28
|
+
multi_json (~> 1.0)
|
|
29
|
+
builder (3.0.4)
|
|
30
|
+
countries (0.9.2)
|
|
31
|
+
currencies (>= 0.4.0)
|
|
32
|
+
currencies (0.4.2)
|
|
33
|
+
diff-lcs (1.2.4)
|
|
34
|
+
erubis (2.7.0)
|
|
35
|
+
hike (1.2.3)
|
|
36
|
+
i18n (0.6.11)
|
|
37
|
+
journey (1.0.4)
|
|
38
|
+
lyberteam-gems-devel (1.0.1)
|
|
39
|
+
rake (>= 0.8.7)
|
|
40
|
+
rest-client
|
|
41
|
+
mime-types (2.0)
|
|
42
|
+
multi_json (1.10.1)
|
|
43
|
+
rack (1.4.5)
|
|
44
|
+
rack-cache (1.2)
|
|
45
|
+
rack (>= 0.4)
|
|
46
|
+
rack-test (0.6.2)
|
|
47
|
+
rack (>= 1.0)
|
|
48
|
+
rake (10.1.0)
|
|
49
|
+
rdf (1.1.4.1)
|
|
50
|
+
rest-client (1.6.7)
|
|
51
|
+
mime-types (>= 1.16)
|
|
52
|
+
rspec (2.14.1)
|
|
53
|
+
rspec-core (~> 2.14.0)
|
|
54
|
+
rspec-expectations (~> 2.14.0)
|
|
55
|
+
rspec-mocks (~> 2.14.0)
|
|
56
|
+
rspec-core (2.14.7)
|
|
57
|
+
rspec-expectations (2.14.3)
|
|
58
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
|
59
|
+
rspec-mocks (2.14.4)
|
|
60
|
+
sprockets (2.2.2)
|
|
61
|
+
hike (~> 1.2)
|
|
62
|
+
multi_json (~> 1.0)
|
|
63
|
+
rack (~> 1.0)
|
|
64
|
+
tilt (~> 1.1, != 1.3.0)
|
|
65
|
+
tilt (1.4.1)
|
|
66
|
+
yard (0.8.7.3)
|
|
67
|
+
|
|
68
|
+
PLATFORMS
|
|
69
|
+
ruby
|
|
70
|
+
|
|
71
|
+
DEPENDENCIES
|
|
72
|
+
lyberteam-gems-devel (> 1.0.0)
|
|
73
|
+
revs-utils!
|
|
74
|
+
rspec (~> 2.6)
|
|
75
|
+
yard
|
data/LICENSE
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#############################################################################################################
|
|
2
|
+
# Copyright (c) 2013-2014 by The Board of Trustees of the Leland Stanford Junior University. All rights reserved.
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
data/README.rdoc
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
= Revs::Utils
|
|
2
|
+
|
|
3
|
+
Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code.
|
|
4
|
+
|
|
5
|
+
== Releases
|
|
6
|
+
- <b>0.0.1</b> Initial release
|
|
7
|
+
- <b>0.0.3</b> Add collection name cleaning method
|
|
8
|
+
- <b>0.0.5</b> Lock down the version of countries gem to avoid problems
|
|
9
|
+
- <b>0.0.5</b> Lock down the version of countries gem to avoid problems
|
|
10
|
+
- <b>0.0.7</b> Don't use this version, it uses the wrong method of Dir.pwd to find the root directory for loading assets.
|
|
11
|
+
- <b>0.0.8</b> Add in helper functions to check the .csv files for registration and metadata updates
|
|
12
|
+
- <b>0.0.9</b> Add in a clean marque function
|
|
13
|
+
- <b>1.0.0</b> Make format fixing case insensitive; add a method for loading CSV files with UTF-8 compliance
|
|
14
|
+
- <b>1.0.1</b> Remove a couple methods from Revs code and update the methods to make them consistent in gem
|
|
15
|
+
- <b>1.0.2 and 1.0.3</b> Make invalid two digit years in dates return as false
|
|
16
|
+
- <b>1.0.4</b> Revs-Utils now assumes .csv files are in UTF-8 format instead of Latin-1
|
|
17
|
+
- <b>1.0.5</b> Parse location using commas as well as pipes
|
|
18
|
+
- <b>1.0.6</b> Add some more conditions to CSV header checks
|
|
19
|
+
- <b>1.0.7</b> Label column needs to be there but does not need to have a value to register
|
|
20
|
+
- <b>1.0.8</b> Update clean_collection_name method to deal with other possible names
|
|
21
|
+
|
|
22
|
+
== Running tests
|
|
23
|
+
|
|
24
|
+
rake spec
|
|
25
|
+
|
|
26
|
+
== Release the gem to the gemserver
|
|
27
|
+
|
|
28
|
+
1. Bump the version number in lib/revs-utils/version.rb
|
|
29
|
+
2. Add to the release notes in this readme.
|
|
30
|
+
3. Ensure the tests pass.
|
|
31
|
+
4. Git commit and push
|
|
32
|
+
5. rake dlss_release
|
|
33
|
+
|
|
34
|
+
== Installation
|
|
35
|
+
|
|
36
|
+
Add this line to your application's Gemfile:
|
|
37
|
+
|
|
38
|
+
gem 'revs-utils'
|
|
39
|
+
|
|
40
|
+
And then execute:
|
|
41
|
+
|
|
42
|
+
$ bundle
|
|
43
|
+
|
|
44
|
+
Or install it yourself as:
|
|
45
|
+
|
|
46
|
+
$ gem install revs-utils
|
|
47
|
+
|
|
48
|
+
== Updating LC Automobile Terms in Gem
|
|
49
|
+
|
|
50
|
+
1. Check out Gem code.
|
|
51
|
+
2. CD into Gem directory
|
|
52
|
+
3. ruby bin/revs_lc_automobile_terms.rb
|
|
53
|
+
4. Update Gem in git, bump version number and rake dlss_release
|
|
54
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# This file will generate a hash of LC more specific terms from the to-level term defined below (currently "Automobiles").
|
|
2
|
+
# It then dumps this hash to a file, so it can be loaded with each pre-assembly run and used when generating Revs Descriptive Metadata
|
|
3
|
+
# This method can be run periodically to refresh the list of terms. It will generate a new file in the "lib/pre_assembly/project" folder
|
|
4
|
+
# which can be updated in git.
|
|
5
|
+
|
|
6
|
+
# Peter Mangiafico
|
|
7
|
+
# May 16, 2013
|
|
8
|
+
|
|
9
|
+
require File.expand_path(File.dirname(__FILE__) + '/../config/boot')
|
|
10
|
+
require 'rdf'
|
|
11
|
+
require 'rdf/ntriples'
|
|
12
|
+
|
|
13
|
+
automobile_term='http://id.loc.gov/authorities/subjects/sh85010201' # the top-level LC term to get RDF for, "Automobiles"
|
|
14
|
+
term_predicate='http://www.w3.org/2004/02/skos/core#prefLabel' # the predicate which tells us when we have a term defined
|
|
15
|
+
|
|
16
|
+
results={} # the hash we will write with the terms and their LC URLs
|
|
17
|
+
|
|
18
|
+
RDF::Reader.open("#{automobile_term}.nt") do |reader|
|
|
19
|
+
reader.each_statement do |statement|
|
|
20
|
+
if statement.predicate.to_s.strip == term_predicate
|
|
21
|
+
results.merge!({statement.object.to_s=>statement.subject.to_s})
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
File.open(REVS_LC_TERMS_FILENAME, "wb") {|f| Marshal.dump(results, f)}
|
data/bin/run_all_tests
ADDED
data/config/boot.rb
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
register:
|
|
2
|
+
label: label
|
|
3
|
+
sourceid: sourceid
|
|
4
|
+
filename: filename
|
|
5
|
+
metadata:
|
|
6
|
+
marque: marque
|
|
7
|
+
model: model
|
|
8
|
+
people: people
|
|
9
|
+
entrant: entrant
|
|
10
|
+
photographer: photographer
|
|
11
|
+
current_owner: current_owner
|
|
12
|
+
venue: venue
|
|
13
|
+
track: track
|
|
14
|
+
event: event
|
|
15
|
+
location: location
|
|
16
|
+
state: state
|
|
17
|
+
country: country
|
|
18
|
+
city: city
|
|
19
|
+
date: date
|
|
20
|
+
year: year
|
|
21
|
+
description: description
|
|
22
|
+
model_year: model_year
|
|
23
|
+
group_or_class: group_or_class
|
|
24
|
+
race_data: race_data
|
|
25
|
+
metadata_sources: metadata_sources
|
|
26
|
+
vehicle_markings: vehicle_markings
|
|
27
|
+
inst_notes: inst_notes
|
|
28
|
+
prod_notes: prod_notes
|
|
29
|
+
has_more_metadata: has_more_metadata
|
|
30
|
+
hide: hide
|
|
31
|
+
format: format
|
|
32
|
+
collection_name: collection_name
|
|
33
|
+
|
|
34
|
+
|
|
Binary file
|
data/lib/revs-utils.rb
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
1
|
+
# encoding: UTF-8
|
|
2
|
+
|
|
3
|
+
require "revs-utils/version"
|
|
4
|
+
require "countries"
|
|
5
|
+
require 'active_support/core_ext/string'
|
|
6
|
+
require 'active_support/core_ext/hash'
|
|
7
|
+
require 'csv'
|
|
8
|
+
|
|
9
|
+
PROJECT_ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
REVS_LC_TERMS_FILENAME=File.join(PROJECT_ROOT,'files','revs-lc-marque-terms.obj')
|
|
13
|
+
REVS_MANIFEST_HEADERS_FILEPATH = File.join(PROJECT_ROOT,'config',"manifest_headers.yml")
|
|
14
|
+
REGISTER = "register"
|
|
15
|
+
METADATA = "metadata"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
module Revs
|
|
19
|
+
module Utils
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
# a hash of LC Subject Heading terms and their IDs for linking for "Automobiles" http://id.loc.gov/authorities/subjects/sh85010201.html
|
|
23
|
+
# this is cached and loaded from disk and deserialized back into a hash for performance reasons, then stored as a module
|
|
24
|
+
# level constant so it can be reused throughout the pre-assembly run as a constant
|
|
25
|
+
# This cached set of terms can be re-generated with "ruby devel/revs_lc_automobile_terms.rb"
|
|
26
|
+
AUTOMOBILE_LC_TERMS= File.open(REVS_LC_TERMS_FILENAME,'rb'){|io| Marshal.load(io)} if File.exists?(REVS_LC_TERMS_FILENAME)
|
|
27
|
+
REVS_MANIFEST_HEADERS_FILE = File.open(REVS_MANIFEST_HEADERS_FILEPATH)
|
|
28
|
+
REVS_MANIFEST_HEADERS = YAML.load( REVS_MANIFEST_HEADERS_FILE)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_manifest_section(section)
|
|
32
|
+
return REVS_MANIFEST_HEADERS[section]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def manifest_headers_file()
|
|
36
|
+
return REVS_MANIFEST_HEADERS_FILE
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def manifest_headers_path()
|
|
40
|
+
return MAINFEST_HEADERS_FILEPATH
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def manifest_register_section_name()
|
|
44
|
+
return REGISTER
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def manifest_metadata_section_name()
|
|
48
|
+
return METADATA
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def read_csv_with_headers(file)
|
|
52
|
+
# load CSV into an array of hashes, allowing UTF-8 to pass through, deleting blank columns
|
|
53
|
+
#file_contents = IO.read(file).force_encoding("ISO-8859-1").encode("utf-8", replace: nil)
|
|
54
|
+
file_contents = IO.read(file)
|
|
55
|
+
csv = CSV.parse(file_contents, :headers => true)
|
|
56
|
+
return csv.map { |row| row.to_hash.with_indifferent_access }
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
#Pass this function a list of all CSVs containing metadata for files you are about to register and it will ensure each sourceid is unique
|
|
60
|
+
def unique_source_ids(file_paths)
|
|
61
|
+
files = Array.new
|
|
62
|
+
file_paths.each do |fp|
|
|
63
|
+
files << read_csv_with_headers(fp)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
sources = Array.new
|
|
67
|
+
files.each do |file|
|
|
68
|
+
file.each do |row|
|
|
69
|
+
#Make sure the sourcid and filename are the same
|
|
70
|
+
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
|
71
|
+
return false if row[get_manifest_section(REGISTER)['sourceid']] != fname
|
|
72
|
+
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
end
|
|
78
|
+
return sources.uniq.size == sources.size
|
|
79
|
+
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in
|
|
84
|
+
def valid_to_register(file_path)
|
|
85
|
+
|
|
86
|
+
file = read_csv_with_headers(file_path)
|
|
87
|
+
#Make sure all the required headers are there
|
|
88
|
+
return false if not get_manifest_section(REGISTER).values-file[0].keys == []
|
|
89
|
+
|
|
90
|
+
#Make sure all files have entries for those required headers
|
|
91
|
+
file.each do |row|
|
|
92
|
+
get_manifest_section(REGISTER).keys.each do |header| # label should be there as a column but does not always need a value
|
|
93
|
+
return false if header.downcase !='label' && row[header].blank? #Alternatively consider row[header].class != String or row[header].size <= 0
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
return true
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in.
|
|
100
|
+
def valid_for_metadata(file_path)
|
|
101
|
+
file = read_csv_with_headers(file_path)
|
|
102
|
+
file_headers=file[0].keys
|
|
103
|
+
#The file doesn't need to have all the metadata values, it just can't have headers that aren't used for metadata or registration
|
|
104
|
+
if file_headers.include?('date') && file_headers.include?('year') # can't have both date and year
|
|
105
|
+
return false
|
|
106
|
+
elsif file_headers.include?('location') && file_headers.include?('state') && file_headers.include?('city') && file_headers.include?('country') # can't have both location and the specific fields
|
|
107
|
+
return false
|
|
108
|
+
else
|
|
109
|
+
return file_headers-get_manifest_section(METADATA).values-get_manifest_section(REGISTER).values == []
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def clean_collection_name(name)
|
|
114
|
+
return "" if name.blank? || name.nil?
|
|
115
|
+
name=name.to_s
|
|
116
|
+
name.gsub!(/\A(the )/i,'')
|
|
117
|
+
name.gsub!(/( of the revs institute)\z/i,'')
|
|
118
|
+
name.gsub!(/( of the revs institute for automotive research)\z/i,'')
|
|
119
|
+
name.gsub!(/( of the revs institute for automotive research, inc)\z/i,'')
|
|
120
|
+
name.gsub!(/( of the revs institute for automotive research, inc.)\z/i,'')
|
|
121
|
+
return name.strip
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def clean_marque_name(name)
|
|
125
|
+
return "" if name.blank? || name.nil?
|
|
126
|
+
name=name.to_s
|
|
127
|
+
name.gsub!(/(automobiles)\z/i,'')
|
|
128
|
+
name.gsub!(/(automobile)\z/i,'')
|
|
129
|
+
return name.strip
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def parse_location(row, location)
|
|
133
|
+
row[location].split(/[,|]/).reverse.each do |local|
|
|
134
|
+
country = revs_get_country(local)
|
|
135
|
+
city_state = revs_get_city_state(local)
|
|
136
|
+
row['country'] = country.strip if country
|
|
137
|
+
if city_state
|
|
138
|
+
row['state'] = revs_get_state_name(city_state[1].strip)
|
|
139
|
+
row['city'] = city_state[0].strip
|
|
140
|
+
end
|
|
141
|
+
if not city_state and not country
|
|
142
|
+
row['city_section'] = local
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
return row
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def revs_check_format(format)
|
|
150
|
+
return revs_check_formats([format]).first
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# check the incoming format and fix some common issues
|
|
154
|
+
def revs_check_formats(format)
|
|
155
|
+
known_fixes = {"black-and-white negative"=>"black-and-white negatives",
|
|
156
|
+
"color negative"=>"color negatives",
|
|
157
|
+
"slides/color transparency"=>"color transparencies",
|
|
158
|
+
"color negatives/slides"=>"color negatives",
|
|
159
|
+
"black-and-white negative strips"=>"black-and-white negatives",
|
|
160
|
+
"color transparency"=>"color transparencies",
|
|
161
|
+
"slide"=>"slides"
|
|
162
|
+
}
|
|
163
|
+
count = 0
|
|
164
|
+
format.each do |f|
|
|
165
|
+
format[count] = known_fixes[f.downcase] || f.downcase
|
|
166
|
+
count += 1
|
|
167
|
+
end
|
|
168
|
+
return format
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# lookup the marque sent to see if it matches any known LC terms, trying a few varieties; returns a hash of the term and its ID if match is found, else returns false
|
|
172
|
+
def revs_lookup_marque(marque)
|
|
173
|
+
result=false
|
|
174
|
+
variants1=[marque,marque.capitalize,marque.singularize,marque.pluralize,marque.capitalize.singularize,marque.capitalize.pluralize]
|
|
175
|
+
variants2=[]
|
|
176
|
+
variants1.each do |name|
|
|
177
|
+
variants2 << "#{name} automobile"
|
|
178
|
+
variants2 << "#{name} automobiles"
|
|
179
|
+
end
|
|
180
|
+
(variants1+variants2).each do |variant|
|
|
181
|
+
lookup_term=AUTOMOBILE_LC_TERMS[variant]
|
|
182
|
+
if lookup_term
|
|
183
|
+
result={'url'=>lookup_term,'value'=>variant}
|
|
184
|
+
break
|
|
185
|
+
end
|
|
186
|
+
end
|
|
187
|
+
return result
|
|
188
|
+
end # revs_lookup_marque
|
|
189
|
+
|
|
190
|
+
# check if the string passed is a country name or code -- if so, return the country name, if not a recognized country, return false
|
|
191
|
+
def revs_get_country(name)
|
|
192
|
+
name='US' if name=='USA' # special case; USA is not recognized by the country gem, but US is
|
|
193
|
+
country=Country.find_country_by_name(name.strip) # find it by name
|
|
194
|
+
code=Country.new(name.strip) # find it by code
|
|
195
|
+
if country.nil? && code.data.nil?
|
|
196
|
+
return false
|
|
197
|
+
else
|
|
198
|
+
return (code.data.nil? ? country.name : code.name)
|
|
199
|
+
end
|
|
200
|
+
end # revs_get_country
|
|
201
|
+
|
|
202
|
+
# parse a string like this: "San Mateo (Calif.)" to try and figure out if there is any state in there; if found, return the city and state as an array, if none found, return false
|
|
203
|
+
def revs_get_city_state(name)
|
|
204
|
+
state_match=name.match(/[(]\S+[)]/)
|
|
205
|
+
if state_match.nil?
|
|
206
|
+
return false
|
|
207
|
+
else
|
|
208
|
+
first_match=state_match[0]
|
|
209
|
+
state=first_match.gsub(/[()]/,'').strip # remove parens and strip
|
|
210
|
+
city=name.gsub(first_match,'').strip # remove state name from input string and strip
|
|
211
|
+
return [city,state]
|
|
212
|
+
end
|
|
213
|
+
end # revs_get_city_state
|
|
214
|
+
|
|
215
|
+
# given an abbreviated state name (e.g. "Calif." or "CA") return the full state name (e.g. "California")
|
|
216
|
+
def revs_get_state_name(name)
|
|
217
|
+
test_name=name.gsub('.','').strip.downcase
|
|
218
|
+
us=Country.new('US')
|
|
219
|
+
us.states.each do |key,value|
|
|
220
|
+
if value['name'].downcase.start_with?(test_name) || key.downcase == test_name
|
|
221
|
+
return value['name']
|
|
222
|
+
break
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
return name
|
|
226
|
+
end # revs_get_state_name
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
# tell us if the string passed is a valid year
|
|
230
|
+
def is_valid_year?(date_string,starting_year=1800)
|
|
231
|
+
date_string.to_s.strip.scan(/\D/).empty? and (starting_year..Date.today.year).include?(date_string.to_i)
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
# tell us if the string passed is in is a full date of the format M/D/YYYY, and returns the date object if it is valid
|
|
235
|
+
def get_full_date(date_string)
|
|
236
|
+
begin
|
|
237
|
+
date_obj=Date.strptime(date_string.gsub('-','/').delete(' '), '%m/%d/%Y')
|
|
238
|
+
return (is_valid_year?(date_obj.year.to_s) ? date_obj : false)
|
|
239
|
+
rescue
|
|
240
|
+
false
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# given a string with dates separated by commas, split into an array
|
|
245
|
+
# also, parse dates like "195x" and "1961-62" into all dates in that range
|
|
246
|
+
def parse_years(date_string)
|
|
247
|
+
date_string.delete!(' ')
|
|
248
|
+
if date_string.include?('|')
|
|
249
|
+
result=date_string.split('|')
|
|
250
|
+
else
|
|
251
|
+
result=date_string.split(',')
|
|
252
|
+
end
|
|
253
|
+
years_to_add=[]
|
|
254
|
+
result.each do |year|
|
|
255
|
+
|
|
256
|
+
if year.scan(/[1-2][0-9][0-9][0-9][-][0-9][0-9]/).size > 0 # if we have a year that looks like "1961-62" or "1961-73", lets deal with it turning it into [1961,1962] or [1961,1962,1963,1964,1965,1966,1967...etc]
|
|
257
|
+
start_year=year[2..3]
|
|
258
|
+
end_year=year[5..6]
|
|
259
|
+
stem=year[0..1]
|
|
260
|
+
for n in start_year..end_year
|
|
261
|
+
years_to_add << "#{stem}#{n}"
|
|
262
|
+
end
|
|
263
|
+
elsif year.scan(/[1-2][0-9][0-9][0-9][-][1-9]/).size > 0 # if we have a year that lloks like "1961-2" or "1961-3", lets deal with it turning it into [1961,1962] or [1961,1962,1963]
|
|
264
|
+
start_year=year[3..3]
|
|
265
|
+
end_year=year[5..5]
|
|
266
|
+
stem=year[0..2]
|
|
267
|
+
for n in start_year..end_year
|
|
268
|
+
years_to_add << "#{stem}#{n}"
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
if year.scan(/[1-2][0-9][0-9][0](('s)|s)/).size > 0 || year.scan(/[1-2][0-9][0-9][x_]/).size > 0 # if we have a year that looks like "195x", let's deal with it by turning it into [1950,1951,1952..etc]
|
|
273
|
+
result.delete(year) # first delete the year itself from the list
|
|
274
|
+
stem=year[0..2] # next get the stem, and expand into the whole decade
|
|
275
|
+
%w{0 1 2 3 4 5 6 7 8 9}.each {|n| years_to_add << "#{stem}#{n}"} # add each year in that decade to the output array
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
if year.scan(/[1-2][0-9][0-9][0-9][-][1-2][0-9][0-9][0-9]/).size > 0 # if we have a year that lloks like "1961-1962" or "1930-1955", lets deal with it turning it into [1961,1962] or [1961,1962,1963]
|
|
279
|
+
start_year=year[0..3]
|
|
280
|
+
end_year=year[5..8]
|
|
281
|
+
if end_year.to_i - start_year.to_i < 10 # let's only do the expansion if we don't have some really large date range, like "1930-1985" .. only ranges less than 9 years will be split into separate years
|
|
282
|
+
for n in start_year..end_year
|
|
283
|
+
years_to_add << n
|
|
284
|
+
end
|
|
285
|
+
end
|
|
286
|
+
end
|
|
287
|
+
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
result = result.uniq
|
|
291
|
+
result.each do |year|
|
|
292
|
+
result.delete(year) if not year.scan(/\A[1-2][0-9][0-9][0-9]\z/).size == 1 #If it doesn't fit the format #### remove it
|
|
293
|
+
end
|
|
294
|
+
return result.concat(years_to_add).uniq.sort
|
|
295
|
+
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
end
|
|
299
|
+
end
|
data/revs-utils.gemspec
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
|
+
require 'revs-utils/version'
|
|
5
|
+
|
|
6
|
+
Gem::Specification.new do |gem|
|
|
7
|
+
gem.name = "revs-utils"
|
|
8
|
+
gem.version = Revs::Utils::VERSION
|
|
9
|
+
gem.authors = ["Peter Mangiafico"]
|
|
10
|
+
gem.email = ["pmangiafico@stanford.edu"]
|
|
11
|
+
gem.description = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
|
12
|
+
gem.summary = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
|
13
|
+
gem.homepage = ""
|
|
14
|
+
|
|
15
|
+
gem.files = `git ls-files`.split($/)
|
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
|
18
|
+
gem.require_paths = ["lib"]
|
|
19
|
+
|
|
20
|
+
gem.add_dependency "countries", "0.9.2"
|
|
21
|
+
gem.add_dependency "rdf"
|
|
22
|
+
gem.add_dependency "actionpack", '~> 3'
|
|
23
|
+
|
|
24
|
+
gem.add_development_dependency "rspec", "~> 2.6"
|
|
25
|
+
gem.add_development_dependency "lyberteam-gems-devel", "> 1.0.0"
|
|
26
|
+
gem.add_development_dependency "yard"
|
|
27
|
+
|
|
28
|
+
end
|