revs-utils 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/.gitignore +10 -0
- data/.rvmrc.example +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +75 -0
- data/LICENSE +14 -0
- data/README.rdoc +54 -0
- data/Rakefile +11 -0
- data/bin/console +16 -0
- data/bin/revs_lc_automobile_terms.rb +26 -0
- data/bin/run_all_tests +3 -0
- data/config/boot.rb +6 -0
- data/config/manifest_headers.yml +34 -0
- data/files/revs-lc-marque-terms.obj +0 -0
- data/lib/revs-utils/version.rb +5 -0
- data/lib/revs-utils.rb +299 -0
- data/revs-utils.gemspec +28 -0
- data/spec/revs-utils_spec.rb +253 -0
- data/spec/sample-csv-files/bad-source_id.csv +3 -0
- data/spec/sample-csv-files/blank-label.csv +3 -0
- data/spec/sample-csv-files/blank-sourceid.csv +216 -0
- data/spec/sample-csv-files/clean-sheet.csv +216 -0
- data/spec/sample-csv-files/date-and-year.csv +2 -0
- data/spec/sample-csv-files/date-instead-of-year.csv +216 -0
- data/spec/sample-csv-files/location-and-other-fields.csv +2 -0
- data/spec/sample-csv-files/malformed-sourceid.csv +216 -0
- data/spec/sample-csv-files/no-blank-label.csv +3 -0
- data/spec/sample-csv-files/no-label-column.csv +3 -0
- data/spec/sample-csv-files/no-sourceid.csv +216 -0
- data/spec/spec_helper.rb +8 -0
- metadata +175 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
NGY1MmUzYTdjMjU1OTI2MGU5NzA0YTIzZDVkOGY0Nzg1OWYwMzZjOA==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ZjQ0OWRiOTlmZGQ2YzY1YjU4Zjc1NWU3MWIyODU4NjRiMTg4Nzg5MQ==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
Y2RkOTIyYTVhNmRlODRmYWViNzMwMzkyNGM3MjVmYTc1MzNjYWMyMDgyZDk0
|
10
|
+
OTNiYjE2NjQ2YzJiMGY2NjA0OGNjY2UzZDdhZTc0ZWRhMjhkNWRlYmY5ZTdi
|
11
|
+
M2I5NzJhMjY2OTMyYTg3NDYyZTFiYWQzYTQ3MjhiZDg5NjMxYjA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MGNhMDEwMTM5Y2Q3YmNiODcxMTU5NzI3MjU5MTZkYjY4YWI1YmZmNWZhMzA0
|
14
|
+
MzhjMWY0ZjExZjg3MTdlNTBiMjJjMGVjYjhhZjk5MGIwOGYxMDBhOWU1MzEz
|
15
|
+
ZDUwNWNmOGU4YzEyNmI0NjY0MzM4OTEwYzgzNjJiOTVhMTQzMGI=
|
data/.gitignore
ADDED
data/.rvmrc.example
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
rvm use 1.9.3@assembly-image --create
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,75 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
revs-utils (1.0.8)
|
5
|
+
actionpack (~> 3)
|
6
|
+
countries (= 0.9.2)
|
7
|
+
rdf
|
8
|
+
|
9
|
+
GEM
|
10
|
+
remote: http://rubygems.org/
|
11
|
+
remote: http://sul-gems.stanford.edu/
|
12
|
+
specs:
|
13
|
+
actionpack (3.2.19)
|
14
|
+
activemodel (= 3.2.19)
|
15
|
+
activesupport (= 3.2.19)
|
16
|
+
builder (~> 3.0.0)
|
17
|
+
erubis (~> 2.7.0)
|
18
|
+
journey (~> 1.0.4)
|
19
|
+
rack (~> 1.4.5)
|
20
|
+
rack-cache (~> 1.2)
|
21
|
+
rack-test (~> 0.6.1)
|
22
|
+
sprockets (~> 2.2.1)
|
23
|
+
activemodel (3.2.19)
|
24
|
+
activesupport (= 3.2.19)
|
25
|
+
builder (~> 3.0.0)
|
26
|
+
activesupport (3.2.19)
|
27
|
+
i18n (~> 0.6, >= 0.6.4)
|
28
|
+
multi_json (~> 1.0)
|
29
|
+
builder (3.0.4)
|
30
|
+
countries (0.9.2)
|
31
|
+
currencies (>= 0.4.0)
|
32
|
+
currencies (0.4.2)
|
33
|
+
diff-lcs (1.2.4)
|
34
|
+
erubis (2.7.0)
|
35
|
+
hike (1.2.3)
|
36
|
+
i18n (0.6.11)
|
37
|
+
journey (1.0.4)
|
38
|
+
lyberteam-gems-devel (1.0.1)
|
39
|
+
rake (>= 0.8.7)
|
40
|
+
rest-client
|
41
|
+
mime-types (2.0)
|
42
|
+
multi_json (1.10.1)
|
43
|
+
rack (1.4.5)
|
44
|
+
rack-cache (1.2)
|
45
|
+
rack (>= 0.4)
|
46
|
+
rack-test (0.6.2)
|
47
|
+
rack (>= 1.0)
|
48
|
+
rake (10.1.0)
|
49
|
+
rdf (1.1.4.1)
|
50
|
+
rest-client (1.6.7)
|
51
|
+
mime-types (>= 1.16)
|
52
|
+
rspec (2.14.1)
|
53
|
+
rspec-core (~> 2.14.0)
|
54
|
+
rspec-expectations (~> 2.14.0)
|
55
|
+
rspec-mocks (~> 2.14.0)
|
56
|
+
rspec-core (2.14.7)
|
57
|
+
rspec-expectations (2.14.3)
|
58
|
+
diff-lcs (>= 1.1.3, < 2.0)
|
59
|
+
rspec-mocks (2.14.4)
|
60
|
+
sprockets (2.2.2)
|
61
|
+
hike (~> 1.2)
|
62
|
+
multi_json (~> 1.0)
|
63
|
+
rack (~> 1.0)
|
64
|
+
tilt (~> 1.1, != 1.3.0)
|
65
|
+
tilt (1.4.1)
|
66
|
+
yard (0.8.7.3)
|
67
|
+
|
68
|
+
PLATFORMS
|
69
|
+
ruby
|
70
|
+
|
71
|
+
DEPENDENCIES
|
72
|
+
lyberteam-gems-devel (> 1.0.0)
|
73
|
+
revs-utils!
|
74
|
+
rspec (~> 2.6)
|
75
|
+
yard
|
data/LICENSE
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
#############################################################################################################
|
2
|
+
# Copyright (c) 2013-2014 by The Board of Trustees of the Leland Stanford Junior University. All rights reserved.
|
3
|
+
#
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
5
|
+
# you may not use this file except in compliance with the License.
|
6
|
+
# You may obtain a copy of the License at
|
7
|
+
#
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
9
|
+
#
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
13
|
+
# See the License for the specific language governing permissions and
|
14
|
+
# limitations under the License.
|
data/README.rdoc
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
= Revs::Utils
|
2
|
+
|
3
|
+
Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code.
|
4
|
+
|
5
|
+
== Releases
|
6
|
+
- <b>0.0.1</b> Initial release
|
7
|
+
- <b>0.0.3</b> Add collection name cleaning method
|
8
|
+
- <b>0.0.5</b> Lock down the version of countries gem to avoid problems
|
9
|
+
- <b>0.0.5</b> Lock down the version of countries gem to avoid problems
|
10
|
+
- <b>0.0.7</b> Don't use this version, it uses the wrong method of Dir.pwd to find the root directory for loading assets.
|
11
|
+
- <b>0.0.8</b> Add in helper functions to check the .csv files for registration and metadata updates
|
12
|
+
- <b>0.0.9</b> Add in a clean marque function
|
13
|
+
- <b>1.0.0</b> Make format fixing case insensitive; add a method for loading CSV files with UTF-8 compliance
|
14
|
+
- <b>1.0.1</b> Remove a couple methods from Revs code and update the methods to make them consistent in gem
|
15
|
+
- <b>1.0.2 and 1.0.3</b> Make invalid two digit years in dates return as false
|
16
|
+
- <b>1.0.4</b> Revs-Utils now assumes .csv files are in UTF-8 format instead of Latin-1
|
17
|
+
- <b>1.0.5</b> Parse location using commas as well as pipes
|
18
|
+
- <b>1.0.6</b> Add some more conditions to CSV header checks
|
19
|
+
- <b>1.0.7</b> Label column needs to be there but does not need to have a value to register
|
20
|
+
- <b>1.0.8</b> Update clean_collection_name method to deal with other possible names
|
21
|
+
|
22
|
+
== Running tests
|
23
|
+
|
24
|
+
rake spec
|
25
|
+
|
26
|
+
== Release the gem to the gemserver
|
27
|
+
|
28
|
+
1. Bump the version number in lib/revs-utils/version.rb
|
29
|
+
2. Add to the release notes in this readme.
|
30
|
+
3. Ensure the tests pass.
|
31
|
+
4. Git commit and push
|
32
|
+
5. rake dlss_release
|
33
|
+
|
34
|
+
== Installation
|
35
|
+
|
36
|
+
Add this line to your application's Gemfile:
|
37
|
+
|
38
|
+
gem 'revs-utils'
|
39
|
+
|
40
|
+
And then execute:
|
41
|
+
|
42
|
+
$ bundle
|
43
|
+
|
44
|
+
Or install it yourself as:
|
45
|
+
|
46
|
+
$ gem install revs-utils
|
47
|
+
|
48
|
+
== Updating LC Automobile Terms in Gem
|
49
|
+
|
50
|
+
1. Check out Gem code.
|
51
|
+
2. CD into Gem directory
|
52
|
+
3. ruby bin/revs_lc_automobile_terms.rb
|
53
|
+
4. Update Gem in git, bump version number and rake dlss_release
|
54
|
+
|
data/Rakefile
ADDED
data/bin/console
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
# This file will generate a hash of LC more specific terms from the to-level term defined below (currently "Automobiles").
|
2
|
+
# It then dumps this hash to a file, so it can be loaded with each pre-assembly run and used when generating Revs Descriptive Metadata
|
3
|
+
# This method can be run periodically to refresh the list of terms. It will generate a new file in the "lib/pre_assembly/project" folder
|
4
|
+
# which can be updated in git.
|
5
|
+
|
6
|
+
# Peter Mangiafico
|
7
|
+
# May 16, 2013
|
8
|
+
|
9
|
+
require File.expand_path(File.dirname(__FILE__) + '/../config/boot')
|
10
|
+
require 'rdf'
|
11
|
+
require 'rdf/ntriples'
|
12
|
+
|
13
|
+
automobile_term='http://id.loc.gov/authorities/subjects/sh85010201' # the top-level LC term to get RDF for, "Automobiles"
|
14
|
+
term_predicate='http://www.w3.org/2004/02/skos/core#prefLabel' # the predicate which tells us when we have a term defined
|
15
|
+
|
16
|
+
results={} # the hash we will write with the terms and their LC URLs
|
17
|
+
|
18
|
+
RDF::Reader.open("#{automobile_term}.nt") do |reader|
|
19
|
+
reader.each_statement do |statement|
|
20
|
+
if statement.predicate.to_s.strip == term_predicate
|
21
|
+
results.merge!({statement.object.to_s=>statement.subject.to_s})
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
File.open(REVS_LC_TERMS_FILENAME, "wb") {|f| Marshal.dump(results, f)}
|
data/bin/run_all_tests
ADDED
data/config/boot.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
register:
|
2
|
+
label: label
|
3
|
+
sourceid: sourceid
|
4
|
+
filename: filename
|
5
|
+
metadata:
|
6
|
+
marque: marque
|
7
|
+
model: model
|
8
|
+
people: people
|
9
|
+
entrant: entrant
|
10
|
+
photographer: photographer
|
11
|
+
current_owner: current_owner
|
12
|
+
venue: venue
|
13
|
+
track: track
|
14
|
+
event: event
|
15
|
+
location: location
|
16
|
+
state: state
|
17
|
+
country: country
|
18
|
+
city: city
|
19
|
+
date: date
|
20
|
+
year: year
|
21
|
+
description: description
|
22
|
+
model_year: model_year
|
23
|
+
group_or_class: group_or_class
|
24
|
+
race_data: race_data
|
25
|
+
metadata_sources: metadata_sources
|
26
|
+
vehicle_markings: vehicle_markings
|
27
|
+
inst_notes: inst_notes
|
28
|
+
prod_notes: prod_notes
|
29
|
+
has_more_metadata: has_more_metadata
|
30
|
+
hide: hide
|
31
|
+
format: format
|
32
|
+
collection_name: collection_name
|
33
|
+
|
34
|
+
|
Binary file
|
data/lib/revs-utils.rb
ADDED
@@ -0,0 +1,299 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
require "revs-utils/version"
|
4
|
+
require "countries"
|
5
|
+
require 'active_support/core_ext/string'
|
6
|
+
require 'active_support/core_ext/hash'
|
7
|
+
require 'csv'
|
8
|
+
|
9
|
+
PROJECT_ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
10
|
+
|
11
|
+
|
12
|
+
REVS_LC_TERMS_FILENAME=File.join(PROJECT_ROOT,'files','revs-lc-marque-terms.obj')
|
13
|
+
REVS_MANIFEST_HEADERS_FILEPATH = File.join(PROJECT_ROOT,'config',"manifest_headers.yml")
|
14
|
+
REGISTER = "register"
|
15
|
+
METADATA = "metadata"
|
16
|
+
|
17
|
+
|
18
|
+
module Revs
|
19
|
+
module Utils
|
20
|
+
|
21
|
+
|
22
|
+
# a hash of LC Subject Heading terms and their IDs for linking for "Automobiles" http://id.loc.gov/authorities/subjects/sh85010201.html
|
23
|
+
# this is cached and loaded from disk and deserialized back into a hash for performance reasons, then stored as a module
|
24
|
+
# level constant so it can be reused throughout the pre-assembly run as a constant
|
25
|
+
# This cached set of terms can be re-generated with "ruby devel/revs_lc_automobile_terms.rb"
|
26
|
+
AUTOMOBILE_LC_TERMS= File.open(REVS_LC_TERMS_FILENAME,'rb'){|io| Marshal.load(io)} if File.exists?(REVS_LC_TERMS_FILENAME)
|
27
|
+
REVS_MANIFEST_HEADERS_FILE = File.open(REVS_MANIFEST_HEADERS_FILEPATH)
|
28
|
+
REVS_MANIFEST_HEADERS = YAML.load( REVS_MANIFEST_HEADERS_FILE)
|
29
|
+
|
30
|
+
|
31
|
+
def get_manifest_section(section)
|
32
|
+
return REVS_MANIFEST_HEADERS[section]
|
33
|
+
end
|
34
|
+
|
35
|
+
def manifest_headers_file()
|
36
|
+
return REVS_MANIFEST_HEADERS_FILE
|
37
|
+
end
|
38
|
+
|
39
|
+
def manifest_headers_path()
|
40
|
+
return MAINFEST_HEADERS_FILEPATH
|
41
|
+
end
|
42
|
+
|
43
|
+
def manifest_register_section_name()
|
44
|
+
return REGISTER
|
45
|
+
end
|
46
|
+
|
47
|
+
def manifest_metadata_section_name()
|
48
|
+
return METADATA
|
49
|
+
end
|
50
|
+
|
51
|
+
def read_csv_with_headers(file)
|
52
|
+
# load CSV into an array of hashes, allowing UTF-8 to pass through, deleting blank columns
|
53
|
+
#file_contents = IO.read(file).force_encoding("ISO-8859-1").encode("utf-8", replace: nil)
|
54
|
+
file_contents = IO.read(file)
|
55
|
+
csv = CSV.parse(file_contents, :headers => true)
|
56
|
+
return csv.map { |row| row.to_hash.with_indifferent_access }
|
57
|
+
end
|
58
|
+
|
59
|
+
#Pass this function a list of all CSVs containing metadata for files you are about to register and it will ensure each sourceid is unique
|
60
|
+
def unique_source_ids(file_paths)
|
61
|
+
files = Array.new
|
62
|
+
file_paths.each do |fp|
|
63
|
+
files << read_csv_with_headers(fp)
|
64
|
+
end
|
65
|
+
|
66
|
+
sources = Array.new
|
67
|
+
files.each do |file|
|
68
|
+
file.each do |row|
|
69
|
+
#Make sure the sourcid and filename are the same
|
70
|
+
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
71
|
+
return false if row[get_manifest_section(REGISTER)['sourceid']] != fname
|
72
|
+
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
|
77
|
+
end
|
78
|
+
return sources.uniq.size == sources.size
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in
|
84
|
+
def valid_to_register(file_path)
|
85
|
+
|
86
|
+
file = read_csv_with_headers(file_path)
|
87
|
+
#Make sure all the required headers are there
|
88
|
+
return false if not get_manifest_section(REGISTER).values-file[0].keys == []
|
89
|
+
|
90
|
+
#Make sure all files have entries for those required headers
|
91
|
+
file.each do |row|
|
92
|
+
get_manifest_section(REGISTER).keys.each do |header| # label should be there as a column but does not always need a value
|
93
|
+
return false if header.downcase !='label' && row[header].blank? #Alternatively consider row[header].class != String or row[header].size <= 0
|
94
|
+
end
|
95
|
+
end
|
96
|
+
return true
|
97
|
+
end
|
98
|
+
|
99
|
+
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in.
|
100
|
+
def valid_for_metadata(file_path)
|
101
|
+
file = read_csv_with_headers(file_path)
|
102
|
+
file_headers=file[0].keys
|
103
|
+
#The file doesn't need to have all the metadata values, it just can't have headers that aren't used for metadata or registration
|
104
|
+
if file_headers.include?('date') && file_headers.include?('year') # can't have both date and year
|
105
|
+
return false
|
106
|
+
elsif file_headers.include?('location') && file_headers.include?('state') && file_headers.include?('city') && file_headers.include?('country') # can't have both location and the specific fields
|
107
|
+
return false
|
108
|
+
else
|
109
|
+
return file_headers-get_manifest_section(METADATA).values-get_manifest_section(REGISTER).values == []
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def clean_collection_name(name)
|
114
|
+
return "" if name.blank? || name.nil?
|
115
|
+
name=name.to_s
|
116
|
+
name.gsub!(/\A(the )/i,'')
|
117
|
+
name.gsub!(/( of the revs institute)\z/i,'')
|
118
|
+
name.gsub!(/( of the revs institute for automotive research)\z/i,'')
|
119
|
+
name.gsub!(/( of the revs institute for automotive research, inc)\z/i,'')
|
120
|
+
name.gsub!(/( of the revs institute for automotive research, inc.)\z/i,'')
|
121
|
+
return name.strip
|
122
|
+
end
|
123
|
+
|
124
|
+
def clean_marque_name(name)
|
125
|
+
return "" if name.blank? || name.nil?
|
126
|
+
name=name.to_s
|
127
|
+
name.gsub!(/(automobiles)\z/i,'')
|
128
|
+
name.gsub!(/(automobile)\z/i,'')
|
129
|
+
return name.strip
|
130
|
+
end
|
131
|
+
|
132
|
+
def parse_location(row, location)
|
133
|
+
row[location].split(/[,|]/).reverse.each do |local|
|
134
|
+
country = revs_get_country(local)
|
135
|
+
city_state = revs_get_city_state(local)
|
136
|
+
row['country'] = country.strip if country
|
137
|
+
if city_state
|
138
|
+
row['state'] = revs_get_state_name(city_state[1].strip)
|
139
|
+
row['city'] = city_state[0].strip
|
140
|
+
end
|
141
|
+
if not city_state and not country
|
142
|
+
row['city_section'] = local
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
return row
|
147
|
+
end
|
148
|
+
|
149
|
+
def revs_check_format(format)
|
150
|
+
return revs_check_formats([format]).first
|
151
|
+
end
|
152
|
+
|
153
|
+
# check the incoming format and fix some common issues
|
154
|
+
def revs_check_formats(format)
|
155
|
+
known_fixes = {"black-and-white negative"=>"black-and-white negatives",
|
156
|
+
"color negative"=>"color negatives",
|
157
|
+
"slides/color transparency"=>"color transparencies",
|
158
|
+
"color negatives/slides"=>"color negatives",
|
159
|
+
"black-and-white negative strips"=>"black-and-white negatives",
|
160
|
+
"color transparency"=>"color transparencies",
|
161
|
+
"slide"=>"slides"
|
162
|
+
}
|
163
|
+
count = 0
|
164
|
+
format.each do |f|
|
165
|
+
format[count] = known_fixes[f.downcase] || f.downcase
|
166
|
+
count += 1
|
167
|
+
end
|
168
|
+
return format
|
169
|
+
end
|
170
|
+
|
171
|
+
# lookup the marque sent to see if it matches any known LC terms, trying a few varieties; returns a hash of the term and its ID if match is found, else returns false
|
172
|
+
def revs_lookup_marque(marque)
|
173
|
+
result=false
|
174
|
+
variants1=[marque,marque.capitalize,marque.singularize,marque.pluralize,marque.capitalize.singularize,marque.capitalize.pluralize]
|
175
|
+
variants2=[]
|
176
|
+
variants1.each do |name|
|
177
|
+
variants2 << "#{name} automobile"
|
178
|
+
variants2 << "#{name} automobiles"
|
179
|
+
end
|
180
|
+
(variants1+variants2).each do |variant|
|
181
|
+
lookup_term=AUTOMOBILE_LC_TERMS[variant]
|
182
|
+
if lookup_term
|
183
|
+
result={'url'=>lookup_term,'value'=>variant}
|
184
|
+
break
|
185
|
+
end
|
186
|
+
end
|
187
|
+
return result
|
188
|
+
end # revs_lookup_marque
|
189
|
+
|
190
|
+
# check if the string passed is a country name or code -- if so, return the country name, if not a recognized country, return false
|
191
|
+
def revs_get_country(name)
|
192
|
+
name='US' if name=='USA' # special case; USA is not recognized by the country gem, but US is
|
193
|
+
country=Country.find_country_by_name(name.strip) # find it by name
|
194
|
+
code=Country.new(name.strip) # find it by code
|
195
|
+
if country.nil? && code.data.nil?
|
196
|
+
return false
|
197
|
+
else
|
198
|
+
return (code.data.nil? ? country.name : code.name)
|
199
|
+
end
|
200
|
+
end # revs_get_country
|
201
|
+
|
202
|
+
# parse a string like this: "San Mateo (Calif.)" to try and figure out if there is any state in there; if found, return the city and state as an array, if none found, return false
|
203
|
+
def revs_get_city_state(name)
|
204
|
+
state_match=name.match(/[(]\S+[)]/)
|
205
|
+
if state_match.nil?
|
206
|
+
return false
|
207
|
+
else
|
208
|
+
first_match=state_match[0]
|
209
|
+
state=first_match.gsub(/[()]/,'').strip # remove parens and strip
|
210
|
+
city=name.gsub(first_match,'').strip # remove state name from input string and strip
|
211
|
+
return [city,state]
|
212
|
+
end
|
213
|
+
end # revs_get_city_state
|
214
|
+
|
215
|
+
# given an abbreviated state name (e.g. "Calif." or "CA") return the full state name (e.g. "California")
|
216
|
+
def revs_get_state_name(name)
|
217
|
+
test_name=name.gsub('.','').strip.downcase
|
218
|
+
us=Country.new('US')
|
219
|
+
us.states.each do |key,value|
|
220
|
+
if value['name'].downcase.start_with?(test_name) || key.downcase == test_name
|
221
|
+
return value['name']
|
222
|
+
break
|
223
|
+
end
|
224
|
+
end
|
225
|
+
return name
|
226
|
+
end # revs_get_state_name
|
227
|
+
|
228
|
+
|
229
|
+
# tell us if the string passed is a valid year
|
230
|
+
def is_valid_year?(date_string,starting_year=1800)
|
231
|
+
date_string.to_s.strip.scan(/\D/).empty? and (starting_year..Date.today.year).include?(date_string.to_i)
|
232
|
+
end
|
233
|
+
|
234
|
+
# tell us if the string passed is in is a full date of the format M/D/YYYY, and returns the date object if it is valid
|
235
|
+
def get_full_date(date_string)
|
236
|
+
begin
|
237
|
+
date_obj=Date.strptime(date_string.gsub('-','/').delete(' '), '%m/%d/%Y')
|
238
|
+
return (is_valid_year?(date_obj.year.to_s) ? date_obj : false)
|
239
|
+
rescue
|
240
|
+
false
|
241
|
+
end
|
242
|
+
end
|
243
|
+
|
244
|
+
# given a string with dates separated by commas, split into an array
|
245
|
+
# also, parse dates like "195x" and "1961-62" into all dates in that range
|
246
|
+
def parse_years(date_string)
|
247
|
+
date_string.delete!(' ')
|
248
|
+
if date_string.include?('|')
|
249
|
+
result=date_string.split('|')
|
250
|
+
else
|
251
|
+
result=date_string.split(',')
|
252
|
+
end
|
253
|
+
years_to_add=[]
|
254
|
+
result.each do |year|
|
255
|
+
|
256
|
+
if year.scan(/[1-2][0-9][0-9][0-9][-][0-9][0-9]/).size > 0 # if we have a year that looks like "1961-62" or "1961-73", lets deal with it turning it into [1961,1962] or [1961,1962,1963,1964,1965,1966,1967...etc]
|
257
|
+
start_year=year[2..3]
|
258
|
+
end_year=year[5..6]
|
259
|
+
stem=year[0..1]
|
260
|
+
for n in start_year..end_year
|
261
|
+
years_to_add << "#{stem}#{n}"
|
262
|
+
end
|
263
|
+
elsif year.scan(/[1-2][0-9][0-9][0-9][-][1-9]/).size > 0 # if we have a year that lloks like "1961-2" or "1961-3", lets deal with it turning it into [1961,1962] or [1961,1962,1963]
|
264
|
+
start_year=year[3..3]
|
265
|
+
end_year=year[5..5]
|
266
|
+
stem=year[0..2]
|
267
|
+
for n in start_year..end_year
|
268
|
+
years_to_add << "#{stem}#{n}"
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
if year.scan(/[1-2][0-9][0-9][0](('s)|s)/).size > 0 || year.scan(/[1-2][0-9][0-9][x_]/).size > 0 # if we have a year that looks like "195x", let's deal with it by turning it into [1950,1951,1952..etc]
|
273
|
+
result.delete(year) # first delete the year itself from the list
|
274
|
+
stem=year[0..2] # next get the stem, and expand into the whole decade
|
275
|
+
%w{0 1 2 3 4 5 6 7 8 9}.each {|n| years_to_add << "#{stem}#{n}"} # add each year in that decade to the output array
|
276
|
+
end
|
277
|
+
|
278
|
+
if year.scan(/[1-2][0-9][0-9][0-9][-][1-2][0-9][0-9][0-9]/).size > 0 # if we have a year that lloks like "1961-1962" or "1930-1955", lets deal with it turning it into [1961,1962] or [1961,1962,1963]
|
279
|
+
start_year=year[0..3]
|
280
|
+
end_year=year[5..8]
|
281
|
+
if end_year.to_i - start_year.to_i < 10 # let's only do the expansion if we don't have some really large date range, like "1930-1985" .. only ranges less than 9 years will be split into separate years
|
282
|
+
for n in start_year..end_year
|
283
|
+
years_to_add << n
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
|
288
|
+
end
|
289
|
+
|
290
|
+
result = result.uniq
|
291
|
+
result.each do |year|
|
292
|
+
result.delete(year) if not year.scan(/\A[1-2][0-9][0-9][0-9]\z/).size == 1 #If it doesn't fit the format #### remove it
|
293
|
+
end
|
294
|
+
return result.concat(years_to_add).uniq.sort
|
295
|
+
|
296
|
+
end
|
297
|
+
|
298
|
+
end
|
299
|
+
end
|
data/revs-utils.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'revs-utils/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |gem|
|
7
|
+
gem.name = "revs-utils"
|
8
|
+
gem.version = Revs::Utils::VERSION
|
9
|
+
gem.authors = ["Peter Mangiafico"]
|
10
|
+
gem.email = ["pmangiafico@stanford.edu"]
|
11
|
+
gem.description = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
12
|
+
gem.summary = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
13
|
+
gem.homepage = ""
|
14
|
+
|
15
|
+
gem.files = `git ls-files`.split($/)
|
16
|
+
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
|
+
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
18
|
+
gem.require_paths = ["lib"]
|
19
|
+
|
20
|
+
gem.add_dependency "countries", "0.9.2"
|
21
|
+
gem.add_dependency "rdf"
|
22
|
+
gem.add_dependency "actionpack", '~> 3'
|
23
|
+
|
24
|
+
gem.add_development_dependency "rspec", "~> 2.6"
|
25
|
+
gem.add_development_dependency "lyberteam-gems-devel", "> 1.0.0"
|
26
|
+
gem.add_development_dependency "yard"
|
27
|
+
|
28
|
+
end
|