revs-utils 1.0.21 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile.lock +41 -50
- data/README.rdoc +3 -9
- data/config/manifest_headers.yml +2 -15
- data/files/revs-lc-marque-terms.obj +0 -0
- data/lib/revs-utils/version.rb +1 -1
- data/lib/revs-utils.rb +34 -110
- data/revs-utils.gemspec +2 -4
- data/spec/revs-utils_spec.rb +4 -58
- data/spec/sample-csv-files/clean-sheet.csv +1 -1
- metadata +6 -23
- data/spec/sample-csv-files/space-sourceid.csv +0 -3
checksums.yaml
CHANGED
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
---
|
|
2
2
|
!binary "U0hBMQ==":
|
|
3
3
|
metadata.gz: !binary |-
|
|
4
|
-
|
|
4
|
+
ZDhhZDFiZDllZWY5MjhhMjE1NTBlN2YxYzNjYTM1YjdlMjk1NTU4ZA==
|
|
5
5
|
data.tar.gz: !binary |-
|
|
6
|
-
|
|
6
|
+
NjJiY2E0MGRjZTllZGE1YTRlNGJiZjg1NDcyNmM5YzcwOWNmNDQwYg==
|
|
7
7
|
SHA512:
|
|
8
8
|
metadata.gz: !binary |-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
NjE0YmY4NWViN2U3ZDE5ZjRhMWM2ZWQxYWM3ZTkyNDQ5MTRkYjE4YjUyZDUx
|
|
10
|
+
OWFkZGNjOGFjN2JjNTZlZWQzMDVlZWMzMDM2YjEzN2Y4MmVkODgyMDUxM2E3
|
|
11
|
+
NzkyOTdhYWQ0OTRjNGNlMDA0YzYyNGUwOWY4NTcwNTI1ZGUyZmM=
|
|
12
12
|
data.tar.gz: !binary |-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
13
|
+
NDEwNGY0NDg5MjdiYWIxMDZhZDhmMGRlZGMzYmE4NzM5OGYzZjhmZjBiOWE0
|
|
14
|
+
NDY1Mzg2NjU0MWQ1M2JkY2Q3NjE4ZTIxNDZkOWU0YWE4N2Q1OTQ0ZjlkN2Qz
|
|
15
|
+
MTc1NjgzMTQ1NjJjNWFmMDUyMmQ3YzNmNzQ5MjY2MWFmMGY2ZjE=
|
data/Gemfile.lock
CHANGED
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
revs-utils (
|
|
5
|
-
actionpack (~>
|
|
6
|
-
chronic
|
|
4
|
+
revs-utils (2.0.0)
|
|
5
|
+
actionpack (~> 4.1.6)
|
|
7
6
|
countries (= 0.9.2)
|
|
8
7
|
rdf
|
|
9
8
|
|
|
@@ -11,63 +10,55 @@ GEM
|
|
|
11
10
|
remote: http://rubygems.org/
|
|
12
11
|
remote: http://sul-gems.stanford.edu/
|
|
13
12
|
specs:
|
|
14
|
-
actionpack (
|
|
15
|
-
|
|
16
|
-
activesupport (=
|
|
17
|
-
|
|
13
|
+
actionpack (4.1.6)
|
|
14
|
+
actionview (= 4.1.6)
|
|
15
|
+
activesupport (= 4.1.6)
|
|
16
|
+
rack (~> 1.5.2)
|
|
17
|
+
rack-test (~> 0.6.2)
|
|
18
|
+
actionview (4.1.6)
|
|
19
|
+
activesupport (= 4.1.6)
|
|
20
|
+
builder (~> 3.1)
|
|
18
21
|
erubis (~> 2.7.0)
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
builder (~> 3.0.0)
|
|
27
|
-
activesupport (3.2.22)
|
|
28
|
-
i18n (~> 0.6, >= 0.6.4)
|
|
29
|
-
multi_json (~> 1.0)
|
|
30
|
-
builder (3.0.4)
|
|
31
|
-
chronic (0.10.2)
|
|
22
|
+
activesupport (4.1.6)
|
|
23
|
+
i18n (~> 0.6, >= 0.6.9)
|
|
24
|
+
json (~> 1.7, >= 1.7.7)
|
|
25
|
+
minitest (~> 5.1)
|
|
26
|
+
thread_safe (~> 0.1)
|
|
27
|
+
tzinfo (~> 1.1)
|
|
28
|
+
builder (3.2.2)
|
|
32
29
|
countries (0.9.2)
|
|
33
30
|
currencies (>= 0.4.0)
|
|
34
31
|
currencies (0.4.2)
|
|
35
|
-
diff-lcs (1.2.
|
|
32
|
+
diff-lcs (1.2.5)
|
|
36
33
|
erubis (2.7.0)
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
journey (1.0.4)
|
|
40
|
-
link_header (0.0.8)
|
|
34
|
+
i18n (0.6.11)
|
|
35
|
+
json (1.8.1)
|
|
41
36
|
lyberteam-gems-devel (1.0.1)
|
|
42
37
|
rake (>= 0.8.7)
|
|
43
38
|
rest-client
|
|
44
|
-
mime-types (2.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
rack
|
|
48
|
-
|
|
49
|
-
rack-test (0.6.3)
|
|
39
|
+
mime-types (2.3)
|
|
40
|
+
minitest (5.4.1)
|
|
41
|
+
netrc (0.7.7)
|
|
42
|
+
rack (1.5.2)
|
|
43
|
+
rack-test (0.6.2)
|
|
50
44
|
rack (>= 1.0)
|
|
51
|
-
rake (10.
|
|
52
|
-
rdf (1.1.
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
rspec (2.
|
|
57
|
-
rspec-core (~> 2.
|
|
58
|
-
rspec-expectations (~> 2.
|
|
59
|
-
rspec-mocks (~> 2.
|
|
60
|
-
rspec-core (2.
|
|
61
|
-
rspec-expectations (2.
|
|
45
|
+
rake (10.3.2)
|
|
46
|
+
rdf (1.1.6)
|
|
47
|
+
rest-client (1.7.2)
|
|
48
|
+
mime-types (>= 1.16, < 3.0)
|
|
49
|
+
netrc (~> 0.7)
|
|
50
|
+
rspec (2.99.0)
|
|
51
|
+
rspec-core (~> 2.99.0)
|
|
52
|
+
rspec-expectations (~> 2.99.0)
|
|
53
|
+
rspec-mocks (~> 2.99.0)
|
|
54
|
+
rspec-core (2.99.2)
|
|
55
|
+
rspec-expectations (2.99.2)
|
|
62
56
|
diff-lcs (>= 1.1.3, < 2.0)
|
|
63
|
-
rspec-mocks (2.
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
tilt (~> 1.1, != 1.3.0)
|
|
69
|
-
tilt (1.4.1)
|
|
70
|
-
yard (0.8.7.3)
|
|
57
|
+
rspec-mocks (2.99.2)
|
|
58
|
+
thread_safe (0.3.4)
|
|
59
|
+
tzinfo (1.2.2)
|
|
60
|
+
thread_safe (~> 0.1)
|
|
61
|
+
yard (0.8.7.4)
|
|
71
62
|
|
|
72
63
|
PLATFORMS
|
|
73
64
|
ruby
|
data/README.rdoc
CHANGED
|
@@ -18,17 +18,11 @@ Shared methods and functions used by revs-indexer, pre-assembly and bulk metadat
|
|
|
18
18
|
- <b>1.0.6</b> Add some more conditions to CSV header checks
|
|
19
19
|
- <b>1.0.7</b> Label column needs to be there but does not need to have a value to register
|
|
20
20
|
- <b>1.0.8</b> Update clean_collection_name method to deal with other possible names
|
|
21
|
-
- <b>
|
|
22
|
-
- <b>1.0.10</b> Update valid for metadata method so it is not sensitive to blank or uppercase columns
|
|
23
|
-
- <b>1.0.11</b> Fix issues with year parsing
|
|
24
|
-
- <b>1.0.12</b> Allow two digit years in year formatting
|
|
25
|
-
- <b>1.0.19</b> Display output showing reasons for validation failures when checking manifests
|
|
26
|
-
- <b>1.0.20</b> Allow for optional metadata columns in the manifest
|
|
27
|
-
- <b>1.0.21</b> Add more information when sourceIDs are not unique
|
|
21
|
+
- <b>2.0.0</b> Updating to use ActionPack 4 for Rails 4 applications. For Rails 3, continue to use 1.x.y releases.
|
|
28
22
|
|
|
29
23
|
== Running tests
|
|
30
24
|
|
|
31
|
-
|
|
25
|
+
./bin/run_all_tests
|
|
32
26
|
|
|
33
27
|
== Release the gem to the gemserver
|
|
34
28
|
|
|
@@ -42,7 +36,7 @@ gem build revs-utils.gemspec
|
|
|
42
36
|
|
|
43
37
|
6. Release the gem to RubyGems:
|
|
44
38
|
|
|
45
|
-
gem push revs-utils-1.0.
|
|
39
|
+
gem push revs-utils-1.0.8.gem # substitute actual version number
|
|
46
40
|
|
|
47
41
|
== Installation
|
|
48
42
|
|
data/config/manifest_headers.yml
CHANGED
|
@@ -30,18 +30,5 @@ metadata:
|
|
|
30
30
|
hide: hide
|
|
31
31
|
format: format
|
|
32
32
|
collection_name: collection_name
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
known_formats:
|
|
36
|
-
- black-and-white film
|
|
37
|
-
- color film
|
|
38
|
-
- slides
|
|
39
|
-
- foldouts
|
|
40
|
-
- pamphlets
|
|
41
|
-
- oversize items
|
|
42
|
-
- photographic prints
|
|
43
|
-
- black-and-white negatives
|
|
44
|
-
- color negatives
|
|
45
|
-
- black-and-white transparencies
|
|
46
|
-
- color transparencies
|
|
47
|
-
- Glass negatives
|
|
33
|
+
|
|
34
|
+
|
|
Binary file
|
data/lib/revs-utils/version.rb
CHANGED
data/lib/revs-utils.rb
CHANGED
|
@@ -5,7 +5,6 @@ require "countries"
|
|
|
5
5
|
require 'active_support/core_ext/string'
|
|
6
6
|
require 'active_support/core_ext/hash'
|
|
7
7
|
require 'csv'
|
|
8
|
-
require 'chronic'
|
|
9
8
|
|
|
10
9
|
PROJECT_ROOT = File.expand_path(File.dirname(__FILE__) + '/..')
|
|
11
10
|
|
|
@@ -14,8 +13,7 @@ REVS_LC_TERMS_FILENAME=File.join(PROJECT_ROOT,'files','revs-lc-marque-terms.obj'
|
|
|
14
13
|
REVS_MANIFEST_HEADERS_FILEPATH = File.join(PROJECT_ROOT,'config',"manifest_headers.yml")
|
|
15
14
|
REGISTER = "register"
|
|
16
15
|
METADATA = "metadata"
|
|
17
|
-
|
|
18
|
-
FORMATS = "known_formats"
|
|
16
|
+
|
|
19
17
|
|
|
20
18
|
module Revs
|
|
21
19
|
module Utils
|
|
@@ -24,14 +22,11 @@ module Revs
|
|
|
24
22
|
# a hash of LC Subject Heading terms and their IDs for linking for "Automobiles" http://id.loc.gov/authorities/subjects/sh85010201.html
|
|
25
23
|
# this is cached and loaded from disk and deserialized back into a hash for performance reasons, then stored as a module
|
|
26
24
|
# level constant so it can be reused throughout the pre-assembly run as a constant
|
|
27
|
-
# This cached set of terms can be re-generated with "ruby
|
|
25
|
+
# This cached set of terms can be re-generated with "ruby devel/revs_lc_automobile_terms.rb"
|
|
28
26
|
AUTOMOBILE_LC_TERMS= File.open(REVS_LC_TERMS_FILENAME,'rb'){|io| Marshal.load(io)} if File.exists?(REVS_LC_TERMS_FILENAME)
|
|
29
27
|
REVS_MANIFEST_HEADERS_FILE = File.open(REVS_MANIFEST_HEADERS_FILEPATH)
|
|
30
28
|
REVS_MANIFEST_HEADERS = YAML.load( REVS_MANIFEST_HEADERS_FILE)
|
|
31
29
|
|
|
32
|
-
def revs_known_formats
|
|
33
|
-
get_manifest_section(FORMATS)
|
|
34
|
-
end
|
|
35
30
|
|
|
36
31
|
def get_manifest_section(section)
|
|
37
32
|
return REVS_MANIFEST_HEADERS[section]
|
|
@@ -71,98 +66,50 @@ module Revs
|
|
|
71
66
|
sources = Array.new
|
|
72
67
|
files.each do |file|
|
|
73
68
|
file.each do |row|
|
|
74
|
-
#Make sure the
|
|
69
|
+
#Make sure the sourcid and filename are the same
|
|
75
70
|
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
|
76
|
-
return false if
|
|
71
|
+
return false if row[get_manifest_section(REGISTER)['sourceid']] != fname
|
|
77
72
|
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
|
78
|
-
end
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
|
|
79
77
|
end
|
|
80
78
|
return sources.uniq.size == sources.size
|
|
81
79
|
|
|
82
80
|
end
|
|
83
|
-
|
|
81
|
+
|
|
82
|
+
|
|
84
83
|
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in
|
|
85
84
|
def valid_to_register(file_path)
|
|
85
|
+
|
|
86
86
|
file = read_csv_with_headers(file_path)
|
|
87
|
-
return check_valid_to_register(file)
|
|
88
|
-
end
|
|
89
|
-
|
|
90
|
-
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in.
|
|
91
|
-
def valid_for_metadata(file_path)
|
|
92
|
-
file = read_csv_with_headers(file_path)
|
|
93
|
-
return check_headers(file)
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
# pass in csv data and it will tell if you everything is safe to register based on having labels, unique sourceIDs and filenames matching sourceIDs
|
|
97
|
-
def check_valid_to_register(csv_data)
|
|
98
87
|
#Make sure all the required headers are there
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
puts "missing headers required for registration"
|
|
102
|
-
result1=false
|
|
103
|
-
end
|
|
104
|
-
sources=Array.new
|
|
88
|
+
return false if not get_manifest_section(REGISTER).values-file[0].keys == []
|
|
89
|
+
|
|
105
90
|
#Make sure all files have entries for those required headers
|
|
106
|
-
|
|
91
|
+
file.each do |row|
|
|
107
92
|
get_manifest_section(REGISTER).keys.each do |header| # label should be there as a column but does not always need a value
|
|
108
|
-
|
|
109
|
-
puts "#{row[get_manifest_section(REGISTER)['sourceid']]} does not have a value for a required registration field"
|
|
110
|
-
result2=false
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
|
114
|
-
if ((row[get_manifest_section(REGISTER)['sourceid']] != fname) || ((/\s/ =~ row[get_manifest_section(REGISTER)['sourceid']].strip) != nil))
|
|
115
|
-
puts "#{row[get_manifest_section(REGISTER)['sourceid']]} does not match the filename or has a space in it"
|
|
116
|
-
result3=false
|
|
93
|
+
return false if header.downcase !='label' && row[header].blank? #Alternatively consider row[header].class != String or row[header].size <= 0
|
|
117
94
|
end
|
|
118
|
-
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
|
119
|
-
end
|
|
120
|
-
result4 = (sources.uniq.size == sources.size)
|
|
121
|
-
unless result4
|
|
122
|
-
puts "sourceIDs are not all unique"
|
|
123
|
-
puts sources.uniq.map { | e | [sources.count(e), e] }.select { | c, _ | c > 1 }.sort.reverse.map { | c, e | "#{e}: #{c}" } # show all non-unique sourceIDs and their frequency
|
|
124
95
|
end
|
|
125
|
-
|
|
126
|
-
|
|
96
|
+
return true
|
|
127
97
|
end
|
|
128
98
|
|
|
129
|
-
#
|
|
130
|
-
def
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
valid_date=revs_is_valid_datestring?(row[get_manifest_section(METADATA)['year']] || row[get_manifest_section(METADATA)['date']])
|
|
134
|
-
valid_format=revs_is_valid_format?(row[get_manifest_section(METADATA)['format']])
|
|
135
|
-
unless (valid_date && valid_format)
|
|
136
|
-
bad_rows+=1
|
|
137
|
-
puts "#{row[get_manifest_section(REGISTER)['sourceid']]} has a bad year/date or format"
|
|
138
|
-
end
|
|
139
|
-
end
|
|
140
|
-
return bad_rows
|
|
141
|
-
end
|
|
142
|
-
|
|
143
|
-
# pass in csv data from a file read in and it will tell you if the headers are valid
|
|
144
|
-
def check_headers(csv_data)
|
|
145
|
-
|
|
146
|
-
result1=result2=true
|
|
147
|
-
file_headers=csv_data[0].keys.reject(&:blank?).collect(&:downcase)
|
|
99
|
+
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in.
|
|
100
|
+
def valid_for_metadata(file_path)
|
|
101
|
+
file = read_csv_with_headers(file_path)
|
|
102
|
+
file_headers=file[0].keys
|
|
148
103
|
#The file doesn't need to have all the metadata values, it just can't have headers that aren't used for metadata or registration
|
|
149
104
|
if file_headers.include?('date') && file_headers.include?('year') # can't have both date and year
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
result2=false
|
|
105
|
+
return false
|
|
106
|
+
elsif file_headers.include?('location') && file_headers.include?('state') && file_headers.include?('city') && file_headers.include?('country') # can't have both location and the specific fields
|
|
107
|
+
return false
|
|
108
|
+
else
|
|
109
|
+
return file_headers-get_manifest_section(METADATA).values-get_manifest_section(REGISTER).values == []
|
|
156
110
|
end
|
|
157
|
-
extra_columns = file_headers-get_manifest_section(METADATA).values-get_manifest_section(REGISTER).values-get_manifest_section(OPTIONAL).values
|
|
158
|
-
has_extra_columns = (extra_columns == [])
|
|
159
|
-
puts "has unknown columns: #{extra_columns.join(', ')}" unless has_extra_columns
|
|
160
|
-
result3 = has_extra_columns
|
|
161
|
-
|
|
162
|
-
return (result1 && result2 && result3)
|
|
163
|
-
|
|
164
111
|
end
|
|
165
|
-
|
|
112
|
+
|
|
166
113
|
def clean_collection_name(name)
|
|
167
114
|
return "" if name.blank? || name.nil?
|
|
168
115
|
name=name.to_s
|
|
@@ -199,32 +146,19 @@ module Revs
|
|
|
199
146
|
return row
|
|
200
147
|
end
|
|
201
148
|
|
|
202
|
-
# checks to see if we have a valid format
|
|
203
|
-
def revs_is_valid_format?(format)
|
|
204
|
-
return true if format.nil? || format.blank?
|
|
205
|
-
formats=format.split("|").collect{|f| f.strip}
|
|
206
|
-
!formats.collect {|f| revs_known_formats.include?(f)}.uniq.include?(false)
|
|
207
|
-
end
|
|
208
|
-
|
|
209
|
-
# check a single format and fix some common issues
|
|
210
149
|
def revs_check_format(format)
|
|
211
150
|
return revs_check_formats([format]).first
|
|
212
151
|
end
|
|
213
152
|
|
|
214
|
-
# check the incoming
|
|
153
|
+
# check the incoming format and fix some common issues
|
|
215
154
|
def revs_check_formats(format)
|
|
216
155
|
known_fixes = {"black-and-white negative"=>"black-and-white negatives",
|
|
217
156
|
"color negative"=>"color negatives",
|
|
218
157
|
"slides/color transparency"=>"color transparencies",
|
|
219
158
|
"color negatives/slides"=>"color negatives",
|
|
220
159
|
"black-and-white negative strips"=>"black-and-white negatives",
|
|
221
|
-
"black and white"=>"black-and-white negatives",
|
|
222
|
-
"black-and-white"=>"black-and-white negatives",
|
|
223
|
-
"black and white negative"=>"black-and-white negatives",
|
|
224
|
-
"black and white negatives"=>"black-and-white negatives",
|
|
225
160
|
"color transparency"=>"color transparencies",
|
|
226
|
-
"slide"=>"slides"
|
|
227
|
-
"color transparancies"=>"color transparencies"
|
|
161
|
+
"slide"=>"slides"
|
|
228
162
|
}
|
|
229
163
|
count = 0
|
|
230
164
|
format.each do |f|
|
|
@@ -297,21 +231,11 @@ module Revs
|
|
|
297
231
|
date_string.to_s.strip.scan(/\D/).empty? and (starting_year..Date.today.year).include?(date_string.to_i)
|
|
298
232
|
end
|
|
299
233
|
|
|
300
|
-
# tell us if the
|
|
301
|
-
def revs_is_valid_datestring?(date_string)
|
|
302
|
-
return true if date_string.nil? || date_string.empty?
|
|
303
|
-
is_full_date=(get_full_date(date_string) != false)
|
|
304
|
-
is_year=!parse_years(date_string).empty?
|
|
305
|
-
return is_year || is_full_date
|
|
306
|
-
end
|
|
307
|
-
|
|
308
|
-
# tell us if the string passed is in is a full date of the format M/D/YYYY or m-d-yyyy or m-d-yy or M/D/YY, and returns the date object if it is valid
|
|
234
|
+
# tell us if the string passed is in is a full date of the format M/D/YYYY, and returns the date object if it is valid
|
|
309
235
|
def get_full_date(date_string)
|
|
310
236
|
begin
|
|
311
|
-
|
|
312
|
-
date_obj
|
|
313
|
-
date_obj=date_obj.prev_year(100) if date_obj > Date.today # if the parsing yields a date in the future, this is a problem, so adjust back a century (due to this issue: http://stackoverflow.com/questions/27058068/ruby-incorrectly-parses-2-digit-year)
|
|
314
|
-
is_valid_year?(date_obj.year.to_s) ? date_obj : false
|
|
237
|
+
date_obj=Date.strptime(date_string.gsub('-','/').delete(' '), '%m/%d/%Y')
|
|
238
|
+
return (is_valid_year?(date_obj.year.to_s) ? date_obj : false)
|
|
315
239
|
rescue
|
|
316
240
|
false
|
|
317
241
|
end
|
|
@@ -329,14 +253,14 @@ module Revs
|
|
|
329
253
|
years_to_add=[]
|
|
330
254
|
result.each do |year|
|
|
331
255
|
|
|
332
|
-
if year.scan(/[1-2][0-9][0-9][0-9][-][0-9][0-9]/).size > 0
|
|
256
|
+
if year.scan(/[1-2][0-9][0-9][0-9][-][0-9][0-9]/).size > 0 # if we have a year that looks like "1961-62" or "1961-73", lets deal with it turning it into [1961,1962] or [1961,1962,1963,1964,1965,1966,1967...etc]
|
|
333
257
|
start_year=year[2..3]
|
|
334
258
|
end_year=year[5..6]
|
|
335
259
|
stem=year[0..1]
|
|
336
260
|
for n in start_year..end_year
|
|
337
261
|
years_to_add << "#{stem}#{n}"
|
|
338
262
|
end
|
|
339
|
-
elsif year.scan(/[1-2][0-9][0-9][0-9][-][1-9]/).size > 0
|
|
263
|
+
elsif year.scan(/[1-2][0-9][0-9][0-9][-][1-9]/).size > 0 # if we have a year that lloks like "1961-2" or "1961-3", lets deal with it turning it into [1961,1962] or [1961,1962,1963]
|
|
340
264
|
start_year=year[3..3]
|
|
341
265
|
end_year=year[5..5]
|
|
342
266
|
stem=year[0..2]
|
|
@@ -351,7 +275,7 @@ module Revs
|
|
|
351
275
|
%w{0 1 2 3 4 5 6 7 8 9}.each {|n| years_to_add << "#{stem}#{n}"} # add each year in that decade to the output array
|
|
352
276
|
end
|
|
353
277
|
|
|
354
|
-
if year.scan(/[1-2][0-9][0-9][0-9][-][1-2][0-9][0-9][0-9]/).size > 0
|
|
278
|
+
if year.scan(/[1-2][0-9][0-9][0-9][-][1-2][0-9][0-9][0-9]/).size > 0 # if we have a year that lloks like "1961-1962" or "1930-1955", lets deal with it turning it into [1961,1962] or [1961,1962,1963]
|
|
355
279
|
start_year=year[0..3]
|
|
356
280
|
end_year=year[5..8]
|
|
357
281
|
if end_year.to_i - start_year.to_i < 10 # let's only do the expansion if we don't have some really large date range, like "1930-1985" .. only ranges less than 9 years will be split into separate years
|
data/revs-utils.gemspec
CHANGED
|
@@ -11,8 +11,7 @@ Gem::Specification.new do |gem|
|
|
|
11
11
|
gem.description = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
|
12
12
|
gem.summary = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
|
13
13
|
gem.homepage = ""
|
|
14
|
-
|
|
15
|
-
|
|
14
|
+
|
|
16
15
|
gem.files = `git ls-files`.split($/)
|
|
17
16
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
|
18
17
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
|
@@ -20,8 +19,7 @@ Gem::Specification.new do |gem|
|
|
|
20
19
|
|
|
21
20
|
gem.add_dependency "countries", "0.9.2"
|
|
22
21
|
gem.add_dependency "rdf"
|
|
23
|
-
gem.add_dependency "actionpack", '~>
|
|
24
|
-
gem.add_dependency "chronic"
|
|
22
|
+
gem.add_dependency "actionpack", '~> 4.1.6'
|
|
25
23
|
|
|
26
24
|
gem.add_development_dependency "rspec", "~> 2.6"
|
|
27
25
|
gem.add_development_dependency "lyberteam-gems-devel", "> 1.0.0"
|
data/spec/revs-utils_spec.rb
CHANGED
|
@@ -62,7 +62,6 @@ describe "Revs-Utils" do
|
|
|
62
62
|
|
|
63
63
|
it "should clean up some common format errors from an array" do
|
|
64
64
|
@revs.revs_check_formats(['black-and-white negative','color negative','leave alone']).should == ['black-and-white negatives','color negatives','leave alone']
|
|
65
|
-
@revs.revs_check_formats(['black and white','color negative','black-and-white negative']).should == ['black-and-white negatives','color negatives','black-and-white negatives']
|
|
66
65
|
end
|
|
67
66
|
|
|
68
67
|
it "should clean up some common format errors from a string" do
|
|
@@ -76,30 +75,10 @@ describe "Revs-Utils" do
|
|
|
76
75
|
end
|
|
77
76
|
|
|
78
77
|
it "should indicate if a date is valid" do
|
|
79
|
-
|
|
80
|
-
# formats that are ok
|
|
78
|
+
@revs.get_full_date('bogus').should be_false
|
|
81
79
|
@revs.get_full_date('5/1/1959').should == Date.strptime("5/1/1959", '%m/%d/%Y')
|
|
82
80
|
@revs.get_full_date('5-1-1959').should == Date.strptime("5/1/1959", '%m/%d/%Y')
|
|
83
|
-
@revs.get_full_date('5-1-
|
|
84
|
-
@revs.get_full_date('5-1-59').should == Date.strptime("5/1/1959", '%m/%d/%Y')
|
|
85
|
-
@revs.get_full_date('1/1/71').should == Date.strptime("1/1/1971", '%m/%d/%Y')
|
|
86
|
-
@revs.get_full_date('5-1-14').should == Date.strptime("5/1/2014", '%m/%d/%Y')
|
|
87
|
-
@revs.get_full_date('5-1-21').should == Date.strptime("5/1/1921", '%m/%d/%Y')
|
|
88
|
-
@revs.get_full_date('1966-02-27').should == Date.strptime("2/27/1966", '%m/%d/%Y')
|
|
89
|
-
@revs.get_full_date('1966-2-5').should == Date.strptime("2/5/1966", '%m/%d/%Y')
|
|
90
|
-
|
|
91
|
-
# bad full dates
|
|
92
|
-
@revs.get_full_date('1966-14-11').should be_false # bad month
|
|
93
|
-
@revs.get_full_date('1966\4\11').should be_false # slashes are the wrong way
|
|
94
|
-
@revs.get_full_date('bogus').should be_false # crap string
|
|
95
|
-
@revs.get_full_date('').should be_false # blank
|
|
96
|
-
@revs.get_full_date('1965').should be_false # only the year
|
|
97
|
-
@revs.get_full_date('1965-68').should be_false # range of years
|
|
98
|
-
@revs.get_full_date('1965,1968').should be_false # multiple years
|
|
99
|
-
@revs.get_full_date('1965|1968').should be_false # multiple years
|
|
100
|
-
@revs.get_full_date('1965-1968').should be_false # multiple years
|
|
101
|
-
@revs.get_full_date('1965-8').should be_false # multiple years
|
|
102
|
-
|
|
81
|
+
@revs.get_full_date('5-1-59').should be_false # two digit year is not allowed
|
|
103
82
|
end
|
|
104
83
|
|
|
105
84
|
it "should indicate if we have a valid year" do
|
|
@@ -108,29 +87,7 @@ describe "Revs-Utils" do
|
|
|
108
87
|
@revs.is_valid_year?('1700').should be_false # too old! no cars even existed yet
|
|
109
88
|
@revs.is_valid_year?('1700',1600).should be_true # unless we allow it to be ok
|
|
110
89
|
end
|
|
111
|
-
|
|
112
|
-
it "should indicate if we have unknown formats" do
|
|
113
|
-
@revs.revs_is_valid_format?(nil).should be_true
|
|
114
|
-
@revs.revs_is_valid_format?('').should be_true
|
|
115
|
-
@revs.revs_is_valid_format?('slides').should be_true
|
|
116
|
-
@revs.revs_is_valid_format?('slide').should be_false
|
|
117
|
-
@revs.revs_is_valid_format?('slides | slide').should be_false
|
|
118
|
-
@revs.revs_is_valid_format?('slides | black-and-white negatives').should be_true
|
|
119
|
-
@revs.revs_is_valid_format?('black-and-white-negatives').should be_false
|
|
120
|
-
@revs.revs_is_valid_format?('black-and-white negatives').should be_true
|
|
121
|
-
end
|
|
122
|
-
|
|
123
|
-
it "should indicate if we have a valid datestring" do
|
|
124
|
-
@revs.revs_is_valid_datestring?('1959').should be_true
|
|
125
|
-
@revs.revs_is_valid_datestring?('bogus').should be_false
|
|
126
|
-
@revs.revs_is_valid_datestring?('').should be_true
|
|
127
|
-
@revs.revs_is_valid_datestring?(nil).should be_true
|
|
128
|
-
@revs.revs_is_valid_datestring?([]).should be_true
|
|
129
|
-
@revs.revs_is_valid_datestring?('2/2/1950').should be_true
|
|
130
|
-
@revs.revs_is_valid_datestring?('2/31/1950').should be_false
|
|
131
|
-
@revs.revs_is_valid_datestring?('2/2/50').should be_true
|
|
132
|
-
@revs.revs_is_valid_datestring?('195x').should be_true
|
|
133
|
-
end
|
|
90
|
+
|
|
134
91
|
|
|
135
92
|
it "should lookup the country correctly" do
|
|
136
93
|
@revs.revs_get_country('USA').should == "United States"
|
|
@@ -159,7 +116,6 @@ describe "Revs-Utils" do
|
|
|
159
116
|
it "should parse 1950s correctly" do
|
|
160
117
|
|
|
161
118
|
@revs.parse_years('1950s').should == ['1950','1951','1952','1953','1954','1955','1956','1957','1958','1959']
|
|
162
|
-
@revs.parse_years("1950's").should == ['1950','1951','1952','1953','1954','1955','1956','1957','1958','1959']
|
|
163
119
|
|
|
164
120
|
end
|
|
165
121
|
|
|
@@ -181,12 +137,6 @@ describe "Revs-Utils" do
|
|
|
181
137
|
|
|
182
138
|
end
|
|
183
139
|
|
|
184
|
-
it "should parse 1800-1802" do
|
|
185
|
-
|
|
186
|
-
@revs.parse_years('1800-1802').should == ['1800','1801','1802']
|
|
187
|
-
|
|
188
|
-
end
|
|
189
|
-
|
|
190
140
|
it "should parse 1955-1957 | 1955 | 1955 and not produce duplicate years" do
|
|
191
141
|
|
|
192
142
|
@revs.parse_years('1955-1957 | 1955 | 1955').should == ['1955','1956','1957']
|
|
@@ -297,11 +247,7 @@ describe "Revs-Utils" do
|
|
|
297
247
|
sheets = [Dir.pwd + "/spec/sample-csv-files/malformed-sourceid.csv"]
|
|
298
248
|
@revs.unique_source_ids(sheets).should == false
|
|
299
249
|
end
|
|
300
|
-
|
|
301
|
-
it "should return false when a sourceid has a space in it" do
|
|
302
|
-
sheets = [Dir.pwd + "/spec/sample-csv-files/space-sourceid.csv"]
|
|
303
|
-
@revs.unique_source_ids(sheets).should == false
|
|
304
|
-
end
|
|
250
|
+
|
|
305
251
|
|
|
306
252
|
|
|
307
253
|
end
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
format,sourceid,collection_name,filename,year,photographer,label,marque,model,model_year,people,city,state,country,event,description,inst_notes,prod_notes,has_more_metadata
|
|
1
|
+
format,sourceid,collection_name,filename,year,photographer,label,marque,model,model_year,people,city,state,country,event,description,inst_notes,prod_notes,has_more_metadata
|
|
2
2
|
black-and-white negatives,2004-100CRAI-b1_1.0_0001,Bruce R Craig Photograph Collection,2004-100CRAI-b1_1.0_0001.tif,,Bruce R Craig,Rutherford Special,Rutherford Special,,,"Rutherford, Slim",,,"full car (rear), #62",,,
|
|
3
3
|
black-and-white negatives,2004-100CRAI-b1_1.0_0002,Bruce R Craig Photograph Collection,2004-100CRAI-b1_1.0_0002.tif,,Bruce R Craig,City of Roses,City of Roses,,,"Sezekendy, Charley",,,"full car, #2",,,
|
|
4
4
|
black-and-white negatives,2004-100CRAI-b1_1.0_0003,Bruce R Craig Photograph Collection,2004-100CRAI-b1_1.0_0003.tif,,Ed Hitzwe,Wilburn | Morgan,,,,Wilburn | Morgan,,,"install tire on wheel, posed photo",,,
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: revs-utils
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version:
|
|
4
|
+
version: 2.0.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Peter Mangiafico
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2014-10-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: countries
|
|
@@ -44,28 +44,14 @@ dependencies:
|
|
|
44
44
|
requirements:
|
|
45
45
|
- - ~>
|
|
46
46
|
- !ruby/object:Gem::Version
|
|
47
|
-
version:
|
|
47
|
+
version: 4.1.6
|
|
48
48
|
type: :runtime
|
|
49
49
|
prerelease: false
|
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
|
51
51
|
requirements:
|
|
52
52
|
- - ~>
|
|
53
53
|
- !ruby/object:Gem::Version
|
|
54
|
-
version:
|
|
55
|
-
- !ruby/object:Gem::Dependency
|
|
56
|
-
name: chronic
|
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
|
58
|
-
requirements:
|
|
59
|
-
- - ! '>='
|
|
60
|
-
- !ruby/object:Gem::Version
|
|
61
|
-
version: '0'
|
|
62
|
-
type: :runtime
|
|
63
|
-
prerelease: false
|
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
65
|
-
requirements:
|
|
66
|
-
- - ! '>='
|
|
67
|
-
- !ruby/object:Gem::Version
|
|
68
|
-
version: '0'
|
|
54
|
+
version: 4.1.6
|
|
69
55
|
- !ruby/object:Gem::Dependency
|
|
70
56
|
name: rspec
|
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -147,11 +133,9 @@ files:
|
|
|
147
133
|
- spec/sample-csv-files/no-blank-label.csv
|
|
148
134
|
- spec/sample-csv-files/no-label-column.csv
|
|
149
135
|
- spec/sample-csv-files/no-sourceid.csv
|
|
150
|
-
- spec/sample-csv-files/space-sourceid.csv
|
|
151
136
|
- spec/spec_helper.rb
|
|
152
137
|
homepage: ''
|
|
153
|
-
licenses:
|
|
154
|
-
- All rights reserved, Stanford University.
|
|
138
|
+
licenses: []
|
|
155
139
|
metadata: {}
|
|
156
140
|
post_install_message:
|
|
157
141
|
rdoc_options: []
|
|
@@ -169,7 +153,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
169
153
|
version: '0'
|
|
170
154
|
requirements: []
|
|
171
155
|
rubyforge_project:
|
|
172
|
-
rubygems_version: 2.
|
|
156
|
+
rubygems_version: 2.2.2
|
|
173
157
|
signing_key:
|
|
174
158
|
specification_version: 4
|
|
175
159
|
summary: Shared methods and functions used by revs-indexer, pre-assembly and bulk
|
|
@@ -187,6 +171,5 @@ test_files:
|
|
|
187
171
|
- spec/sample-csv-files/no-blank-label.csv
|
|
188
172
|
- spec/sample-csv-files/no-label-column.csv
|
|
189
173
|
- spec/sample-csv-files/no-sourceid.csv
|
|
190
|
-
- spec/sample-csv-files/space-sourceid.csv
|
|
191
174
|
- spec/spec_helper.rb
|
|
192
175
|
has_rdoc:
|
|
@@ -1,3 +0,0 @@
|
|
|
1
|
-
format,sourceid,collection_name,filename,year,photographer,label,marque,model,model_year,people,location,event,description,inst_notes,prod_notes,has_more_metadata
|
|
2
|
-
black-and-white negatives,2004-100CRAI-b1_1.0 _0001,Bruce R Craig Photograph Collection,2004-100CRAI-b1_1.0 _0001.tif,,Bruce R Craig,Rutherford Special,Rutherford Special,,,"Rutherford, Slim",,,"full car (rear), #62",,,
|
|
3
|
-
black-and-white negatives,2004-100CRAI-b1_1.0_0002,Bruce R Craig Photograph Collection,2004-100CRAI-b1_1.0_0002.tif,,Bruce R Craig,City of Roses,City of Roses,,,"Sezekendy, Charley",,,"full car, #2",,,
|