revs-utils 2.0.7 → 2.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/Gemfile.lock +23 -15
- data/README.rdoc +2 -8
- data/config/manifest_headers.yml +12 -2
- data/lib/revs-utils/version.rb +1 -1
- data/lib/revs-utils.rb +59 -20
- data/revs-utils.gemspec +2 -1
- data/spec/revs-utils_spec.rb +33 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MTk1MjU3MmRiNzgwMjNlNmRmMjRjMGQ2NmEwOTIxZTFiNTA3YjEyNQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzMzOWNmODZlYjU2NDE2ZmQyZWUwNjgxMjcwMjk5ZTNmYmU5OWQ2Nw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OTkzOTQ5ZmEyNjYxMWJlZTNmOTZkOWZhMjBjOWNmYmI1OGQ4MzM5Y2IzZmE4
|
10
|
+
ZWNhNDI5M2Y2YTUxZmIwMDExM2Q3YWVkYmFjNDg5OTM1ZDVkMGVjOWRjODI3
|
11
|
+
YjM3ZjU3MTg4NWViMGZkODUwYjgyMWU1OWRkYTkzZTJiYmE1MGM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTBmMjY5YzIyMzVhOWQ1NjA3NTkxYWNjNDUzZDA3MWIyODRhZmU5NDZhYmY0
|
14
|
+
MTUxZDcyNjlkNzFmNWZmNGJhNzU2OWYzYzRhM2QxNzVhZWUwNGVjM2NiODNk
|
15
|
+
ZDY1NzcxOGNjMWQ1ZDZlODM1YTJjYzgzM2FiZTg2NDE4NmQ4ZTg=
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
revs-utils (2.0.
|
4
|
+
revs-utils (2.0.8)
|
5
5
|
actionpack (~> 4.1.6)
|
6
6
|
chronic
|
7
7
|
countries (= 0.9.2)
|
@@ -11,16 +11,16 @@ GEM
|
|
11
11
|
remote: http://rubygems.org/
|
12
12
|
remote: http://sul-gems.stanford.edu/
|
13
13
|
specs:
|
14
|
-
actionpack (4.1.
|
15
|
-
actionview (= 4.1.
|
16
|
-
activesupport (= 4.1.
|
14
|
+
actionpack (4.1.10)
|
15
|
+
actionview (= 4.1.10)
|
16
|
+
activesupport (= 4.1.10)
|
17
17
|
rack (~> 1.5.2)
|
18
18
|
rack-test (~> 0.6.2)
|
19
|
-
actionview (4.1.
|
20
|
-
activesupport (= 4.1.
|
19
|
+
actionview (4.1.10)
|
20
|
+
activesupport (= 4.1.10)
|
21
21
|
builder (~> 3.1)
|
22
22
|
erubis (~> 2.7.0)
|
23
|
-
activesupport (4.1.
|
23
|
+
activesupport (4.1.10)
|
24
24
|
i18n (~> 0.6, >= 0.6.9)
|
25
25
|
json (~> 1.7, >= 1.7.7)
|
26
26
|
minitest (~> 5.1)
|
@@ -32,23 +32,28 @@ GEM
|
|
32
32
|
currencies (>= 0.4.0)
|
33
33
|
currencies (0.4.2)
|
34
34
|
diff-lcs (1.2.5)
|
35
|
+
domain_name (0.5.23)
|
36
|
+
unf (>= 0.0.5, < 1.0.0)
|
35
37
|
erubis (2.7.0)
|
38
|
+
http-cookie (1.0.2)
|
39
|
+
domain_name (~> 0.5)
|
36
40
|
i18n (0.7.0)
|
37
41
|
json (1.8.2)
|
38
42
|
link_header (0.0.8)
|
39
43
|
lyberteam-gems-devel (1.0.1)
|
40
44
|
rake (>= 0.8.7)
|
41
45
|
rest-client
|
42
|
-
mime-types (2.3)
|
46
|
+
mime-types (2.4.3)
|
43
47
|
minitest (5.5.1)
|
44
|
-
netrc (0.
|
48
|
+
netrc (0.10.3)
|
45
49
|
rack (1.5.2)
|
46
50
|
rack-test (0.6.3)
|
47
51
|
rack (>= 1.0)
|
48
|
-
rake (10.
|
49
|
-
rdf (1.1.
|
52
|
+
rake (10.4.2)
|
53
|
+
rdf (1.1.11)
|
50
54
|
link_header (~> 0.0, >= 0.0.8)
|
51
|
-
rest-client (1.
|
55
|
+
rest-client (1.8.0)
|
56
|
+
http-cookie (>= 1.0.2, < 2.0)
|
52
57
|
mime-types (>= 1.16, < 3.0)
|
53
58
|
netrc (~> 0.7)
|
54
59
|
rspec (2.99.0)
|
@@ -58,11 +63,14 @@ GEM
|
|
58
63
|
rspec-core (2.99.2)
|
59
64
|
rspec-expectations (2.99.2)
|
60
65
|
diff-lcs (>= 1.1.3, < 2.0)
|
61
|
-
rspec-mocks (2.99.
|
62
|
-
thread_safe (0.3.
|
66
|
+
rspec-mocks (2.99.3)
|
67
|
+
thread_safe (0.3.5)
|
63
68
|
tzinfo (1.2.2)
|
64
69
|
thread_safe (~> 0.1)
|
65
|
-
|
70
|
+
unf (0.1.4)
|
71
|
+
unf_ext
|
72
|
+
unf_ext (0.0.6)
|
73
|
+
yard (0.8.7.6)
|
66
74
|
|
67
75
|
PLATFORMS
|
68
76
|
ruby
|
data/README.rdoc
CHANGED
@@ -18,21 +18,14 @@ Shared methods and functions used by revs-indexer, pre-assembly and bulk metadat
|
|
18
18
|
- <b>1.0.6</b> Add some more conditions to CSV header checks
|
19
19
|
- <b>1.0.7</b> Label column needs to be there but does not need to have a value to register
|
20
20
|
- <b>1.0.8</b> Update clean_collection_name method to deal with other possible names
|
21
|
-
<<<<<<< HEAD
|
22
21
|
- <b>2.0.0</b> Updating to use ActionPack 4 for Rails 4 applications. For Rails 3, continue to use 1.x.y releases.
|
23
22
|
- <b>2.0.1</b> Add more common format corrections
|
24
23
|
- <b>2.0.2 and 2.0.3</b> Update valid for metadata method so it is not sensitive to blank or uppercase columns
|
25
24
|
- <b>2.0.4</b> Fix issues with year parsing
|
26
|
-
=======
|
27
|
-
- <b>1.0.9</b> Add more common format corrections
|
28
|
-
- <b>1.0.10</b> Update valid for metadata method so it is not sensitive to blank or uppercase columns
|
29
|
-
- <b>1.0.11</b> Fix issues with year parsing
|
30
|
-
- <b>1.0.12</b> Allow two digit years in year formatting
|
31
|
-
>>>>>>> 9db8640... Allow two digit years in year formatting
|
32
25
|
|
33
26
|
== Running tests
|
34
27
|
|
35
|
-
bundle exec
|
28
|
+
bundle exec rspec spec
|
36
29
|
|
37
30
|
== Release the gem to the gemserver
|
38
31
|
|
@@ -48,6 +41,7 @@ gem build revs-utils.gemspec
|
|
48
41
|
|
49
42
|
gem push revs-utils-2.0.1.gem # substitute actual version number
|
50
43
|
|
44
|
+
|
51
45
|
== Installation
|
52
46
|
|
53
47
|
Add this line to your application's Gemfile:
|
data/config/manifest_headers.yml
CHANGED
@@ -30,5 +30,15 @@ metadata:
|
|
30
30
|
hide: hide
|
31
31
|
format: format
|
32
32
|
collection_name: collection_name
|
33
|
-
|
34
|
-
|
33
|
+
known_formats:
|
34
|
+
- black-and-white film
|
35
|
+
- color film
|
36
|
+
- slides
|
37
|
+
- foldouts
|
38
|
+
- pamphlets
|
39
|
+
- oversize items
|
40
|
+
- photographic prints
|
41
|
+
- black-and-white negatives
|
42
|
+
- color negatives
|
43
|
+
- black-and-white transparencies
|
44
|
+
- color transparencies
|
data/lib/revs-utils/version.rb
CHANGED
data/lib/revs-utils.rb
CHANGED
@@ -14,7 +14,7 @@ REVS_LC_TERMS_FILENAME=File.join(PROJECT_ROOT,'files','revs-lc-marque-terms.obj'
|
|
14
14
|
REVS_MANIFEST_HEADERS_FILEPATH = File.join(PROJECT_ROOT,'config',"manifest_headers.yml")
|
15
15
|
REGISTER = "register"
|
16
16
|
METADATA = "metadata"
|
17
|
-
|
17
|
+
FORMATS = "known_formats"
|
18
18
|
|
19
19
|
module Revs
|
20
20
|
module Utils
|
@@ -28,6 +28,9 @@ module Revs
|
|
28
28
|
REVS_MANIFEST_HEADERS_FILE = File.open(REVS_MANIFEST_HEADERS_FILEPATH)
|
29
29
|
REVS_MANIFEST_HEADERS = YAML.load( REVS_MANIFEST_HEADERS_FILE)
|
30
30
|
|
31
|
+
def revs_known_formats
|
32
|
+
get_manifest_section(FORMATS)
|
33
|
+
end
|
31
34
|
|
32
35
|
def get_manifest_section(section)
|
33
36
|
return REVS_MANIFEST_HEADERS[section]
|
@@ -67,40 +70,59 @@ module Revs
|
|
67
70
|
sources = Array.new
|
68
71
|
files.each do |file|
|
69
72
|
file.each do |row|
|
70
|
-
#Make sure the
|
73
|
+
#Make sure the sourceid and filename are the same
|
71
74
|
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
72
75
|
return false if row[get_manifest_section(REGISTER)['sourceid']] != fname
|
73
76
|
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
74
|
-
end
|
75
|
-
|
76
|
-
|
77
|
-
|
77
|
+
end
|
78
78
|
end
|
79
79
|
return sources.uniq.size == sources.size
|
80
80
|
|
81
81
|
end
|
82
|
-
|
83
|
-
|
82
|
+
|
84
83
|
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in
|
85
84
|
def valid_to_register(file_path)
|
86
|
-
|
87
85
|
file = read_csv_with_headers(file_path)
|
86
|
+
return check_valid_to_register(file)
|
87
|
+
end
|
88
|
+
|
89
|
+
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in.
|
90
|
+
def valid_for_metadata(file_path)
|
91
|
+
file = read_csv_with_headers(file_path)
|
92
|
+
return check_headers(file)
|
93
|
+
end
|
94
|
+
|
95
|
+
# pass in csv data and it will tell if you everything is safe to register based on having labels, unique sourceIDs and filenames matching sourceIDs
|
96
|
+
def check_valid_to_register(csv_data)
|
88
97
|
#Make sure all the required headers are there
|
89
|
-
return false if not get_manifest_section(REGISTER).values-
|
90
|
-
|
98
|
+
return false if not get_manifest_section(REGISTER).values-csv_data[0].keys == []
|
99
|
+
sources=Array.new
|
91
100
|
#Make sure all files have entries for those required headers
|
92
|
-
|
101
|
+
csv_data.each do |row|
|
93
102
|
get_manifest_section(REGISTER).keys.each do |header| # label should be there as a column but does not always need a value
|
94
103
|
return false if header.downcase !='label' && row[header].blank? #Alternatively consider row[header].class != String or row[header].size <= 0
|
95
104
|
end
|
105
|
+
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
106
|
+
return false if row[get_manifest_section(REGISTER)['sourceid']] != fname
|
107
|
+
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
96
108
|
end
|
97
|
-
|
109
|
+
return sources.uniq.size == sources.size
|
98
110
|
end
|
99
111
|
|
100
|
-
#
|
101
|
-
def
|
102
|
-
|
103
|
-
|
112
|
+
# looks at certain metadata fields in manifest to confirm validity (such as dates and formats)
|
113
|
+
def check_metadata(csv_data)
|
114
|
+
bad_rows=0
|
115
|
+
csv_data.each do |row|
|
116
|
+
valid_date=revs_is_valid_datestring?(row[get_manifest_section(METADATA)['year']] || row[get_manifest_section(METADATA)['date']])
|
117
|
+
valid_format=revs_is_valid_format?(row[get_manifest_section(METADATA)['format']])
|
118
|
+
bad_rows+=1 unless (valid_date && valid_format)
|
119
|
+
end
|
120
|
+
return bad_rows
|
121
|
+
end
|
122
|
+
|
123
|
+
# pass in csv data from a file read in and it will tell you if the headers are valid
|
124
|
+
def check_headers(csv_data)
|
125
|
+
file_headers=csv_data[0].keys.reject(&:blank?).collect(&:downcase)
|
104
126
|
#The file doesn't need to have all the metadata values, it just can't have headers that aren't used for metadata or registration
|
105
127
|
if file_headers.include?('date') && file_headers.include?('year') # can't have both date and year
|
106
128
|
return false
|
@@ -110,7 +132,7 @@ module Revs
|
|
110
132
|
return file_headers-get_manifest_section(METADATA).values-get_manifest_section(REGISTER).values == []
|
111
133
|
end
|
112
134
|
end
|
113
|
-
|
135
|
+
|
114
136
|
def clean_collection_name(name)
|
115
137
|
return "" if name.blank? || name.nil?
|
116
138
|
name=name.to_s
|
@@ -147,11 +169,19 @@ module Revs
|
|
147
169
|
return row
|
148
170
|
end
|
149
171
|
|
172
|
+
# checks to see if we have a valid format
|
173
|
+
def revs_is_valid_format?(format)
|
174
|
+
return true if format.nil? || format.blank?
|
175
|
+
formats=format.split("|").collect{|f| f.strip}
|
176
|
+
!formats.collect {|f| revs_known_formats.include?(f)}.uniq.include?(false)
|
177
|
+
end
|
178
|
+
|
179
|
+
# check a single format and fix some common issues
|
150
180
|
def revs_check_format(format)
|
151
181
|
return revs_check_formats([format]).first
|
152
182
|
end
|
153
183
|
|
154
|
-
# check the incoming
|
184
|
+
# check the incoming array of formats and fix some common issues
|
155
185
|
def revs_check_formats(format)
|
156
186
|
known_fixes = {"black-and-white negative"=>"black-and-white negatives",
|
157
187
|
"color negative"=>"color negatives",
|
@@ -163,7 +193,8 @@ module Revs
|
|
163
193
|
"black and white negative"=>"black-and-white negatives",
|
164
194
|
"black and white negatives"=>"black-and-white negatives",
|
165
195
|
"color transparency"=>"color transparencies",
|
166
|
-
"slide"=>"slides"
|
196
|
+
"slide"=>"slides",
|
197
|
+
"color transparancies"=>"color transparencies"
|
167
198
|
}
|
168
199
|
count = 0
|
169
200
|
format.each do |f|
|
@@ -236,6 +267,14 @@ module Revs
|
|
236
267
|
date_string.to_s.strip.scan(/\D/).empty? and (starting_year..Date.today.year).include?(date_string.to_i)
|
237
268
|
end
|
238
269
|
|
270
|
+
# tell us if the incoming datestring supplied in the manifest column is a valid date, year or list of years
|
271
|
+
def revs_is_valid_datestring?(date_string)
|
272
|
+
return true if date_string.nil? || date_string.empty?
|
273
|
+
is_full_date=(get_full_date(date_string) != false)
|
274
|
+
is_year=!parse_years(date_string).empty?
|
275
|
+
return is_year || is_full_date
|
276
|
+
end
|
277
|
+
|
239
278
|
# tell us if the string passed is in is a full date of the format M/D/YYYY or m-d-yyyy or m-d-yy or M/D/YY, and returns the date object if it is valid
|
240
279
|
def get_full_date(date_string)
|
241
280
|
begin
|
data/revs-utils.gemspec
CHANGED
@@ -11,7 +11,8 @@ Gem::Specification.new do |gem|
|
|
11
11
|
gem.description = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
12
12
|
gem.summary = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
13
13
|
gem.homepage = ""
|
14
|
-
|
14
|
+
gem.license = "All rights reserved, Stanford University."
|
15
|
+
|
15
16
|
gem.files = `git ls-files`.split($/)
|
16
17
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
18
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
data/spec/revs-utils_spec.rb
CHANGED
@@ -61,6 +61,7 @@ describe "Revs-Utils" do
|
|
61
61
|
end
|
62
62
|
|
63
63
|
it "should clean up some common format errors from an array" do
|
64
|
+
@revs.revs_check_formats(['black-and-white negative','color negative','leave alone']).should == ['black-and-white negatives','color negatives','leave alone']
|
64
65
|
@revs.revs_check_formats(['black and white','color negative','black-and-white negative']).should == ['black-and-white negatives','color negatives','black-and-white negatives']
|
65
66
|
end
|
66
67
|
|
@@ -98,6 +99,7 @@ describe "Revs-Utils" do
|
|
98
99
|
@revs.get_full_date('1965|1968').should be_falsey# multiple years
|
99
100
|
@revs.get_full_date('1965-1968').should be_falsey# multiple years
|
100
101
|
@revs.get_full_date('1965-8').should be_falsey# multiple years
|
102
|
+
|
101
103
|
end
|
102
104
|
|
103
105
|
it "should indicate if we have a valid year" do
|
@@ -106,7 +108,29 @@ describe "Revs-Utils" do
|
|
106
108
|
@revs.is_valid_year?('1700').should be_falsey # too old! no cars even existed yet
|
107
109
|
@revs.is_valid_year?('1700',1600).should be_truthy # unless we allow it to be ok
|
108
110
|
end
|
109
|
-
|
111
|
+
|
112
|
+
it "should indicate if we have unknown formats" do
|
113
|
+
@revs.revs_is_valid_format?(nil).should be_truthy
|
114
|
+
@revs.revs_is_valid_format?('').should be_truthy
|
115
|
+
@revs.revs_is_valid_format?('slides').should be_truthy
|
116
|
+
@revs.revs_is_valid_format?('slide').should be_falsey
|
117
|
+
@revs.revs_is_valid_format?('slides | slide').should be_falsey
|
118
|
+
@revs.revs_is_valid_format?('slides | black-and-white negatives').should be_truthy
|
119
|
+
@revs.revs_is_valid_format?('black-and-white-negatives').should be_falsey
|
120
|
+
@revs.revs_is_valid_format?('black-and-white negatives').should be_truthy
|
121
|
+
end
|
122
|
+
|
123
|
+
it "should indicate if we have a valid datestring" do
|
124
|
+
@revs.revs_is_valid_datestring?('1959').should be_truthy
|
125
|
+
@revs.revs_is_valid_datestring?('bogus').should be_falsey
|
126
|
+
@revs.revs_is_valid_datestring?('').should be_truthy
|
127
|
+
@revs.revs_is_valid_datestring?(nil).should be_truthy
|
128
|
+
@revs.revs_is_valid_datestring?([]).should be_truthy
|
129
|
+
@revs.revs_is_valid_datestring?('2/2/1950').should be_truthy
|
130
|
+
@revs.revs_is_valid_datestring?('2/31/1950').should be_falsey
|
131
|
+
@revs.revs_is_valid_datestring?('2/2/50').should be_truthy
|
132
|
+
@revs.revs_is_valid_datestring?('195x').should be_truthy
|
133
|
+
end
|
110
134
|
|
111
135
|
it "should lookup the country correctly" do
|
112
136
|
@revs.revs_get_country('USA').should == "United States"
|
@@ -132,9 +156,10 @@ describe "Revs-Utils" do
|
|
132
156
|
@revs.revs_get_state_name('IN').should == "Indiana"
|
133
157
|
end
|
134
158
|
|
135
|
-
it "should parse 1950s correctly" do
|
159
|
+
it "should parse 1950s and 1950's correctly" do
|
136
160
|
|
137
161
|
@revs.parse_years('1950s').should == ['1950','1951','1952','1953','1954','1955','1956','1957','1958','1959']
|
162
|
+
@revs.parse_years("1950's").should == ['1950','1951','1952','1953','1954','1955','1956','1957','1958','1959']
|
138
163
|
|
139
164
|
end
|
140
165
|
|
@@ -156,6 +181,12 @@ describe "Revs-Utils" do
|
|
156
181
|
|
157
182
|
end
|
158
183
|
|
184
|
+
it "should parse 1800-1802" do
|
185
|
+
|
186
|
+
@revs.parse_years('1800-1802').should == ['1800','1801','1802']
|
187
|
+
|
188
|
+
end
|
189
|
+
|
159
190
|
it "should parse 1955-1957 | 1955 | 1955 and not produce duplicate years" do
|
160
191
|
|
161
192
|
@revs.parse_years('1955-1957 | 1955 | 1955').should == ['1955','1956','1957']
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: revs-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Mangiafico
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: countries
|
@@ -149,7 +149,8 @@ files:
|
|
149
149
|
- spec/sample-csv-files/no-sourceid.csv
|
150
150
|
- spec/spec_helper.rb
|
151
151
|
homepage: ''
|
152
|
-
licenses:
|
152
|
+
licenses:
|
153
|
+
- All rights reserved, Stanford University.
|
153
154
|
metadata: {}
|
154
155
|
post_install_message:
|
155
156
|
rdoc_options: []
|