revs-utils 2.0.7 → 2.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile.lock +23 -15
- data/README.rdoc +2 -8
- data/config/manifest_headers.yml +12 -2
- data/lib/revs-utils/version.rb +1 -1
- data/lib/revs-utils.rb +59 -20
- data/revs-utils.gemspec +2 -1
- data/spec/revs-utils_spec.rb +33 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MTk1MjU3MmRiNzgwMjNlNmRmMjRjMGQ2NmEwOTIxZTFiNTA3YjEyNQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NzMzOWNmODZlYjU2NDE2ZmQyZWUwNjgxMjcwMjk5ZTNmYmU5OWQ2Nw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OTkzOTQ5ZmEyNjYxMWJlZTNmOTZkOWZhMjBjOWNmYmI1OGQ4MzM5Y2IzZmE4
|
10
|
+
ZWNhNDI5M2Y2YTUxZmIwMDExM2Q3YWVkYmFjNDg5OTM1ZDVkMGVjOWRjODI3
|
11
|
+
YjM3ZjU3MTg4NWViMGZkODUwYjgyMWU1OWRkYTkzZTJiYmE1MGM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTBmMjY5YzIyMzVhOWQ1NjA3NTkxYWNjNDUzZDA3MWIyODRhZmU5NDZhYmY0
|
14
|
+
MTUxZDcyNjlkNzFmNWZmNGJhNzU2OWYzYzRhM2QxNzVhZWUwNGVjM2NiODNk
|
15
|
+
ZDY1NzcxOGNjMWQ1ZDZlODM1YTJjYzgzM2FiZTg2NDE4NmQ4ZTg=
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
revs-utils (2.0.
|
4
|
+
revs-utils (2.0.8)
|
5
5
|
actionpack (~> 4.1.6)
|
6
6
|
chronic
|
7
7
|
countries (= 0.9.2)
|
@@ -11,16 +11,16 @@ GEM
|
|
11
11
|
remote: http://rubygems.org/
|
12
12
|
remote: http://sul-gems.stanford.edu/
|
13
13
|
specs:
|
14
|
-
actionpack (4.1.
|
15
|
-
actionview (= 4.1.
|
16
|
-
activesupport (= 4.1.
|
14
|
+
actionpack (4.1.10)
|
15
|
+
actionview (= 4.1.10)
|
16
|
+
activesupport (= 4.1.10)
|
17
17
|
rack (~> 1.5.2)
|
18
18
|
rack-test (~> 0.6.2)
|
19
|
-
actionview (4.1.
|
20
|
-
activesupport (= 4.1.
|
19
|
+
actionview (4.1.10)
|
20
|
+
activesupport (= 4.1.10)
|
21
21
|
builder (~> 3.1)
|
22
22
|
erubis (~> 2.7.0)
|
23
|
-
activesupport (4.1.
|
23
|
+
activesupport (4.1.10)
|
24
24
|
i18n (~> 0.6, >= 0.6.9)
|
25
25
|
json (~> 1.7, >= 1.7.7)
|
26
26
|
minitest (~> 5.1)
|
@@ -32,23 +32,28 @@ GEM
|
|
32
32
|
currencies (>= 0.4.0)
|
33
33
|
currencies (0.4.2)
|
34
34
|
diff-lcs (1.2.5)
|
35
|
+
domain_name (0.5.23)
|
36
|
+
unf (>= 0.0.5, < 1.0.0)
|
35
37
|
erubis (2.7.0)
|
38
|
+
http-cookie (1.0.2)
|
39
|
+
domain_name (~> 0.5)
|
36
40
|
i18n (0.7.0)
|
37
41
|
json (1.8.2)
|
38
42
|
link_header (0.0.8)
|
39
43
|
lyberteam-gems-devel (1.0.1)
|
40
44
|
rake (>= 0.8.7)
|
41
45
|
rest-client
|
42
|
-
mime-types (2.3)
|
46
|
+
mime-types (2.4.3)
|
43
47
|
minitest (5.5.1)
|
44
|
-
netrc (0.
|
48
|
+
netrc (0.10.3)
|
45
49
|
rack (1.5.2)
|
46
50
|
rack-test (0.6.3)
|
47
51
|
rack (>= 1.0)
|
48
|
-
rake (10.
|
49
|
-
rdf (1.1.
|
52
|
+
rake (10.4.2)
|
53
|
+
rdf (1.1.11)
|
50
54
|
link_header (~> 0.0, >= 0.0.8)
|
51
|
-
rest-client (1.
|
55
|
+
rest-client (1.8.0)
|
56
|
+
http-cookie (>= 1.0.2, < 2.0)
|
52
57
|
mime-types (>= 1.16, < 3.0)
|
53
58
|
netrc (~> 0.7)
|
54
59
|
rspec (2.99.0)
|
@@ -58,11 +63,14 @@ GEM
|
|
58
63
|
rspec-core (2.99.2)
|
59
64
|
rspec-expectations (2.99.2)
|
60
65
|
diff-lcs (>= 1.1.3, < 2.0)
|
61
|
-
rspec-mocks (2.99.
|
62
|
-
thread_safe (0.3.
|
66
|
+
rspec-mocks (2.99.3)
|
67
|
+
thread_safe (0.3.5)
|
63
68
|
tzinfo (1.2.2)
|
64
69
|
thread_safe (~> 0.1)
|
65
|
-
|
70
|
+
unf (0.1.4)
|
71
|
+
unf_ext
|
72
|
+
unf_ext (0.0.6)
|
73
|
+
yard (0.8.7.6)
|
66
74
|
|
67
75
|
PLATFORMS
|
68
76
|
ruby
|
data/README.rdoc
CHANGED
@@ -18,21 +18,14 @@ Shared methods and functions used by revs-indexer, pre-assembly and bulk metadat
|
|
18
18
|
- <b>1.0.6</b> Add some more conditions to CSV header checks
|
19
19
|
- <b>1.0.7</b> Label column needs to be there but does not need to have a value to register
|
20
20
|
- <b>1.0.8</b> Update clean_collection_name method to deal with other possible names
|
21
|
-
<<<<<<< HEAD
|
22
21
|
- <b>2.0.0</b> Updating to use ActionPack 4 for Rails 4 applications. For Rails 3, continue to use 1.x.y releases.
|
23
22
|
- <b>2.0.1</b> Add more common format corrections
|
24
23
|
- <b>2.0.2 and 2.0.3</b> Update valid for metadata method so it is not sensitive to blank or uppercase columns
|
25
24
|
- <b>2.0.4</b> Fix issues with year parsing
|
26
|
-
=======
|
27
|
-
- <b>1.0.9</b> Add more common format corrections
|
28
|
-
- <b>1.0.10</b> Update valid for metadata method so it is not sensitive to blank or uppercase columns
|
29
|
-
- <b>1.0.11</b> Fix issues with year parsing
|
30
|
-
- <b>1.0.12</b> Allow two digit years in year formatting
|
31
|
-
>>>>>>> 9db8640... Allow two digit years in year formatting
|
32
25
|
|
33
26
|
== Running tests
|
34
27
|
|
35
|
-
bundle exec
|
28
|
+
bundle exec rspec spec
|
36
29
|
|
37
30
|
== Release the gem to the gemserver
|
38
31
|
|
@@ -48,6 +41,7 @@ gem build revs-utils.gemspec
|
|
48
41
|
|
49
42
|
gem push revs-utils-2.0.1.gem # substitute actual version number
|
50
43
|
|
44
|
+
|
51
45
|
== Installation
|
52
46
|
|
53
47
|
Add this line to your application's Gemfile:
|
data/config/manifest_headers.yml
CHANGED
@@ -30,5 +30,15 @@ metadata:
|
|
30
30
|
hide: hide
|
31
31
|
format: format
|
32
32
|
collection_name: collection_name
|
33
|
-
|
34
|
-
|
33
|
+
known_formats:
|
34
|
+
- black-and-white film
|
35
|
+
- color film
|
36
|
+
- slides
|
37
|
+
- foldouts
|
38
|
+
- pamphlets
|
39
|
+
- oversize items
|
40
|
+
- photographic prints
|
41
|
+
- black-and-white negatives
|
42
|
+
- color negatives
|
43
|
+
- black-and-white transparencies
|
44
|
+
- color transparencies
|
data/lib/revs-utils/version.rb
CHANGED
data/lib/revs-utils.rb
CHANGED
@@ -14,7 +14,7 @@ REVS_LC_TERMS_FILENAME=File.join(PROJECT_ROOT,'files','revs-lc-marque-terms.obj'
|
|
14
14
|
REVS_MANIFEST_HEADERS_FILEPATH = File.join(PROJECT_ROOT,'config',"manifest_headers.yml")
|
15
15
|
REGISTER = "register"
|
16
16
|
METADATA = "metadata"
|
17
|
-
|
17
|
+
FORMATS = "known_formats"
|
18
18
|
|
19
19
|
module Revs
|
20
20
|
module Utils
|
@@ -28,6 +28,9 @@ module Revs
|
|
28
28
|
REVS_MANIFEST_HEADERS_FILE = File.open(REVS_MANIFEST_HEADERS_FILEPATH)
|
29
29
|
REVS_MANIFEST_HEADERS = YAML.load( REVS_MANIFEST_HEADERS_FILE)
|
30
30
|
|
31
|
+
def revs_known_formats
|
32
|
+
get_manifest_section(FORMATS)
|
33
|
+
end
|
31
34
|
|
32
35
|
def get_manifest_section(section)
|
33
36
|
return REVS_MANIFEST_HEADERS[section]
|
@@ -67,40 +70,59 @@ module Revs
|
|
67
70
|
sources = Array.new
|
68
71
|
files.each do |file|
|
69
72
|
file.each do |row|
|
70
|
-
#Make sure the
|
73
|
+
#Make sure the sourceid and filename are the same
|
71
74
|
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
72
75
|
return false if row[get_manifest_section(REGISTER)['sourceid']] != fname
|
73
76
|
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
74
|
-
end
|
75
|
-
|
76
|
-
|
77
|
-
|
77
|
+
end
|
78
78
|
end
|
79
79
|
return sources.uniq.size == sources.size
|
80
80
|
|
81
81
|
end
|
82
|
-
|
83
|
-
|
82
|
+
|
84
83
|
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in
|
85
84
|
def valid_to_register(file_path)
|
86
|
-
|
87
85
|
file = read_csv_with_headers(file_path)
|
86
|
+
return check_valid_to_register(file)
|
87
|
+
end
|
88
|
+
|
89
|
+
#Pass this function a CSV file and it will return true if the proper headers are there and each entry has the required fields filled in.
|
90
|
+
def valid_for_metadata(file_path)
|
91
|
+
file = read_csv_with_headers(file_path)
|
92
|
+
return check_headers(file)
|
93
|
+
end
|
94
|
+
|
95
|
+
# pass in csv data and it will tell if you everything is safe to register based on having labels, unique sourceIDs and filenames matching sourceIDs
|
96
|
+
def check_valid_to_register(csv_data)
|
88
97
|
#Make sure all the required headers are there
|
89
|
-
return false if not get_manifest_section(REGISTER).values-
|
90
|
-
|
98
|
+
return false if not get_manifest_section(REGISTER).values-csv_data[0].keys == []
|
99
|
+
sources=Array.new
|
91
100
|
#Make sure all files have entries for those required headers
|
92
|
-
|
101
|
+
csv_data.each do |row|
|
93
102
|
get_manifest_section(REGISTER).keys.each do |header| # label should be there as a column but does not always need a value
|
94
103
|
return false if header.downcase !='label' && row[header].blank? #Alternatively consider row[header].class != String or row[header].size <= 0
|
95
104
|
end
|
105
|
+
fname = row[get_manifest_section(REGISTER)['filename']].chomp(File.extname(row[get_manifest_section(REGISTER)['filename']]))
|
106
|
+
return false if row[get_manifest_section(REGISTER)['sourceid']] != fname
|
107
|
+
sources << row[get_manifest_section(REGISTER)['sourceid']]
|
96
108
|
end
|
97
|
-
|
109
|
+
return sources.uniq.size == sources.size
|
98
110
|
end
|
99
111
|
|
100
|
-
#
|
101
|
-
def
|
102
|
-
|
103
|
-
|
112
|
+
# looks at certain metadata fields in manifest to confirm validity (such as dates and formats)
|
113
|
+
def check_metadata(csv_data)
|
114
|
+
bad_rows=0
|
115
|
+
csv_data.each do |row|
|
116
|
+
valid_date=revs_is_valid_datestring?(row[get_manifest_section(METADATA)['year']] || row[get_manifest_section(METADATA)['date']])
|
117
|
+
valid_format=revs_is_valid_format?(row[get_manifest_section(METADATA)['format']])
|
118
|
+
bad_rows+=1 unless (valid_date && valid_format)
|
119
|
+
end
|
120
|
+
return bad_rows
|
121
|
+
end
|
122
|
+
|
123
|
+
# pass in csv data from a file read in and it will tell you if the headers are valid
|
124
|
+
def check_headers(csv_data)
|
125
|
+
file_headers=csv_data[0].keys.reject(&:blank?).collect(&:downcase)
|
104
126
|
#The file doesn't need to have all the metadata values, it just can't have headers that aren't used for metadata or registration
|
105
127
|
if file_headers.include?('date') && file_headers.include?('year') # can't have both date and year
|
106
128
|
return false
|
@@ -110,7 +132,7 @@ module Revs
|
|
110
132
|
return file_headers-get_manifest_section(METADATA).values-get_manifest_section(REGISTER).values == []
|
111
133
|
end
|
112
134
|
end
|
113
|
-
|
135
|
+
|
114
136
|
def clean_collection_name(name)
|
115
137
|
return "" if name.blank? || name.nil?
|
116
138
|
name=name.to_s
|
@@ -147,11 +169,19 @@ module Revs
|
|
147
169
|
return row
|
148
170
|
end
|
149
171
|
|
172
|
+
# checks to see if we have a valid format
|
173
|
+
def revs_is_valid_format?(format)
|
174
|
+
return true if format.nil? || format.blank?
|
175
|
+
formats=format.split("|").collect{|f| f.strip}
|
176
|
+
!formats.collect {|f| revs_known_formats.include?(f)}.uniq.include?(false)
|
177
|
+
end
|
178
|
+
|
179
|
+
# check a single format and fix some common issues
|
150
180
|
def revs_check_format(format)
|
151
181
|
return revs_check_formats([format]).first
|
152
182
|
end
|
153
183
|
|
154
|
-
# check the incoming
|
184
|
+
# check the incoming array of formats and fix some common issues
|
155
185
|
def revs_check_formats(format)
|
156
186
|
known_fixes = {"black-and-white negative"=>"black-and-white negatives",
|
157
187
|
"color negative"=>"color negatives",
|
@@ -163,7 +193,8 @@ module Revs
|
|
163
193
|
"black and white negative"=>"black-and-white negatives",
|
164
194
|
"black and white negatives"=>"black-and-white negatives",
|
165
195
|
"color transparency"=>"color transparencies",
|
166
|
-
"slide"=>"slides"
|
196
|
+
"slide"=>"slides",
|
197
|
+
"color transparancies"=>"color transparencies"
|
167
198
|
}
|
168
199
|
count = 0
|
169
200
|
format.each do |f|
|
@@ -236,6 +267,14 @@ module Revs
|
|
236
267
|
date_string.to_s.strip.scan(/\D/).empty? and (starting_year..Date.today.year).include?(date_string.to_i)
|
237
268
|
end
|
238
269
|
|
270
|
+
# tell us if the incoming datestring supplied in the manifest column is a valid date, year or list of years
|
271
|
+
def revs_is_valid_datestring?(date_string)
|
272
|
+
return true if date_string.nil? || date_string.empty?
|
273
|
+
is_full_date=(get_full_date(date_string) != false)
|
274
|
+
is_year=!parse_years(date_string).empty?
|
275
|
+
return is_year || is_full_date
|
276
|
+
end
|
277
|
+
|
239
278
|
# tell us if the string passed is in is a full date of the format M/D/YYYY or m-d-yyyy or m-d-yy or M/D/YY, and returns the date object if it is valid
|
240
279
|
def get_full_date(date_string)
|
241
280
|
begin
|
data/revs-utils.gemspec
CHANGED
@@ -11,7 +11,8 @@ Gem::Specification.new do |gem|
|
|
11
11
|
gem.description = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
12
12
|
gem.summary = "Shared methods and functions used by revs-indexer, pre-assembly and bulk metadata loading code."
|
13
13
|
gem.homepage = ""
|
14
|
-
|
14
|
+
gem.license = "All rights reserved, Stanford University."
|
15
|
+
|
15
16
|
gem.files = `git ls-files`.split($/)
|
16
17
|
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
17
18
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
data/spec/revs-utils_spec.rb
CHANGED
@@ -61,6 +61,7 @@ describe "Revs-Utils" do
|
|
61
61
|
end
|
62
62
|
|
63
63
|
it "should clean up some common format errors from an array" do
|
64
|
+
@revs.revs_check_formats(['black-and-white negative','color negative','leave alone']).should == ['black-and-white negatives','color negatives','leave alone']
|
64
65
|
@revs.revs_check_formats(['black and white','color negative','black-and-white negative']).should == ['black-and-white negatives','color negatives','black-and-white negatives']
|
65
66
|
end
|
66
67
|
|
@@ -98,6 +99,7 @@ describe "Revs-Utils" do
|
|
98
99
|
@revs.get_full_date('1965|1968').should be_falsey# multiple years
|
99
100
|
@revs.get_full_date('1965-1968').should be_falsey# multiple years
|
100
101
|
@revs.get_full_date('1965-8').should be_falsey# multiple years
|
102
|
+
|
101
103
|
end
|
102
104
|
|
103
105
|
it "should indicate if we have a valid year" do
|
@@ -106,7 +108,29 @@ describe "Revs-Utils" do
|
|
106
108
|
@revs.is_valid_year?('1700').should be_falsey # too old! no cars even existed yet
|
107
109
|
@revs.is_valid_year?('1700',1600).should be_truthy # unless we allow it to be ok
|
108
110
|
end
|
109
|
-
|
111
|
+
|
112
|
+
it "should indicate if we have unknown formats" do
|
113
|
+
@revs.revs_is_valid_format?(nil).should be_truthy
|
114
|
+
@revs.revs_is_valid_format?('').should be_truthy
|
115
|
+
@revs.revs_is_valid_format?('slides').should be_truthy
|
116
|
+
@revs.revs_is_valid_format?('slide').should be_falsey
|
117
|
+
@revs.revs_is_valid_format?('slides | slide').should be_falsey
|
118
|
+
@revs.revs_is_valid_format?('slides | black-and-white negatives').should be_truthy
|
119
|
+
@revs.revs_is_valid_format?('black-and-white-negatives').should be_falsey
|
120
|
+
@revs.revs_is_valid_format?('black-and-white negatives').should be_truthy
|
121
|
+
end
|
122
|
+
|
123
|
+
it "should indicate if we have a valid datestring" do
|
124
|
+
@revs.revs_is_valid_datestring?('1959').should be_truthy
|
125
|
+
@revs.revs_is_valid_datestring?('bogus').should be_falsey
|
126
|
+
@revs.revs_is_valid_datestring?('').should be_truthy
|
127
|
+
@revs.revs_is_valid_datestring?(nil).should be_truthy
|
128
|
+
@revs.revs_is_valid_datestring?([]).should be_truthy
|
129
|
+
@revs.revs_is_valid_datestring?('2/2/1950').should be_truthy
|
130
|
+
@revs.revs_is_valid_datestring?('2/31/1950').should be_falsey
|
131
|
+
@revs.revs_is_valid_datestring?('2/2/50').should be_truthy
|
132
|
+
@revs.revs_is_valid_datestring?('195x').should be_truthy
|
133
|
+
end
|
110
134
|
|
111
135
|
it "should lookup the country correctly" do
|
112
136
|
@revs.revs_get_country('USA').should == "United States"
|
@@ -132,9 +156,10 @@ describe "Revs-Utils" do
|
|
132
156
|
@revs.revs_get_state_name('IN').should == "Indiana"
|
133
157
|
end
|
134
158
|
|
135
|
-
it "should parse 1950s correctly" do
|
159
|
+
it "should parse 1950s and 1950's correctly" do
|
136
160
|
|
137
161
|
@revs.parse_years('1950s').should == ['1950','1951','1952','1953','1954','1955','1956','1957','1958','1959']
|
162
|
+
@revs.parse_years("1950's").should == ['1950','1951','1952','1953','1954','1955','1956','1957','1958','1959']
|
138
163
|
|
139
164
|
end
|
140
165
|
|
@@ -156,6 +181,12 @@ describe "Revs-Utils" do
|
|
156
181
|
|
157
182
|
end
|
158
183
|
|
184
|
+
it "should parse 1800-1802" do
|
185
|
+
|
186
|
+
@revs.parse_years('1800-1802').should == ['1800','1801','1802']
|
187
|
+
|
188
|
+
end
|
189
|
+
|
159
190
|
it "should parse 1955-1957 | 1955 | 1955 and not produce duplicate years" do
|
160
191
|
|
161
192
|
@revs.parse_years('1955-1957 | 1955 | 1955').should == ['1955','1956','1957']
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: revs-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
4
|
+
version: 2.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Mangiafico
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: countries
|
@@ -149,7 +149,8 @@ files:
|
|
149
149
|
- spec/sample-csv-files/no-sourceid.csv
|
150
150
|
- spec/spec_helper.rb
|
151
151
|
homepage: ''
|
152
|
-
licenses:
|
152
|
+
licenses:
|
153
|
+
- All rights reserved, Stanford University.
|
153
154
|
metadata: {}
|
154
155
|
post_install_message:
|
155
156
|
rdoc_options: []
|