revs-utils 2.0.10 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/Gemfile.lock +2 -2
- data/README.rdoc +1 -0
- data/lib/revs-utils.rb +72 -3
- data/lib/revs-utils/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
YTcyMmQyOWY5ZWVmYzM1ZmVmOWM0NTM1ZjQ5N2MwMzI3Yzc4OTg3Ng==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YmNjNzM1ZDFjMjUxYTllMWUwYWE3YThmMDQ4OWEwYmI4MTQzYjk4YQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MzZjY2YyOTFlZTVhNGI3ZTg2ODZhNGVkODUyNzFlYmI4N2RmMmM3YjcyNWY3
|
10
|
+
ZmI0N2ZmNjYwNjA2ZjQxOTkzZDM1MzEwODkwMDMzMTkxN2U2MjMyYmU5OGFj
|
11
|
+
YzIzNDBkOWE2ZTY3MjhiZTUwMmY0MjdhMDMzZmZjMmY3MWEwZTQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MDQ2NDZjYTU4YTg5ZDk4OGNjNjRiNGI3NzkxZDExOGYwNDE0MWQ4YTk1MGNj
|
14
|
+
OWYwMjc3MDExZTE3YmJkMTJiNjAyNjVkNTE4MDdkODFjNjdlOGFhYWNiNGM0
|
15
|
+
YjZkM2RhMjBhYTcyYzM0Y2ZkNGQ2OWU0MTBmNjk4NDNmMTU4OWQ=
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
revs-utils (2.0
|
4
|
+
revs-utils (2.1.0)
|
5
5
|
actionpack (>= 4.1.6)
|
6
6
|
chronic
|
7
7
|
countries (= 0.9.2)
|
@@ -67,7 +67,7 @@ GEM
|
|
67
67
|
rails-html-sanitizer (1.0.2)
|
68
68
|
loofah (~> 2.0)
|
69
69
|
rake (10.4.2)
|
70
|
-
rdf (1.1.
|
70
|
+
rdf (1.1.13)
|
71
71
|
link_header (~> 0.0, >= 0.0.8)
|
72
72
|
rest-client (1.8.0)
|
73
73
|
http-cookie (>= 1.0.2, < 2.0)
|
data/README.rdoc
CHANGED
@@ -23,6 +23,7 @@ Shared methods and functions used by revs-indexer, pre-assembly and bulk metadat
|
|
23
23
|
- <b>2.0.2 and 2.0.3</b> Update valid for metadata method so it is not sensitive to blank or uppercase columns
|
24
24
|
- <b>2.0.4</b> Fix issues with year parsing
|
25
25
|
- <b>2.0.10</b> Update to latest version of actionpack
|
26
|
+
- <b>2.1.0</b> Add field mapping and other common methods used in both revs digital library and revs-indexing
|
26
27
|
|
27
28
|
== Running tests
|
28
29
|
|
data/lib/revs-utils.rb
CHANGED
@@ -18,8 +18,7 @@ FORMATS = "known_formats"
|
|
18
18
|
|
19
19
|
module Revs
|
20
20
|
module Utils
|
21
|
-
|
22
|
-
|
21
|
+
|
23
22
|
# a hash of LC Subject Heading terms and their IDs for linking for "Automobiles" http://id.loc.gov/authorities/subjects/sh85010201.html
|
24
23
|
# this is cached and loaded from disk and deserialized back into a hash for performance reasons, then stored as a module
|
25
24
|
# level constant so it can be reused throughout the pre-assembly run as a constant
|
@@ -27,7 +26,77 @@ module Revs
|
|
27
26
|
AUTOMOBILE_LC_TERMS= File.open(REVS_LC_TERMS_FILENAME,'rb'){|io| Marshal.load(io)} if File.exists?(REVS_LC_TERMS_FILENAME)
|
28
27
|
REVS_MANIFEST_HEADERS_FILE = File.open(REVS_MANIFEST_HEADERS_FILEPATH)
|
29
28
|
REVS_MANIFEST_HEADERS = YAML.load( REVS_MANIFEST_HEADERS_FILE)
|
30
|
-
|
29
|
+
|
30
|
+
# these are used in the revs solr document in the main revs digital library rails app, as well as the revs-indexing-service app
|
31
|
+
def revs_field_mappings
|
32
|
+
{
|
33
|
+
:title=>{:field=>'title_tsi',:default=>'Untitled'},
|
34
|
+
:description=>{:field=>'description_tsim', :multi_valued => true, :weight => 3},
|
35
|
+
:photographer=>{:field=>'photographer_ssi', :weight => 1},
|
36
|
+
:years=>{:field=>'pub_year_isim', :multi_valued => true, :weight => 5},
|
37
|
+
:single_year=>{:field=>'pub_year_single_isi'},
|
38
|
+
:full_date=>{:field=>'pub_date_ssi'},
|
39
|
+
:people=>{:field=>'people_ssim', :multi_valued => true, :weight => 4},
|
40
|
+
:subjects=>{:field=>'subjects_ssim', :multi_valued => true, :weight => 1},
|
41
|
+
:city_section=>{:field=>'city_sections_ssi'},
|
42
|
+
:city=>{:field=>'cities_ssi'},
|
43
|
+
:state=>{:field=>'states_ssi'},
|
44
|
+
:country=>{:field=>'countries_ssi'},
|
45
|
+
:formats=>{:field=>'format_ssim', :multi_valued => true},
|
46
|
+
:identifier=>{:field=>'source_id_ssi'},
|
47
|
+
:production_notes=>{:field=>'prod_notes_tsi'},
|
48
|
+
:institutional_notes=>{:field=>'inst_notes_tsi'},
|
49
|
+
:metadata_sources=>{:field=>'metadata_sources_tsi'},
|
50
|
+
:has_more_metadata=>{:field=>'has_more_metadata_ssi'},
|
51
|
+
:vehicle_markings=>{:field=>'vehicle_markings_tsi', :weight => 1},
|
52
|
+
:marque=>{:field=>'marque_ssim', :multi_valued => true, :weight => 4},
|
53
|
+
:vehicle_model=>{:field=>'model_ssim', :multi_valued => true, :weight => 2},
|
54
|
+
:model_year=>{:field=>'model_year_ssim', :multi_valued => true, :weight => 1},
|
55
|
+
:current_owner=>{:field=>'current_owner_tsi', :weight => 1},
|
56
|
+
:entrant=>{:field=>'entrant_ssim', :multi_valued => true, :weight => 1},
|
57
|
+
:venue=>{:field=>'venue_ssi'},
|
58
|
+
:track=>{:field=>'track_ssi', :weight => 1},
|
59
|
+
:event=>{:field=>'event_ssi'},
|
60
|
+
:group_class=>{:field=>'group_class_tsi', :weight => 1},
|
61
|
+
:race_data=>{:field=>'race_data_tsi', :weight => 1},
|
62
|
+
:priority=>{:field=>'priority_isi',:default=>0,:editstore=>false},
|
63
|
+
:collections=>{:field=>'is_member_of_ssim', :multi_valued => true},
|
64
|
+
:collection_names=>{:field=>'collection_ssim', :multi_valued => true,:editstore=>false},
|
65
|
+
:archive_name=>{:field=>'archive_ssi',:editstore=>false},
|
66
|
+
:highlighted=>{:field=>'highlighted_ssi',:editstore=>false},
|
67
|
+
:visibility_value=>{:field=>'visibility_isi',:editstore=>false},
|
68
|
+
:score=>{:field=>'score_isi', :editstore=>false},
|
69
|
+
:timestamp=>{:field=>'timestamp', :editstore=>false}
|
70
|
+
}
|
71
|
+
end
|
72
|
+
|
73
|
+
# these are used in the revs solr document in the main revs digital library rails app, as well as the revs-indexing-service app
|
74
|
+
def revs_location(doc_hash)
|
75
|
+
[doc_hash[:city_section_ssi],doc_hash[:cities_ssi],doc_hash[:states_ssi],doc_hash[:countries_ssi]].reject(&:blank?).join(', ')
|
76
|
+
end
|
77
|
+
|
78
|
+
# these are used in the revs solr document in the main revs digital library rails app, as well as the revs-indexing-service app
|
79
|
+
def revs_compute_score(doc_hash)
|
80
|
+
|
81
|
+
total_score=0
|
82
|
+
total_weights=0
|
83
|
+
field_mappings.each do |field_name,field_config|
|
84
|
+
if !field_config[:weight].blank?
|
85
|
+
total_score += field_config[:weight].to_f * (blank_value?(doc_hash[field_config[:field]]) ? 0 : 1) # if the field is blank, it is a 0 regardless of weight, otherwise it is a 1 times its weight
|
86
|
+
total_weights += field_config[:weight].to_f
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
# now we will account for the location, which has a weighting of 3 for *any* location like field having a value
|
91
|
+
location_score = (revs_location(doc_hash).blank? && doc_hash[:venue][:field].blank? && doc_hash[:event][:field].blank?) ? 0 : 1
|
92
|
+
location_weight = 3
|
93
|
+
total_weights += location_weight
|
94
|
+
total_score += (location_score * location_weight)
|
95
|
+
|
96
|
+
return ((total_score/total_weights)*100).ceil
|
97
|
+
|
98
|
+
end
|
99
|
+
|
31
100
|
def revs_known_formats
|
32
101
|
get_manifest_section(FORMATS)
|
33
102
|
end
|
data/lib/revs-utils/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: revs-utils
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Mangiafico
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-05-
|
11
|
+
date: 2015-05-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: countries
|