digital_scriptorium 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -29
- data/.rubocop_todo.yml +12 -0
- data/lib/digital_scriptorium/ds_item.rb +9 -5
- data/lib/digital_scriptorium/ds_meta.rb +19 -2
- data/lib/digital_scriptorium/item_id.rb +4 -3
- data/lib/digital_scriptorium/manuscript.rb +3 -3
- data/lib/digital_scriptorium/record.rb +1 -1
- data/lib/digital_scriptorium/transformers/acknowledgements_claim_transformer.rb +14 -0
- data/lib/digital_scriptorium/transformers/base_claim_transformer.rb +40 -0
- data/lib/digital_scriptorium/transformers/date_claim_transformer.rb +51 -0
- data/lib/digital_scriptorium/transformers/dated_claim_transformer.rb +17 -0
- data/lib/digital_scriptorium/transformers/iiif_manifest_claim_transformer.rb +10 -0
- data/lib/digital_scriptorium/transformers/link_claim_transformer.rb +14 -0
- data/lib/digital_scriptorium/transformers/name_claim_transformer.rb +23 -0
- data/lib/digital_scriptorium/transformers/note_claim_transformer.rb +18 -0
- data/lib/digital_scriptorium/transformers/physical_description_claim_transformer.rb +18 -0
- data/lib/digital_scriptorium/transformers/qualified_claim_transformer.rb +79 -0
- data/lib/digital_scriptorium/transformers/qualified_claim_transformer_with_facet_fallback.rb +11 -0
- data/lib/digital_scriptorium/transformers/shelfmark_claim_transformer.rb +18 -0
- data/lib/digital_scriptorium/transformers/uniform_title_claim_transformer.rb +14 -0
- data/lib/digital_scriptorium/transformers.rb +100 -0
- data/lib/digital_scriptorium/version.rb +1 -1
- data/lib/digital_scriptorium.rb +1 -3
- data/wikibase_to_solr_new.rb +32 -24
- metadata +73 -6
- data/lib/digital_scriptorium/claim_transformer.rb +0 -82
- data/lib/digital_scriptorium/date_claim_transformer.rb +0 -25
- data/lib/digital_scriptorium/name_claim_transformer.rb +0 -61
- data/property_config.yml +0 -106
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 90d97605a47a87aec5fb8dc41385f9ce55e415d8d96bd51bc91f0436b8d5ec07
|
4
|
+
data.tar.gz: 873ab9fbb3d1fb275419ec753c36929f1a806fe40a9023aaa3279b2684871218
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0142d571dd96cd21270a782c0327bf0798af63e00d904eb71263d7a43fb8eb69e4fbe2854ad75dcf4b4bed7b56148e81a4a0be1862bff0e76168572126b94532
|
7
|
+
data.tar.gz: e51efd7f188fcc8bc29e846d50b6fceeb6a61ec00ccb1217807ed124303060290a6f6890980b25ab54adae11283896319af3ba0af63b6ca1f4d890a7bf2243e4
|
data/.rubocop.yml
CHANGED
@@ -1,30 +1,4 @@
|
|
1
|
-
|
2
|
-
- rubocop-rake
|
3
|
-
- rubocop-rspec
|
1
|
+
inherit_from: .rubocop_todo.yml
|
4
2
|
|
5
|
-
|
6
|
-
|
7
|
-
NewCops: enable
|
8
|
-
Layout/LineLength:
|
9
|
-
Exclude:
|
10
|
-
- spec/**/*
|
11
|
-
Metrics/AbcSize:
|
12
|
-
Enabled: false
|
13
|
-
Metrics/BlockLength:
|
14
|
-
Enabled: false
|
15
|
-
Metrics/CyclomaticComplexity:
|
16
|
-
Enabled: false
|
17
|
-
Metrics/MethodLength:
|
18
|
-
Enabled: false
|
19
|
-
Metrics/ModuleLength:
|
20
|
-
Enabled: false
|
21
|
-
Metrics/PerceivedComplexity:
|
22
|
-
Enabled: false
|
23
|
-
RSpec/ExampleLength:
|
24
|
-
Enabled: false
|
25
|
-
RSpec/MultipleExpectations:
|
26
|
-
Enabled: false
|
27
|
-
RSpec/MultipleMemoizedHelpers:
|
28
|
-
Enabled: false
|
29
|
-
Style/SafeNavigationChainLength:
|
30
|
-
Enabled: false
|
3
|
+
inherit_gem:
|
4
|
+
upennlib-rubocop: upennlib_rubocop_defaults.yml
|
data/.rubocop_todo.yml
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
# This configuration was generated by
|
2
|
+
# `rubocop --auto-gen-config`
|
3
|
+
# on 2025-01-17 01:39:58 UTC using RuboCop version 1.70.0.
|
4
|
+
# The point is for the user to remove these configuration records
|
5
|
+
# one by one as the offenses are removed from the code base.
|
6
|
+
# Note that changes in the inspected code, or installation of new
|
7
|
+
# versions of RuboCop, may require this file to be generated again.
|
8
|
+
|
9
|
+
# Offense count: 2
|
10
|
+
# Configuration parameters: CountComments, CountAsOne.
|
11
|
+
Metrics/ModuleLength:
|
12
|
+
Max: 137
|
@@ -10,19 +10,23 @@ module DigitalScriptorium
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def ds_id
|
13
|
-
|
13
|
+
claims_by_property_id(PropertyId::DS_ID)&.first&.data_value # P1
|
14
14
|
end
|
15
15
|
|
16
|
-
def
|
17
|
-
|
16
|
+
def holding_ids
|
17
|
+
claims_by_property_id(PropertyId::MANUSCRIPT_HOLDING)&.map(&:entity_id_value) # P2
|
18
18
|
end
|
19
19
|
|
20
20
|
def described_manuscript_id
|
21
|
-
|
21
|
+
claims_by_property_id(PropertyId::DESCRIBED_MANUSCRIPT)&.first&.entity_id_value # P3
|
22
|
+
end
|
23
|
+
|
24
|
+
def holding_status
|
25
|
+
claims_by_property_id(PropertyId::HOLDING_STATUS)&.first&.entity_id_value # P6
|
22
26
|
end
|
23
27
|
|
24
28
|
def iiif_manifest
|
25
|
-
|
29
|
+
claims_by_property_id(PropertyId::IIIF_MANIFEST)&.first&.entity_id_value # P41
|
26
30
|
end
|
27
31
|
|
28
32
|
def core_model_item?
|
@@ -3,15 +3,32 @@
|
|
3
3
|
module DigitalScriptorium
|
4
4
|
# Represents a meta record consisting of a manuscript, its holding information, and metadata record.
|
5
5
|
class DsMeta
|
6
|
+
include ItemId
|
7
|
+
include PropertyId
|
8
|
+
|
6
9
|
attr_reader :holding, :manuscript, :record
|
7
10
|
|
8
11
|
def initialize(record, export_hash)
|
9
12
|
manuscript = export_hash[record.described_manuscript_id]
|
10
|
-
|
13
|
+
current_holdings = current_holdings(manuscript, export_hash)
|
14
|
+
|
15
|
+
if current_holdings.size != 1
|
16
|
+
raise "Manuscripts must have exactly 1 current holding, found #{current_holdings.size}"
|
17
|
+
end
|
11
18
|
|
12
|
-
@holding =
|
19
|
+
@holding = current_holdings.first
|
13
20
|
@manuscript = manuscript
|
14
21
|
@record = record
|
15
22
|
end
|
23
|
+
|
24
|
+
def current?(holding)
|
25
|
+
holding.holding_status == HOLDING_STATUS_CURRENT
|
26
|
+
end
|
27
|
+
|
28
|
+
def current_holdings(manuscript, export_hash)
|
29
|
+
manuscript.holding_ids
|
30
|
+
.map { |id| export_hash[id] }
|
31
|
+
.filter { |holding| current?(holding) }
|
32
|
+
end
|
16
33
|
end
|
17
34
|
end
|
@@ -5,9 +5,10 @@ require 'set'
|
|
5
5
|
module DigitalScriptorium
|
6
6
|
# Constants for core model item IDs.
|
7
7
|
module ItemId
|
8
|
-
MANUSCRIPT
|
9
|
-
HOLDING
|
10
|
-
RECORD
|
8
|
+
MANUSCRIPT = 'Q1'
|
9
|
+
HOLDING = 'Q2'
|
10
|
+
RECORD = 'Q3'
|
11
|
+
HOLDING_STATUS_CURRENT = 'Q4'
|
11
12
|
|
12
13
|
CORE_MODEL_ITEMS = Set[MANUSCRIPT, HOLDING, RECORD]
|
13
14
|
end
|
@@ -6,11 +6,11 @@ module DigitalScriptorium
|
|
6
6
|
include PropertyId
|
7
7
|
|
8
8
|
def ds_id
|
9
|
-
|
9
|
+
claims_by_property_id(DS_ID)&.first&.data_value # P1
|
10
10
|
end
|
11
11
|
|
12
|
-
def
|
13
|
-
|
12
|
+
def holding_ids
|
13
|
+
claims_by_property_id(MANUSCRIPT_HOLDING)&.map(&:entity_id_value) # P2
|
14
14
|
end
|
15
15
|
end
|
16
16
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for acknowledgements (P33) claims.
|
5
|
+
class AcknowledgementsClaimTransformer < BaseClaimTransformer
|
6
|
+
def initialize(claim, _, **kwargs)
|
7
|
+
super(claim, **kwargs)
|
8
|
+
end
|
9
|
+
|
10
|
+
def display_values
|
11
|
+
[display_value(claim.data_value)]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Base transformer class providing a common interface for all transformers.
|
5
|
+
class BaseClaimTransformer
|
6
|
+
attr_reader :claim, :prefix
|
7
|
+
|
8
|
+
def initialize(claim, **kwargs)
|
9
|
+
@claim = claim
|
10
|
+
@prefix = kwargs[:prefix]
|
11
|
+
end
|
12
|
+
|
13
|
+
def display_values
|
14
|
+
[]
|
15
|
+
end
|
16
|
+
|
17
|
+
def search_values
|
18
|
+
[]
|
19
|
+
end
|
20
|
+
|
21
|
+
def facet_values
|
22
|
+
[]
|
23
|
+
end
|
24
|
+
|
25
|
+
def display_value(recorded_value, in_original_script = nil, linked_terms = [])
|
26
|
+
value = { 'recorded_value' => recorded_value }
|
27
|
+
value['original_script'] = in_original_script if in_original_script
|
28
|
+
value['linked_terms'] = linked_terms if linked_terms.any?
|
29
|
+
value.to_json
|
30
|
+
end
|
31
|
+
|
32
|
+
def solr_props
|
33
|
+
solr_props = {}
|
34
|
+
solr_props["#{prefix}_display"] = display_values if display_values.any?
|
35
|
+
solr_props["#{prefix}_search"] = search_values if search_values.any?
|
36
|
+
solr_props["#{prefix}_facet"] = facet_values if facet_values.any?
|
37
|
+
solr_props
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'time'
|
4
|
+
|
5
|
+
module DigitalScriptorium
|
6
|
+
# Transformer for production date (P23) claims.
|
7
|
+
class DateClaimTransformer < QualifiedClaimTransformer
|
8
|
+
include PropertyId
|
9
|
+
|
10
|
+
def solr_props
|
11
|
+
super.merge(meta_props).merge(int_props)
|
12
|
+
end
|
13
|
+
|
14
|
+
def meta_props
|
15
|
+
{
|
16
|
+
'date_meta' => [claim.data_value]
|
17
|
+
}
|
18
|
+
end
|
19
|
+
|
20
|
+
def int_props
|
21
|
+
return {} unless claim.qualifiers_by_property_id? CENTURY
|
22
|
+
|
23
|
+
{
|
24
|
+
'century_int' => [century_int]
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
def linked_term_for(authority)
|
29
|
+
{
|
30
|
+
'label' => authority.label('en'),
|
31
|
+
'facet_field' => 'century_int',
|
32
|
+
'facet_value' => century_int,
|
33
|
+
'source_url' => external_uri(authority) || wikidata_uri(authority)
|
34
|
+
}.compact
|
35
|
+
end
|
36
|
+
|
37
|
+
def century_int
|
38
|
+
parse_year(time_value_from_qualifier(CENTURY))
|
39
|
+
end
|
40
|
+
|
41
|
+
def time_value_from_qualifier(property_id)
|
42
|
+
claim.qualifiers_by_property_id(property_id)&.first&.time_value
|
43
|
+
end
|
44
|
+
|
45
|
+
# Wikibase date format "resembling ISO 8601": +YYYY-MM-DDT00:00:00Z
|
46
|
+
# https://www.wikidata.org/wiki/Help:Dates#Time_datatype
|
47
|
+
def parse_year(date)
|
48
|
+
Time.iso8601(date[1..]).year
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for dated? (P26) claims.
|
5
|
+
class DatedClaimTransformer < BaseClaimTransformer
|
6
|
+
attr_reader :export_hash
|
7
|
+
|
8
|
+
def initialize(claim, export_hash, **kwargs)
|
9
|
+
super(claim, **kwargs)
|
10
|
+
@export_hash = export_hash
|
11
|
+
end
|
12
|
+
|
13
|
+
def facet_values
|
14
|
+
[export_hash[claim.entity_id_value]&.label('en')].compact
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for extracting links from relevant Digital Scriptorium claims.
|
5
|
+
class LinkClaimTransformer < BaseClaimTransformer
|
6
|
+
def initialize(claim, _, **kwargs)
|
7
|
+
super(claim, **kwargs)
|
8
|
+
end
|
9
|
+
|
10
|
+
def solr_props
|
11
|
+
super.merge({ "#{prefix}_link" => [claim.data_value] })
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for associated name (P14) claims.
|
5
|
+
# NOTE: Name claims produce fields prefixes derived from the value of their role (P15) qualifiers
|
6
|
+
# (owner, author, scribe, artist, agent).
|
7
|
+
class NameClaimTransformer < QualifiedClaimTransformerWithFacetFallback
|
8
|
+
include PropertyId
|
9
|
+
|
10
|
+
def initialize(claim, export_hash, **kwargs)
|
11
|
+
super(claim, export_hash, prefix: role_prefix(claim, export_hash), authority_id: kwargs[:authority_id])
|
12
|
+
end
|
13
|
+
|
14
|
+
def role_prefix(claim, export_hash)
|
15
|
+
role_entity_id = claim.qualifiers_by_property_id(ROLE_IN_AUTHORITY_FILE)&.first&.entity_id_value
|
16
|
+
raise 'Missing role qualifier for name claim' unless role_entity_id
|
17
|
+
|
18
|
+
role_item = export_hash[role_entity_id]
|
19
|
+
role_label = role_item.label('en')
|
20
|
+
role_label.split.last.downcase
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for acknowledgements (P32) claims.
|
5
|
+
class NoteClaimTransformer < BaseClaimTransformer
|
6
|
+
def initialize(claim, _, **kwargs)
|
7
|
+
super(claim, **kwargs)
|
8
|
+
end
|
9
|
+
|
10
|
+
def display_values
|
11
|
+
[display_value(claim.data_value)]
|
12
|
+
end
|
13
|
+
|
14
|
+
def search_values
|
15
|
+
[claim.data_value]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for physical description (P29) claims.
|
5
|
+
class PhysicalDescriptionClaimTransformer < BaseClaimTransformer
|
6
|
+
def initialize(claim, _, **kwargs)
|
7
|
+
super(claim, **kwargs)
|
8
|
+
end
|
9
|
+
|
10
|
+
def display_values
|
11
|
+
[display_value(claim.data_value)]
|
12
|
+
end
|
13
|
+
|
14
|
+
def search_values
|
15
|
+
[claim.data_value]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for converting qualified claims of Digital Scriptorium items into Solr fields.
|
5
|
+
class QualifiedClaimTransformer < BaseClaimTransformer
|
6
|
+
include PropertyId
|
7
|
+
|
8
|
+
attr_reader :export_hash, :authority_id
|
9
|
+
|
10
|
+
def initialize(claim, export_hash, **kwargs)
|
11
|
+
super(claim, **kwargs)
|
12
|
+
@export_hash = export_hash
|
13
|
+
@authority_id = kwargs[:authority_id]
|
14
|
+
end
|
15
|
+
|
16
|
+
def display_values
|
17
|
+
[display_value(main_snak_value, in_original_script, linked_terms)]
|
18
|
+
end
|
19
|
+
|
20
|
+
def search_values
|
21
|
+
[main_snak_value, in_original_script, linked_term_labels].flatten.compact.uniq
|
22
|
+
end
|
23
|
+
|
24
|
+
def facet_values
|
25
|
+
linked_term_labels
|
26
|
+
end
|
27
|
+
|
28
|
+
def in_original_script
|
29
|
+
claim.qualifiers_by_property_id(IN_ORIGINAL_SCRIPT)&.first&.data_value&.value
|
30
|
+
end
|
31
|
+
|
32
|
+
def external_uri(authority)
|
33
|
+
authority.claims_by_property_id(EXTERNAL_URI)&.first&.data_value
|
34
|
+
end
|
35
|
+
|
36
|
+
def wikidata_id(authority)
|
37
|
+
authority.claims_by_property_id(WIKIDATA_QID)&.first&.data_value
|
38
|
+
end
|
39
|
+
|
40
|
+
def wikidata_uri(authority)
|
41
|
+
wikidata_id(authority) && "https://www.wikidata.org/wiki/#{wikidata_id(authority)}"
|
42
|
+
end
|
43
|
+
|
44
|
+
def linked_term_for(authority)
|
45
|
+
{
|
46
|
+
'label' => authority.label('en'),
|
47
|
+
'source_url' => external_uri(authority) || wikidata_uri(authority)
|
48
|
+
}.compact
|
49
|
+
end
|
50
|
+
|
51
|
+
def linked_terms
|
52
|
+
@linked_terms ||= begin
|
53
|
+
linked_terms = []
|
54
|
+
|
55
|
+
claim.qualifiers_by_property_id(authority_id)&.each do |qualifier|
|
56
|
+
authority_file_item_id = qualifier.entity_id_value
|
57
|
+
authority = export_hash[authority_file_item_id]
|
58
|
+
linked_terms << linked_term_for(authority) if authority
|
59
|
+
end
|
60
|
+
|
61
|
+
linked_terms.uniq
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def linked_term_labels
|
66
|
+
@linked_term_labels ||= linked_terms.map { |term| term['label'] }.uniq
|
67
|
+
end
|
68
|
+
|
69
|
+
def main_snak_value
|
70
|
+
if claim.value_type? WikibaseRepresentable::Model::EntityIdValue
|
71
|
+
entity_id = claim.entity_id_value
|
72
|
+
referenced_item = export_hash[entity_id]
|
73
|
+
referenced_item.label('en')
|
74
|
+
else
|
75
|
+
claim.data_value
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for converting qualified claims of Digital Scriptorium items into Solr fields
|
5
|
+
# with a fallback to the value as-recorded for the facet field.
|
6
|
+
class QualifiedClaimTransformerWithFacetFallback < QualifiedClaimTransformer
|
7
|
+
def facet_values
|
8
|
+
super.any? ? super : [claim.data_value]
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for shelfmark (P8) claims.
|
5
|
+
class ShelfmarkClaimTransformer < BaseClaimTransformer
|
6
|
+
def initialize(claim, _, **kwargs)
|
7
|
+
super(claim, **kwargs)
|
8
|
+
end
|
9
|
+
|
10
|
+
def display_values
|
11
|
+
[display_value(claim.data_value)]
|
12
|
+
end
|
13
|
+
|
14
|
+
def search_values
|
15
|
+
[claim.data_value]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module DigitalScriptorium
|
4
|
+
# Transformer for uniform title (P12) claims.
|
5
|
+
class UniformTitleClaimTransformer < BaseClaimTransformer
|
6
|
+
def initialize(claim, _, **kwargs)
|
7
|
+
super(claim, **kwargs)
|
8
|
+
end
|
9
|
+
|
10
|
+
def search_values
|
11
|
+
[claim.data_value]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'transformers/base_claim_transformer'
|
4
|
+
require_relative 'transformers/link_claim_transformer'
|
5
|
+
require_relative 'transformers/qualified_claim_transformer'
|
6
|
+
require_relative 'transformers/qualified_claim_transformer_with_facet_fallback'
|
7
|
+
|
8
|
+
require_relative 'transformers/acknowledgements_claim_transformer'
|
9
|
+
require_relative 'transformers/date_claim_transformer'
|
10
|
+
require_relative 'transformers/dated_claim_transformer'
|
11
|
+
require_relative 'transformers/iiif_manifest_claim_transformer'
|
12
|
+
require_relative 'transformers/name_claim_transformer'
|
13
|
+
require_relative 'transformers/note_claim_transformer'
|
14
|
+
require_relative 'transformers/physical_description_claim_transformer'
|
15
|
+
require_relative 'transformers/shelfmark_claim_transformer'
|
16
|
+
require_relative 'transformers/uniform_title_claim_transformer'
|
17
|
+
|
18
|
+
module DigitalScriptorium
|
19
|
+
# Factory for creating claim transformers
|
20
|
+
module Transformers
|
21
|
+
include PropertyId
|
22
|
+
|
23
|
+
TRANSFORMERS = {
|
24
|
+
HOLDING_INSTITUTION_AS_RECORDED => QualifiedClaimTransformer,
|
25
|
+
SHELFMARK => ShelfmarkClaimTransformer,
|
26
|
+
LINK_TO_INSTITUTIONAL_RECORD => LinkClaimTransformer,
|
27
|
+
TITLE_AS_RECORDED => QualifiedClaimTransformerWithFacetFallback,
|
28
|
+
UNIFORM_TITLE_AS_RECORDED => UniformTitleClaimTransformer,
|
29
|
+
ASSOCIATED_NAME_AS_RECORDED => NameClaimTransformer,
|
30
|
+
GENRE_AS_RECORDED => QualifiedClaimTransformerWithFacetFallback,
|
31
|
+
SUBJECT_AS_RECORDED => QualifiedClaimTransformerWithFacetFallback,
|
32
|
+
LANGUAGE_AS_RECORDED => QualifiedClaimTransformer,
|
33
|
+
PRODUCTION_DATE_AS_RECORDED => DateClaimTransformer,
|
34
|
+
DATED => DatedClaimTransformer,
|
35
|
+
PRODUCTION_PLACE_AS_RECORDED => QualifiedClaimTransformer,
|
36
|
+
PHYSICAL_DESCRIPTION => PhysicalDescriptionClaimTransformer,
|
37
|
+
MATERIAL_AS_RECORDED => QualifiedClaimTransformer,
|
38
|
+
NOTE => NoteClaimTransformer,
|
39
|
+
ACKNOWLEDGEMENTS => AcknowledgementsClaimTransformer,
|
40
|
+
IIIF_MANIFEST => IiifManifestClaimTransformer
|
41
|
+
}.freeze
|
42
|
+
|
43
|
+
AUTHORITY_IDS = {
|
44
|
+
HOLDING_INSTITUTION_AS_RECORDED => HOLDING_INSTITUTION_IN_AUTHORITY_FILE,
|
45
|
+
TITLE_AS_RECORDED => STANDARD_TITLE,
|
46
|
+
ASSOCIATED_NAME_AS_RECORDED => NAME_IN_AUTHORITY_FILE,
|
47
|
+
GENRE_AS_RECORDED => TERM_IN_AUTHORITY_FILE,
|
48
|
+
SUBJECT_AS_RECORDED => TERM_IN_AUTHORITY_FILE,
|
49
|
+
LANGUAGE_AS_RECORDED => LANGUAGE_IN_AUTHORITY_FILE,
|
50
|
+
PRODUCTION_DATE_AS_RECORDED => PRODUCTION_CENTURY_IN_AUTHORITY_FILE,
|
51
|
+
PRODUCTION_PLACE_AS_RECORDED => PLACE_IN_AUTHORITY_FILE,
|
52
|
+
MATERIAL_AS_RECORDED => MATERIAL_IN_AUTHORITY_FILE
|
53
|
+
}.freeze
|
54
|
+
|
55
|
+
PREFIXES = {
|
56
|
+
HOLDING_INSTITUTION_AS_RECORDED => 'institution',
|
57
|
+
SHELFMARK => 'shelfmark',
|
58
|
+
LINK_TO_INSTITUTIONAL_RECORD => 'institutional_record',
|
59
|
+
TITLE_AS_RECORDED => 'title',
|
60
|
+
UNIFORM_TITLE_AS_RECORDED => 'uniform_title',
|
61
|
+
ASSOCIATED_NAME_AS_RECORDED => 'name',
|
62
|
+
GENRE_AS_RECORDED => 'term',
|
63
|
+
SUBJECT_AS_RECORDED => 'term',
|
64
|
+
LANGUAGE_AS_RECORDED => 'language',
|
65
|
+
PRODUCTION_DATE_AS_RECORDED => 'date',
|
66
|
+
DATED => 'dated',
|
67
|
+
PRODUCTION_PLACE_AS_RECORDED => 'place',
|
68
|
+
PHYSICAL_DESCRIPTION => 'physical_description',
|
69
|
+
MATERIAL_AS_RECORDED => 'material',
|
70
|
+
NOTE => 'note',
|
71
|
+
ACKNOWLEDGEMENTS => 'acknowledgements',
|
72
|
+
IIIF_MANIFEST => 'iiif_manifest'
|
73
|
+
}.freeze
|
74
|
+
|
75
|
+
def self.defined?(property_id)
|
76
|
+
TRANSFORMERS.include?(property_id)
|
77
|
+
end
|
78
|
+
|
79
|
+
def self.transformer(property_id)
|
80
|
+
TRANSFORMERS[property_id]
|
81
|
+
end
|
82
|
+
|
83
|
+
def self.authority_id(property_id)
|
84
|
+
AUTHORITY_IDS[property_id]
|
85
|
+
end
|
86
|
+
|
87
|
+
def self.prefix(property_id)
|
88
|
+
PREFIXES[property_id]
|
89
|
+
end
|
90
|
+
|
91
|
+
def self.create(property_id, claim, export_hash)
|
92
|
+
transformer_class = TRANSFORMERS[property_id]
|
93
|
+
authority_id = AUTHORITY_IDS[property_id]
|
94
|
+
prefix = PREFIXES[property_id]
|
95
|
+
return unless transformer_class && prefix
|
96
|
+
|
97
|
+
transformer_class.new(claim, export_hash, prefix: prefix, authority_id: authority_id)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/digital_scriptorium.rb
CHANGED
@@ -12,6 +12,4 @@ require 'digital_scriptorium/record'
|
|
12
12
|
require 'digital_scriptorium/export'
|
13
13
|
require 'digital_scriptorium/export_representer'
|
14
14
|
|
15
|
-
require 'digital_scriptorium/
|
16
|
-
require 'digital_scriptorium/date_claim_transformer'
|
17
|
-
require 'digital_scriptorium/name_claim_transformer'
|
15
|
+
require 'digital_scriptorium/transformers'
|
data/wikibase_to_solr_new.rb
CHANGED
@@ -2,10 +2,11 @@
|
|
2
2
|
|
3
3
|
require 'digital_scriptorium'
|
4
4
|
require 'json'
|
5
|
+
require 'logging'
|
5
6
|
require 'optparse'
|
7
|
+
require 'set'
|
6
8
|
require 'time'
|
7
9
|
require 'tty-spinner'
|
8
|
-
require 'yaml'
|
9
10
|
require 'zlib'
|
10
11
|
|
11
12
|
dir = File.dirname __FILE__
|
@@ -15,7 +16,9 @@ output_file = File.expand_path 'solr_import.json', dir
|
|
15
16
|
config_file = File.expand_path 'property_config.yml', dir
|
16
17
|
pretty_print = false
|
17
18
|
|
18
|
-
|
19
|
+
logger = Logging.logger($stdout)
|
20
|
+
|
21
|
+
OptionParser.new { |opts|
|
19
22
|
opts.banner = 'Usage: wikibase_to_solr.rb [options]'
|
20
23
|
|
21
24
|
opts.on('-i', '--in FILE', 'The file path to the gzipped Wikibase JSON export file.') do |f|
|
@@ -33,7 +36,7 @@ OptionParser.new do |opts|
|
|
33
36
|
opts.on('-p', '--pretty-print', 'Whether to pretty-print the JSON output.') do
|
34
37
|
pretty_print = true
|
35
38
|
end
|
36
|
-
|
39
|
+
}.parse!
|
37
40
|
|
38
41
|
def merge(solr_item, new_props)
|
39
42
|
solr_item.merge(new_props) do |_, old_val, new_val|
|
@@ -41,21 +44,23 @@ def merge(solr_item, new_props)
|
|
41
44
|
end
|
42
45
|
end
|
43
46
|
|
44
|
-
def
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
DigitalScriptorium::ClaimTransformer.transform(claim, export_hash, property_config))
|
53
|
-
end
|
47
|
+
def base_solr_item(meta)
|
48
|
+
ds_id = meta.manuscript.ds_id
|
49
|
+
{
|
50
|
+
'qid_meta' => [meta.holding.id, meta.manuscript.id, meta.record.id],
|
51
|
+
'id' => [ds_id],
|
52
|
+
'id_display' => [JSON.generate(recorded_value: ds_id)],
|
53
|
+
'id_search' => [ds_id]
|
54
|
+
}
|
54
55
|
end
|
55
56
|
|
56
|
-
|
57
|
+
def record?(entity)
|
58
|
+
entity.is_a?(DigitalScriptorium::DsItem) &&
|
59
|
+
entity.claims_by_property_id?(DigitalScriptorium::PropertyId::INSTANCE_OF) &&
|
60
|
+
entity.record?
|
61
|
+
end
|
57
62
|
|
58
|
-
|
63
|
+
start_time = Time.now.utc
|
59
64
|
|
60
65
|
loading_spinner = TTY::Spinner.new('[:spinner] Loading export data', hide_cursor: true)
|
61
66
|
loading_spinner.auto_spin
|
@@ -64,7 +69,7 @@ export_json = Zlib::GzipReader.open(input_file).read
|
|
64
69
|
export_hash = DigitalScriptorium::ExportRepresenter.new(DigitalScriptorium::Export.new)
|
65
70
|
.from_json(export_json)
|
66
71
|
.to_hash
|
67
|
-
loaded_time = Time.now
|
72
|
+
loaded_time = Time.now.utc
|
68
73
|
loading_spinner.success("(#{format('%0.02f', loaded_time - start_time)}s)")
|
69
74
|
|
70
75
|
item_count = 0
|
@@ -76,19 +81,22 @@ File.open(output_file, 'w') do |file|
|
|
76
81
|
file << "\n" if pretty_print
|
77
82
|
|
78
83
|
export_hash.each_with_index do |(_, entity), idx|
|
79
|
-
next unless
|
80
|
-
entity.claims_by_property_id?(DigitalScriptorium::PropertyId::INSTANCE_OF) &&
|
81
|
-
entity.record?
|
84
|
+
next unless record?(entity)
|
82
85
|
|
83
86
|
meta = DigitalScriptorium::DsMeta.new(entity, export_hash)
|
84
|
-
solr_item =
|
87
|
+
solr_item = base_solr_item(meta)
|
85
88
|
|
86
89
|
[meta.holding, meta.manuscript, meta.record].each do |item|
|
87
90
|
item.claims.each do |property_id, claims|
|
88
91
|
claims.each do |claim|
|
89
|
-
next unless
|
90
|
-
|
91
|
-
|
92
|
+
next unless DigitalScriptorium::Transformers.defined? property_id
|
93
|
+
|
94
|
+
begin
|
95
|
+
transformer = DigitalScriptorium::Transformers.create property_id, claim, export_hash
|
96
|
+
solr_item = merge solr_item, transformer.solr_props
|
97
|
+
rescue StandardError => e
|
98
|
+
logger.error "Error processing #{property_id} claim for item #{item.id}: #{e}"
|
99
|
+
end
|
92
100
|
end
|
93
101
|
end
|
94
102
|
end
|
@@ -103,6 +111,6 @@ File.open(output_file, 'w') do |file|
|
|
103
111
|
file << ']'
|
104
112
|
end
|
105
113
|
|
106
|
-
finish_time = Time.now
|
114
|
+
finish_time = Time.now.utc
|
107
115
|
generating_spinner.success("(#{format('%0.02f', finish_time - loaded_time)}s)")
|
108
116
|
puts "Generated #{item_count} Solr documents in #{format('%0.02f', finish_time - start_time)} seconds"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: digital_scriptorium
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Michael Holloway
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-01-
|
10
|
+
date: 2025-01-17 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: multi_json
|
@@ -65,6 +65,62 @@ dependencies:
|
|
65
65
|
- - "~>"
|
66
66
|
- !ruby/object:Gem::Version
|
67
67
|
version: '0.1'
|
68
|
+
- !ruby/object:Gem::Dependency
|
69
|
+
name: bundler
|
70
|
+
requirement: !ruby/object:Gem::Requirement
|
71
|
+
requirements:
|
72
|
+
- - "~>"
|
73
|
+
- !ruby/object:Gem::Version
|
74
|
+
version: '2.5'
|
75
|
+
type: :development
|
76
|
+
prerelease: false
|
77
|
+
version_requirements: !ruby/object:Gem::Requirement
|
78
|
+
requirements:
|
79
|
+
- - "~>"
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
version: '2.5'
|
82
|
+
- !ruby/object:Gem::Dependency
|
83
|
+
name: rake
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
85
|
+
requirements:
|
86
|
+
- - "~>"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '13.2'
|
89
|
+
type: :development
|
90
|
+
prerelease: false
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
92
|
+
requirements:
|
93
|
+
- - "~>"
|
94
|
+
- !ruby/object:Gem::Version
|
95
|
+
version: '13.2'
|
96
|
+
- !ruby/object:Gem::Dependency
|
97
|
+
name: rspec
|
98
|
+
requirement: !ruby/object:Gem::Requirement
|
99
|
+
requirements:
|
100
|
+
- - "~>"
|
101
|
+
- !ruby/object:Gem::Version
|
102
|
+
version: '3.13'
|
103
|
+
type: :development
|
104
|
+
prerelease: false
|
105
|
+
version_requirements: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '3.13'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: upennlib-rubocop
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '1.2'
|
117
|
+
type: :development
|
118
|
+
prerelease: false
|
119
|
+
version_requirements: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '1.2'
|
68
124
|
email:
|
69
125
|
- michael@mdholloway.org
|
70
126
|
executables: []
|
@@ -74,13 +130,12 @@ files:
|
|
74
130
|
- ".ignore"
|
75
131
|
- ".rspec"
|
76
132
|
- ".rubocop.yml"
|
133
|
+
- ".rubocop_todo.yml"
|
77
134
|
- LICENSE.txt
|
78
135
|
- README.md
|
79
136
|
- Rakefile
|
80
137
|
- doc/overview.md
|
81
138
|
- lib/digital_scriptorium.rb
|
82
|
-
- lib/digital_scriptorium/claim_transformer.rb
|
83
|
-
- lib/digital_scriptorium/date_claim_transformer.rb
|
84
139
|
- lib/digital_scriptorium/ds_item.rb
|
85
140
|
- lib/digital_scriptorium/ds_meta.rb
|
86
141
|
- lib/digital_scriptorium/export.rb
|
@@ -88,11 +143,23 @@ files:
|
|
88
143
|
- lib/digital_scriptorium/holding.rb
|
89
144
|
- lib/digital_scriptorium/item_id.rb
|
90
145
|
- lib/digital_scriptorium/manuscript.rb
|
91
|
-
- lib/digital_scriptorium/name_claim_transformer.rb
|
92
146
|
- lib/digital_scriptorium/property_id.rb
|
93
147
|
- lib/digital_scriptorium/record.rb
|
148
|
+
- lib/digital_scriptorium/transformers.rb
|
149
|
+
- lib/digital_scriptorium/transformers/acknowledgements_claim_transformer.rb
|
150
|
+
- lib/digital_scriptorium/transformers/base_claim_transformer.rb
|
151
|
+
- lib/digital_scriptorium/transformers/date_claim_transformer.rb
|
152
|
+
- lib/digital_scriptorium/transformers/dated_claim_transformer.rb
|
153
|
+
- lib/digital_scriptorium/transformers/iiif_manifest_claim_transformer.rb
|
154
|
+
- lib/digital_scriptorium/transformers/link_claim_transformer.rb
|
155
|
+
- lib/digital_scriptorium/transformers/name_claim_transformer.rb
|
156
|
+
- lib/digital_scriptorium/transformers/note_claim_transformer.rb
|
157
|
+
- lib/digital_scriptorium/transformers/physical_description_claim_transformer.rb
|
158
|
+
- lib/digital_scriptorium/transformers/qualified_claim_transformer.rb
|
159
|
+
- lib/digital_scriptorium/transformers/qualified_claim_transformer_with_facet_fallback.rb
|
160
|
+
- lib/digital_scriptorium/transformers/shelfmark_claim_transformer.rb
|
161
|
+
- lib/digital_scriptorium/transformers/uniform_title_claim_transformer.rb
|
94
162
|
- lib/digital_scriptorium/version.rb
|
95
|
-
- property_config.yml
|
96
163
|
- sig/digital_scriptorium.rbs
|
97
164
|
- wikibase_to_solr_new.rb
|
98
165
|
homepage: https://github.com/mdholloway/digital_scriptorium
|
@@ -1,82 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'wikibase_representable'
|
4
|
-
|
5
|
-
module DigitalScriptorium
|
6
|
-
# Transformer for converting claims of Digital Scriptorium items into Solr fields.
|
7
|
-
class ClaimTransformer
|
8
|
-
include PropertyId
|
9
|
-
include WikibaseRepresentable::Model
|
10
|
-
|
11
|
-
def self.transform(claim, export_hash, config)
|
12
|
-
solr_props = {}
|
13
|
-
|
14
|
-
prefix = config['prefix']
|
15
|
-
requested_fields = config['fields']
|
16
|
-
authority_property_id = config['authority']
|
17
|
-
|
18
|
-
value = primary_value_from_claim(claim, export_hash)
|
19
|
-
|
20
|
-
solr_props['id'] = [value] if requested_fields.include? 'id'
|
21
|
-
solr_props["#{prefix}_meta"] = [value] if requested_fields.include? 'meta'
|
22
|
-
|
23
|
-
unless authority_property_id && claim.qualifiers_by_property_id?(authority_property_id)
|
24
|
-
solr_props["#{prefix}_display"] = [{ 'PV' => value }.to_json] if requested_fields.include? 'display'
|
25
|
-
solr_props["#{prefix}_search"] = [value] if requested_fields.include? 'search'
|
26
|
-
solr_props["#{prefix}_facet"] = [value] if requested_fields.include? 'facet'
|
27
|
-
|
28
|
-
solr_props['images_facet'] = ['Yes'] if value && claim.property_id == IIIF_MANIFEST
|
29
|
-
solr_props["#{prefix}_link"] = [value] if requested_fields.include? 'link'
|
30
|
-
|
31
|
-
return solr_props
|
32
|
-
end
|
33
|
-
|
34
|
-
display_entries = []
|
35
|
-
search_entries = [value]
|
36
|
-
facets = []
|
37
|
-
|
38
|
-
claim.qualifiers_by_property_id(authority_property_id).each do |qualifier|
|
39
|
-
display_props = { 'PV' => value }
|
40
|
-
|
41
|
-
authority_id = qualifier.entity_id_value
|
42
|
-
authority = export_hash[authority_id]
|
43
|
-
|
44
|
-
if authority
|
45
|
-
label = authority.label('en')
|
46
|
-
|
47
|
-
display_props['QL'] = label
|
48
|
-
search_entries << label
|
49
|
-
facets << label
|
50
|
-
|
51
|
-
external_uri = authority.claim_by_property_id(EXTERNAL_URI)&.data_value
|
52
|
-
wikidata_id = authority.claim_by_property_id(WIKIDATA_QID)&.data_value
|
53
|
-
wikidata_uri = wikidata_id && "https://www.wikidata.org/wiki/#{wikidata_id}"
|
54
|
-
|
55
|
-
# Only one or the other of these seem to exist for a given item in practice.
|
56
|
-
display_props['QU'] = external_uri if external_uri
|
57
|
-
display_props['QU'] = wikidata_uri if wikidata_uri
|
58
|
-
end
|
59
|
-
|
60
|
-
display_entries << display_props.to_json
|
61
|
-
end
|
62
|
-
|
63
|
-
solr_props["#{prefix}_display"] = display_entries.uniq if requested_fields.include? 'display'
|
64
|
-
solr_props["#{prefix}_search"] = search_entries.uniq if requested_fields.include? 'search'
|
65
|
-
solr_props["#{prefix}_facet"] = facets.uniq if requested_fields.include? 'facet'
|
66
|
-
|
67
|
-
solr_props
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.primary_value_from_claim(claim, export_hash)
|
71
|
-
if claim.value_type? EntityIdValue
|
72
|
-
entity_id = claim.entity_id_value
|
73
|
-
referenced_item = export_hash[entity_id]
|
74
|
-
referenced_item.label('en')
|
75
|
-
elsif claim.value_type? TimeValue
|
76
|
-
claim.time_value
|
77
|
-
else
|
78
|
-
claim.data_value
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
82
|
-
end
|
@@ -1,25 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'time'
|
4
|
-
|
5
|
-
module DigitalScriptorium
|
6
|
-
# Special-purpose transformer for date (P23) claims
|
7
|
-
class DateClaimTransformer
|
8
|
-
include PropertyId
|
9
|
-
|
10
|
-
def self.transform(claim, export_hash, config)
|
11
|
-
solr_props = ClaimTransformer.transform(claim, export_hash, config)
|
12
|
-
return solr_props unless claim.qualifiers
|
13
|
-
|
14
|
-
century = claim.qualifier_by_property_id(CENTURY).time_value
|
15
|
-
earliest = claim.qualifier_by_property_id(EARLIEST_DATE).time_value
|
16
|
-
latest = claim.qualifier_by_property_id(LATEST_DATE).time_value
|
17
|
-
|
18
|
-
solr_props['century_int'] = [Time.parse(century).year] unless century.nil?
|
19
|
-
solr_props['earliest_int'] = [Time.parse(earliest).year] unless earliest.nil?
|
20
|
-
solr_props['latest_int'] = [Time.parse(latest).year] unless latest.nil?
|
21
|
-
|
22
|
-
solr_props
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
@@ -1,61 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module DigitalScriptorium
|
4
|
-
# Special-purpose transformer for name (P14) claims
|
5
|
-
class NameClaimTransformer
|
6
|
-
include PropertyId
|
7
|
-
|
8
|
-
def self.transform(claim, export_hash)
|
9
|
-
return {} unless claim.qualifiers_by_property_id? ROLE_IN_AUTHORITY_FILE
|
10
|
-
|
11
|
-
role_entity_id = claim.qualifier_by_property_id(ROLE_IN_AUTHORITY_FILE).entity_id_value
|
12
|
-
role_item = export_hash[role_entity_id]
|
13
|
-
role_label = role_item.label('en')
|
14
|
-
prefix = role_label.downcase.split.last
|
15
|
-
|
16
|
-
recorded_name = claim.data_value
|
17
|
-
display_names = { 'PV' => recorded_name }
|
18
|
-
search_names = [recorded_name]
|
19
|
-
|
20
|
-
name_in_original_script = claim.qualifier_by_property_id(IN_ORIGINAL_SCRIPT)&.data_value&.value
|
21
|
-
display_names['AGR'] = name_in_original_script if name_in_original_script
|
22
|
-
search_names << name_in_original_script if name_in_original_script
|
23
|
-
|
24
|
-
unless claim.qualifiers_by_property_id? NAME_IN_AUTHORITY_FILE
|
25
|
-
return {
|
26
|
-
"#{prefix}_display" => [display_names.to_json],
|
27
|
-
"#{prefix}_search" => search_names,
|
28
|
-
"#{prefix}_facet" => [recorded_name]
|
29
|
-
}
|
30
|
-
end
|
31
|
-
|
32
|
-
display_entries = []
|
33
|
-
facets = []
|
34
|
-
|
35
|
-
claim.qualifiers_by_property_id(NAME_IN_AUTHORITY_FILE).each do |qualifier|
|
36
|
-
display_names_for_qualifier = { 'PV' => recorded_name }
|
37
|
-
display_names_for_qualifier['AGR'] = name_in_original_script if name_in_original_script
|
38
|
-
|
39
|
-
name_entity_id = qualifier.entity_id_value
|
40
|
-
name_item = export_hash[name_entity_id]
|
41
|
-
name_label = name_item.label('en')
|
42
|
-
|
43
|
-
display_names_for_qualifier['QL'] = name_label
|
44
|
-
search_names << name_label
|
45
|
-
facets << name_label
|
46
|
-
|
47
|
-
wikidata_id = name_item.claim_by_property_id(WIKIDATA_QID).data_value
|
48
|
-
wikidata_url = "https://www.wikidata.org/wiki/#{wikidata_id}"
|
49
|
-
display_names_for_qualifier['QU'] = wikidata_url if wikidata_url
|
50
|
-
|
51
|
-
display_entries << display_names_for_qualifier.to_json
|
52
|
-
end
|
53
|
-
|
54
|
-
{
|
55
|
-
"#{prefix}_display" => display_entries.uniq,
|
56
|
-
"#{prefix}_search" => search_names.uniq,
|
57
|
-
"#{prefix}_facet" => facets.uniq
|
58
|
-
}
|
59
|
-
end
|
60
|
-
end
|
61
|
-
end
|
data/property_config.yml
DELETED
@@ -1,106 +0,0 @@
|
|
1
|
-
P1:
|
2
|
-
prefix: id
|
3
|
-
fields:
|
4
|
-
- id
|
5
|
-
- display
|
6
|
-
- search
|
7
|
-
P5:
|
8
|
-
prefix: institution
|
9
|
-
fields:
|
10
|
-
- display
|
11
|
-
- search
|
12
|
-
- facet
|
13
|
-
authority: P4
|
14
|
-
P6:
|
15
|
-
prefix: holding_status
|
16
|
-
fields:
|
17
|
-
- display
|
18
|
-
P8:
|
19
|
-
prefix: shelfmark
|
20
|
-
fields:
|
21
|
-
- display
|
22
|
-
- search
|
23
|
-
P9:
|
24
|
-
prefix: institutional_record
|
25
|
-
fields:
|
26
|
-
- link
|
27
|
-
P10:
|
28
|
-
prefix: title
|
29
|
-
fields:
|
30
|
-
- display
|
31
|
-
- search
|
32
|
-
- facet
|
33
|
-
authority: P11
|
34
|
-
P12:
|
35
|
-
prefix: uniform_title
|
36
|
-
fields:
|
37
|
-
- search
|
38
|
-
# NOTE: P14 can translate to any of a few different Solr fields based on the value of the
|
39
|
-
# associated role (P15) qualifier, and is handled in its own dedicated processing method
|
40
|
-
P14:
|
41
|
-
prefix: associated_name
|
42
|
-
fields: []
|
43
|
-
P18:
|
44
|
-
prefix: term
|
45
|
-
fields:
|
46
|
-
- display
|
47
|
-
- search
|
48
|
-
- facet
|
49
|
-
authority: P20
|
50
|
-
P19:
|
51
|
-
prefix: term
|
52
|
-
fields:
|
53
|
-
- display
|
54
|
-
- search
|
55
|
-
- facet
|
56
|
-
authority: P20
|
57
|
-
P21:
|
58
|
-
prefix: language
|
59
|
-
fields:
|
60
|
-
- display
|
61
|
-
- search
|
62
|
-
- facet
|
63
|
-
authority: P22
|
64
|
-
P23:
|
65
|
-
prefix: date
|
66
|
-
fields:
|
67
|
-
- meta
|
68
|
-
- display
|
69
|
-
- search
|
70
|
-
- facet
|
71
|
-
authority: P24
|
72
|
-
P26:
|
73
|
-
prefix: dated
|
74
|
-
fields:
|
75
|
-
- display
|
76
|
-
- facet
|
77
|
-
P27:
|
78
|
-
prefix: place
|
79
|
-
fields:
|
80
|
-
- display
|
81
|
-
- search
|
82
|
-
- facet
|
83
|
-
authority: P28
|
84
|
-
P29:
|
85
|
-
prefix: physical_description
|
86
|
-
fields:
|
87
|
-
- display
|
88
|
-
- search
|
89
|
-
P30:
|
90
|
-
prefix: material
|
91
|
-
fields:
|
92
|
-
- facet
|
93
|
-
authority: P31
|
94
|
-
P32:
|
95
|
-
prefix: note
|
96
|
-
fields:
|
97
|
-
- display
|
98
|
-
- search
|
99
|
-
P33:
|
100
|
-
prefix: acknowledgements
|
101
|
-
fields:
|
102
|
-
- display
|
103
|
-
P41:
|
104
|
-
prefix: iiif_manifest
|
105
|
-
fields:
|
106
|
-
- link
|