digital_scriptorium 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +3 -29
  3. data/.rubocop_todo.yml +12 -0
  4. data/lib/digital_scriptorium/ds_item.rb +9 -5
  5. data/lib/digital_scriptorium/ds_meta.rb +19 -2
  6. data/lib/digital_scriptorium/item_id.rb +4 -3
  7. data/lib/digital_scriptorium/manuscript.rb +3 -3
  8. data/lib/digital_scriptorium/record.rb +1 -1
  9. data/lib/digital_scriptorium/transformers/acknowledgements_claim_transformer.rb +14 -0
  10. data/lib/digital_scriptorium/transformers/base_claim_transformer.rb +40 -0
  11. data/lib/digital_scriptorium/transformers/date_claim_transformer.rb +51 -0
  12. data/lib/digital_scriptorium/transformers/dated_claim_transformer.rb +17 -0
  13. data/lib/digital_scriptorium/transformers/iiif_manifest_claim_transformer.rb +10 -0
  14. data/lib/digital_scriptorium/transformers/link_claim_transformer.rb +14 -0
  15. data/lib/digital_scriptorium/transformers/name_claim_transformer.rb +23 -0
  16. data/lib/digital_scriptorium/transformers/note_claim_transformer.rb +18 -0
  17. data/lib/digital_scriptorium/transformers/physical_description_claim_transformer.rb +18 -0
  18. data/lib/digital_scriptorium/transformers/qualified_claim_transformer.rb +79 -0
  19. data/lib/digital_scriptorium/transformers/qualified_claim_transformer_with_facet_fallback.rb +11 -0
  20. data/lib/digital_scriptorium/transformers/shelfmark_claim_transformer.rb +18 -0
  21. data/lib/digital_scriptorium/transformers/uniform_title_claim_transformer.rb +14 -0
  22. data/lib/digital_scriptorium/transformers.rb +100 -0
  23. data/lib/digital_scriptorium/version.rb +1 -1
  24. data/lib/digital_scriptorium.rb +1 -3
  25. data/wikibase_to_solr_new.rb +32 -24
  26. metadata +73 -6
  27. data/lib/digital_scriptorium/claim_transformer.rb +0 -82
  28. data/lib/digital_scriptorium/date_claim_transformer.rb +0 -25
  29. data/lib/digital_scriptorium/name_claim_transformer.rb +0 -61
  30. data/property_config.yml +0 -106
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26bfe7a6e13db8a90e6d3fb40f2c134383d954503e4d81d7eeff75ae7f0e83ab
4
- data.tar.gz: 11f3be8b35b9279d1d5b217d277c8117a2b2b00b5b52190f6a978ee734a9e275
3
+ metadata.gz: 90d97605a47a87aec5fb8dc41385f9ce55e415d8d96bd51bc91f0436b8d5ec07
4
+ data.tar.gz: 873ab9fbb3d1fb275419ec753c36929f1a806fe40a9023aaa3279b2684871218
5
5
  SHA512:
6
- metadata.gz: d00ac9d13e72ce1213158ccd6d0aa063c620f8a58637b706d1869b41e63641c687b16f1cdc5d3508c253406b4dd08d9bf714ac52af15e139cc560f9ecac92f65
7
- data.tar.gz: 02d25955d26867e0b2560df2118f2bc634822a8c2a62a9cf6adb3c49bf48a189515add16fc1e25e27f462459b49a033ee1f0a16413c496fa186f5dd644966a5f
6
+ metadata.gz: 0142d571dd96cd21270a782c0327bf0798af63e00d904eb71263d7a43fb8eb69e4fbe2854ad75dcf4b4bed7b56148e81a4a0be1862bff0e76168572126b94532
7
+ data.tar.gz: e51efd7f188fcc8bc29e846d50b6fceeb6a61ec00ccb1217807ed124303060290a6f6890980b25ab54adae11283896319af3ba0af63b6ca1f4d890a7bf2243e4
data/.rubocop.yml CHANGED
@@ -1,30 +1,4 @@
1
- require:
2
- - rubocop-rake
3
- - rubocop-rspec
1
+ inherit_from: .rubocop_todo.yml
4
2
 
5
- AllCops:
6
- TargetRubyVersion: 3.0
7
- NewCops: enable
8
- Layout/LineLength:
9
- Exclude:
10
- - spec/**/*
11
- Metrics/AbcSize:
12
- Enabled: false
13
- Metrics/BlockLength:
14
- Enabled: false
15
- Metrics/CyclomaticComplexity:
16
- Enabled: false
17
- Metrics/MethodLength:
18
- Enabled: false
19
- Metrics/ModuleLength:
20
- Enabled: false
21
- Metrics/PerceivedComplexity:
22
- Enabled: false
23
- RSpec/ExampleLength:
24
- Enabled: false
25
- RSpec/MultipleExpectations:
26
- Enabled: false
27
- RSpec/MultipleMemoizedHelpers:
28
- Enabled: false
29
- Style/SafeNavigationChainLength:
30
- Enabled: false
3
+ inherit_gem:
4
+ upennlib-rubocop: upennlib_rubocop_defaults.yml
data/.rubocop_todo.yml ADDED
@@ -0,0 +1,12 @@
1
+ # This configuration was generated by
2
+ # `rubocop --auto-gen-config`
3
+ # on 2025-01-17 01:39:58 UTC using RuboCop version 1.70.0.
4
+ # The point is for the user to remove these configuration records
5
+ # one by one as the offenses are removed from the code base.
6
+ # Note that changes in the inspected code, or installation of new
7
+ # versions of RuboCop, may require this file to be generated again.
8
+
9
+ # Offense count: 2
10
+ # Configuration parameters: CountComments, CountAsOne.
11
+ Metrics/ModuleLength:
12
+ Max: 137
@@ -10,19 +10,23 @@ module DigitalScriptorium
10
10
  end
11
11
 
12
12
  def ds_id
13
- claim_by_property_id(PropertyId::DS_ID)&.data_value # P1
13
+ claims_by_property_id(PropertyId::DS_ID)&.first&.data_value # P1
14
14
  end
15
15
 
16
- def holding_id
17
- claim_by_property_id(PropertyId::MANUSCRIPT_HOLDING)&.entity_id_value # P2
16
+ def holding_ids
17
+ claims_by_property_id(PropertyId::MANUSCRIPT_HOLDING)&.map(&:entity_id_value) # P2
18
18
  end
19
19
 
20
20
  def described_manuscript_id
21
- claim_by_property_id(PropertyId::DESCRIBED_MANUSCRIPT)&.entity_id_value # P3
21
+ claims_by_property_id(PropertyId::DESCRIBED_MANUSCRIPT)&.first&.entity_id_value # P3
22
+ end
23
+
24
+ def holding_status
25
+ claims_by_property_id(PropertyId::HOLDING_STATUS)&.first&.entity_id_value # P6
22
26
  end
23
27
 
24
28
  def iiif_manifest
25
- claim_by_property_id(PropertyId::IIIF_MANIFEST)&.entity_id_value # P41
29
+ claims_by_property_id(PropertyId::IIIF_MANIFEST)&.first&.entity_id_value # P41
26
30
  end
27
31
 
28
32
  def core_model_item?
@@ -3,15 +3,32 @@
3
3
  module DigitalScriptorium
4
4
  # Represents a meta record consisting of a manuscript, its holding information, and metadata record.
5
5
  class DsMeta
6
+ include ItemId
7
+ include PropertyId
8
+
6
9
  attr_reader :holding, :manuscript, :record
7
10
 
8
11
  def initialize(record, export_hash)
9
12
  manuscript = export_hash[record.described_manuscript_id]
10
- holding = export_hash[manuscript.holding_id]
13
+ current_holdings = current_holdings(manuscript, export_hash)
14
+
15
+ if current_holdings.size != 1
16
+ raise "Manuscripts must have exactly 1 current holding, found #{current_holdings.size}"
17
+ end
11
18
 
12
- @holding = holding
19
+ @holding = current_holdings.first
13
20
  @manuscript = manuscript
14
21
  @record = record
15
22
  end
23
+
24
+ def current?(holding)
25
+ holding.holding_status == HOLDING_STATUS_CURRENT
26
+ end
27
+
28
+ def current_holdings(manuscript, export_hash)
29
+ manuscript.holding_ids
30
+ .map { |id| export_hash[id] }
31
+ .filter { |holding| current?(holding) }
32
+ end
16
33
  end
17
34
  end
@@ -5,9 +5,10 @@ require 'set'
5
5
  module DigitalScriptorium
6
6
  # Constants for core model item IDs.
7
7
  module ItemId
8
- MANUSCRIPT = 'Q1'
9
- HOLDING = 'Q2'
10
- RECORD = 'Q3'
8
+ MANUSCRIPT = 'Q1'
9
+ HOLDING = 'Q2'
10
+ RECORD = 'Q3'
11
+ HOLDING_STATUS_CURRENT = 'Q4'
11
12
 
12
13
  CORE_MODEL_ITEMS = Set[MANUSCRIPT, HOLDING, RECORD]
13
14
  end
@@ -6,11 +6,11 @@ module DigitalScriptorium
6
6
  include PropertyId
7
7
 
8
8
  def ds_id
9
- claim_by_property_id(DS_ID).data_value # P1
9
+ claims_by_property_id(DS_ID)&.first&.data_value # P1
10
10
  end
11
11
 
12
- def holding_id
13
- claim_by_property_id(MANUSCRIPT_HOLDING).entity_id_value # P2
12
+ def holding_ids
13
+ claims_by_property_id(MANUSCRIPT_HOLDING)&.map(&:entity_id_value) # P2
14
14
  end
15
15
  end
16
16
  end
@@ -6,7 +6,7 @@ module DigitalScriptorium
6
6
  include PropertyId
7
7
 
8
8
  def described_manuscript_id
9
- claim_by_property_id(DESCRIBED_MANUSCRIPT).entity_id_value # P3
9
+ claims_by_property_id(DESCRIBED_MANUSCRIPT)&.first&.entity_id_value # P3
10
10
  end
11
11
 
12
12
  def title_as_recorded_claims
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for acknowledgements (P33) claims.
5
+ class AcknowledgementsClaimTransformer < BaseClaimTransformer
6
+ def initialize(claim, _, **kwargs)
7
+ super(claim, **kwargs)
8
+ end
9
+
10
+ def display_values
11
+ [display_value(claim.data_value)]
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Base transformer class providing a common interface for all transformers.
5
+ class BaseClaimTransformer
6
+ attr_reader :claim, :prefix
7
+
8
+ def initialize(claim, **kwargs)
9
+ @claim = claim
10
+ @prefix = kwargs[:prefix]
11
+ end
12
+
13
+ def display_values
14
+ []
15
+ end
16
+
17
+ def search_values
18
+ []
19
+ end
20
+
21
+ def facet_values
22
+ []
23
+ end
24
+
25
+ def display_value(recorded_value, in_original_script = nil, linked_terms = [])
26
+ value = { 'recorded_value' => recorded_value }
27
+ value['original_script'] = in_original_script if in_original_script
28
+ value['linked_terms'] = linked_terms if linked_terms.any?
29
+ value.to_json
30
+ end
31
+
32
+ def solr_props
33
+ solr_props = {}
34
+ solr_props["#{prefix}_display"] = display_values if display_values.any?
35
+ solr_props["#{prefix}_search"] = search_values if search_values.any?
36
+ solr_props["#{prefix}_facet"] = facet_values if facet_values.any?
37
+ solr_props
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'time'
4
+
5
+ module DigitalScriptorium
6
+ # Transformer for production date (P23) claims.
7
+ class DateClaimTransformer < QualifiedClaimTransformer
8
+ include PropertyId
9
+
10
+ def solr_props
11
+ super.merge(meta_props).merge(int_props)
12
+ end
13
+
14
+ def meta_props
15
+ {
16
+ 'date_meta' => [claim.data_value]
17
+ }
18
+ end
19
+
20
+ def int_props
21
+ return {} unless claim.qualifiers_by_property_id? CENTURY
22
+
23
+ {
24
+ 'century_int' => [century_int]
25
+ }
26
+ end
27
+
28
+ def linked_term_for(authority)
29
+ {
30
+ 'label' => authority.label('en'),
31
+ 'facet_field' => 'century_int',
32
+ 'facet_value' => century_int,
33
+ 'source_url' => external_uri(authority) || wikidata_uri(authority)
34
+ }.compact
35
+ end
36
+
37
+ def century_int
38
+ parse_year(time_value_from_qualifier(CENTURY))
39
+ end
40
+
41
+ def time_value_from_qualifier(property_id)
42
+ claim.qualifiers_by_property_id(property_id)&.first&.time_value
43
+ end
44
+
45
+ # Wikibase date format "resembling ISO 8601": +YYYY-MM-DDT00:00:00Z
46
+ # https://www.wikidata.org/wiki/Help:Dates#Time_datatype
47
+ def parse_year(date)
48
+ Time.iso8601(date[1..]).year
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for dated? (P26) claims.
5
+ class DatedClaimTransformer < BaseClaimTransformer
6
+ attr_reader :export_hash
7
+
8
+ def initialize(claim, export_hash, **kwargs)
9
+ super(claim, **kwargs)
10
+ @export_hash = export_hash
11
+ end
12
+
13
+ def facet_values
14
+ [export_hash[claim.entity_id_value]&.label('en')].compact
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for IIIF Manifest (P41) claims.
5
+ class IiifManifestClaimTransformer < LinkClaimTransformer
6
+ def solr_props
7
+ super.merge({ 'images_facet' => ['Yes'] })
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for extracting links from relevant Digital Scriptorium claims.
5
+ class LinkClaimTransformer < BaseClaimTransformer
6
+ def initialize(claim, _, **kwargs)
7
+ super(claim, **kwargs)
8
+ end
9
+
10
+ def solr_props
11
+ super.merge({ "#{prefix}_link" => [claim.data_value] })
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for associated name (P14) claims.
5
+ # NOTE: Name claims produce fields prefixes derived from the value of their role (P15) qualifiers
6
+ # (owner, author, scribe, artist, agent).
7
+ class NameClaimTransformer < QualifiedClaimTransformerWithFacetFallback
8
+ include PropertyId
9
+
10
+ def initialize(claim, export_hash, **kwargs)
11
+ super(claim, export_hash, prefix: role_prefix(claim, export_hash), authority_id: kwargs[:authority_id])
12
+ end
13
+
14
+ def role_prefix(claim, export_hash)
15
+ role_entity_id = claim.qualifiers_by_property_id(ROLE_IN_AUTHORITY_FILE)&.first&.entity_id_value
16
+ raise 'Missing role qualifier for name claim' unless role_entity_id
17
+
18
+ role_item = export_hash[role_entity_id]
19
+ role_label = role_item.label('en')
20
+ role_label.split.last.downcase
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for acknowledgements (P32) claims.
5
+ class NoteClaimTransformer < BaseClaimTransformer
6
+ def initialize(claim, _, **kwargs)
7
+ super(claim, **kwargs)
8
+ end
9
+
10
+ def display_values
11
+ [display_value(claim.data_value)]
12
+ end
13
+
14
+ def search_values
15
+ [claim.data_value]
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for physical description (P29) claims.
5
+ class PhysicalDescriptionClaimTransformer < BaseClaimTransformer
6
+ def initialize(claim, _, **kwargs)
7
+ super(claim, **kwargs)
8
+ end
9
+
10
+ def display_values
11
+ [display_value(claim.data_value)]
12
+ end
13
+
14
+ def search_values
15
+ [claim.data_value]
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,79 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for converting qualified claims of Digital Scriptorium items into Solr fields.
5
+ class QualifiedClaimTransformer < BaseClaimTransformer
6
+ include PropertyId
7
+
8
+ attr_reader :export_hash, :authority_id
9
+
10
+ def initialize(claim, export_hash, **kwargs)
11
+ super(claim, **kwargs)
12
+ @export_hash = export_hash
13
+ @authority_id = kwargs[:authority_id]
14
+ end
15
+
16
+ def display_values
17
+ [display_value(main_snak_value, in_original_script, linked_terms)]
18
+ end
19
+
20
+ def search_values
21
+ [main_snak_value, in_original_script, linked_term_labels].flatten.compact.uniq
22
+ end
23
+
24
+ def facet_values
25
+ linked_term_labels
26
+ end
27
+
28
+ def in_original_script
29
+ claim.qualifiers_by_property_id(IN_ORIGINAL_SCRIPT)&.first&.data_value&.value
30
+ end
31
+
32
+ def external_uri(authority)
33
+ authority.claims_by_property_id(EXTERNAL_URI)&.first&.data_value
34
+ end
35
+
36
+ def wikidata_id(authority)
37
+ authority.claims_by_property_id(WIKIDATA_QID)&.first&.data_value
38
+ end
39
+
40
+ def wikidata_uri(authority)
41
+ wikidata_id(authority) && "https://www.wikidata.org/wiki/#{wikidata_id(authority)}"
42
+ end
43
+
44
+ def linked_term_for(authority)
45
+ {
46
+ 'label' => authority.label('en'),
47
+ 'source_url' => external_uri(authority) || wikidata_uri(authority)
48
+ }.compact
49
+ end
50
+
51
+ def linked_terms
52
+ @linked_terms ||= begin
53
+ linked_terms = []
54
+
55
+ claim.qualifiers_by_property_id(authority_id)&.each do |qualifier|
56
+ authority_file_item_id = qualifier.entity_id_value
57
+ authority = export_hash[authority_file_item_id]
58
+ linked_terms << linked_term_for(authority) if authority
59
+ end
60
+
61
+ linked_terms.uniq
62
+ end
63
+ end
64
+
65
+ def linked_term_labels
66
+ @linked_term_labels ||= linked_terms.map { |term| term['label'] }.uniq
67
+ end
68
+
69
+ def main_snak_value
70
+ if claim.value_type? WikibaseRepresentable::Model::EntityIdValue
71
+ entity_id = claim.entity_id_value
72
+ referenced_item = export_hash[entity_id]
73
+ referenced_item.label('en')
74
+ else
75
+ claim.data_value
76
+ end
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for converting qualified claims of Digital Scriptorium items into Solr fields
5
+ # with a fallback to the value as-recorded for the facet field.
6
+ class QualifiedClaimTransformerWithFacetFallback < QualifiedClaimTransformer
7
+ def facet_values
8
+ super.any? ? super : [claim.data_value]
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for shelfmark (P8) claims.
5
+ class ShelfmarkClaimTransformer < BaseClaimTransformer
6
+ def initialize(claim, _, **kwargs)
7
+ super(claim, **kwargs)
8
+ end
9
+
10
+ def display_values
11
+ [display_value(claim.data_value)]
12
+ end
13
+
14
+ def search_values
15
+ [claim.data_value]
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module DigitalScriptorium
4
+ # Transformer for uniform title (P12) claims.
5
+ class UniformTitleClaimTransformer < BaseClaimTransformer
6
+ def initialize(claim, _, **kwargs)
7
+ super(claim, **kwargs)
8
+ end
9
+
10
+ def search_values
11
+ [claim.data_value]
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'transformers/base_claim_transformer'
4
+ require_relative 'transformers/link_claim_transformer'
5
+ require_relative 'transformers/qualified_claim_transformer'
6
+ require_relative 'transformers/qualified_claim_transformer_with_facet_fallback'
7
+
8
+ require_relative 'transformers/acknowledgements_claim_transformer'
9
+ require_relative 'transformers/date_claim_transformer'
10
+ require_relative 'transformers/dated_claim_transformer'
11
+ require_relative 'transformers/iiif_manifest_claim_transformer'
12
+ require_relative 'transformers/name_claim_transformer'
13
+ require_relative 'transformers/note_claim_transformer'
14
+ require_relative 'transformers/physical_description_claim_transformer'
15
+ require_relative 'transformers/shelfmark_claim_transformer'
16
+ require_relative 'transformers/uniform_title_claim_transformer'
17
+
18
+ module DigitalScriptorium
19
+ # Factory for creating claim transformers
20
+ module Transformers
21
+ include PropertyId
22
+
23
+ TRANSFORMERS = {
24
+ HOLDING_INSTITUTION_AS_RECORDED => QualifiedClaimTransformer,
25
+ SHELFMARK => ShelfmarkClaimTransformer,
26
+ LINK_TO_INSTITUTIONAL_RECORD => LinkClaimTransformer,
27
+ TITLE_AS_RECORDED => QualifiedClaimTransformerWithFacetFallback,
28
+ UNIFORM_TITLE_AS_RECORDED => UniformTitleClaimTransformer,
29
+ ASSOCIATED_NAME_AS_RECORDED => NameClaimTransformer,
30
+ GENRE_AS_RECORDED => QualifiedClaimTransformerWithFacetFallback,
31
+ SUBJECT_AS_RECORDED => QualifiedClaimTransformerWithFacetFallback,
32
+ LANGUAGE_AS_RECORDED => QualifiedClaimTransformer,
33
+ PRODUCTION_DATE_AS_RECORDED => DateClaimTransformer,
34
+ DATED => DatedClaimTransformer,
35
+ PRODUCTION_PLACE_AS_RECORDED => QualifiedClaimTransformer,
36
+ PHYSICAL_DESCRIPTION => PhysicalDescriptionClaimTransformer,
37
+ MATERIAL_AS_RECORDED => QualifiedClaimTransformer,
38
+ NOTE => NoteClaimTransformer,
39
+ ACKNOWLEDGEMENTS => AcknowledgementsClaimTransformer,
40
+ IIIF_MANIFEST => IiifManifestClaimTransformer
41
+ }.freeze
42
+
43
+ AUTHORITY_IDS = {
44
+ HOLDING_INSTITUTION_AS_RECORDED => HOLDING_INSTITUTION_IN_AUTHORITY_FILE,
45
+ TITLE_AS_RECORDED => STANDARD_TITLE,
46
+ ASSOCIATED_NAME_AS_RECORDED => NAME_IN_AUTHORITY_FILE,
47
+ GENRE_AS_RECORDED => TERM_IN_AUTHORITY_FILE,
48
+ SUBJECT_AS_RECORDED => TERM_IN_AUTHORITY_FILE,
49
+ LANGUAGE_AS_RECORDED => LANGUAGE_IN_AUTHORITY_FILE,
50
+ PRODUCTION_DATE_AS_RECORDED => PRODUCTION_CENTURY_IN_AUTHORITY_FILE,
51
+ PRODUCTION_PLACE_AS_RECORDED => PLACE_IN_AUTHORITY_FILE,
52
+ MATERIAL_AS_RECORDED => MATERIAL_IN_AUTHORITY_FILE
53
+ }.freeze
54
+
55
+ PREFIXES = {
56
+ HOLDING_INSTITUTION_AS_RECORDED => 'institution',
57
+ SHELFMARK => 'shelfmark',
58
+ LINK_TO_INSTITUTIONAL_RECORD => 'institutional_record',
59
+ TITLE_AS_RECORDED => 'title',
60
+ UNIFORM_TITLE_AS_RECORDED => 'uniform_title',
61
+ ASSOCIATED_NAME_AS_RECORDED => 'name',
62
+ GENRE_AS_RECORDED => 'term',
63
+ SUBJECT_AS_RECORDED => 'term',
64
+ LANGUAGE_AS_RECORDED => 'language',
65
+ PRODUCTION_DATE_AS_RECORDED => 'date',
66
+ DATED => 'dated',
67
+ PRODUCTION_PLACE_AS_RECORDED => 'place',
68
+ PHYSICAL_DESCRIPTION => 'physical_description',
69
+ MATERIAL_AS_RECORDED => 'material',
70
+ NOTE => 'note',
71
+ ACKNOWLEDGEMENTS => 'acknowledgements',
72
+ IIIF_MANIFEST => 'iiif_manifest'
73
+ }.freeze
74
+
75
+ def self.defined?(property_id)
76
+ TRANSFORMERS.include?(property_id)
77
+ end
78
+
79
+ def self.transformer(property_id)
80
+ TRANSFORMERS[property_id]
81
+ end
82
+
83
+ def self.authority_id(property_id)
84
+ AUTHORITY_IDS[property_id]
85
+ end
86
+
87
+ def self.prefix(property_id)
88
+ PREFIXES[property_id]
89
+ end
90
+
91
+ def self.create(property_id, claim, export_hash)
92
+ transformer_class = TRANSFORMERS[property_id]
93
+ authority_id = AUTHORITY_IDS[property_id]
94
+ prefix = PREFIXES[property_id]
95
+ return unless transformer_class && prefix
96
+
97
+ transformer_class.new(claim, export_hash, prefix: prefix, authority_id: authority_id)
98
+ end
99
+ end
100
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DigitalScriptorium
4
- VERSION = '0.1.2'
4
+ VERSION = '0.2.0'
5
5
  end
@@ -12,6 +12,4 @@ require 'digital_scriptorium/record'
12
12
  require 'digital_scriptorium/export'
13
13
  require 'digital_scriptorium/export_representer'
14
14
 
15
- require 'digital_scriptorium/claim_transformer'
16
- require 'digital_scriptorium/date_claim_transformer'
17
- require 'digital_scriptorium/name_claim_transformer'
15
+ require 'digital_scriptorium/transformers'
@@ -2,10 +2,11 @@
2
2
 
3
3
  require 'digital_scriptorium'
4
4
  require 'json'
5
+ require 'logging'
5
6
  require 'optparse'
7
+ require 'set'
6
8
  require 'time'
7
9
  require 'tty-spinner'
8
- require 'yaml'
9
10
  require 'zlib'
10
11
 
11
12
  dir = File.dirname __FILE__
@@ -15,7 +16,9 @@ output_file = File.expand_path 'solr_import.json', dir
15
16
  config_file = File.expand_path 'property_config.yml', dir
16
17
  pretty_print = false
17
18
 
18
- OptionParser.new do |opts|
19
+ logger = Logging.logger($stdout)
20
+
21
+ OptionParser.new { |opts|
19
22
  opts.banner = 'Usage: wikibase_to_solr.rb [options]'
20
23
 
21
24
  opts.on('-i', '--in FILE', 'The file path to the gzipped Wikibase JSON export file.') do |f|
@@ -33,7 +36,7 @@ OptionParser.new do |opts|
33
36
  opts.on('-p', '--pretty-print', 'Whether to pretty-print the JSON output.') do
34
37
  pretty_print = true
35
38
  end
36
- end.parse!
39
+ }.parse!
37
40
 
38
41
  def merge(solr_item, new_props)
39
42
  solr_item.merge(new_props) do |_, old_val, new_val|
@@ -41,21 +44,23 @@ def merge(solr_item, new_props)
41
44
  end
42
45
  end
43
46
 
44
- def merge_transformed_fields(solr_item, claim, export_hash, property_config)
45
- if claim.property_id == DigitalScriptorium::PropertyId::ASSOCIATED_NAME_AS_RECORDED
46
- merge(solr_item, DigitalScriptorium::NameClaimTransformer.transform(claim, export_hash))
47
- elsif claim.property_id == DigitalScriptorium::PropertyId::PRODUCTION_DATE_AS_RECORDED
48
- merge(solr_item,
49
- DigitalScriptorium::DateClaimTransformer.transform(claim, export_hash, property_config))
50
- else
51
- merge(solr_item,
52
- DigitalScriptorium::ClaimTransformer.transform(claim, export_hash, property_config))
53
- end
47
+ def base_solr_item(meta)
48
+ ds_id = meta.manuscript.ds_id
49
+ {
50
+ 'qid_meta' => [meta.holding.id, meta.manuscript.id, meta.record.id],
51
+ 'id' => [ds_id],
52
+ 'id_display' => [JSON.generate(recorded_value: ds_id)],
53
+ 'id_search' => [ds_id]
54
+ }
54
55
  end
55
56
 
56
- start_time = Time.now
57
+ def record?(entity)
58
+ entity.is_a?(DigitalScriptorium::DsItem) &&
59
+ entity.claims_by_property_id?(DigitalScriptorium::PropertyId::INSTANCE_OF) &&
60
+ entity.record?
61
+ end
57
62
 
58
- config = YAML.load_file(config_file)
63
+ start_time = Time.now.utc
59
64
 
60
65
  loading_spinner = TTY::Spinner.new('[:spinner] Loading export data', hide_cursor: true)
61
66
  loading_spinner.auto_spin
@@ -64,7 +69,7 @@ export_json = Zlib::GzipReader.open(input_file).read
64
69
  export_hash = DigitalScriptorium::ExportRepresenter.new(DigitalScriptorium::Export.new)
65
70
  .from_json(export_json)
66
71
  .to_hash
67
- loaded_time = Time.now
72
+ loaded_time = Time.now.utc
68
73
  loading_spinner.success("(#{format('%0.02f', loaded_time - start_time)}s)")
69
74
 
70
75
  item_count = 0
@@ -76,19 +81,22 @@ File.open(output_file, 'w') do |file|
76
81
  file << "\n" if pretty_print
77
82
 
78
83
  export_hash.each_with_index do |(_, entity), idx|
79
- next unless entity.is_a?(DigitalScriptorium::DsItem) &&
80
- entity.claims_by_property_id?(DigitalScriptorium::PropertyId::INSTANCE_OF) &&
81
- entity.record?
84
+ next unless record?(entity)
82
85
 
83
86
  meta = DigitalScriptorium::DsMeta.new(entity, export_hash)
84
- solr_item = { 'qid_meta' => [meta.holding.id, meta.manuscript.id, meta.record.id] }
87
+ solr_item = base_solr_item(meta)
85
88
 
86
89
  [meta.holding, meta.manuscript, meta.record].each do |item|
87
90
  item.claims.each do |property_id, claims|
88
91
  claims.each do |claim|
89
- next unless (property_config = config[property_id])
90
-
91
- solr_item = merge_transformed_fields(solr_item, claim, export_hash, property_config)
92
+ next unless DigitalScriptorium::Transformers.defined? property_id
93
+
94
+ begin
95
+ transformer = DigitalScriptorium::Transformers.create property_id, claim, export_hash
96
+ solr_item = merge solr_item, transformer.solr_props
97
+ rescue StandardError => e
98
+ logger.error "Error processing #{property_id} claim for item #{item.id}: #{e}"
99
+ end
92
100
  end
93
101
  end
94
102
  end
@@ -103,6 +111,6 @@ File.open(output_file, 'w') do |file|
103
111
  file << ']'
104
112
  end
105
113
 
106
- finish_time = Time.now
114
+ finish_time = Time.now.utc
107
115
  generating_spinner.success("(#{format('%0.02f', finish_time - loaded_time)}s)")
108
116
  puts "Generated #{item_count} Solr documents in #{format('%0.02f', finish_time - start_time)} seconds"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: digital_scriptorium
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Michael Holloway
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-01-08 00:00:00.000000000 Z
10
+ date: 2025-01-17 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: multi_json
@@ -65,6 +65,62 @@ dependencies:
65
65
  - - "~>"
66
66
  - !ruby/object:Gem::Version
67
67
  version: '0.1'
68
+ - !ruby/object:Gem::Dependency
69
+ name: bundler
70
+ requirement: !ruby/object:Gem::Requirement
71
+ requirements:
72
+ - - "~>"
73
+ - !ruby/object:Gem::Version
74
+ version: '2.5'
75
+ type: :development
76
+ prerelease: false
77
+ version_requirements: !ruby/object:Gem::Requirement
78
+ requirements:
79
+ - - "~>"
80
+ - !ruby/object:Gem::Version
81
+ version: '2.5'
82
+ - !ruby/object:Gem::Dependency
83
+ name: rake
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - "~>"
87
+ - !ruby/object:Gem::Version
88
+ version: '13.2'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - "~>"
94
+ - !ruby/object:Gem::Version
95
+ version: '13.2'
96
+ - !ruby/object:Gem::Dependency
97
+ name: rspec
98
+ requirement: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - "~>"
101
+ - !ruby/object:Gem::Version
102
+ version: '3.13'
103
+ type: :development
104
+ prerelease: false
105
+ version_requirements: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '3.13'
110
+ - !ruby/object:Gem::Dependency
111
+ name: upennlib-rubocop
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.2'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '1.2'
68
124
  email:
69
125
  - michael@mdholloway.org
70
126
  executables: []
@@ -74,13 +130,12 @@ files:
74
130
  - ".ignore"
75
131
  - ".rspec"
76
132
  - ".rubocop.yml"
133
+ - ".rubocop_todo.yml"
77
134
  - LICENSE.txt
78
135
  - README.md
79
136
  - Rakefile
80
137
  - doc/overview.md
81
138
  - lib/digital_scriptorium.rb
82
- - lib/digital_scriptorium/claim_transformer.rb
83
- - lib/digital_scriptorium/date_claim_transformer.rb
84
139
  - lib/digital_scriptorium/ds_item.rb
85
140
  - lib/digital_scriptorium/ds_meta.rb
86
141
  - lib/digital_scriptorium/export.rb
@@ -88,11 +143,23 @@ files:
88
143
  - lib/digital_scriptorium/holding.rb
89
144
  - lib/digital_scriptorium/item_id.rb
90
145
  - lib/digital_scriptorium/manuscript.rb
91
- - lib/digital_scriptorium/name_claim_transformer.rb
92
146
  - lib/digital_scriptorium/property_id.rb
93
147
  - lib/digital_scriptorium/record.rb
148
+ - lib/digital_scriptorium/transformers.rb
149
+ - lib/digital_scriptorium/transformers/acknowledgements_claim_transformer.rb
150
+ - lib/digital_scriptorium/transformers/base_claim_transformer.rb
151
+ - lib/digital_scriptorium/transformers/date_claim_transformer.rb
152
+ - lib/digital_scriptorium/transformers/dated_claim_transformer.rb
153
+ - lib/digital_scriptorium/transformers/iiif_manifest_claim_transformer.rb
154
+ - lib/digital_scriptorium/transformers/link_claim_transformer.rb
155
+ - lib/digital_scriptorium/transformers/name_claim_transformer.rb
156
+ - lib/digital_scriptorium/transformers/note_claim_transformer.rb
157
+ - lib/digital_scriptorium/transformers/physical_description_claim_transformer.rb
158
+ - lib/digital_scriptorium/transformers/qualified_claim_transformer.rb
159
+ - lib/digital_scriptorium/transformers/qualified_claim_transformer_with_facet_fallback.rb
160
+ - lib/digital_scriptorium/transformers/shelfmark_claim_transformer.rb
161
+ - lib/digital_scriptorium/transformers/uniform_title_claim_transformer.rb
94
162
  - lib/digital_scriptorium/version.rb
95
- - property_config.yml
96
163
  - sig/digital_scriptorium.rbs
97
164
  - wikibase_to_solr_new.rb
98
165
  homepage: https://github.com/mdholloway/digital_scriptorium
@@ -1,82 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'wikibase_representable'
4
-
5
- module DigitalScriptorium
6
- # Transformer for converting claims of Digital Scriptorium items into Solr fields.
7
- class ClaimTransformer
8
- include PropertyId
9
- include WikibaseRepresentable::Model
10
-
11
- def self.transform(claim, export_hash, config)
12
- solr_props = {}
13
-
14
- prefix = config['prefix']
15
- requested_fields = config['fields']
16
- authority_property_id = config['authority']
17
-
18
- value = primary_value_from_claim(claim, export_hash)
19
-
20
- solr_props['id'] = [value] if requested_fields.include? 'id'
21
- solr_props["#{prefix}_meta"] = [value] if requested_fields.include? 'meta'
22
-
23
- unless authority_property_id && claim.qualifiers_by_property_id?(authority_property_id)
24
- solr_props["#{prefix}_display"] = [{ 'PV' => value }.to_json] if requested_fields.include? 'display'
25
- solr_props["#{prefix}_search"] = [value] if requested_fields.include? 'search'
26
- solr_props["#{prefix}_facet"] = [value] if requested_fields.include? 'facet'
27
-
28
- solr_props['images_facet'] = ['Yes'] if value && claim.property_id == IIIF_MANIFEST
29
- solr_props["#{prefix}_link"] = [value] if requested_fields.include? 'link'
30
-
31
- return solr_props
32
- end
33
-
34
- display_entries = []
35
- search_entries = [value]
36
- facets = []
37
-
38
- claim.qualifiers_by_property_id(authority_property_id).each do |qualifier|
39
- display_props = { 'PV' => value }
40
-
41
- authority_id = qualifier.entity_id_value
42
- authority = export_hash[authority_id]
43
-
44
- if authority
45
- label = authority.label('en')
46
-
47
- display_props['QL'] = label
48
- search_entries << label
49
- facets << label
50
-
51
- external_uri = authority.claim_by_property_id(EXTERNAL_URI)&.data_value
52
- wikidata_id = authority.claim_by_property_id(WIKIDATA_QID)&.data_value
53
- wikidata_uri = wikidata_id && "https://www.wikidata.org/wiki/#{wikidata_id}"
54
-
55
- # Only one or the other of these seem to exist for a given item in practice.
56
- display_props['QU'] = external_uri if external_uri
57
- display_props['QU'] = wikidata_uri if wikidata_uri
58
- end
59
-
60
- display_entries << display_props.to_json
61
- end
62
-
63
- solr_props["#{prefix}_display"] = display_entries.uniq if requested_fields.include? 'display'
64
- solr_props["#{prefix}_search"] = search_entries.uniq if requested_fields.include? 'search'
65
- solr_props["#{prefix}_facet"] = facets.uniq if requested_fields.include? 'facet'
66
-
67
- solr_props
68
- end
69
-
70
- def self.primary_value_from_claim(claim, export_hash)
71
- if claim.value_type? EntityIdValue
72
- entity_id = claim.entity_id_value
73
- referenced_item = export_hash[entity_id]
74
- referenced_item.label('en')
75
- elsif claim.value_type? TimeValue
76
- claim.time_value
77
- else
78
- claim.data_value
79
- end
80
- end
81
- end
82
- end
@@ -1,25 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'time'
4
-
5
- module DigitalScriptorium
6
- # Special-purpose transformer for date (P23) claims
7
- class DateClaimTransformer
8
- include PropertyId
9
-
10
- def self.transform(claim, export_hash, config)
11
- solr_props = ClaimTransformer.transform(claim, export_hash, config)
12
- return solr_props unless claim.qualifiers
13
-
14
- century = claim.qualifier_by_property_id(CENTURY).time_value
15
- earliest = claim.qualifier_by_property_id(EARLIEST_DATE).time_value
16
- latest = claim.qualifier_by_property_id(LATEST_DATE).time_value
17
-
18
- solr_props['century_int'] = [Time.parse(century).year] unless century.nil?
19
- solr_props['earliest_int'] = [Time.parse(earliest).year] unless earliest.nil?
20
- solr_props['latest_int'] = [Time.parse(latest).year] unless latest.nil?
21
-
22
- solr_props
23
- end
24
- end
25
- end
@@ -1,61 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module DigitalScriptorium
4
- # Special-purpose transformer for name (P14) claims
5
- class NameClaimTransformer
6
- include PropertyId
7
-
8
- def self.transform(claim, export_hash)
9
- return {} unless claim.qualifiers_by_property_id? ROLE_IN_AUTHORITY_FILE
10
-
11
- role_entity_id = claim.qualifier_by_property_id(ROLE_IN_AUTHORITY_FILE).entity_id_value
12
- role_item = export_hash[role_entity_id]
13
- role_label = role_item.label('en')
14
- prefix = role_label.downcase.split.last
15
-
16
- recorded_name = claim.data_value
17
- display_names = { 'PV' => recorded_name }
18
- search_names = [recorded_name]
19
-
20
- name_in_original_script = claim.qualifier_by_property_id(IN_ORIGINAL_SCRIPT)&.data_value&.value
21
- display_names['AGR'] = name_in_original_script if name_in_original_script
22
- search_names << name_in_original_script if name_in_original_script
23
-
24
- unless claim.qualifiers_by_property_id? NAME_IN_AUTHORITY_FILE
25
- return {
26
- "#{prefix}_display" => [display_names.to_json],
27
- "#{prefix}_search" => search_names,
28
- "#{prefix}_facet" => [recorded_name]
29
- }
30
- end
31
-
32
- display_entries = []
33
- facets = []
34
-
35
- claim.qualifiers_by_property_id(NAME_IN_AUTHORITY_FILE).each do |qualifier|
36
- display_names_for_qualifier = { 'PV' => recorded_name }
37
- display_names_for_qualifier['AGR'] = name_in_original_script if name_in_original_script
38
-
39
- name_entity_id = qualifier.entity_id_value
40
- name_item = export_hash[name_entity_id]
41
- name_label = name_item.label('en')
42
-
43
- display_names_for_qualifier['QL'] = name_label
44
- search_names << name_label
45
- facets << name_label
46
-
47
- wikidata_id = name_item.claim_by_property_id(WIKIDATA_QID).data_value
48
- wikidata_url = "https://www.wikidata.org/wiki/#{wikidata_id}"
49
- display_names_for_qualifier['QU'] = wikidata_url if wikidata_url
50
-
51
- display_entries << display_names_for_qualifier.to_json
52
- end
53
-
54
- {
55
- "#{prefix}_display" => display_entries.uniq,
56
- "#{prefix}_search" => search_names.uniq,
57
- "#{prefix}_facet" => facets.uniq
58
- }
59
- end
60
- end
61
- end
data/property_config.yml DELETED
@@ -1,106 +0,0 @@
1
- P1:
2
- prefix: id
3
- fields:
4
- - id
5
- - display
6
- - search
7
- P5:
8
- prefix: institution
9
- fields:
10
- - display
11
- - search
12
- - facet
13
- authority: P4
14
- P6:
15
- prefix: holding_status
16
- fields:
17
- - display
18
- P8:
19
- prefix: shelfmark
20
- fields:
21
- - display
22
- - search
23
- P9:
24
- prefix: institutional_record
25
- fields:
26
- - link
27
- P10:
28
- prefix: title
29
- fields:
30
- - display
31
- - search
32
- - facet
33
- authority: P11
34
- P12:
35
- prefix: uniform_title
36
- fields:
37
- - search
38
- # NOTE: P14 can translate to any of a few different Solr fields based on the value of the
39
- # associated role (P15) qualifier, and is handled in its own dedicated processing method
40
- P14:
41
- prefix: associated_name
42
- fields: []
43
- P18:
44
- prefix: term
45
- fields:
46
- - display
47
- - search
48
- - facet
49
- authority: P20
50
- P19:
51
- prefix: term
52
- fields:
53
- - display
54
- - search
55
- - facet
56
- authority: P20
57
- P21:
58
- prefix: language
59
- fields:
60
- - display
61
- - search
62
- - facet
63
- authority: P22
64
- P23:
65
- prefix: date
66
- fields:
67
- - meta
68
- - display
69
- - search
70
- - facet
71
- authority: P24
72
- P26:
73
- prefix: dated
74
- fields:
75
- - display
76
- - facet
77
- P27:
78
- prefix: place
79
- fields:
80
- - display
81
- - search
82
- - facet
83
- authority: P28
84
- P29:
85
- prefix: physical_description
86
- fields:
87
- - display
88
- - search
89
- P30:
90
- prefix: material
91
- fields:
92
- - facet
93
- authority: P31
94
- P32:
95
- prefix: note
96
- fields:
97
- - display
98
- - search
99
- P33:
100
- prefix: acknowledgements
101
- fields:
102
- - display
103
- P41:
104
- prefix: iiif_manifest
105
- fields:
106
- - link