ds-convert 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +294 -0
  3. data/Rakefile +12 -0
  4. data/config/settings.yml +150 -0
  5. data/exe/ds-convert +149 -0
  6. data/exe/ds-recon +275 -0
  7. data/exe/ds-validate-csv +40 -0
  8. data/exe/marc-mrc-to-xml.rb +80 -0
  9. data/lib/ds/cli.rb +102 -0
  10. data/lib/ds/constants.rb +166 -0
  11. data/lib/ds/converter/converter.rb +124 -0
  12. data/lib/ds/converter/writer.rb +50 -0
  13. data/lib/ds/converter.rb +7 -0
  14. data/lib/ds/csv_util.rb +43 -0
  15. data/lib/ds/data/berkeley-arks.txt +4000 -0
  16. data/lib/ds/data/getty-aat-centuries.csv +71 -0
  17. data/lib/ds/data/iiif_manifests.csv +122 -0
  18. data/lib/ds/data/legacy-iiif-manifests.csv +77 -0
  19. data/lib/ds/ds_error.rb +1 -0
  20. data/lib/ds/extractor/base_record_locator.rb +24 -0
  21. data/lib/ds/extractor/base_term.rb +79 -0
  22. data/lib/ds/extractor/csv_record_locator.rb +13 -0
  23. data/lib/ds/extractor/ds_csv_extractor.rb +695 -0
  24. data/lib/ds/extractor/ds_mets_xml_extractor.rb +1114 -0
  25. data/lib/ds/extractor/genre.rb +45 -0
  26. data/lib/ds/extractor/language.rb +31 -0
  27. data/lib/ds/extractor/marc_xml_extractor.rb +1172 -0
  28. data/lib/ds/extractor/material.rb +12 -0
  29. data/lib/ds/extractor/name.rb +50 -0
  30. data/lib/ds/extractor/place.rb +11 -0
  31. data/lib/ds/extractor/subject.rb +58 -0
  32. data/lib/ds/extractor/tei_xml_extractor.rb +687 -0
  33. data/lib/ds/extractor/title.rb +52 -0
  34. data/lib/ds/extractor/xml_record_locator.rb +38 -0
  35. data/lib/ds/extractor.rb +24 -0
  36. data/lib/ds/institutions.rb +55 -0
  37. data/lib/ds/manifest/base_id_validator.rb +76 -0
  38. data/lib/ds/manifest/constants.rb +67 -0
  39. data/lib/ds/manifest/ds_csv_id_validator.rb +15 -0
  40. data/lib/ds/manifest/entry.rb +133 -0
  41. data/lib/ds/manifest/manifest.rb +74 -0
  42. data/lib/ds/manifest/manifest_validator.rb +256 -0
  43. data/lib/ds/manifest/simple_xml_id_validator.rb +42 -0
  44. data/lib/ds/manifest.rb +30 -0
  45. data/lib/ds/mapper/base_mapper.rb +221 -0
  46. data/lib/ds/mapper/ds_csv_mapper.rb +77 -0
  47. data/lib/ds/mapper/ds_mets_mapper.rb +85 -0
  48. data/lib/ds/mapper/marc_mapper.rb +87 -0
  49. data/lib/ds/mapper/tei_xml_mapper.rb +79 -0
  50. data/lib/ds/mapper.rb +13 -0
  51. data/lib/ds/recon/constants.rb +56 -0
  52. data/lib/ds/recon/ds_csv_enumerator.rb +16 -0
  53. data/lib/ds/recon/ds_mets_xml_enumerator.rb +14 -0
  54. data/lib/ds/recon/marc_xml_enumerator.rb +15 -0
  55. data/lib/ds/recon/recon_builder.rb +183 -0
  56. data/lib/ds/recon/recon_data.rb +37 -0
  57. data/lib/ds/recon/recon_manager.rb +92 -0
  58. data/lib/ds/recon/source_enumerator.rb +21 -0
  59. data/lib/ds/recon/tei_xml_enumerator.rb +14 -0
  60. data/lib/ds/recon/type/all_subjects.rb +18 -0
  61. data/lib/ds/recon/type/genres.rb +50 -0
  62. data/lib/ds/recon/type/languages.rb +38 -0
  63. data/lib/ds/recon/type/materials.rb +40 -0
  64. data/lib/ds/recon/type/named_subjects.rb +20 -0
  65. data/lib/ds/recon/type/names.rb +65 -0
  66. data/lib/ds/recon/type/places.rb +40 -0
  67. data/lib/ds/recon/type/recon_type.rb +136 -0
  68. data/lib/ds/recon/type/splits.rb +34 -0
  69. data/lib/ds/recon/type/subjects.rb +65 -0
  70. data/lib/ds/recon/type/titles.rb +38 -0
  71. data/lib/ds/recon/url_lookup.rb +52 -0
  72. data/lib/ds/recon.rb +292 -0
  73. data/lib/ds/source/base_source.rb +32 -0
  74. data/lib/ds/source/ds_csv.rb +18 -0
  75. data/lib/ds/source/ds_mets_xml.rb +20 -0
  76. data/lib/ds/source/marc_xml.rb +22 -0
  77. data/lib/ds/source/source_cache.rb +69 -0
  78. data/lib/ds/source/tei_xml.rb +22 -0
  79. data/lib/ds/source.rb +20 -0
  80. data/lib/ds/util/cache.rb +111 -0
  81. data/lib/ds/util/csv_validator.rb +209 -0
  82. data/lib/ds/util/csv_writer.rb +42 -0
  83. data/lib/ds/util/strings.rb +194 -0
  84. data/lib/ds/util.rb +37 -0
  85. data/lib/ds/version.rb +5 -0
  86. data/lib/ds.rb +237 -0
  87. metadata +246 -0
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Recon
4
+ class SourceEnumerator
5
+ include DS::Util
6
+ include Enumerable
7
+
8
+ attr_accessor :files
9
+ # Initialize the SourceEnumerator with the given files.
10
+ # @param [Array] files an array of source file paths
11
+ def initialize files
12
+ @files = *files
13
+ end
14
+
15
+ ##
16
+ # @yield record a record of the SourceEnumerator's type (MARC XML, CSV::Row, etc.)
17
+ def each &block
18
+ raise NotImplementedError
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,14 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Recon
4
+ class TeiXmlEnumerator < SourceEnumerator
5
+
6
+ def each &block
7
+ process_xml files, remove_namespaces: true do |xml|
8
+ xml.xpath('//TEI').each do |record|
9
+ yield record
10
+ end
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,18 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+ module Type
5
+ ##
6
+ # Lookup subjects and named subjects for import CSV output
7
+ #
8
+ class AllSubjects < Recon::Type::Subjects
9
+
10
+ extend DS::Util
11
+
12
+ SET_NAME = :'all-subjects'
13
+
14
+ METHOD_NAME = %i{ extract_all_subjects }
15
+
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,50 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+ module Type
5
+ ##
6
+ # Extract genre terms for reconciliation CSV output.
7
+ #
8
+ # Return a two-dimensional array, each row is a term; and each row has
9
+ # three columns: term, vocab, and authority number.
10
+ #
11
+ class Genres
12
+
13
+ extend DS::Util
14
+ include ReconType
15
+
16
+ SET_NAME = :genres
17
+
18
+ RECON_CSV_HEADERS = %i{
19
+ genre_as_recorded
20
+ vocab
21
+ source_authority_uri
22
+ authorized_label
23
+ structured_value
24
+ ds_qid
25
+ }
26
+
27
+ LOOKUP_COLUMNS = %i{
28
+ authorized_label
29
+ structured_value
30
+ ds_qid
31
+ }
32
+
33
+ KEY_COLUMNS = %i{
34
+ genre_as_recorded
35
+ vocab
36
+ }
37
+
38
+ AS_RECORDED_COLUMN = :genre_as_recorded
39
+
40
+ DELIMITER_MAP = { '|' => ';' }
41
+
42
+ METHOD_NAME = %i{ extract_genres }
43
+
44
+ BALANCED_COLUMNS = {
45
+ genres: %i{ structured_value authorized_label }
46
+ }
47
+
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,38 @@
1
+ module Recon
2
+ module Type
3
+ class Languages
4
+
5
+ extend DS::Util
6
+ include Recon::Type::ReconType
7
+
8
+ SET_NAME = :languages
9
+
10
+ RECON_CSV_HEADERS = %i{
11
+ language_as_recorded
12
+ language_code
13
+ authorized_label
14
+ structured_value
15
+ ds_qid
16
+ }
17
+
18
+ LOOKUP_COLUMNS = %i{
19
+ authorized_label
20
+ structured_value
21
+ ds_qid
22
+ }
23
+
24
+ KEY_COLUMNS = %i{
25
+ language_as_recorded
26
+ }
27
+
28
+ AS_RECORDED_COLUMN = :language_as_recorded
29
+
30
+ DELIMITER_MAP = {}
31
+
32
+ METHOD_NAME = %i{ extract_languages }
33
+
34
+ BALANCED_COLUMNS = { languages: %w{ structured_value authorized_label } }
35
+
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,40 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+
5
+ module Type
6
+ class Materials
7
+
8
+ extend DS::Util
9
+ include ReconType
10
+
11
+ SET_NAME = :materials
12
+
13
+ RECON_CSV_HEADERS = %i{
14
+ material_as_recorded
15
+ authorized_label
16
+ structured_value
17
+ ds_qid
18
+ }
19
+
20
+ LOOKUP_COLUMNS = %i{
21
+ authorized_label
22
+ structured_value
23
+ ds_qid
24
+ }
25
+
26
+ KEY_COLUMNS = %i{
27
+ material_as_recorded
28
+ }
29
+
30
+ METHOD_NAME = %i{ extract_materials }
31
+
32
+ AS_RECORDED_COLUMN = :material_as_recorded
33
+
34
+ DELIMITER_MAP = { '|' => ';' }
35
+
36
+ BALANCED_COLUMNS = { materials: %w{ structured_value authorized_label } }
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,20 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+ module Type
5
+ ##
6
+ # Extract named subjects for reconciliation CSV output.
7
+ #
8
+ # Return a two-dimensional array, each row is a term; and each row has
9
+ # two columns: subject and authority number.
10
+ #
11
+ class NamedSubjects < Recon::Type::Subjects
12
+
13
+ extend DS::Util
14
+ SET_NAME = :'named-subjects'
15
+
16
+ METHOD_NAME = %i{ extract_named_subjects }
17
+
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,65 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+ module Type
5
+ class Names
6
+
7
+ extend DS::Util
8
+ include ReconType
9
+
10
+ SET_NAME = :names
11
+
12
+ RECON_CSV_HEADERS = %i{
13
+ name_as_recorded
14
+ role
15
+ name_agr
16
+ source_authority_uri
17
+ instance_of
18
+ authorized_label
19
+ structured_value
20
+ ds_qid
21
+ }
22
+
23
+ LOOKUP_COLUMNS = %i{
24
+ authorized_label
25
+ structured_value
26
+ source_authority_uri
27
+ instance_of
28
+ ds_qid
29
+ }
30
+
31
+ KEY_COLUMNS = %i{
32
+ name_as_recorded
33
+ }
34
+
35
+ AS_RECORDED_COLUMN = :name_as_recorded
36
+
37
+ DELIMITER_MAP = {}
38
+
39
+ METHOD_NAME = %i{ extract_authors extract_artists extract_scribes extract_former_owners }
40
+
41
+ BALANCED_COLUMNS = { names: %i{ structured_value authorized_label instance_of } }
42
+
43
+ end
44
+
45
+ class Authors < Names
46
+ METHOD_NAME = %i{ extract_authors }.freeze
47
+ end
48
+
49
+ class Artists < Names
50
+ METHOD_NAME = %i{ extract_artists }.freeze
51
+ end
52
+
53
+ class AssociatedAgents < Names
54
+ METHOD_NAME = %i{ extract_associated_agents }.freeze
55
+ end
56
+
57
+ class FormerOwners < Names
58
+ METHOD_NAME = %i{ extract_former_owners }.freeze
59
+ end
60
+
61
+ class Scribes < Names
62
+ METHOD_NAME = %i{ extract_scribes }.freeze
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,40 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+ module Type
5
+ class Places
6
+
7
+ extend DS::Util
8
+ include ReconType
9
+
10
+ SET_NAME = :places
11
+
12
+ RECON_CSV_HEADERS = %i{ place_as_recorded authorized_label structured_value ds_qid}
13
+
14
+ LOOKUP_COLUMNS = %i{
15
+ authorized_label
16
+ structured_value
17
+ ds_qid
18
+ }
19
+
20
+ KEY_COLUMNS = %i{ place_as_recorded }
21
+
22
+ AS_RECORDED_COLUMN = :place_as_recorded
23
+
24
+ DELIMITER_MAP = { '|' => ';' }
25
+
26
+ METHOD_NAME = %i{ extract_places }
27
+
28
+ BALANCED_COLUMNS = { places: %i{ structured_value authorized_label } }
29
+
30
+ def self.lookup places, from_column: 'structured_value'
31
+ places.map { |place|
32
+ key_values = get_key_values place.to_h
33
+ place_uris = Recon.lookup_single SET_NAME, key_values: key_values, column: from_column
34
+ place_uris.to_s.gsub '|', ';'
35
+ }
36
+ end
37
+
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,136 @@
1
+ # frozen_string_literal: true
2
+
3
+
4
+ module Recon
5
+ module Type
6
+ ##
7
+ # The Recon::Type::ReconType module should be included in all
8
+ # Recon::Type classes. It provides access to recon type
9
+ # configuration information. Its methods support the lookup and
10
+ # enrichment of DS::Extractor::BaseTerm object values.
11
+ #
12
+ # ReconType methods define recon CSV columns, the c, the columns
13
+ # retrieved fom the DS data dictionaries, the lookup key columns,
14
+ # the import CSV as recorded column (eg., author_as_recorded),
15
+ # and, for validation purposes, the balanced columns; that is,
16
+ # those columns in the recon CSVs that must have equal numbers of
17
+ # subfields in each row.
18
+ #
19
+ # Classes that include Recon::Type::ReconType should define these
20
+ # constants
21
+ #
22
+ # SET_NAME :: the name of the recon set; e.g., :places
23
+ # RECON_CSV_HEADERS :: the recon CSV headers; e.g., [:place_as_recorded, :authorized_label, :structured_value, :ds_qid]
24
+ # LOOKUP_COLUMNS :: the columns to extract from the data dictionaries; e.g., [:authorized_label, :structured_value, :ds_qid]
25
+ # KEY_COLUMNS :: the key columns in the recon CSV; e.g., [:place_as_recorded]
26
+ # AS_RECORDED_COLUMN :: the column in the recon CSV that holds the as-recorded value; e.g., :author_as_recorded
27
+ # DELIMITER_MAP :: a map of delimiters to replace in the recon CSV values: { ORIGINAL => REPLACEMENT}; e.g., { '|' => ';' }
28
+ # METHOD_NAME :: the name of the DS::Extractor methods; e.g., [:extract_places]
29
+ # BALANCED_COLUMNS :: the columns that must have equal numbers of subfields; e.g., { places: [:structured_value, :authorized_label] }
30
+ #
31
+ module ReconType
32
+
33
+ def self.included base
34
+ base.extend ClassMethods
35
+ end
36
+
37
+ module ClassMethods
38
+
39
+ # Returns the set name of the recon set; e.g., :places
40
+ #
41
+ # Used to find a recon type configuration by name; either
42
+ # the ReconType (like Recon::Type::Places) or the path to the
43
+ # recon data dictionary CSV in the ds-data git repo as defined
44
+ # in config/settings.yml:
45
+ #
46
+ # ds:
47
+ # recon:
48
+ # ...
49
+ # sets:
50
+ # - name: :places
51
+ # repo_path: terms/reconciled/places.csv
52
+ # key_column: place_as_recorded
53
+ # ...
54
+ #
55
+ # @return [Symbol] the set name
56
+ def set_name
57
+ self::SET_NAME
58
+ end
59
+
60
+ # Returns the recon CSV headers; e.g., [:place_as_recorded, :authorized_label, :structured_value, :ds_qid]
61
+ #
62
+ # @return [Array<Symbol>] the recon CSV headers
63
+ def recon_csv_headers
64
+ self::RECON_CSV_HEADERS
65
+ end
66
+
67
+ # Returns lookups should pulls from the data dictionaries; e.g., [:authorized_label, :structured_value, :ds_qid]
68
+ #
69
+ # @return [Array<Symbol>] the lookup columns
70
+ def lookup_columns
71
+ self::LOOKUP_COLUMNS
72
+ end
73
+
74
+ # Returns the columns used to make the lookup key for the data dictionary; e.g., [:genre_as_recorded, :vocabulary]
75
+ #
76
+ # @return [Array<Symbol>] the key columns
77
+ def key_columns
78
+ self::KEY_COLUMNS
79
+ end
80
+
81
+ # Returns the column in the recon CSV that holds the as-recorded value; e.g., :author_as_recorded
82
+ #
83
+ # @return [Symbol] the import CSV as recorded column
84
+ def as_recorded_column
85
+ self::AS_RECORDED_COLUMN
86
+ end
87
+
88
+ # Returns the delimiter repalcement map: { ORIGINAL => REPLACEMENT}; e.g., { '|' => ';' }
89
+ #
90
+ # @return [Hash<Symbol,String>] the delimiter map
91
+ def delimiter_map
92
+ self::DELIMITER_MAP
93
+ end
94
+
95
+ # Returns the name of the DS::Extractor methods; e.g., [:extract_places]
96
+ #
97
+ # @return [Array<Symbol>] the method name
98
+ def method_name
99
+ self::METHOD_NAME
100
+ end
101
+
102
+ # Returns the balanced columns for the current object.
103
+ #
104
+ # Balanced columns should have equal numbers of fields and
105
+ # subfields in each row; e.g., if fields are delimited by '|'
106
+ # and subfields by ';', then the following are balanced:
107
+ #
108
+ # structured_value,authorized_label
109
+ # a|b;c,d|e;f
110
+ # 1|2|3,x|y|z
111
+ # r,s
112
+ #
113
+ # @return [Array<Symbol>] The balanced columns.
114
+ #
115
+ # @example
116
+ # Recon::Type::Materials.balanced_columns #=> [:structured_value, :authorized_label]
117
+ def balanced_columns
118
+ self::BALANCED_COLUMNS
119
+ end
120
+
121
+ ##
122
+ # Return the values of the key columns in the given row.
123
+ #
124
+ # @param row [Hash<Symbol,String>] The row to extract values from.
125
+ # @return [Array<String>] The values of the key columns in the given row.
126
+ def get_key_values row
127
+ key_columns.map { |key| row[key] }
128
+ end
129
+
130
+ def lookup_values row
131
+ lookup_columns.map { |key| row[key] }
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Recon
4
+ module Type
5
+ class Splits
6
+
7
+ extend DS::Util
8
+ include ReconType
9
+
10
+ SET_NAME = :splits
11
+
12
+ RECON_CSV_HEADERS = %i{ as_recorded authorized_label }
13
+
14
+ LOOKUP_COLUMNS = %i{ authorized_label }
15
+
16
+ KEY_COLUMNS = %i{ as_recorded }
17
+
18
+ AS_RECORDED_COLUMN = :as_recorded
19
+
20
+ DELIMITER_MAP = {}
21
+
22
+ METHOD_NAME = []
23
+
24
+ BALANCED_COLUMNS = {}
25
+
26
+
27
+ def self._lookup_single as_recorded, from_column:
28
+ key_values = [as_recorded]
29
+ Recon.lookup_single(:splits, key_values: key_values , column: from_column)
30
+ end
31
+
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,65 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+ module Type
5
+ ##
6
+ # Extract subjects for reconciliation CSV output.
7
+ #
8
+ # NOTE: Each source subject extraction method should return a two dimensional
9
+ # array:
10
+ #
11
+ # [["Islamic law--Early works to 1800", ""],
12
+ # ["Malikites--Early works to 1800", ""],
13
+ # ["Islamic law", ""],
14
+ # ["Malikites", ""],
15
+ # ["Arabic language--Grammar--Early works to 1800", ""],
16
+ # ["Arabic language--Grammar", ""],
17
+ # ...
18
+ # ]
19
+ #
20
+ # The two values are `subject_as_recorded` and `source_authority_uri`. The
21
+ # second of these is present when the source record provides an accompanying
22
+ # URI. This is rare. Sources the lack a URI should return the as recorded
23
+ # value and `""` (the empty string) for the `source_authority_uri` as shown
24
+ # above.
25
+ #
26
+ class Subjects
27
+
28
+ extend DS::Util
29
+ include ReconType
30
+
31
+ SET_NAME = :subjects
32
+
33
+ RECON_CSV_HEADERS = %i{
34
+ subject_as_recorded
35
+ subfield_codes
36
+ vocab
37
+ source_authority_uri
38
+ authorized_label
39
+ structured_value
40
+ ds_qid
41
+ }.freeze
42
+
43
+ LOOKUP_COLUMNS = %i{
44
+ authorized_label
45
+ structured_value
46
+ ds_qid
47
+ }
48
+
49
+ KEY_COLUMNS = %i{
50
+ subject_as_recorded
51
+ subfield_codes
52
+ vocab
53
+ }
54
+
55
+ METHOD_NAME = %i{ extract_subjects }
56
+
57
+ BALANCED_COLUMNS = { subjects: %i{ structured_value authorized_label } }
58
+
59
+ AS_RECORDED_COLUMN = :subject_as_recorded
60
+
61
+ DELIMITER_MAP = { '|' => ';' }
62
+
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,38 @@
1
+ require 'nokogiri'
2
+
3
+ module Recon
4
+ module Type
5
+ class Titles
6
+
7
+ extend DS::Util
8
+ include ReconType
9
+
10
+ SET_NAME = :titles
11
+
12
+ METHOD_NAME = %i{ extract_titles }
13
+
14
+ RECON_CSV_HEADERS = %i{
15
+ title_as_recorded
16
+ title_as_recorded_agr
17
+ uniform_title_as_recorded
18
+ uniform_title_as_recorded_agr
19
+ authorized_label
20
+ ds_qid
21
+ }
22
+
23
+ LOOKUP_COLUMNS = %i{
24
+ authorized_label
25
+ ds_qid
26
+ }
27
+
28
+ KEY_COLUMNS = %i{ title_as_recorded uniform_title_as_recorded }
29
+
30
+ AS_RECORDED_COLUMN = :title_as_recorded
31
+
32
+ DELIMITER_MAP = { '|' => ';' }
33
+
34
+ BALANCED_COLUMNS = {}
35
+
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,52 @@
1
+ module Recon
2
+ class URLLookup
3
+
4
+ attr_reader :lookup_set
5
+ attr_reader :url_hash
6
+
7
+ ##
8
+ # The name of the lookup set in `config/recon.yml`. For example, for
9
+ #
10
+ # ---
11
+ # recon:
12
+ # # ...
13
+ # iiif_manifests: iiif/legacy-iiif-manifests.csv
14
+ #
15
+ # the +lookup_set+ is 'iiif_manifests'.
16
+ #
17
+ # @param [String] lookup_set the name of the recon setting
18
+ def initialize lookup_set
19
+ @lookup_set = lookup_set
20
+ @url_hash = {}
21
+ end
22
+
23
+ def find_url holding_inst_as_recorded, shelfmark
24
+ key = url_key holding_inst_as_recorded, shelfmark
25
+ urls[key]
26
+ end
27
+
28
+ @url_hash = nil
29
+
30
+ def urls
31
+ return url_hash unless url_hash.empty?
32
+ recon_repo = File.join DS.root, 'data', Settings.recon.git_local_name
33
+ csv_file = File.join recon_repo, Settings.recon[lookup_set]
34
+
35
+ CSV.readlines(csv_file, headers: true).each { |row|
36
+ key = url_key row['holding_institution'], row['shelfmark']
37
+ url_hash[key] = row['url']
38
+ }
39
+ url_hash
40
+ end
41
+
42
+ def url_key holder, shelfmark
43
+ qid = DS::Institutions.find_qid holder
44
+ raise DSError, "No QID found for #{holder}" if qid.blank?
45
+ normalize_key qid, shelfmark
46
+ end
47
+
48
+ def normalize_key *strings
49
+ strings.join.downcase.gsub(%r{\s+}, '')
50
+ end
51
+ end
52
+ end