ds-convert 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +294 -0
- data/Rakefile +12 -0
- data/config/settings.yml +150 -0
- data/exe/ds-convert +149 -0
- data/exe/ds-recon +275 -0
- data/exe/ds-validate-csv +40 -0
- data/exe/marc-mrc-to-xml.rb +80 -0
- data/lib/ds/cli.rb +102 -0
- data/lib/ds/constants.rb +166 -0
- data/lib/ds/converter/converter.rb +124 -0
- data/lib/ds/converter/writer.rb +50 -0
- data/lib/ds/converter.rb +7 -0
- data/lib/ds/csv_util.rb +43 -0
- data/lib/ds/data/berkeley-arks.txt +4000 -0
- data/lib/ds/data/getty-aat-centuries.csv +71 -0
- data/lib/ds/data/iiif_manifests.csv +122 -0
- data/lib/ds/data/legacy-iiif-manifests.csv +77 -0
- data/lib/ds/ds_error.rb +1 -0
- data/lib/ds/extractor/base_record_locator.rb +24 -0
- data/lib/ds/extractor/base_term.rb +79 -0
- data/lib/ds/extractor/csv_record_locator.rb +13 -0
- data/lib/ds/extractor/ds_csv_extractor.rb +695 -0
- data/lib/ds/extractor/ds_mets_xml_extractor.rb +1114 -0
- data/lib/ds/extractor/genre.rb +45 -0
- data/lib/ds/extractor/language.rb +31 -0
- data/lib/ds/extractor/marc_xml_extractor.rb +1172 -0
- data/lib/ds/extractor/material.rb +12 -0
- data/lib/ds/extractor/name.rb +50 -0
- data/lib/ds/extractor/place.rb +11 -0
- data/lib/ds/extractor/subject.rb +58 -0
- data/lib/ds/extractor/tei_xml_extractor.rb +687 -0
- data/lib/ds/extractor/title.rb +52 -0
- data/lib/ds/extractor/xml_record_locator.rb +38 -0
- data/lib/ds/extractor.rb +24 -0
- data/lib/ds/institutions.rb +55 -0
- data/lib/ds/manifest/base_id_validator.rb +76 -0
- data/lib/ds/manifest/constants.rb +67 -0
- data/lib/ds/manifest/ds_csv_id_validator.rb +15 -0
- data/lib/ds/manifest/entry.rb +133 -0
- data/lib/ds/manifest/manifest.rb +74 -0
- data/lib/ds/manifest/manifest_validator.rb +256 -0
- data/lib/ds/manifest/simple_xml_id_validator.rb +42 -0
- data/lib/ds/manifest.rb +30 -0
- data/lib/ds/mapper/base_mapper.rb +221 -0
- data/lib/ds/mapper/ds_csv_mapper.rb +77 -0
- data/lib/ds/mapper/ds_mets_mapper.rb +85 -0
- data/lib/ds/mapper/marc_mapper.rb +87 -0
- data/lib/ds/mapper/tei_xml_mapper.rb +79 -0
- data/lib/ds/mapper.rb +13 -0
- data/lib/ds/recon/constants.rb +56 -0
- data/lib/ds/recon/ds_csv_enumerator.rb +16 -0
- data/lib/ds/recon/ds_mets_xml_enumerator.rb +14 -0
- data/lib/ds/recon/marc_xml_enumerator.rb +15 -0
- data/lib/ds/recon/recon_builder.rb +183 -0
- data/lib/ds/recon/recon_data.rb +37 -0
- data/lib/ds/recon/recon_manager.rb +92 -0
- data/lib/ds/recon/source_enumerator.rb +21 -0
- data/lib/ds/recon/tei_xml_enumerator.rb +14 -0
- data/lib/ds/recon/type/all_subjects.rb +18 -0
- data/lib/ds/recon/type/genres.rb +50 -0
- data/lib/ds/recon/type/languages.rb +38 -0
- data/lib/ds/recon/type/materials.rb +40 -0
- data/lib/ds/recon/type/named_subjects.rb +20 -0
- data/lib/ds/recon/type/names.rb +65 -0
- data/lib/ds/recon/type/places.rb +40 -0
- data/lib/ds/recon/type/recon_type.rb +136 -0
- data/lib/ds/recon/type/splits.rb +34 -0
- data/lib/ds/recon/type/subjects.rb +65 -0
- data/lib/ds/recon/type/titles.rb +38 -0
- data/lib/ds/recon/url_lookup.rb +52 -0
- data/lib/ds/recon.rb +292 -0
- data/lib/ds/source/base_source.rb +32 -0
- data/lib/ds/source/ds_csv.rb +18 -0
- data/lib/ds/source/ds_mets_xml.rb +20 -0
- data/lib/ds/source/marc_xml.rb +22 -0
- data/lib/ds/source/source_cache.rb +69 -0
- data/lib/ds/source/tei_xml.rb +22 -0
- data/lib/ds/source.rb +20 -0
- data/lib/ds/util/cache.rb +111 -0
- data/lib/ds/util/csv_validator.rb +209 -0
- data/lib/ds/util/csv_writer.rb +42 -0
- data/lib/ds/util/strings.rb +194 -0
- data/lib/ds/util.rb +37 -0
- data/lib/ds/version.rb +5 -0
- data/lib/ds.rb +237 -0
- metadata +246 -0
@@ -0,0 +1,695 @@
|
|
1
|
+
require 'csv'
|
2
|
+
|
3
|
+
module DS
|
4
|
+
module Extractor
|
5
|
+
module DsCsvExtractor
|
6
|
+
COLUMN_MAPPINGS = {
|
7
|
+
ds_id: "DS ID",
|
8
|
+
holding_institution_as_recorded: "Holding Institution",
|
9
|
+
source_type: "Source Type",
|
10
|
+
cataloging_convention: "Cataloging Convention",
|
11
|
+
holding_institution_id_number: "Holding Institution Identifier",
|
12
|
+
holding_institution_shelfmark: "Shelfmark",
|
13
|
+
fragment_num_disambiguator: "Fragment Number or Disambiguator",
|
14
|
+
link_to_holding_institution_record: "Link to Institutional Record",
|
15
|
+
link_to_iiif_manifest: "IIIF Manifest",
|
16
|
+
production_places_as_recorded: "Production Place(s)",
|
17
|
+
production_date_as_recorded: "Date Description",
|
18
|
+
production_date_start: "Production Date START",
|
19
|
+
production_date_end: "Production Date END",
|
20
|
+
dated: "Dated",
|
21
|
+
uniform_titles_as_recorded: "Uniform Title(s)",
|
22
|
+
titles_as_recorded: "Title(s)",
|
23
|
+
genres_as_recorded: "Genre/Form",
|
24
|
+
all_subjects: [
|
25
|
+
"Subject(s)",
|
26
|
+
"Named Subject(s)",
|
27
|
+
],
|
28
|
+
subjects_as_recorded: "Subject(s)",
|
29
|
+
named_subjects_as_recorded: "Named Subject(s)",
|
30
|
+
authors_as_recorded: "Author Name(s)",
|
31
|
+
artists_as_recorded: "Artist Name(s)",
|
32
|
+
scribes_as_recorded: "Scribe Name(s)",
|
33
|
+
former_owners_as_recorded: "Former Owner Name(s)",
|
34
|
+
languages_as_recorded: "Language(s)",
|
35
|
+
material_as_recorded: "Materials Description",
|
36
|
+
extent: "Extent",
|
37
|
+
dimensions: "Dimensions",
|
38
|
+
notes: [
|
39
|
+
"Layout",
|
40
|
+
"Script",
|
41
|
+
"Decoration",
|
42
|
+
"Binding",
|
43
|
+
"Physical Description Miscellaneous",
|
44
|
+
"Provenance Notes",
|
45
|
+
"Note 1",
|
46
|
+
"Note 2"
|
47
|
+
],
|
48
|
+
acknowledgments: "Acknowledgements",
|
49
|
+
date_source_modified: "Date Updated by Contributor",
|
50
|
+
}.freeze
|
51
|
+
|
52
|
+
LONG_STRING_WARNING = 'TEXT_EXCEEDS_400_CHARACTERS'
|
53
|
+
|
54
|
+
module ClassMethods
|
55
|
+
|
56
|
+
# Extracts the DSID value from the given record.
|
57
|
+
#
|
58
|
+
# @param [CSV::Row] record the record to extract the DSID from
|
59
|
+
# @return [String] the extracted DSID value
|
60
|
+
def extract_dsid record
|
61
|
+
[extract_values_for(property: :ds_id, record: record)].flatten.first
|
62
|
+
end
|
63
|
+
|
64
|
+
# Extracts the source type value from the given record.
|
65
|
+
#
|
66
|
+
# @param [CSV::Row] record the record to extract the source type from
|
67
|
+
# @return [String] the extracted source type value
|
68
|
+
def extract_source_type record
|
69
|
+
extract_values_for(property: :source_type, record: record).first
|
70
|
+
end
|
71
|
+
|
72
|
+
# Extracts the cataloging convention value from the given record.
|
73
|
+
#
|
74
|
+
# @param [CSV::Row] record the record to extract the cataloging convention from
|
75
|
+
# @return [String] the extracted cataloging convention value
|
76
|
+
def extract_cataloging_convention record
|
77
|
+
extract_values_for(property: :cataloging_convention, record: record).first
|
78
|
+
end
|
79
|
+
|
80
|
+
# Extracts the cataloging convention value from the given record.
|
81
|
+
#
|
82
|
+
# @param [CSV::Row] record the record to extract the cataloging convention from
|
83
|
+
# @return [String] the extracted cataloging convention value
|
84
|
+
def extract_holding_institution_as_recorded record
|
85
|
+
extract_values_for(property: :holding_institution_as_recorded, record: record).first
|
86
|
+
end
|
87
|
+
|
88
|
+
# Extracts the institutional identifier (e.g., BibID) from the given record.
|
89
|
+
#
|
90
|
+
# @param [CSV::Row] record the record to extract the cataloging convention from
|
91
|
+
# @return [String] the institutional identifier for the manuscript
|
92
|
+
def extract_holding_institution_id_number record
|
93
|
+
extract_values_for(property: :holding_institution_id_number, record: record).first
|
94
|
+
end
|
95
|
+
|
96
|
+
# Extracts the holding institution shelfmark from the given record.
|
97
|
+
#
|
98
|
+
# @param [CSV::Row] record the record to extract the holding institution shelfmark from
|
99
|
+
# @return [String] the extracted holding institution shelfmark value
|
100
|
+
def extract_holding_institution_shelfmark record
|
101
|
+
extract_values_for(property: :holding_institution_shelfmark, record: record).first
|
102
|
+
end
|
103
|
+
|
104
|
+
# Extracts the fragment number or disambiguator value from the given record.
|
105
|
+
#
|
106
|
+
# @param [CSV::Row] record the record to extract the fragment number or disambiguator from
|
107
|
+
# @return [String] the extracted fragment number or disambiguator value
|
108
|
+
def extract_fragment_num_disambiguator record
|
109
|
+
extract_values_for(property: :fragment_num_disambiguator, record: record).first
|
110
|
+
end
|
111
|
+
|
112
|
+
# Extracts the link to the holding institution record from the given record.
|
113
|
+
#
|
114
|
+
# @param [CSV::Row] record the record to extract the link from
|
115
|
+
# @return [String] the extracted link to the holding institution record
|
116
|
+
def extract_link_to_holding_institution_record record
|
117
|
+
extract_values_for(property: :link_to_holding_institution_record, record: record).first
|
118
|
+
end
|
119
|
+
|
120
|
+
# Extracts the link to the IIIF manifest from the given record.
|
121
|
+
#
|
122
|
+
# @param [CSV::Row] record the record to extract the link from
|
123
|
+
# @return [String] the extracted link to the IIIF manifest
|
124
|
+
def extract_link_to_iiif_manifest record
|
125
|
+
extract_values_for(property: :link_to_iiif_manifest, record: record).first
|
126
|
+
end
|
127
|
+
|
128
|
+
# Extracts the production date as recorded value from the given record.
|
129
|
+
#
|
130
|
+
# @param [CSV::Row] record the record to extract the production date from
|
131
|
+
# @return [Array<String>] the extracted production dates
|
132
|
+
def extract_production_date_as_recorded record
|
133
|
+
dar = extract_values_for(property: :production_date_as_recorded, record: record)
|
134
|
+
return dar if dar.present?
|
135
|
+
|
136
|
+
extract_date_range record, range_sep: '-'
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# Extracts the date range from the given record using the specified separator.
|
141
|
+
#
|
142
|
+
# @param [CSV::Row] record the record to extract the date range from
|
143
|
+
# @param [String] range_sep the separator to be used in the date range
|
144
|
+
# @return [Array<String>] the extracted date range
|
145
|
+
def extract_date_range record, range_sep:
|
146
|
+
start_date = extract_production_date_start record
|
147
|
+
end_date = extract_production_date_end record
|
148
|
+
range = [start_date, end_date].select(&:present?)
|
149
|
+
return [] if range.blank?
|
150
|
+
[range.join(range_sep)]
|
151
|
+
end
|
152
|
+
|
153
|
+
# Extracts the production date start value from the given record.
|
154
|
+
#
|
155
|
+
# @param [CSV::Row] record the record to extract the production date start from
|
156
|
+
# @return [String] the extracted production date start value
|
157
|
+
def extract_production_date_start record
|
158
|
+
extract_values_for(property: :production_date_start, record: record).first
|
159
|
+
end
|
160
|
+
|
161
|
+
# Extracts the production date end value from the given record.
|
162
|
+
#
|
163
|
+
# @param [CSV::Row] record the record to extract the production date end from
|
164
|
+
# @return [String] the extracted production date end value
|
165
|
+
def extract_production_date_end record
|
166
|
+
extract_values_for(property: :production_date_end, record: record).first
|
167
|
+
end
|
168
|
+
|
169
|
+
# Extracts the dated value from the given record.
|
170
|
+
#
|
171
|
+
# @param [CSV::Row] record the record to extract the dated value from
|
172
|
+
# @return [Boolean] true if the dated value is 'true', false otherwise
|
173
|
+
def extract_dated record
|
174
|
+
dated = extract_values_for(property: :dated, record: record)
|
175
|
+
return true if dated.join.strip.downcase == 'true'
|
176
|
+
end
|
177
|
+
|
178
|
+
# @todo implement extract_names
|
179
|
+
# Extracts the physical description from the given record.
|
180
|
+
#
|
181
|
+
# @param [CSV::Row] record the record to extract the physical description from
|
182
|
+
# @return [Array<String>] the extracted physical description
|
183
|
+
def extract_physical_description record
|
184
|
+
extent = extract_values_for property: :extent, record: record
|
185
|
+
material = extract_values_for property: :material_as_recorded, record: record
|
186
|
+
dimensions = extract_dimensions record
|
187
|
+
desc = [extent, material, dimensions].flatten
|
188
|
+
|
189
|
+
# return an empty array if no values are present
|
190
|
+
return [] unless desc.any?(&:present?)
|
191
|
+
|
192
|
+
["Extent: #{desc.join '; '}"]
|
193
|
+
end
|
194
|
+
|
195
|
+
# Extracts the dimensions from the given record.
|
196
|
+
#
|
197
|
+
# @param [CSV::Row] record the record to extract the dimensions from
|
198
|
+
# @return [Array<String>] the extracted dimensions
|
199
|
+
def extract_dimensions record
|
200
|
+
extract_values_for property: :dimensions, record: record
|
201
|
+
end
|
202
|
+
|
203
|
+
# Extracts authors as recorded from the given record.
|
204
|
+
#
|
205
|
+
# @param [CSV::Row] record the record to extract authors from
|
206
|
+
# @return [Array<String>] the extracted authors as recorded
|
207
|
+
def extract_authors_as_recorded record
|
208
|
+
extract_authors(record).map &:as_recorded
|
209
|
+
end
|
210
|
+
|
211
|
+
# Extracts authors as recorded with vernacular form from the given record.
|
212
|
+
#
|
213
|
+
# @param [CSV::Row] record the record to extract authors from
|
214
|
+
# @return [Array<String>] the extracted authors as recorded with vernacular form
|
215
|
+
def extract_authors_as_recorded_agr record
|
216
|
+
extract_authors(record).map &:vernacular
|
217
|
+
end
|
218
|
+
|
219
|
+
# Extracts authors from the given record using the specified type and role.
|
220
|
+
#
|
221
|
+
# @param [CSV::Row] record the record to extract authors from
|
222
|
+
# @return [Array<String>] the extracted authors
|
223
|
+
def extract_authors record
|
224
|
+
extract_names(record, :authors_as_recorded, 'author')
|
225
|
+
end
|
226
|
+
|
227
|
+
# Extracts artists as recorded from the given record.
|
228
|
+
#
|
229
|
+
# @param [CSV::Row] record the record to extract artists from
|
230
|
+
# @return [Array<String>] the extracted artists as recorded
|
231
|
+
def extract_artists_as_recorded record
|
232
|
+
extract_artists(record).map &:as_recorded
|
233
|
+
end
|
234
|
+
|
235
|
+
# Extracts artists as recorded with vernacular form from the given record.
|
236
|
+
#
|
237
|
+
# @param [CSV::Row] record the record to extract artists from
|
238
|
+
# @return [Array<String>] the extracted artists as recorded with vernacular form
|
239
|
+
def extract_artists_as_recorded_agr record
|
240
|
+
extract_artists(record).map &:vernacular
|
241
|
+
end
|
242
|
+
|
243
|
+
# Extracts artists from the given record using the specified type and role.
|
244
|
+
#
|
245
|
+
# @param [CSV::Row] record the record to extract artists from
|
246
|
+
# @return [Array<String>] the extracted artists
|
247
|
+
def extract_artists record
|
248
|
+
extract_names(record, :artists_as_recorded, 'artist')
|
249
|
+
end
|
250
|
+
|
251
|
+
# Extracts scribes as recorded from the given record.
|
252
|
+
#
|
253
|
+
# @param [CSV::Row] record the record to extract scribes from
|
254
|
+
# @return [Array<String>] the extracted scribes as recorded
|
255
|
+
def extract_scribes_as_recorded record
|
256
|
+
extract_scribes(record).map &:as_recorded
|
257
|
+
end
|
258
|
+
|
259
|
+
# Extracts scribes as recorded with vernacular form from the given record.
|
260
|
+
#
|
261
|
+
# @param [CSV::Row] record the record to extract scribes from
|
262
|
+
# @return [Array<String>] the extracted scribes as recorded with vernacular form
|
263
|
+
def extract_scribes_as_recorded_agr record
|
264
|
+
extract_scribes(record).map &:vernacular
|
265
|
+
end
|
266
|
+
|
267
|
+
# Extracts scribes from the given record using the specified type and role.
|
268
|
+
#
|
269
|
+
# @param [CSV::Row] record the record to extract scribes from
|
270
|
+
# @return [Array<String>] the extracted scribes
|
271
|
+
def extract_scribes record
|
272
|
+
extract_names(record, :scribes_as_recorded, 'scribe')
|
273
|
+
end
|
274
|
+
|
275
|
+
# Extracts former owners as recorded from the given record.
|
276
|
+
#
|
277
|
+
# @param [CSV::Row] record the record to extract former owners from
|
278
|
+
# @return [Array<String>] the extracted former owners as recorded
|
279
|
+
def extract_former_owners_as_recorded record
|
280
|
+
extract_former_owners(record).map &:as_recorded
|
281
|
+
end
|
282
|
+
|
283
|
+
# Extracts former owners as recorded with vernacular form from the given record.
|
284
|
+
#
|
285
|
+
# @param [CSV::Row] record the record to extract former owners from
|
286
|
+
# @return [Array<String>] the extracted former owners as recorded with vernacular form
|
287
|
+
def extract_former_owners_as_recorded_agr record
|
288
|
+
extract_former_owners(record).map &:vernacular
|
289
|
+
end
|
290
|
+
|
291
|
+
# Extracts former owners from the given record using the specified type and role.
|
292
|
+
#
|
293
|
+
# @param [CSV::Row] record the record to extract former owners from
|
294
|
+
# @return [Array<String>] the extracted former owners
|
295
|
+
def extract_former_owners record
|
296
|
+
extract_names(record, :former_owners_as_recorded, 'former owner')
|
297
|
+
end
|
298
|
+
|
299
|
+
# Extracts associated agents from the given record.
|
300
|
+
#
|
301
|
+
# @note Method to fulfill DS::Extractor contract; returns an empty array
|
302
|
+
#
|
303
|
+
# @param [CSV::Row] record the record
|
304
|
+
# @return [Array<String>] an empty array
|
305
|
+
def extract_associated_agents record
|
306
|
+
[]
|
307
|
+
end
|
308
|
+
|
309
|
+
# Extracts languages as recorded from the given record.
|
310
|
+
#
|
311
|
+
# @param [CSV::Row] record the record to extract languages from
|
312
|
+
# @return [Array<String>] the extracted languages as recorded
|
313
|
+
def extract_languages_as_recorded record
|
314
|
+
extract_languages(record).map &:as_recorded
|
315
|
+
end
|
316
|
+
|
317
|
+
# Extracts languages from the given record using the specified type and role.
|
318
|
+
#
|
319
|
+
# @param [CSV::Row] record the record to extract languages from
|
320
|
+
# @return [Array<DS::Extractor::Language>] the extracted languages
|
321
|
+
def extract_languages record
|
322
|
+
extract_values_for(property: :languages_as_recorded, record: record).map { |lang|
|
323
|
+
DS::Extractor::Language.new as_recorded: lang
|
324
|
+
}
|
325
|
+
end
|
326
|
+
|
327
|
+
# Extracts material as recorded from the given record.
|
328
|
+
#
|
329
|
+
# @param [CSV::Row] record the record to extract material from
|
330
|
+
# @return [String, nil] the extracted material as recorded
|
331
|
+
def extract_material_as_recorded record
|
332
|
+
extract_materials(record).map(&:as_recorded).join '|'
|
333
|
+
end
|
334
|
+
|
335
|
+
# Extracts materials from the given record.
|
336
|
+
#
|
337
|
+
# @param [CSV::Row] record the record to extract materials from
|
338
|
+
# @return [Array<DS::Extractor::Material>] the extracted materials
|
339
|
+
def extract_materials record
|
340
|
+
extract_values_for(property: :material_as_recorded, record: record).map { |as_recorded|
|
341
|
+
DS::Extractor::Material.new as_recorded: as_recorded
|
342
|
+
}
|
343
|
+
end
|
344
|
+
|
345
|
+
# Extracts titles as recorded from the given record.
|
346
|
+
#
|
347
|
+
# @param [CSV::Row] record the record to extract titles from
|
348
|
+
# @return [Array<String>] the extracted titles as recorded
|
349
|
+
def extract_titles_as_recorded record
|
350
|
+
extract_titles(record).map &:as_recorded
|
351
|
+
end
|
352
|
+
|
353
|
+
# Extracts titles as recorded with vernacular form from the given record.
|
354
|
+
#
|
355
|
+
# @param [CSV::Row] record the record to extract titles from
|
356
|
+
# @return [Array<String>] the extracted titles as recorded with vernacular form
|
357
|
+
def extract_titles_as_recorded_agr record
|
358
|
+
extract_titles(record).map &:vernacular
|
359
|
+
end
|
360
|
+
|
361
|
+
# Extracts uniform titles as recorded from the given record.
|
362
|
+
#
|
363
|
+
# @param [CSV::Row] record the record to extract uniform titles from
|
364
|
+
# @return [Array<String>] the extracted uniform titles as recorded
|
365
|
+
def extract_uniform_titles_as_recorded record
|
366
|
+
extract_uniform_titles(record).map &:uniform_title
|
367
|
+
end
|
368
|
+
|
369
|
+
# Extracts uniform titles as recorded with vernacular form from the given record.
|
370
|
+
#
|
371
|
+
# @param [CSV::Row] record the record to extract uniform titles from
|
372
|
+
# @return [Array<String>] the extracted uniform titles as recorded with vernacular form
|
373
|
+
def extract_uniform_titles_as_recorded_agr record
|
374
|
+
extract_uniform_titles(record).map &:uniform_title_vernacular
|
375
|
+
end
|
376
|
+
|
377
|
+
##
|
378
|
+
# Return titles as an array of DS::Extractor::Title instances.
|
379
|
+
# Title as recorded and vernacular values are in single columns:
|
380
|
+
#
|
381
|
+
# Uniform Title(s)
|
382
|
+
# Al-Hajj;;الجزء التاسع
|
383
|
+
#
|
384
|
+
# Titles are divided by pipe characters and as recorded and
|
385
|
+
# vernacular forms of a title are separated by double semicolons:
|
386
|
+
# +;;+.
|
387
|
+
#
|
388
|
+
# @param [CSV::Row] record a CSV row with headers
|
389
|
+
# @return [Array<DS::Extractor::Title>] the names a list
|
390
|
+
def extract_titles record
|
391
|
+
as_recorded_titles = extract_values_for(property: :titles_as_recorded, record: record)
|
392
|
+
uniform_titles = extract_values_for(property: :uniform_titles_as_recorded, record: record)
|
393
|
+
as_recorded_titles << '' if as_recorded_titles.blank?
|
394
|
+
|
395
|
+
unless balanced_titles? as_recorded_titles, uniform_titles
|
396
|
+
raise ArgumentError, "Unbalanced number of titles and uniform titles (titles: #{as_recorded_titles.inspect}, uniform titles: #{uniform_titles.inspect})"
|
397
|
+
end
|
398
|
+
|
399
|
+
as_recorded_titles.zip(uniform_titles).map { |as_rec, uniform|
|
400
|
+
as_recorded, vernacular = as_rec.split ';;', 2
|
401
|
+
uniform_title, uniform_title_vernacular = uniform.to_s.split ';;', 2
|
402
|
+
DS::Extractor::Title.new(
|
403
|
+
as_recorded: as_recorded,
|
404
|
+
vernacular: vernacular,
|
405
|
+
uniform_title: uniform_title,
|
406
|
+
uniform_title_vernacular: uniform_title_vernacular
|
407
|
+
)
|
408
|
+
}
|
409
|
+
end
|
410
|
+
|
411
|
+
# Return true if the as_recorded and uniform titles are of equal length.
|
412
|
+
#
|
413
|
+
# @param [Array<String>] as_recorded_titles
|
414
|
+
# @param [Array<String>] uniform_titles
|
415
|
+
# @return [Boolean]
|
416
|
+
def balanced_titles? as_recorded_titles, uniform_titles
|
417
|
+
return true if uniform_titles.blank?
|
418
|
+
|
419
|
+
as_recorded_titles.size == uniform_titles.size
|
420
|
+
end
|
421
|
+
|
422
|
+
##
|
423
|
+
# Note: BaseTerm implementations require +as_recorded+; for DS
|
424
|
+
# CSV we don't assume that the Title(s) and Uniform Titles(s)
|
425
|
+
# are paralleled so they're handled separately.
|
426
|
+
#
|
427
|
+
# @todo: Find out whether we should enforce that Titles and
|
428
|
+
# Uniform Titles be evenly paired.
|
429
|
+
# Extracts uniform titles from the given record.
|
430
|
+
#
|
431
|
+
# @param [CSV::Row] record the record to extract uniform titles from
|
432
|
+
# @return [Array<DS::Extractor::Title>] the extracted uniform titles
|
433
|
+
def extract_uniform_titles record
|
434
|
+
extract_values_for(property: :uniform_titles_as_recorded, record: record).map { |title|
|
435
|
+
as_recorded, vernacular = title.to_s.split ';;', 2
|
436
|
+
# BaseTerm implementations require +as_recorded+; for DS CSV
|
437
|
+
# we don't assume that the Title(s) and Uniform Titles(s)
|
438
|
+
# are paralleled so there handled separately
|
439
|
+
DS::Extractor::Title.new as_recorded: nil, uniform_title: as_recorded, uniform_title_vernacular: vernacular
|
440
|
+
}
|
441
|
+
end
|
442
|
+
|
443
|
+
##
|
444
|
+
# Return names as an array DS::Extractor::Name instances. Name
|
445
|
+
# as recorded and vernacular values are in single columns:
|
446
|
+
#
|
447
|
+
# Author Name(s)
|
448
|
+
# An author;;An author in original script|Another author
|
449
|
+
#
|
450
|
+
# Names are divided by pipe characters and as recorded and
|
451
|
+
# vernacular forms of a name are separated by double semicolons:
|
452
|
+
# +;;+.
|
453
|
+
#
|
454
|
+
# @param [CSV::Row] record a CSV row with headers
|
455
|
+
# @param [Symbol] property a valid property name; e.g., +:artist_as_recorded+
|
456
|
+
# @param [String] role the name role; e.g., +artist+
|
457
|
+
# @return [Array<DS::Extractor::Name>] the names a list
|
458
|
+
def extract_names record, property, role
|
459
|
+
extract_values_for(property: property, record: record).map { |name|
|
460
|
+
as_recorded, vernacular = name.to_s.split ';;', 2
|
461
|
+
DS::Extractor::Name.new as_recorded: as_recorded, vernacular: vernacular, role: role
|
462
|
+
}
|
463
|
+
end
|
464
|
+
|
465
|
+
# Extracts production places as recorded from the given record.
|
466
|
+
#
|
467
|
+
# @param [CSV::Row] record the record to extract production places from
|
468
|
+
# @return [Array<String>] the extracted production places as recorded
|
469
|
+
def extract_production_places_as_recorded record
|
470
|
+
extract_places(record, :production_places_as_recorded).map &:as_recorded
|
471
|
+
end
|
472
|
+
|
473
|
+
# Extracts places from the given record using the specified property.
|
474
|
+
#
|
475
|
+
# @param [Symbol] property the property to extract places from the record
|
476
|
+
# @param [CSV::Row] record the record to extract places from
|
477
|
+
# @return [Array<DS::Extractor::Place>] the extracted places
|
478
|
+
def extract_places record, property = :production_places_as_recorded
|
479
|
+
extract_values_for(property: property, record: record).map { |place|
|
480
|
+
DS::Extractor::Place.new as_recorded: place
|
481
|
+
}
|
482
|
+
end
|
483
|
+
|
484
|
+
# Extracts genres as recorded from the given record.
|
485
|
+
#
|
486
|
+
# @param [CSV::Row] record the record to extract genres from
|
487
|
+
# @return [Array<String>] the extracted genres as recorded
|
488
|
+
def extract_genres_as_recorded record
|
489
|
+
extract_genres(record).map &:as_recorded
|
490
|
+
end
|
491
|
+
|
492
|
+
# Extracts genres from the given record.
|
493
|
+
#
|
494
|
+
# @param [CSV::Row] record the record to extract genres from
|
495
|
+
# @return [Array<DS::Extractor::Genre>] the extracted genres
|
496
|
+
def extract_genres record
|
497
|
+
extract_terms record, :genres_as_recorded, DS::Extractor::Genre, vocab: 'ds-genre'
|
498
|
+
end
|
499
|
+
|
500
|
+
# Extracts subjects as recorded from the given record.
|
501
|
+
#
|
502
|
+
# @param [CSV::Row] record the record to extract subjects from
|
503
|
+
# @return [Array<String>] the extracted subjects as recorded
|
504
|
+
def extract_subjects_as_recorded record
|
505
|
+
extract_subjects(record).map &:as_recorded
|
506
|
+
end
|
507
|
+
|
508
|
+
# Extracts all subjects as recorded from the given record.
|
509
|
+
#
|
510
|
+
# @param [CSV::Row] record the record to extract all subjects from
|
511
|
+
# @return [Array<String>] the extracted all subjects as recorded
|
512
|
+
def extract_all_subjects_as_recorded record
|
513
|
+
extract_all_subjects(record).map &:as_recorded
|
514
|
+
end
|
515
|
+
|
516
|
+
# Extracts all subjects from the given record, including subjects and named subjects.
|
517
|
+
#
|
518
|
+
# @param [CSV::Row] record the record to extract all subjects from
|
519
|
+
# @return [Array<DS::Extractor::Subject>] the extracted all subjects
|
520
|
+
def extract_all_subjects record
|
521
|
+
extract_subjects(record) + extract_named_subjects(record)
|
522
|
+
end
|
523
|
+
|
524
|
+
# Extracts subjects from the given record.
|
525
|
+
#
|
526
|
+
# @param [CSV::Row] record the record to extract subjects from
|
527
|
+
# @return [Array<DS::Extractor::Subject>] the extracted subjects
|
528
|
+
def extract_subjects record
|
529
|
+
extract_terms record, :subjects_as_recorded, DS::Extractor::Subject, vocab: 'ds-subject'
|
530
|
+
end
|
531
|
+
|
532
|
+
# Extracts named subjects as recorded from the given record.
|
533
|
+
#
|
534
|
+
# @param [CSV::Row] record the record to extract named subjects from
|
535
|
+
# @return [Array<String>] the extracted named subjects as recorded
|
536
|
+
def extract_named_subjects_as_recorded record
|
537
|
+
extract_named_subjects(record).map &:as_recorded
|
538
|
+
end
|
539
|
+
|
540
|
+
# Extracts named subjects from the given record.
|
541
|
+
#
|
542
|
+
# @param [CSV::Row] record the record to extract named subjects from
|
543
|
+
# @return [Array<DS::Extractor::Subject>] the extracted named subjects
|
544
|
+
def extract_named_subjects record
|
545
|
+
extract_terms record, :named_subjects_as_recorded, DS::Extractor::Subject, vocab: 'ds-subject'
|
546
|
+
end
|
547
|
+
|
548
|
+
# Extracts terms of a specific type from the given record using the specified property.
|
549
|
+
#
|
550
|
+
# @param [CSV::Row] record the record to extract terms from
|
551
|
+
# @param [Symbol] property the property to extract terms from the record
|
552
|
+
# @param [Class] term_type the type of terms to extract
|
553
|
+
# @return [Array<term_type>] the extracted terms
|
554
|
+
def extract_terms record, property, term_type, vocab: nil
|
555
|
+
extract_values_for(property: property, record: record).map { |term|
|
556
|
+
term_type.new as_recorded: term, vocab: vocab
|
557
|
+
}
|
558
|
+
end
|
559
|
+
|
560
|
+
# Extracts acknowledgments from the given record.
|
561
|
+
#
|
562
|
+
# @param [CSV::Row] record the record to extract acknowledgments from
|
563
|
+
# @return [Array] the extracted acknowledgments
|
564
|
+
def extract_acknowledgments record
|
565
|
+
extract_values_for property: :acknowledgments, record: record
|
566
|
+
end
|
567
|
+
|
568
|
+
# Extracts reconstructed places from the given record.
|
569
|
+
#
|
570
|
+
# @param [CSV::Row] record the record to extract reconstructed places from
|
571
|
+
# @return [Array] the extracted reconstructed places
|
572
|
+
def extract_recon_places record
|
573
|
+
extract_places(record, :production_places_as_recorded).map &:to_a
|
574
|
+
end
|
575
|
+
|
576
|
+
# Extracts reconstructed titles from the given record.
|
577
|
+
#
|
578
|
+
# @param [CSV::Row] record the record to extract reconstructed titles from
|
579
|
+
# @return [Array] the extracted reconstructed titles
|
580
|
+
def extract_recon_titles record
|
581
|
+
extract_titles(record).map &:to_a
|
582
|
+
end
|
583
|
+
|
584
|
+
# Extracts reconstructed subjects from the given record.
|
585
|
+
#
|
586
|
+
# @param [CSV::Row] record the record to extract reconstructed subjects from
|
587
|
+
# @return [Array] the extracted reconstructed subjects
|
588
|
+
def extract_recon_subjects record
|
589
|
+
extract_all_subjects(record).map &:to_a
|
590
|
+
end
|
591
|
+
|
592
|
+
# Extracts reconstructed genres from the given record.
|
593
|
+
#
|
594
|
+
# @param [CSV::Row] record the record to extract reconstructed genres from
|
595
|
+
# @return [Array] the extracted reconstructed genres
|
596
|
+
def extract_recon_genres record
|
597
|
+
extract_genres(record).map &:to_a
|
598
|
+
end
|
599
|
+
|
600
|
+
# @todo implement extract_recon_names
|
601
|
+
def extract_recon_names record
|
602
|
+
names = []
|
603
|
+
names += extract_names(record, :authors_as_recorded, 'author').map(&:to_a)
|
604
|
+
names += extract_names(record, :artists_as_recorded, 'artist').map(&:to_a)
|
605
|
+
names += extract_names(record, :scribes_as_recorded, 'scribe').map(&:to_a)
|
606
|
+
names += extract_names(record, :former_owners_as_recorded, 'former owner').map(&:to_a)
|
607
|
+
names
|
608
|
+
end
|
609
|
+
|
610
|
+
# Extracts values for a specific property from a record.
|
611
|
+
#
|
612
|
+
# @param [Symbol] property the property to extract values for
|
613
|
+
# @param [CSV::Row] record the record containing the values
|
614
|
+
# @return [Array] the extracted values
|
615
|
+
def extract_values_for property:, record:
|
616
|
+
raise "Unknown property: #{property}" unless known_property? property
|
617
|
+
columns = [COLUMN_MAPPINGS[property.to_sym]].flatten
|
618
|
+
columns.filter_map { |header|
|
619
|
+
extract_values_for_header header: header, record: record
|
620
|
+
}.flatten.map { |s| mark_long s}
|
621
|
+
end
|
622
|
+
|
623
|
+
# Extracts the values for a specific header from a record, splitting on '|' and stripping whitespace.
|
624
|
+
#
|
625
|
+
# @param [CSV::Row] record the record containing the values
|
626
|
+
# @param [String] header the header to extract values for
|
627
|
+
# @return [Array<String>] the extracted values
|
628
|
+
def extract_values_for_header header:, record:
|
629
|
+
return unless record[header].present?
|
630
|
+
|
631
|
+
# use split -1 to preserve empty values
|
632
|
+
record[header].to_s.split('|', -1).map(&:strip)
|
633
|
+
end
|
634
|
+
|
635
|
+
# Determines if a method name maps to a property.
|
636
|
+
#
|
637
|
+
# @param [String] method_name the method name to check
|
638
|
+
# @return [Boolean] true if the method name corresponds to a known property, false otherwise
|
639
|
+
def maps_to_property? method_name
|
640
|
+
prop_name = get_property_name method_name
|
641
|
+
return unless prop_name
|
642
|
+
known_property? prop_name
|
643
|
+
end
|
644
|
+
|
645
|
+
# Determines if a property is known.
|
646
|
+
#
|
647
|
+
# @param [Symbol] property the property to check if it is known
|
648
|
+
# @return [Boolean] true if the property is known, false otherwise
|
649
|
+
def known_property? property
|
650
|
+
COLUMN_MAPPINGS.include? property.to_sym
|
651
|
+
end
|
652
|
+
|
653
|
+
# Determines the property name extracted from the method name.
|
654
|
+
#
|
655
|
+
# @param [String] method_name the method name to extract the property name from
|
656
|
+
# @return [String, nil] the extracted property name or nil if not found
|
657
|
+
def get_property_name method_name
|
658
|
+
return unless method_name.to_s =~ /^extract_\w+/
|
659
|
+
method_name.to_s.split(/_/, 2).last
|
660
|
+
end
|
661
|
+
|
662
|
+
# Extracts notes from the given record.
|
663
|
+
#
|
664
|
+
# @param [CSV::Row] record the record to extract notes from
|
665
|
+
# @return [Array<String>] the extracted notes
|
666
|
+
def extract_notes record
|
667
|
+
notes = COLUMN_MAPPINGS[:notes].filter_map { |header|
|
668
|
+
vals = extract_values_for_header header: header, record: record
|
669
|
+
next unless vals
|
670
|
+
|
671
|
+
case header
|
672
|
+
when /^(Note|Physical description)/i
|
673
|
+
vals
|
674
|
+
when /^Provenance/
|
675
|
+
vals.map { |v| "Provenance: #{v}" }
|
676
|
+
else
|
677
|
+
vals.map { |v| "#{header}: #{v}" }
|
678
|
+
end
|
679
|
+
}.flatten.map { |s| mark_long s }
|
680
|
+
notes
|
681
|
+
end
|
682
|
+
|
683
|
+
def mark_long s
|
684
|
+
return s if s.blank?
|
685
|
+
return s if s.length <= 400
|
686
|
+
|
687
|
+
"#{LONG_STRING_WARNING}: #{s}"
|
688
|
+
end
|
689
|
+
|
690
|
+
end
|
691
|
+
|
692
|
+
self.extend ClassMethods
|
693
|
+
end
|
694
|
+
end
|
695
|
+
end
|