cdmbl 0.12.2 → 0.12.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cdmbl/field_mapping.rb +144 -0
- data/lib/cdmbl/field_transformer.rb +6 -7
- data/lib/cdmbl/record_transformer.rb +5 -9
- data/lib/cdmbl/transformer.rb +4 -122
- data/lib/cdmbl/version.rb +1 -1
- data/lib/cdmbl.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1899e8b4635f4d9d16ede1fb27c013fddb85d873
|
4
|
+
data.tar.gz: 11e6c86acb88e1da474eee20c9dd99cd2f748460
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 22280016fca6ce7a00462ee9b8614647a1ca916d43c4cf7a0dea8a9e534d7e599b52b4c92cc1ebdcd230b84cd7f65a9f6ebf119f24ac66a892c00969df273962
|
7
|
+
data.tar.gz: f28701b6382f7aa2d17d51262a571751d3f8bf4e04a43c3551bfbe35c5573415777939d5286a8e27d52efd216a1d299242d117daa95619b9488581fedd971207
|
@@ -0,0 +1,144 @@
|
|
1
|
+
module CDMBL
|
2
|
+
class FieldMapping
|
3
|
+
attr_reader :config
|
4
|
+
def initialize(config: false)
|
5
|
+
@config = config ? symbolize(config) : default_config
|
6
|
+
end
|
7
|
+
|
8
|
+
def origin_path
|
9
|
+
config.fetch(:origin_path)
|
10
|
+
end
|
11
|
+
|
12
|
+
def dest_path
|
13
|
+
config.fetch(:dest_path)
|
14
|
+
end
|
15
|
+
|
16
|
+
def formatters
|
17
|
+
config.fetch(:formatters, [DefaultFormatter]).map do |formatter|
|
18
|
+
formatter.is_a?(String) ? Object.const_get(formatter) : formatter
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def symbolize(config)
|
25
|
+
config.inject({}) { |memo, (k, v)| memo[k.to_sym] = v; memo }
|
26
|
+
end
|
27
|
+
|
28
|
+
def default_config
|
29
|
+
[
|
30
|
+
{dest_path: 'location_llsi', origin_path: '/', formatters: [LocationFormatter]},
|
31
|
+
{dest_path: 'id', origin_path: 'id', formatters: [StripFormatter, IDFormatter]},
|
32
|
+
{dest_path: 'setspec_ssi', origin_path: '/', formatters: [AddSetSpecFormatter, SetSpecFormatter]},
|
33
|
+
{dest_path: 'collection_name_ssi', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionNameFormatter]},
|
34
|
+
{dest_path: 'collection_name_tei', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionNameFormatter]},
|
35
|
+
{dest_path: 'collection_description_tesi', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionDescriptionFormatter, FilterBadCollections]},
|
36
|
+
{dest_path: 'parent_collection_name_ssi', origin_path: 'par', formatters: [StripFormatter]},
|
37
|
+
{dest_path: 'parent_collection_name_tei', origin_path: 'par', formatters: [StripFormatter]},
|
38
|
+
{dest_path: 'parent_collection_description_tei', origin_path: 'par', formatters: [StripFormatter]},
|
39
|
+
{dest_path: 'title_tesi', origin_path: 'title', formatters: [StripFormatter]},
|
40
|
+
{dest_path: 'title_ssi', origin_path: 'title', formatters: [StripFormatter]},
|
41
|
+
{dest_path: 'title_sort', origin_path: 'title', formatters: [StripFormatter]},
|
42
|
+
{dest_path: 'title_unstem_search', origin_path: 'title', formatters: [StripFormatter]},
|
43
|
+
{dest_path: 'contributor_teim', origin_path: 'contri', formatters: [StripFormatter]},
|
44
|
+
{dest_path: 'contributor_unstem_search', origin_path: 'contri', formatters: [StripFormatter]},
|
45
|
+
{dest_path: 'contributor_ssim', origin_path: 'contri', formatters: [SplitFormatter, StripFormatter]},
|
46
|
+
{dest_path: 'creator_tesi', origin_path: 'photog', formatters: [JoinFormatter, StripFormatter]},
|
47
|
+
{dest_path: 'creator_unstem_search', origin_path: 'photog', formatters: [StripFormatter]},
|
48
|
+
{dest_path: 'creator_ssim', origin_path: 'photog', formatters: [SplitFormatter, StripFormatter]},
|
49
|
+
{dest_path: 'creator_sort', origin_path: 'photog', formatters: [StripFormatter]},
|
50
|
+
{dest_path: 'description_tei', origin_path: 'descri', formatters: [StripFormatter]},
|
51
|
+
{dest_path: 'description_ts', origin_path: 'descri', formatters: [StripFormatter]},
|
52
|
+
{dest_path: 'dat_ssi', origin_path: 'dat', formatters: [StripFormatter]},
|
53
|
+
{dest_path: 'dat_tesi', origin_path: 'dat', formatters: [StripFormatter]},
|
54
|
+
{dest_path: 'dat_sort', origin_path: 'dat', formatters: [StripFormatter]},
|
55
|
+
{dest_path: 'publishing_agency_tei', origin_path: 'publia', formatters: [StripFormatter]},
|
56
|
+
{dest_path: 'publishing_agency_unstem_search', origin_path: 'publia', formatters: [StripFormatter]},
|
57
|
+
{dest_path: 'publishing_agency_ssi', origin_path: 'publia', formatters: [StripFormatter]},
|
58
|
+
{dest_path: 'dimensions_ssi', origin_path: 'dimens', formatters: [StripFormatter]},
|
59
|
+
{dest_path: 'topic_teim', origin_path: 'genera', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
60
|
+
{dest_path: 'topic_ssim', origin_path: 'genera', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
61
|
+
{dest_path: 'topic_unstem_search', origin_path: 'genera', formatters: [StripSemicolonFormatter, StripFormatter]},
|
62
|
+
{dest_path: 'type_ssi', origin_path: 'type', formatters: [Titlieze, StripSemicolonFormatter, StripFormatter]},
|
63
|
+
{dest_path: 'type_tesi', origin_path: 'type', formatters: [Titlieze, StripSemicolonFormatter, StripFormatter]},
|
64
|
+
{dest_path: 'physical_format_ssi', origin_path: 'physic', formatters: [StripSemicolonFormatter]},
|
65
|
+
{dest_path: 'physical_format_tesi', origin_path: 'physic', formatters: [StripSemicolonFormatter]},
|
66
|
+
{dest_path: 'formal_subject_unstem_search', origin_path: 'specif', formatters: [StripFormatter]},
|
67
|
+
{dest_path: 'formal_subject_ssim', origin_path: 'specif', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
68
|
+
{dest_path: 'formal_subject_teim', origin_path: 'specif', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
69
|
+
{dest_path: 'subject_unstem_search', origin_path: 'subjec', formatters: [StripFormatter]},
|
70
|
+
{dest_path: 'subject_teim', origin_path: 'subjec', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
71
|
+
{dest_path: 'subject_ssim', origin_path: 'subjec', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
72
|
+
{dest_path: 'keyword_unstem_search', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, StripFormatter]},
|
73
|
+
{dest_path: 'keyword_tesi', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, JoinFormatter, StripFormatter]},
|
74
|
+
{dest_path: 'keyword_ssim', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, StripFormatter]},
|
75
|
+
{dest_path: 'city_ssim', origin_path: 'city', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
76
|
+
{dest_path: 'city_unstem_search', origin_path: 'city', formatters: [StripFormatter]},
|
77
|
+
{dest_path: 'district_ssi', origin_path: 'distri', formatters: [StripFormatter]},
|
78
|
+
{dest_path: 'district_unstem_search', origin_path: 'distri', formatters: [StripFormatter]},
|
79
|
+
{dest_path: 'county_ssim', origin_path: 'county', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
80
|
+
{dest_path: 'county_unstem_search', origin_path: 'county', formatters: [StripFormatter]},
|
81
|
+
{dest_path: 'state_ssi', origin_path: 'state', formatters: [StripFormatter]},
|
82
|
+
{dest_path: 'state_unstem_search', origin_path: 'state', formatters: [StripFormatter]},
|
83
|
+
{dest_path: 'country_ssi', origin_path: 'countr', formatters: [StripFormatter]},
|
84
|
+
{dest_path: 'country_unstem_search', origin_path: 'countr', formatters: [StripFormatter]},
|
85
|
+
{dest_path: 'language_ssi', origin_path: 'langua', formatters: [StripFormatter]},
|
86
|
+
{dest_path: 'language_unstem_search', origin_path: 'langua', formatters: [StripFormatter]},
|
87
|
+
{dest_path: 'contributing_unstem_search', origin_path: 'contra', formatters: [StripFormatter]},
|
88
|
+
{dest_path: 'contributing_organization_tesi', origin_path: 'contra', formatters: [StripFormatter]},
|
89
|
+
{dest_path: 'contributing_organization_ssi', origin_path: 'contra', formatters: [Titlieze, StripFormatter]},
|
90
|
+
{dest_path: 'contact_information_ssi', origin_path: 'contac', formatters: [StripFormatter]},
|
91
|
+
{dest_path: 'rights_ssi', origin_path: 'righta', formatters: [StripFormatter]},
|
92
|
+
{dest_path: 'local_identifier_ssi', origin_path: 'identi', formatters: [StripFormatter]},
|
93
|
+
{dest_path: 'identifier_ssi', origin_path: 'resour', formatters: [StripFormatter]},
|
94
|
+
{dest_path: 'project_ssi', origin_path: 'projec', formatters: [StripFormatter]},
|
95
|
+
{dest_path: 'fiscal_sponsor_ssi', origin_path: 'fiscal', formatters: [StripFormatter]},
|
96
|
+
{dest_path: 'publisher_ssi', origin_path: 'publis', formatters: [StripFormatter]},
|
97
|
+
{dest_path: 'date_ssi', origin_path: 'date', formatters: [StripFormatter]},
|
98
|
+
{dest_path: 'format_tesi', origin_path: 'format', formatters: [StripFormatter]},
|
99
|
+
{dest_path: 'digspa_ssi', origin_path: 'digspa'},
|
100
|
+
{dest_path: 'digspb_ssi', origin_path: 'digspb'},
|
101
|
+
{dest_path: 'digspc_ssi', origin_path: 'digspc'},
|
102
|
+
{dest_path: 'digspd_ssi', origin_path: 'digspd'},
|
103
|
+
{dest_path: 'digspe_ssi', origin_path: 'digspe'},
|
104
|
+
{dest_path: 'digspf_ssi', origin_path: 'digspf'},
|
105
|
+
{dest_path: 'digspg_ssi', origin_path: 'digspg'},
|
106
|
+
{dest_path: 'digsph_ssi', origin_path: 'digsph'},
|
107
|
+
{dest_path: 'digspi_ssi', origin_path: 'digspi'},
|
108
|
+
{dest_path: 'digspj_ssi', origin_path: 'digspj'},
|
109
|
+
{dest_path: 'digspk_ssi', origin_path: 'digspk'},
|
110
|
+
{dest_path: 'transcription_tesi', origin_path: 'transc', formatters: [StripFormatter]},
|
111
|
+
{dest_path: 'translation_tesi', origin_path: 'transl', formatters: [StripFormatter]},
|
112
|
+
{dest_path: 'fullrs_tesi', origin_path: 'fullrs', formatters: [StripFormatter]},
|
113
|
+
{dest_path: 'find_ssi', origin_path: 'find', formatters: [StripFormatter]},
|
114
|
+
{dest_path: 'dmaccess_ssi', origin_path: 'dmaccess', formatters: [StripFormatter]},
|
115
|
+
{dest_path: 'dmimage_ssi', origin_path: 'dmimage', formatters: [StripFormatter]},
|
116
|
+
{dest_path: 'dmcreated_ssi', origin_path: 'dmcreated', formatters: [StripFormatter]},
|
117
|
+
{dest_path: 'dmmodified_ssi', origin_path: 'dmmodified', formatters: [StripFormatter]},
|
118
|
+
{dest_path: 'dmoclcno_ssi', origin_path: 'dmoclcno', formatters: [StripFormatter]},
|
119
|
+
{dest_path: 'restriction_code_ssi', origin_path: 'restrictionCode', formatters: [StripFormatter]},
|
120
|
+
{dest_path: 'cdmfilesize_ssi', origin_path: 'cdmfilesize', formatters: [StripFormatter]},
|
121
|
+
{dest_path: 'cdmfilesizeformatted_ssi', origin_path: 'cdmfilesizeformatted', formatters: [StripFormatter]},
|
122
|
+
{dest_path: 'cdmprintpdf_is', origin_path: 'cdmprintpdf', formatters: [ToIFormatter]},
|
123
|
+
{dest_path: 'cdmhasocr_is', origin_path: 'cdmhasocr', formatters: [ToIFormatter]},
|
124
|
+
{dest_path: 'cdmisnewspaper_is', origin_path: 'cdmisnewspaper', formatters: [ToIFormatter]},
|
125
|
+
{dest_path: 'image_uri_ssi', origin_path: 'image_uri', formatters: [StripFormatter]},
|
126
|
+
{dest_path: 'record_type_ssi', origin_path: 'record_type', formatters: [StripFormatter]},
|
127
|
+
{dest_path: 'geographic_feature_ssim', origin_path: 'geogra', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
128
|
+
{dest_path: 'geographic_feature_teim', origin_path: 'geogra', formatters: [StripFormatter]},
|
129
|
+
{dest_path: 'geographic_feature_unstem_search', origin_path: 'geogra', formatters: [StripFormatter]},
|
130
|
+
{dest_path: 'compound_objects_ts', origin_path: 'page', formatters: [ToJsonFormatter]},
|
131
|
+
{dest_path: 'geonam_ssi', origin_path: 'geonam', formatters: [StripFormatter]},
|
132
|
+
{dest_path: 'kaltura_audio_ssi', origin_path: 'audio', formatters: [StripFormatter]},
|
133
|
+
{dest_path: 'kaltura_audio_playlist_ssi', origin_path: 'audioa', formatters: [StripFormatter]},
|
134
|
+
{dest_path: 'kaltura_video_ssi', origin_path: 'video', formatters: [StripFormatter]},
|
135
|
+
{dest_path: 'kaltura_video_playlist_ssi', origin_path: 'videoa', formatters: [StripFormatter]},
|
136
|
+
{dest_path: 'coordinates_llsi', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToLocation]},
|
137
|
+
{dest_path: 'placename_ssim', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToPlaceName]},
|
138
|
+
{dest_path: 'placename_unstem_search', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToPlaceName]},
|
139
|
+
{dest_path: 'table_ssim', origin_path: 'table', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
140
|
+
{dest_path: 'umedia_ssi', origin_path: 'umedia', formatters: [StripFormatter]}
|
141
|
+
]
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
@@ -2,20 +2,19 @@ require 'hash_at_path'
|
|
2
2
|
|
3
3
|
module CDMBL
|
4
4
|
class FieldTransformer
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
extend Forwardable
|
6
|
+
def_delegators :@field_mapping, :origin_path, :dest_path, :formatters
|
7
|
+
attr_reader :field_value, :field_mapping, :formatter_klass
|
8
|
+
def initialize(field_mapping: FieldMapping.new,
|
8
9
|
record: {},
|
9
|
-
formatters: [],
|
10
10
|
formatter_klass: FieldFormatter)
|
11
|
+
@field_mapping = field_mapping
|
11
12
|
@field_value = compact(record.at_path(origin_path))
|
12
|
-
@dest_path = dest_path
|
13
|
-
@formatters = (!formatters.nil?) ? formatters : [DefaultFormatter]
|
14
13
|
@formatter_klass = formatter_klass
|
15
14
|
end
|
16
15
|
|
17
16
|
def reduce
|
18
|
-
(blank?(value)) ? {} : { "#{dest_path}" => value }
|
17
|
+
(blank?(value)) ? {} : { "#{dest_path}" => value }
|
19
18
|
end
|
20
19
|
|
21
20
|
def value
|
@@ -10,20 +10,16 @@ module CDMBL
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def transform!
|
13
|
-
field_mappings.inject({}) do |dest_record,
|
14
|
-
dest_record.merge(transform_field(record,
|
13
|
+
field_mappings.inject({}) do |dest_record, field_mapping|
|
14
|
+
dest_record.merge(transform_field(record, field_mapping))
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
18
|
private
|
19
19
|
|
20
|
-
def transform_field(record,
|
21
|
-
field_transformer.new(
|
22
|
-
dest_path: mapping[:dest_path],
|
23
|
-
formatters: mapping[:formatters],
|
20
|
+
def transform_field(record, field_mapping)
|
21
|
+
field_transformer.new(field_mapping: field_mapping,
|
24
22
|
record: record).reduce
|
25
23
|
end
|
26
|
-
|
27
|
-
|
28
24
|
end
|
29
|
-
end
|
25
|
+
end
|
data/lib/cdmbl/transformer.rb
CHANGED
@@ -2,7 +2,6 @@ require 'json'
|
|
2
2
|
require 'titleize'
|
3
3
|
|
4
4
|
module CDMBL
|
5
|
-
|
6
5
|
class Transformer
|
7
6
|
attr_reader :cdm_records,
|
8
7
|
:oai_sets,
|
@@ -18,7 +17,7 @@ module CDMBL
|
|
18
17
|
record.merge('record_type' => 'primary')
|
19
18
|
end
|
20
19
|
@oai_sets = oai_sets
|
21
|
-
@field_mappings =
|
20
|
+
@field_mappings = field_mappings
|
22
21
|
@extract_compounds = extract_compounds
|
23
22
|
@record_transformer = record_transformer
|
24
23
|
end
|
@@ -27,7 +26,6 @@ module CDMBL
|
|
27
26
|
raw_records.map { |record| to_solr(record) }.compact
|
28
27
|
end
|
29
28
|
|
30
|
-
|
31
29
|
private
|
32
30
|
|
33
31
|
def raw_records
|
@@ -48,8 +46,8 @@ module CDMBL
|
|
48
46
|
end.flatten
|
49
47
|
end
|
50
48
|
|
51
|
-
def
|
52
|
-
|
49
|
+
def mappings
|
50
|
+
field_mappings.map { |config| FieldMapping.new(config: config) }
|
53
51
|
end
|
54
52
|
|
55
53
|
def to_solr(record)
|
@@ -59,124 +57,8 @@ module CDMBL
|
|
59
57
|
return nil
|
60
58
|
else
|
61
59
|
record_transformer.new(record: record.merge('oai_sets' => oai_sets),
|
62
|
-
field_mappings:
|
60
|
+
field_mappings: mappings).transform!
|
63
61
|
end
|
64
62
|
end
|
65
|
-
|
66
|
-
def self.default_mappings
|
67
|
-
[
|
68
|
-
{dest_path: 'location_llsi', origin_path: '/', formatters: [LocationFormatter]},
|
69
|
-
{dest_path: 'id', origin_path: 'id', formatters: [StripFormatter, IDFormatter]},
|
70
|
-
{dest_path: 'setspec_ssi', origin_path: '/', formatters: [AddSetSpecFormatter, SetSpecFormatter]},
|
71
|
-
{dest_path: 'collection_name_ssi', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionNameFormatter]},
|
72
|
-
{dest_path: 'collection_name_tei', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionNameFormatter]},
|
73
|
-
{dest_path: 'collection_description_tesi', origin_path: '/', formatters: [AddSetSpecFormatter, CollectionDescriptionFormatter, FilterBadCollections]},
|
74
|
-
{dest_path: 'parent_collection_name_ssi', origin_path: 'par', formatters: [StripFormatter]},
|
75
|
-
{dest_path: 'parent_collection_name_tei', origin_path: 'par', formatters: [StripFormatter]},
|
76
|
-
{dest_path: 'parent_collection_description_tei', origin_path: 'par', formatters: [StripFormatter]},
|
77
|
-
{dest_path: 'title_tesi', origin_path: 'title', formatters: [StripFormatter]},
|
78
|
-
{dest_path: 'title_ssi', origin_path: 'title', formatters: [StripFormatter]},
|
79
|
-
{dest_path: 'title_sort', origin_path: 'title', formatters: [StripFormatter]},
|
80
|
-
{dest_path: 'title_unstem_search', origin_path: 'title', formatters: [StripFormatter]},
|
81
|
-
{dest_path: 'contributor_teim', origin_path: 'contri', formatters: [StripFormatter]},
|
82
|
-
{dest_path: 'contributor_unstem_search', origin_path: 'contri', formatters: [StripFormatter]},
|
83
|
-
{dest_path: 'contributor_ssim', origin_path: 'contri', formatters: [SplitFormatter, StripFormatter]},
|
84
|
-
{dest_path: 'creator_tesi', origin_path: 'photog', formatters: [JoinFormatter, StripFormatter]},
|
85
|
-
{dest_path: 'creator_unstem_search', origin_path: 'photog', formatters: [StripFormatter]},
|
86
|
-
{dest_path: 'creator_ssim', origin_path: 'photog', formatters: [SplitFormatter, StripFormatter]},
|
87
|
-
{dest_path: 'creator_sort', origin_path: 'photog', formatters: [StripFormatter]},
|
88
|
-
{dest_path: 'description_tei', origin_path: 'descri', formatters: [StripFormatter]},
|
89
|
-
{dest_path: 'description_ts', origin_path: 'descri', formatters: [StripFormatter]},
|
90
|
-
{dest_path: 'dat_ssi', origin_path: 'dat', formatters: [StripFormatter]},
|
91
|
-
{dest_path: 'dat_tesi', origin_path: 'dat', formatters: [StripFormatter]},
|
92
|
-
{dest_path: 'dat_sort', origin_path: 'dat', formatters: [StripFormatter]},
|
93
|
-
{dest_path: 'publishing_agency_tei', origin_path: 'publia', formatters: [StripFormatter]},
|
94
|
-
{dest_path: 'publishing_agency_unstem_search', origin_path: 'publia', formatters: [StripFormatter]},
|
95
|
-
{dest_path: 'publishing_agency_ssi', origin_path: 'publia', formatters: [StripFormatter]},
|
96
|
-
{dest_path: 'dimensions_ssi', origin_path: 'dimens', formatters: [StripFormatter]},
|
97
|
-
{dest_path: 'topic_teim', origin_path: 'genera', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
98
|
-
{dest_path: 'topic_ssim', origin_path: 'genera', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
99
|
-
{dest_path: 'topic_unstem_search', origin_path: 'genera', formatters: [StripSemicolonFormatter, StripFormatter]},
|
100
|
-
{dest_path: 'type_ssi', origin_path: 'type', formatters: [Titlieze, StripSemicolonFormatter, StripFormatter]},
|
101
|
-
{dest_path: 'type_tesi', origin_path: 'type', formatters: [Titlieze, StripSemicolonFormatter, StripFormatter]},
|
102
|
-
{dest_path: 'physical_format_ssi', origin_path: 'physic', formatters: [StripSemicolonFormatter]},
|
103
|
-
{dest_path: 'physical_format_tesi', origin_path: 'physic', formatters: [StripSemicolonFormatter]},
|
104
|
-
{dest_path: 'formal_subject_unstem_search', origin_path: 'specif', formatters: [StripFormatter]},
|
105
|
-
{dest_path: 'formal_subject_ssim', origin_path: 'specif', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
106
|
-
{dest_path: 'formal_subject_teim', origin_path: 'specif', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
107
|
-
{dest_path: 'subject_unstem_search', origin_path: 'subjec', formatters: [StripFormatter]},
|
108
|
-
{dest_path: 'subject_teim', origin_path: 'subjec', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
109
|
-
{dest_path: 'subject_ssim', origin_path: 'subjec', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
110
|
-
{dest_path: 'keyword_unstem_search', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, StripFormatter]},
|
111
|
-
{dest_path: 'keyword_tesi', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, JoinFormatter, StripFormatter]},
|
112
|
-
{dest_path: 'keyword_ssim', origin_path: '/', formatters: [KeywordFormatter, Titlieze, UniqueFormatter, StripFormatter]},
|
113
|
-
{dest_path: 'city_ssim', origin_path: 'city', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
114
|
-
{dest_path: 'city_unstem_search', origin_path: 'city', formatters: [StripFormatter]},
|
115
|
-
{dest_path: 'district_ssi', origin_path: 'distri', formatters: [StripFormatter]},
|
116
|
-
{dest_path: 'district_unstem_search', origin_path: 'distri', formatters: [StripFormatter]},
|
117
|
-
{dest_path: 'county_ssim', origin_path: 'county', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
118
|
-
{dest_path: 'county_unstem_search', origin_path: 'county', formatters: [StripFormatter]},
|
119
|
-
{dest_path: 'state_ssi', origin_path: 'state', formatters: [StripFormatter]},
|
120
|
-
{dest_path: 'state_unstem_search', origin_path: 'state', formatters: [StripFormatter]},
|
121
|
-
{dest_path: 'country_ssi', origin_path: 'countr', formatters: [StripFormatter]},
|
122
|
-
{dest_path: 'country_unstem_search', origin_path: 'countr', formatters: [StripFormatter]},
|
123
|
-
{dest_path: 'language_ssi', origin_path: 'langua', formatters: [StripFormatter]},
|
124
|
-
{dest_path: 'language_unstem_search', origin_path: 'langua', formatters: [StripFormatter]},
|
125
|
-
{dest_path: 'contributing_unstem_search', origin_path: 'contra', formatters: [StripFormatter]},
|
126
|
-
{dest_path: 'contributing_organization_tesi', origin_path: 'contra', formatters: [StripFormatter]},
|
127
|
-
{dest_path: 'contributing_organization_ssi', origin_path: 'contra', formatters: [Titlieze, StripFormatter]},
|
128
|
-
{dest_path: 'contact_information_ssi', origin_path: 'contac', formatters: [StripFormatter]},
|
129
|
-
{dest_path: 'rights_ssi', origin_path: 'righta', formatters: [StripFormatter]},
|
130
|
-
{dest_path: 'local_identifier_ssi', origin_path: 'identi', formatters: [StripFormatter]},
|
131
|
-
{dest_path: 'identifier_ssi', origin_path: 'resour', formatters: [StripFormatter]},
|
132
|
-
{dest_path: 'project_ssi', origin_path: 'projec', formatters: [StripFormatter]},
|
133
|
-
{dest_path: 'fiscal_sponsor_ssi', origin_path: 'fiscal', formatters: [StripFormatter]},
|
134
|
-
{dest_path: 'publisher_ssi', origin_path: 'publis', formatters: [StripFormatter]},
|
135
|
-
{dest_path: 'date_ssi', origin_path: 'date', formatters: [StripFormatter]},
|
136
|
-
{dest_path: 'format_tesi', origin_path: 'format', formatters: [StripFormatter]},
|
137
|
-
{dest_path: 'digspa_ssi', origin_path: 'digspa'},
|
138
|
-
{dest_path: 'digspb_ssi', origin_path: 'digspb'},
|
139
|
-
{dest_path: 'digspc_ssi', origin_path: 'digspc'},
|
140
|
-
{dest_path: 'digspd_ssi', origin_path: 'digspd'},
|
141
|
-
{dest_path: 'digspe_ssi', origin_path: 'digspe'},
|
142
|
-
{dest_path: 'digspf_ssi', origin_path: 'digspf'},
|
143
|
-
{dest_path: 'digspg_ssi', origin_path: 'digspg'},
|
144
|
-
{dest_path: 'digsph_ssi', origin_path: 'digsph'},
|
145
|
-
{dest_path: 'digspi_ssi', origin_path: 'digspi'},
|
146
|
-
{dest_path: 'digspj_ssi', origin_path: 'digspj'},
|
147
|
-
{dest_path: 'digspk_ssi', origin_path: 'digspk'},
|
148
|
-
{dest_path: 'transcription_tesi', origin_path: 'transc', formatters: [StripFormatter]},
|
149
|
-
{dest_path: 'translation_tesi', origin_path: 'transl', formatters: [StripFormatter]},
|
150
|
-
{dest_path: 'fullrs_tesi', origin_path: 'fullrs', formatters: [StripFormatter]},
|
151
|
-
{dest_path: 'find_ssi', origin_path: 'find', formatters: [StripFormatter]},
|
152
|
-
{dest_path: 'dmaccess_ssi', origin_path: 'dmaccess', formatters: [StripFormatter]},
|
153
|
-
{dest_path: 'dmimage_ssi', origin_path: 'dmimage', formatters: [StripFormatter]},
|
154
|
-
{dest_path: 'dmcreated_ssi', origin_path: 'dmcreated', formatters: [StripFormatter]},
|
155
|
-
{dest_path: 'dmmodified_ssi', origin_path: 'dmmodified', formatters: [StripFormatter]},
|
156
|
-
{dest_path: 'dmoclcno_ssi', origin_path: 'dmoclcno', formatters: [StripFormatter]},
|
157
|
-
{dest_path: 'restriction_code_ssi', origin_path: 'restrictionCode', formatters: [StripFormatter]},
|
158
|
-
{dest_path: 'cdmfilesize_ssi', origin_path: 'cdmfilesize', formatters: [StripFormatter]},
|
159
|
-
{dest_path: 'cdmfilesizeformatted_ssi', origin_path: 'cdmfilesizeformatted', formatters: [StripFormatter]},
|
160
|
-
{dest_path: 'cdmprintpdf_is', origin_path: 'cdmprintpdf', formatters: [ToIFormatter]},
|
161
|
-
{dest_path: 'cdmhasocr_is', origin_path: 'cdmhasocr', formatters: [ToIFormatter]},
|
162
|
-
{dest_path: 'cdmisnewspaper_is', origin_path: 'cdmisnewspaper', formatters: [ToIFormatter]},
|
163
|
-
{dest_path: 'image_uri_ssi', origin_path: 'image_uri', formatters: [StripFormatter]},
|
164
|
-
{dest_path: 'record_type_ssi', origin_path: 'record_type', formatters: [StripFormatter]},
|
165
|
-
{dest_path: 'geographic_feature_ssim', origin_path: 'geogra', formatters: [Titlieze, StripFormatter, SplitFormatter, StripFormatter]},
|
166
|
-
{dest_path: 'geographic_feature_teim', origin_path: 'geogra', formatters: [StripFormatter]},
|
167
|
-
{dest_path: 'geographic_feature_unstem_search', origin_path: 'geogra', formatters: [StripFormatter]},
|
168
|
-
{dest_path: 'compound_objects_ts', origin_path: 'page', formatters: [ToJsonFormatter]},
|
169
|
-
{dest_path: 'geonam_ssi', origin_path: 'geonam', formatters: [StripFormatter]},
|
170
|
-
{dest_path: 'kaltura_audio_ssi', origin_path: 'audio', formatters: [StripFormatter]},
|
171
|
-
{dest_path: 'kaltura_audio_playlist_ssi', origin_path: 'audioa', formatters: [StripFormatter]},
|
172
|
-
{dest_path: 'kaltura_video_ssi', origin_path: 'video', formatters: [StripFormatter]},
|
173
|
-
{dest_path: 'kaltura_video_playlist_ssi', origin_path: 'videoa', formatters: [StripFormatter]},
|
174
|
-
{dest_path: 'coordinates_llsi', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToLocation]},
|
175
|
-
{dest_path: 'placename_ssim', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToPlaceName]},
|
176
|
-
{dest_path: 'placename_unstem_search', origin_path: 'geonam', formatters: [GeoNameID, GeoNameIDToJson, GeoNameToPlaceName]},
|
177
|
-
{dest_path: 'table_ssim', origin_path: 'table', formatters: [StripFormatter, SplitFormatter, StripFormatter]},
|
178
|
-
{dest_path: 'umedia_ssi', origin_path: 'umedia', formatters: [StripFormatter]}
|
179
|
-
]
|
180
|
-
end
|
181
63
|
end
|
182
64
|
end
|
data/lib/cdmbl/version.rb
CHANGED
data/lib/cdmbl.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cdmbl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.12.
|
4
|
+
version: 0.12.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- chadfennell
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-05-
|
11
|
+
date: 2018-05-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: hash_at_path
|
@@ -224,6 +224,7 @@ files:
|
|
224
224
|
- lib/cdmbl/extract_worker.rb
|
225
225
|
- lib/cdmbl/extractor.rb
|
226
226
|
- lib/cdmbl/field_formatter.rb
|
227
|
+
- lib/cdmbl/field_mapping.rb
|
227
228
|
- lib/cdmbl/field_transformer.rb
|
228
229
|
- lib/cdmbl/filtered_set_specs.rb
|
229
230
|
- lib/cdmbl/formatters.rb
|