vaultify 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +203 -0
- data/README.md +29 -0
- data/Rakefile +33 -0
- data/app/assets/config/vaultify_manifest.js +2 -0
- data/app/assets/javascripts/vaultify/aat.js +38 -0
- data/app/assets/javascripts/vaultify/application.js +15 -0
- data/app/assets/javascripts/vaultify/autocomplete.js +79 -0
- data/app/assets/javascripts/vaultify/dcmi.js +16 -0
- data/app/assets/javascripts/vaultify/edtf.js +16 -0
- data/app/assets/javascripts/vaultify/fast.js +26 -0
- data/app/assets/javascripts/vaultify/horsey.js +2917 -0
- data/app/assets/javascripts/vaultify/iso.js +200 -0
- data/app/assets/javascripts/vaultify/vaultify.js.erb +152 -0
- data/app/assets/stylesheets/vaultify/application.css +16 -0
- data/app/assets/stylesheets/vaultify/horsey.css +61 -0
- data/app/assets/stylesheets/vaultify/vaultify.css +81 -0
- data/app/controllers/vaultify/application_controller.rb +5 -0
- data/app/controllers/vaultify/vaultify_controller.rb +370 -0
- data/app/helpers/vaultify/application_helper.rb +4 -0
- data/app/helpers/vaultify/edtf_helper.rb +126 -0
- data/app/jobs/vaultify/application_job.rb +4 -0
- data/app/mailers/vaultify/application_mailer.rb +6 -0
- data/app/models/vaultify/application_record.rb +5 -0
- data/app/views/layouts/vaultify/application.html.erb +16 -0
- data/app/views/vaultify/vaultify/_modal.html.erb +37 -0
- data/app/views/vaultify/vaultify/translate.html.erb +71 -0
- data/app/views/vaultify/vaultify/upload.html.erb +18 -0
- data/config/initializers/session_store.rb +1 -0
- data/config/routes.rb +8 -0
- data/db/migrate/20181127173405_add_sessions_table.rb +12 -0
- data/lib/generators/vaultify/install/install_generator.rb +13 -0
- data/lib/generators/vaultify/install/templates/config/vaultify.yml +12 -0
- data/lib/tasks/vaultify_tasks.rake +4 -0
- data/lib/vaultify.rb +5 -0
- data/lib/vaultify/engine.rb +20 -0
- data/lib/vaultify/version.rb +3 -0
- metadata +121 -0
|
@@ -0,0 +1,370 @@
|
|
|
1
|
+
module Vaultify
|
|
2
|
+
class VaultifyController < ApplicationController
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
require 'csv'
|
|
6
|
+
require 'net/http'
|
|
7
|
+
require 'base64'
|
|
8
|
+
|
|
9
|
+
include EdtfHelper
|
|
10
|
+
|
|
11
|
+
def initialize
|
|
12
|
+
super
|
|
13
|
+
@fields = Vaultify::Engine.config['fields']
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def upload
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def translate
|
|
21
|
+
@mvs = Vaultify::Engine.config['mvs']
|
|
22
|
+
csv = params['csv'].path
|
|
23
|
+
@csv = ::CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
|
|
24
|
+
session[:csv] = ::CSV.parse(File.read(csv), headers: true, encoding: 'utf-8').map(&:to_hash)
|
|
25
|
+
@total = @csv.length
|
|
26
|
+
@csv.each_with_index do |row, index|
|
|
27
|
+
next if row == nil
|
|
28
|
+
@fields.each do |field_row|
|
|
29
|
+
field = field_row.first
|
|
30
|
+
api = field_row.last
|
|
31
|
+
next if (row[field].to_s.empty? || api.to_s.empty?)
|
|
32
|
+
adjusted_array = []
|
|
33
|
+
row[field].split(@mvs).each do |line|
|
|
34
|
+
adjusted_array << send("#{api}First", line) rescue nil
|
|
35
|
+
end
|
|
36
|
+
@csv[index]["#{field}-adjusted"] = adjusted_array.join(@mvs)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def export
|
|
42
|
+
@csv = session[:csv].deep_dup
|
|
43
|
+
@puramses = params
|
|
44
|
+
@fields.each do |field_row|
|
|
45
|
+
next unless params.key? field_row.first.to_sym
|
|
46
|
+
|
|
47
|
+
params.permit![field_row.first.to_sym].each do |csv_row, values|
|
|
48
|
+
adj = []
|
|
49
|
+
values.each do |_, value|
|
|
50
|
+
adj << value
|
|
51
|
+
end
|
|
52
|
+
@csv[csv_row.to_i][field_row.first] = adj.join('|')
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
csv_output = []
|
|
56
|
+
csv_output << @csv.first.keys
|
|
57
|
+
@csv.each do |row|
|
|
58
|
+
csv_output << row.values
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
respond_to do |format|
|
|
62
|
+
format.csv {render plain: csv_output.inject([]) {|csv, row| csv << CSV.generate_line(row)}.join, content_type: 'text/csv'}
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
#csv.each_with_index |row, index|
|
|
67
|
+
#@fields.each do |field_row|
|
|
68
|
+
# params[field_row.first.to_sym][index]
|
|
69
|
+
# csv
|
|
70
|
+
# end
|
|
71
|
+
#end
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def fast
|
|
75
|
+
res = fastApi params['query']
|
|
76
|
+
|
|
77
|
+
respond_to do |format|
|
|
78
|
+
format.json {render plain: res}
|
|
79
|
+
format.text {render plain: res}
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def aat
|
|
84
|
+
res = aatApi params['cleanQuery']
|
|
85
|
+
respond_to do |format|
|
|
86
|
+
format.xml {render plain: res}
|
|
87
|
+
format.text {render plain: res}
|
|
88
|
+
format.json {render plain: res.to_json}
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def edtf
|
|
93
|
+
date = convert_date params[:query]
|
|
94
|
+
respond_to do |format|
|
|
95
|
+
format.text {render plain: date}
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
private
|
|
100
|
+
|
|
101
|
+
def fastFirst query, queryIndex = 'suggestall', queryReturn = '%2Cidroot%2Cauth'
|
|
102
|
+
res = JSON.parse(fastApi query, queryIndex, queryReturn)
|
|
103
|
+
res['response']['docs'][0]['auth'] + 'url:' + res['response']['docs'][0]['idroot']
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def aatFirst query
|
|
107
|
+
res = aatApi query.gsub(' AAT', '')
|
|
108
|
+
#res = Hash.from_xml(res.gsub("\r\n", '').downcase).to_json.force_encoding('UTF-8')['response']['docs'][0]['auth']
|
|
109
|
+
res[0]['preferred_term'] + 'url:' + res[0]['subject_id']
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def isoFirst query
|
|
113
|
+
# make list with matches, return the one with most letters matched
|
|
114
|
+
# query.split('').each do |char| { if row.alpha3.includes? char { count++ } }
|
|
115
|
+
#top = 0
|
|
116
|
+
#top_index = 0
|
|
117
|
+
#query_arr = query.split('')
|
|
118
|
+
#isoVar.each_with_index do |row, index|
|
|
119
|
+
isoVar.each do |row|
|
|
120
|
+
return row[:English] + 'url:' + [:alpha2] if query == row[:"alpha3-b"] || query == row[:alpha2] || query == row[:English]
|
|
121
|
+
=begin
|
|
122
|
+
count = 0
|
|
123
|
+
query_arr.each do |ch|
|
|
124
|
+
count += 1 if row[:"alpha3-b"].include? ch
|
|
125
|
+
end
|
|
126
|
+
if count > top
|
|
127
|
+
top = count
|
|
128
|
+
top_index = index
|
|
129
|
+
end
|
|
130
|
+
=end
|
|
131
|
+
end
|
|
132
|
+
#isoVar[top_index][:alpha2]
|
|
133
|
+
nil
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def dcmiFirst query
|
|
137
|
+
dcmiVar.each do |row|
|
|
138
|
+
return row[:text] + 'url:' + row[:url] if query == row[:text]
|
|
139
|
+
end
|
|
140
|
+
nil
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def edtfFirst query
|
|
144
|
+
date = convert_date query
|
|
145
|
+
return date + 'url:' + date if date
|
|
146
|
+
nil
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def fastApi query, queryIndex = 'suggestall', queryReturn = '%2Cidroot%2Cauth'
|
|
150
|
+
query = query.gsub(/[^a-zA-Z\d\s]/, '')
|
|
151
|
+
url = 'http://fast.oclc.org/searchfast/fastsuggest?' + "&query=#{query}&queryIndex=#{queryIndex}&queryReturn=#{queryIndex + queryReturn}&suggest=autoSubject&rows=20"
|
|
152
|
+
Net::HTTP.get_response(URI.parse(url)).body
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def aatApi query
|
|
156
|
+
url = URI('http://vocabsservices.getty.edu/AATService.asmx/AATGetTermMatch?term=' + query + '&logop=¬es=')
|
|
157
|
+
req = Net::HTTP::Get.new(url)
|
|
158
|
+
res = Net::HTTP.start(url.hostname, url.port) do |http|
|
|
159
|
+
http.request(req)
|
|
160
|
+
end
|
|
161
|
+
Hash.from_xml(res.body.gsub("\r\n", '').downcase)['vocabulary']['subject']
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def dcmiVar
|
|
165
|
+
[
|
|
166
|
+
{"text": 'Collection', "url": 'Collection'},
|
|
167
|
+
{"text": 'Dataset', "url": 'Dataset'},
|
|
168
|
+
{"text": 'Event', "url": 'Event'},
|
|
169
|
+
{"text": 'Image', "url": 'Image'},
|
|
170
|
+
{"text": 'InteractiveResource', "url": 'InteractiveResource'},
|
|
171
|
+
{"text": 'MovingImage', "url": 'MovingImage'},
|
|
172
|
+
{"text": 'PhysicalObject', "url": 'PhysicalObject'},
|
|
173
|
+
{"text": 'Service', "url": 'Service'},
|
|
174
|
+
{"text": 'Software', "url": 'Software'},
|
|
175
|
+
{"text": 'Sound', "url": 'Sound'},
|
|
176
|
+
{"text": 'StillImage', "url": 'StillImage'},
|
|
177
|
+
{"text": 'Text', "url": 'Text'}
|
|
178
|
+
]
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
def isoVar
|
|
182
|
+
[
|
|
183
|
+
{"alpha3-b": "aar", "alpha2": "aa", "English": "Afar"},
|
|
184
|
+
{"alpha3-b": "abk", "alpha2": "ab", "English": "Abkhazian"},
|
|
185
|
+
{"alpha3-b": "afr", "alpha2": "af", "English": "Afrikaans"},
|
|
186
|
+
{"alpha3-b": "aka", "alpha2": "ak", "English": "Akan"},
|
|
187
|
+
{"alpha3-b": "alb", "alpha2": "sq", "English": "Albanian"},
|
|
188
|
+
{"alpha3-b": "amh", "alpha2": "am", "English": "Amharic"},
|
|
189
|
+
{"alpha3-b": "ara", "alpha2": "ar", "English": "Arabic"},
|
|
190
|
+
{"alpha3-b": "arg", "alpha2": "an", "English": "Aragonese"},
|
|
191
|
+
{"alpha3-b": "arm", "alpha2": "hy", "English": "Armenian"},
|
|
192
|
+
{"alpha3-b": "asm", "alpha2": "as", "English": "Assamese"},
|
|
193
|
+
{"alpha3-b": "ava", "alpha2": "av", "English": "Avaric"},
|
|
194
|
+
{"alpha3-b": "ave", "alpha2": "ae", "English": "Avestan"},
|
|
195
|
+
{"alpha3-b": "aym", "alpha2": "ay", "English": "Aymara"},
|
|
196
|
+
{"alpha3-b": "aze", "alpha2": "az", "English": "Azerbaijani"},
|
|
197
|
+
{"alpha3-b": "bak", "alpha2": "ba", "English": "Bashkir"},
|
|
198
|
+
{"alpha3-b": "bam", "alpha2": "bm", "English": "Bambara"},
|
|
199
|
+
{"alpha3-b": "baq", "alpha2": "eu", "English": "Basque"},
|
|
200
|
+
{"alpha3-b": "bel", "alpha2": "be", "English": "Belarusian"},
|
|
201
|
+
{"alpha3-b": "ben", "alpha2": "bn", "English": "Bengali"},
|
|
202
|
+
{"alpha3-b": "bih", "alpha2": "bh", "English": "Bihari languages"},
|
|
203
|
+
{"alpha3-b": "bis", "alpha2": "bi", "English": "Bislama"},
|
|
204
|
+
{"alpha3-b": "bos", "alpha2": "bs", "English": "Bosnian"},
|
|
205
|
+
{"alpha3-b": "bre", "alpha2": "br", "English": "Breton"},
|
|
206
|
+
{"alpha3-b": "bul", "alpha2": "bg", "English": "Bulgarian"},
|
|
207
|
+
{"alpha3-b": "bur", "alpha2": "my", "English": "Burmese"},
|
|
208
|
+
{"alpha3-b": "cat", "alpha2": "ca", "English": "Catalan; Valencian"},
|
|
209
|
+
{"alpha3-b": "cha", "alpha2": "ch", "English": "Chamorro"},
|
|
210
|
+
{"alpha3-b": "che", "alpha2": "ce", "English": "Chechen"},
|
|
211
|
+
{"alpha3-b": "chi", "alpha2": "zh", "English": "Chinese"},
|
|
212
|
+
{"alpha3-b": "chu", "alpha2": "cu", "English": "Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic"},
|
|
213
|
+
{"alpha3-b": "chv", "alpha2": "cv", "English": "Chuvash"},
|
|
214
|
+
{"alpha3-b": "cor", "alpha2": "kw", "English": "Cornish"},
|
|
215
|
+
{"alpha3-b": "cos", "alpha2": "co", "English": "Corsican"},
|
|
216
|
+
{"alpha3-b": "cre", "alpha2": "cr", "English": "Cree"},
|
|
217
|
+
{"alpha3-b": "cze", "alpha2": "cs", "English": "Czech"},
|
|
218
|
+
{"alpha3-b": "dan", "alpha2": "da", "English": "Danish"},
|
|
219
|
+
{"alpha3-b": "div", "alpha2": "dv", "English": "Divehi; Dhivehi; Maldivian"},
|
|
220
|
+
{"alpha3-b": "dut", "alpha2": "nl", "English": "Dutch; Flemish"},
|
|
221
|
+
{"alpha3-b": "dzo", "alpha2": "dz", "English": "Dzongkha"},
|
|
222
|
+
{"alpha3-b": "eng", "alpha2": "en", "English": "English"},
|
|
223
|
+
{"alpha3-b": "epo", "alpha2": "eo", "English": "Esperanto"},
|
|
224
|
+
{"alpha3-b": "est", "alpha2": "et", "English": "Estonian"},
|
|
225
|
+
{"alpha3-b": "ewe", "alpha2": "ee", "English": "Ewe"},
|
|
226
|
+
{"alpha3-b": "fao", "alpha2": "fo", "English": "Faroese"},
|
|
227
|
+
{"alpha3-b": "fij", "alpha2": "fj", "English": "Fijian"},
|
|
228
|
+
{"alpha3-b": "fin", "alpha2": "fi", "English": "Finnish"},
|
|
229
|
+
{"alpha3-b": "fre", "alpha2": "fr", "English": "French"},
|
|
230
|
+
{"alpha3-b": "fry", "alpha2": "fy", "English": "Western Frisian"},
|
|
231
|
+
{"alpha3-b": "ful", "alpha2": "ff", "English": "Fulah"},
|
|
232
|
+
{"alpha3-b": "geo", "alpha2": "ka", "English": "Georgian"},
|
|
233
|
+
{"alpha3-b": "ger", "alpha2": "de", "English": "German"},
|
|
234
|
+
{"alpha3-b": "gla", "alpha2": "gd", "English": "Gaelic; Scottish Gaelic"},
|
|
235
|
+
{"alpha3-b": "gle", "alpha2": "ga", "English": "Irish"},
|
|
236
|
+
{"alpha3-b": "glg", "alpha2": "gl", "English": "Galician"},
|
|
237
|
+
{"alpha3-b": "glv", "alpha2": "gv", "English": "Manx"},
|
|
238
|
+
{"alpha3-b": "gre", "alpha2": "el", "English": "Greek, Modern (1453-)"},
|
|
239
|
+
{"alpha3-b": "grn", "alpha2": "gn", "English": "Guarani"},
|
|
240
|
+
{"alpha3-b": "guj", "alpha2": "gu", "English": "Gujarati"},
|
|
241
|
+
{"alpha3-b": "hat", "alpha2": "ht", "English": "Haitian; Haitian Creole"},
|
|
242
|
+
{"alpha3-b": "hau", "alpha2": "ha", "English": "Hausa"},
|
|
243
|
+
{"alpha3-b": "heb", "alpha2": "he", "English": "Hebrew"},
|
|
244
|
+
{"alpha3-b": "her", "alpha2": "hz", "English": "Herero"},
|
|
245
|
+
{"alpha3-b": "hin", "alpha2": "hi", "English": "Hindi"},
|
|
246
|
+
{"alpha3-b": "hmo", "alpha2": "ho", "English": "Hiri Motu"},
|
|
247
|
+
{"alpha3-b": "hrv", "alpha2": "hr", "English": "Croatian"},
|
|
248
|
+
{"alpha3-b": "hun", "alpha2": "hu", "English": "Hungarian"},
|
|
249
|
+
{"alpha3-b": "ibo", "alpha2": "ig", "English": "Igbo"},
|
|
250
|
+
{"alpha3-b": "ice", "alpha2": "is", "English": "Icelandic"},
|
|
251
|
+
{"alpha3-b": "ido", "alpha2": "io", "English": "Ido"},
|
|
252
|
+
{"alpha3-b": "iii", "alpha2": "ii", "English": "Sichuan Yi; Nuosu"},
|
|
253
|
+
{"alpha3-b": "iku", "alpha2": "iu", "English": "Inuktitut"},
|
|
254
|
+
{"alpha3-b": "ile", "alpha2": "ie", "English": "Interlingue; Occidental"},
|
|
255
|
+
{"alpha3-b": "ina", "alpha2": "ia", "English": "Interlingua (International Auxiliary Language Association)"},
|
|
256
|
+
{"alpha3-b": "ind", "alpha2": "id", "English": "Indonesian"},
|
|
257
|
+
{"alpha3-b": "ipk", "alpha2": "ik", "English": "Inupiaq"},
|
|
258
|
+
{"alpha3-b": "ita", "alpha2": "it", "English": "Italian"},
|
|
259
|
+
{"alpha3-b": "jav", "alpha2": "jv", "English": "Javanese"},
|
|
260
|
+
{"alpha3-b": "jpn", "alpha2": "ja", "English": "Japanese"},
|
|
261
|
+
{"alpha3-b": "kal", "alpha2": "kl", "English": "Kalaallisut; Greenlandic"},
|
|
262
|
+
{"alpha3-b": "kan", "alpha2": "kn", "English": "Kannada"},
|
|
263
|
+
{"alpha3-b": "kas", "alpha2": "ks", "English": "Kashmiri"},
|
|
264
|
+
{"alpha3-b": "kau", "alpha2": "kr", "English": "Kanuri"},
|
|
265
|
+
{"alpha3-b": "kaz", "alpha2": "kk", "English": "Kazakh"},
|
|
266
|
+
{"alpha3-b": "khm", "alpha2": "km", "English": "Central Khmer"},
|
|
267
|
+
{"alpha3-b": "kik", "alpha2": "ki", "English": "Kikuyu; Gikuyu"},
|
|
268
|
+
{"alpha3-b": "kin", "alpha2": "rw", "English": "Kinyarwanda"},
|
|
269
|
+
{"alpha3-b": "kir", "alpha2": "ky", "English": "Kirghiz; Kyrgyz"},
|
|
270
|
+
{"alpha3-b": "kom", "alpha2": "kv", "English": "Komi"},
|
|
271
|
+
{"alpha3-b": "kon", "alpha2": "kg", "English": "Kongo"},
|
|
272
|
+
{"alpha3-b": "kor", "alpha2": "ko", "English": "Korean"},
|
|
273
|
+
{"alpha3-b": "kua", "alpha2": "kj", "English": "Kuanyama; Kwanyama"},
|
|
274
|
+
{"alpha3-b": "kur", "alpha2": "ku", "English": "Kurdish"},
|
|
275
|
+
{"alpha3-b": "lao", "alpha2": "lo", "English": "Lao"},
|
|
276
|
+
{"alpha3-b": "lat", "alpha2": "la", "English": "Latin"},
|
|
277
|
+
{"alpha3-b": "lav", "alpha2": "lv", "English": "Latvian"},
|
|
278
|
+
{"alpha3-b": "lim", "alpha2": "li", "English": "Limburgan; Limburger; Limburgish"},
|
|
279
|
+
{"alpha3-b": "lin", "alpha2": "ln", "English": "Lingala"},
|
|
280
|
+
{"alpha3-b": "lit", "alpha2": "lt", "English": "Lithuanian"},
|
|
281
|
+
{"alpha3-b": "ltz", "alpha2": "lb", "English": "Luxembourgish; Letzeburgesch"},
|
|
282
|
+
{"alpha3-b": "lub", "alpha2": "lu", "English": "Luba-Katanga"},
|
|
283
|
+
{"alpha3-b": "lug", "alpha2": "lg", "English": "Ganda"},
|
|
284
|
+
{"alpha3-b": "mac", "alpha2": "mk", "English": "Macedonian"},
|
|
285
|
+
{"alpha3-b": "mah", "alpha2": "mh", "English": "Marshallese"},
|
|
286
|
+
{"alpha3-b": "mal", "alpha2": "ml", "English": "Malayalam"},
|
|
287
|
+
{"alpha3-b": "mao", "alpha2": "mi", "English": "Maori"},
|
|
288
|
+
{"alpha3-b": "mar", "alpha2": "mr", "English": "Marathi"},
|
|
289
|
+
{"alpha3-b": "may", "alpha2": "ms", "English": "Malay"},
|
|
290
|
+
{"alpha3-b": "mlg", "alpha2": "mg", "English": "Malagasy"},
|
|
291
|
+
{"alpha3-b": "mlt", "alpha2": "mt", "English": "Maltese"},
|
|
292
|
+
{"alpha3-b": "mon", "alpha2": "mn", "English": "Mongolian"},
|
|
293
|
+
{"alpha3-b": "nau", "alpha2": "na", "English": "Nauru"},
|
|
294
|
+
{"alpha3-b": "nav", "alpha2": "nv", "English": "Navajo; Navaho"},
|
|
295
|
+
{"alpha3-b": "nbl", "alpha2": "nr", "English": "Ndebele, South; South Ndebele"},
|
|
296
|
+
{"alpha3-b": "nde", "alpha2": "nd", "English": "Ndebele, North; North Ndebele"},
|
|
297
|
+
{"alpha3-b": "ndo", "alpha2": "ng", "English": "Ndonga"},
|
|
298
|
+
{"alpha3-b": "nep", "alpha2": "ne", "English": "Nepali"},
|
|
299
|
+
{"alpha3-b": "nno", "alpha2": "nn", "English": "Norwegian Nynorsk; Nynorsk, Norwegian"},
|
|
300
|
+
{"alpha3-b": "nob", "alpha2": "nb", "English": "Bokmål, Norwegian; Norwegian Bokmål"},
|
|
301
|
+
{"alpha3-b": "nor", "alpha2": "no", "English": "Norwegian"},
|
|
302
|
+
{"alpha3-b": "nya", "alpha2": "ny", "English": "Chichewa; Chewa; Nyanja"},
|
|
303
|
+
{"alpha3-b": "oci", "alpha2": "oc", "English": "Occitan (post 1500); Provençal"},
|
|
304
|
+
{"alpha3-b": "oji", "alpha2": "oj", "English": "Ojibwa"},
|
|
305
|
+
{"alpha3-b": "ori", "alpha2": "or", "English": "Oriya"},
|
|
306
|
+
{"alpha3-b": "orm", "alpha2": "om", "English": "Oromo"},
|
|
307
|
+
{"alpha3-b": "oss", "alpha2": "os", "English": "Ossetian; Ossetic"},
|
|
308
|
+
{"alpha3-b": "pan", "alpha2": "pa", "English": "Panjabi; Punjabi"},
|
|
309
|
+
{"alpha3-b": "per", "alpha2": "fa", "English": "Persian"},
|
|
310
|
+
{"alpha3-b": "pli", "alpha2": "pi", "English": "Pali"},
|
|
311
|
+
{"alpha3-b": "pol", "alpha2": "pl", "English": "Polish"},
|
|
312
|
+
{"alpha3-b": "por", "alpha2": "pt", "English": "Portuguese"},
|
|
313
|
+
{"alpha3-b": "pus", "alpha2": "ps", "English": "Pushto; Pashto"},
|
|
314
|
+
{"alpha3-b": "que", "alpha2": "qu", "English": "Quechua"},
|
|
315
|
+
{"alpha3-b": "roh", "alpha2": "rm", "English": "Romansh"},
|
|
316
|
+
{"alpha3-b": "rum", "alpha2": "ro", "English": "Romanian; Moldavian; Moldovan"},
|
|
317
|
+
{"alpha3-b": "run", "alpha2": "rn", "English": "Rundi"},
|
|
318
|
+
{"alpha3-b": "rus", "alpha2": "ru", "English": "Russian"},
|
|
319
|
+
{"alpha3-b": "sag", "alpha2": "sg", "English": "Sango"},
|
|
320
|
+
{"alpha3-b": "san", "alpha2": "sa", "English": "Sanskrit"},
|
|
321
|
+
{"alpha3-b": "sin", "alpha2": "si", "English": "Sinhala; Sinhalese"},
|
|
322
|
+
{"alpha3-b": "slo", "alpha2": "sk", "English": "Slovak"},
|
|
323
|
+
{"alpha3-b": "slv", "alpha2": "sl", "English": "Slovenian"},
|
|
324
|
+
{"alpha3-b": "sme", "alpha2": "se", "English": "Northern Sami"},
|
|
325
|
+
{"alpha3-b": "smo", "alpha2": "sm", "English": "Samoan"},
|
|
326
|
+
{"alpha3-b": "sna", "alpha2": "sn", "English": "Shona"},
|
|
327
|
+
{"alpha3-b": "snd", "alpha2": "sd", "English": "Sindhi"},
|
|
328
|
+
{"alpha3-b": "som", "alpha2": "so", "English": "Somali"},
|
|
329
|
+
{"alpha3-b": "sot", "alpha2": "st", "English": "Sotho, Southern"},
|
|
330
|
+
{"alpha3-b": "spa", "alpha2": "es", "English": "Spanish; Castilian"},
|
|
331
|
+
{"alpha3-b": "srd", "alpha2": "sc", "English": "Sardinian"},
|
|
332
|
+
{"alpha3-b": "srp", "alpha2": "sr", "English": "Serbian"},
|
|
333
|
+
{"alpha3-b": "ssw", "alpha2": "ss", "English": "Swati"},
|
|
334
|
+
{"alpha3-b": "sun", "alpha2": "su", "English": "Sundanese"},
|
|
335
|
+
{"alpha3-b": "swa", "alpha2": "sw", "English": "Swahili"},
|
|
336
|
+
{"alpha3-b": "swe", "alpha2": "sv", "English": "Swedish"},
|
|
337
|
+
{"alpha3-b": "tah", "alpha2": "ty", "English": "Tahitian"},
|
|
338
|
+
{"alpha3-b": "tam", "alpha2": "ta", "English": "Tamil"},
|
|
339
|
+
{"alpha3-b": "tat", "alpha2": "tt", "English": "Tatar"},
|
|
340
|
+
{"alpha3-b": "tel", "alpha2": "te", "English": "Telugu"},
|
|
341
|
+
{"alpha3-b": "tgk", "alpha2": "tg", "English": "Tajik"},
|
|
342
|
+
{"alpha3-b": "tgl", "alpha2": "tl", "English": "Tagalog"},
|
|
343
|
+
{"alpha3-b": "tha", "alpha2": "th", "English": "Thai"},
|
|
344
|
+
{"alpha3-b": "tib", "alpha2": "bo", "English": "Tibetan"},
|
|
345
|
+
{"alpha3-b": "tir", "alpha2": "ti", "English": "Tigrinya"},
|
|
346
|
+
{"alpha3-b": "ton", "alpha2": "to", "English": "Tonga (Tonga Islands)"},
|
|
347
|
+
{"alpha3-b": "tsn", "alpha2": "tn", "English": "Tswana"},
|
|
348
|
+
{"alpha3-b": "tso", "alpha2": "ts", "English": "Tsonga"},
|
|
349
|
+
{"alpha3-b": "tuk", "alpha2": "tk", "English": "Turkmen"},
|
|
350
|
+
{"alpha3-b": "tur", "alpha2": "tr", "English": "Turkish"},
|
|
351
|
+
{"alpha3-b": "twi", "alpha2": "tw", "English": "Twi"},
|
|
352
|
+
{"alpha3-b": "uig", "alpha2": "ug", "English": "Uighur; Uyghur"},
|
|
353
|
+
{"alpha3-b": "ukr", "alpha2": "uk", "English": "Ukrainian"},
|
|
354
|
+
{"alpha3-b": "urd", "alpha2": "ur", "English": "Urdu"},
|
|
355
|
+
{"alpha3-b": "uzb", "alpha2": "uz", "English": "Uzbek"},
|
|
356
|
+
{"alpha3-b": "ven", "alpha2": "ve", "English": "Venda"},
|
|
357
|
+
{"alpha3-b": "vie", "alpha2": "vi", "English": "Vietnamese"},
|
|
358
|
+
{"alpha3-b": "vol", "alpha2": "vo", "English": "Volapük"},
|
|
359
|
+
{"alpha3-b": "wel", "alpha2": "cy", "English": "Welsh"},
|
|
360
|
+
{"alpha3-b": "wln", "alpha2": "wa", "English": "Walloon"},
|
|
361
|
+
{"alpha3-b": "wol", "alpha2": "wo", "English": "Wolof"},
|
|
362
|
+
{"alpha3-b": "xho", "alpha2": "xh", "English": "Xhosa"},
|
|
363
|
+
{"alpha3-b": "yid", "alpha2": "yi", "English": "Yiddish"},
|
|
364
|
+
{"alpha3-b": "yor", "alpha2": "yo", "English": "Yoruba"},
|
|
365
|
+
{"alpha3-b": "zha", "alpha2": "za", "English": "Zhuang; Chuang"},
|
|
366
|
+
{"alpha3-b": "zul", "alpha2": "zu", "English": "Zulu"}
|
|
367
|
+
]
|
|
368
|
+
end
|
|
369
|
+
end
|
|
370
|
+
end
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
module Vaultify
|
|
2
|
+
|
|
3
|
+
module EdtfHelper
|
|
4
|
+
|
|
5
|
+
def scrub str
|
|
6
|
+
str.strip.downcase.gsub(/[\|,.\[\]'"]/, ' ').gsub(/(\d+)\s*(st|nd|rd|th)\b/, '\\1')
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
def clean_words str
|
|
10
|
+
cleaned_string = str.strip.downcase
|
|
11
|
+
months.each do |month, value|
|
|
12
|
+
cleaned_string = cleaned_string.gsub(Regexp.new(month), value)
|
|
13
|
+
end
|
|
14
|
+
seasons.each do |season, value|
|
|
15
|
+
cleaned_string = cleaned_string.gsub(Regexp.new(season), value)
|
|
16
|
+
end
|
|
17
|
+
descriptors.each do |descriptor, value|
|
|
18
|
+
cleaned_string = cleaned_string.gsub(Regexp.new(descriptor), value)
|
|
19
|
+
end
|
|
20
|
+
cleaned_string
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def machine_clean str
|
|
24
|
+
clean_input = str.gsub(/open/, '*').gsub(/unknown/, '').gsub(/[^\du]u[^\du]/, '')
|
|
25
|
+
clean_input.gsub(/u/, 'X')
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def clean_extra_words str
|
|
29
|
+
str.strip.gsub(/[a-z]/, '').gsub(/ +/, ' ')
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def add_zeroes str
|
|
33
|
+
str.strip.gsub(/\b(\d)\b/, '0\\1').gsub(/\b([\dX][\dX][\dX])[^\dX]/, '\\1X')
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def d4
|
|
37
|
+
'[\\dX][\\dX][\\dX][\\dX][?~]?'
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def d2
|
|
41
|
+
'[\\dX][\\dX][?~]?'
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def add_separators str
|
|
45
|
+
|
|
46
|
+
str.gsub(/([\dX?~])\s+([\dX?~])/, '\\1-\\2')
|
|
47
|
+
.gsub(' ', '').gsub(/([\dX?~])\/([\dX?~])/, '\\1-\\2')
|
|
48
|
+
.gsub(Regexp.new('(' + d2 + '-' + d4 + ')-(' + d2 + '-' + d4 + ')'), '\\1/\\2')
|
|
49
|
+
.gsub(Regexp.new('(' + d2 + '-' + d4 + ')-(' + d2 + '-' + d2 + '-' + d4 + ')'), '\\1/\\2')
|
|
50
|
+
.gsub(Regexp.new('(' + d4 + ')-(' + d4 + ')'), '\\1/\\2')
|
|
51
|
+
.gsub(Regexp.new('(' + d4 + '-' + d2 + ')-(' + d4 + ')'), '\\1/\\2')
|
|
52
|
+
.gsub(Regexp.new('(' + d4 + '-' + d2 + '-' + d2 + ')-(' + d4 + ')'), '\\1/\\2')
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def arrange str
|
|
56
|
+
if Regexp.new(d4 + '-' + d2 + '-' + d2, 'g').match(str)
|
|
57
|
+
str
|
|
58
|
+
elsif Regexp.new(d2 + '-' + d2 + '-' + d4, 'g').match(str)
|
|
59
|
+
str.gsub(Regexp.new('(' + d2 + ')-(' + d2 + ')-(' + d4 + ')'), '\\3-\\2-\\1')
|
|
60
|
+
elsif Regexp.new(d2 + '-' + d4 + '-' + d2, 'g').match(str)
|
|
61
|
+
str.gsub(Regexp.new('(' + d2 + ')-(' + d4 + ')-(' + d2 + ')'), '\\2-\\1-\\3')
|
|
62
|
+
elsif Regexp.new(d4 + '-' + d2, 'g').match(str)
|
|
63
|
+
str
|
|
64
|
+
elsif Regexp.new(d2 + '-' + d4, 'g').match(str)
|
|
65
|
+
str.gsub(Regexp.new('(' + d2 + ')-(' + d4 + ')'), '\\2-\\1')
|
|
66
|
+
else
|
|
67
|
+
str
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def rearrange str
|
|
72
|
+
if str.include? '/'
|
|
73
|
+
split_string = str.split('/')
|
|
74
|
+
arrange(split_string[0]) + '/' + arrange(split_string[1])
|
|
75
|
+
else
|
|
76
|
+
arrange(str)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def dirty str
|
|
81
|
+
clean_input = str.gsub(/^\*|\*$/, 'open').gsub(/^\//, 'unknown/').gsub(/\/$/, '/unknown')
|
|
82
|
+
clean_input.gsub(/X/, 'u')
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def convert_date input
|
|
86
|
+
return input if Date.edtf(input)
|
|
87
|
+
clean_date = clean_extra_words(machine_clean(clean_words(scrub(input))))
|
|
88
|
+
clean_date = dirty(rearrange(add_separators(add_zeroes(clean_date))))
|
|
89
|
+
Date.edtf(clean_date) ? clean_date : nil
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def months
|
|
93
|
+
{
|
|
94
|
+
'january|jan' => '01',
|
|
95
|
+
'february|feb' => '02',
|
|
96
|
+
'march|mar' => '03',
|
|
97
|
+
'april|apr' => '04',
|
|
98
|
+
'may' => '05',
|
|
99
|
+
'june|jun' => '06',
|
|
100
|
+
'july|jul' => '07',
|
|
101
|
+
'august|aug' => '08',
|
|
102
|
+
'september|sep' => '09',
|
|
103
|
+
'october|oct' => '10',
|
|
104
|
+
'november|nov' => '11',
|
|
105
|
+
'december|dec' => '12'
|
|
106
|
+
}
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def seasons
|
|
110
|
+
{
|
|
111
|
+
'spring|spr' => '21',
|
|
112
|
+
'summer|summ|sum' => '22',
|
|
113
|
+
'autumn|aut|fall|fal' => '23',
|
|
114
|
+
'winter|wint|win' => '24'
|
|
115
|
+
}
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def descriptors
|
|
119
|
+
{
|
|
120
|
+
'(\\d.*)to(.*\\d)' => '\\1/\\2',
|
|
121
|
+
'\\s*(around|about|abt|estimated|est|circa|approximately|approx)\\s*' => '~'
|
|
122
|
+
}
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
end
|
|
126
|
+
end
|