pennmarc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +6 -0
  3. data/.rspec +2 -0
  4. data/.ruby-version +1 -0
  5. data/Gemfile +23 -0
  6. data/Gemfile.lock +119 -0
  7. data/README.md +82 -0
  8. data/legacy/indexer.rb +568 -0
  9. data/legacy/marc.rb +2964 -0
  10. data/legacy/test_file_output.json +49 -0
  11. data/lib/pennmarc/encoding_level.rb +43 -0
  12. data/lib/pennmarc/enriched_marc.rb +36 -0
  13. data/lib/pennmarc/heading_control.rb +11 -0
  14. data/lib/pennmarc/helpers/citation.rb +31 -0
  15. data/lib/pennmarc/helpers/creator.rb +237 -0
  16. data/lib/pennmarc/helpers/database.rb +89 -0
  17. data/lib/pennmarc/helpers/date.rb +85 -0
  18. data/lib/pennmarc/helpers/edition.rb +90 -0
  19. data/lib/pennmarc/helpers/format.rb +312 -0
  20. data/lib/pennmarc/helpers/genre.rb +71 -0
  21. data/lib/pennmarc/helpers/helper.rb +11 -0
  22. data/lib/pennmarc/helpers/identifier.rb +134 -0
  23. data/lib/pennmarc/helpers/language.rb +37 -0
  24. data/lib/pennmarc/helpers/link.rb +12 -0
  25. data/lib/pennmarc/helpers/location.rb +97 -0
  26. data/lib/pennmarc/helpers/note.rb +132 -0
  27. data/lib/pennmarc/helpers/production.rb +131 -0
  28. data/lib/pennmarc/helpers/relation.rb +135 -0
  29. data/lib/pennmarc/helpers/series.rb +118 -0
  30. data/lib/pennmarc/helpers/subject.rb +304 -0
  31. data/lib/pennmarc/helpers/title.rb +197 -0
  32. data/lib/pennmarc/mappings/language.yml +516 -0
  33. data/lib/pennmarc/mappings/locations.yml +1801 -0
  34. data/lib/pennmarc/mappings/relator.yml +263 -0
  35. data/lib/pennmarc/parser.rb +177 -0
  36. data/lib/pennmarc/util.rb +240 -0
  37. data/lib/pennmarc.rb +6 -0
  38. data/pennmarc.gemspec +22 -0
  39. data/spec/fixtures/marcxml/test.xml +167 -0
  40. data/spec/lib/pennmarc/helpers/citation_spec.rb +27 -0
  41. data/spec/lib/pennmarc/helpers/creator_spec.rb +183 -0
  42. data/spec/lib/pennmarc/helpers/database_spec.rb +60 -0
  43. data/spec/lib/pennmarc/helpers/date_spec.rb +105 -0
  44. data/spec/lib/pennmarc/helpers/edition_spec.rb +38 -0
  45. data/spec/lib/pennmarc/helpers/format_spec.rb +200 -0
  46. data/spec/lib/pennmarc/helpers/genre_spec.rb +89 -0
  47. data/spec/lib/pennmarc/helpers/identifer_spec.rb +105 -0
  48. data/spec/lib/pennmarc/helpers/language_spec.rb +30 -0
  49. data/spec/lib/pennmarc/helpers/location_spec.rb +70 -0
  50. data/spec/lib/pennmarc/helpers/note_spec.rb +233 -0
  51. data/spec/lib/pennmarc/helpers/production_spec.rb +193 -0
  52. data/spec/lib/pennmarc/helpers/relation_spec.rb +120 -0
  53. data/spec/lib/pennmarc/helpers/subject_spec.rb +262 -0
  54. data/spec/lib/pennmarc/helpers/title_spec.rb +169 -0
  55. data/spec/lib/pennmarc/marc_util_spec.rb +206 -0
  56. data/spec/lib/pennmarc/parser_spec.rb +13 -0
  57. data/spec/spec_helper.rb +104 -0
  58. data/spec/support/marc_spec_helpers.rb +84 -0
  59. metadata +171 -0
@@ -0,0 +1,516 @@
1
+ aar: Afar
2
+ abk: Abkhaz
3
+ ace: Achinese
4
+ ach: Acoli
5
+ ada: Adangme
6
+ ady: Adygei
7
+ afa: Afroasiatic (Other)
8
+ afh: Afrihili (Artificial language)
9
+ afr: Afrikaans
10
+ ain: Ainu
11
+ ajm: Aljamía
12
+ aka: Akan
13
+ akk: Akkadian
14
+ alb: Albanian
15
+ ale: Aleut
16
+ alg: Algonquian (Other)
17
+ alt: Altai
18
+ amh: Amharic
19
+ ang: English, Old (ca. 450-1100)
20
+ anp: Angika
21
+ apa: Apache languages
22
+ ara: Arabic
23
+ arc: Aramaic
24
+ arg: Aragonese
25
+ arm: Armenian
26
+ arn: Mapuche
27
+ arp: Arapaho
28
+ art: Artificial (Other)
29
+ arw: Arawak
30
+ asm: Assamese
31
+ ast: Bable
32
+ ath: Athapascan (Other)
33
+ aus: Australian languages
34
+ ava: Avaric
35
+ ave: Avestan
36
+ awa: Awadhi
37
+ aym: Aymara
38
+ aze: Azerbaijani
39
+ bad: Banda languages
40
+ bai: Bamileke languages
41
+ bak: Bashkir
42
+ bal: Baluchi
43
+ bam: Bambara
44
+ ban: Balinese
45
+ baq: Basque
46
+ bas: Basa
47
+ bat: Baltic (Other)
48
+ bej: Beja
49
+ bel: Belarusian
50
+ bem: Bemba
51
+ ben: Bengali
52
+ ber: Berber (Other)
53
+ bho: Bhojpuri
54
+ bih: Bihari (Other)
55
+ bik: Bikol
56
+ bin: Edo
57
+ bis: Bislama
58
+ bla: Siksika
59
+ bnt: Bantu (Other)
60
+ bos: Bosnian
61
+ bra: Braj
62
+ bre: Breton
63
+ btk: Batak
64
+ bua: Buriat
65
+ bug: Bugis
66
+ bul: Bulgarian
67
+ bur: Burmese
68
+ byn: Bilin
69
+ cad: Caddo
70
+ cai: Central American Indian (Other)
71
+ cam: Khmer
72
+ car: Carib
73
+ cat: Catalan
74
+ cau: Caucasian (Other)
75
+ ceb: Cebuano
76
+ cel: Celtic (Other)
77
+ cha: Chamorro
78
+ chb: Chibcha
79
+ che: Chechen
80
+ chg: Chagatai
81
+ chi: Chinese
82
+ chk: Chuukese
83
+ chm: Mari
84
+ chn: Chinook jargon
85
+ cho: Choctaw
86
+ chp: Chipewyan
87
+ chr: Cherokee
88
+ chu: Church Slavic
89
+ chv: Chuvash
90
+ chy: Cheyenne
91
+ cmc: Chamic languages
92
+ cop: Coptic
93
+ cor: Cornish
94
+ cos: Corsican
95
+ cpe: Creoles and Pidgins, English-based (Other)
96
+ cpf: Creoles and Pidgins, French-based (Other)
97
+ cpp: Creoles and Pidgins, Portuguese-based (Other)
98
+ cre: Cree
99
+ crh: Crimean Tatar
100
+ crp: Creoles and Pidgins (Other)
101
+ csb: Kashubian
102
+ cus: Cushitic (Other)
103
+ cze: Czech
104
+ dak: Dakota
105
+ dan: Danish
106
+ dar: Dargwa
107
+ day: Dayak
108
+ del: Delaware
109
+ den: Slavey
110
+ dgr: Dogrib
111
+ din: Dinka
112
+ div: Divehi
113
+ doi: Dogri
114
+ dra: Dravidian (Other)
115
+ dsb: Lower Sorbian
116
+ dua: Duala
117
+ dum: Dutch, Middle (ca. 1050-1350)
118
+ dut: Dutch
119
+ dyu: Dyula
120
+ dzo: Dzongkha
121
+ efi: Efik
122
+ egy: Egyptian
123
+ eka: Ekajuk
124
+ elx: Elamite
125
+ eng: English
126
+ enm: English, Middle (1100-1500)
127
+ epo: Esperanto
128
+ esk: Eskimo languages
129
+ esp: Esperanto
130
+ est: Estonian
131
+ eth: Ethiopic
132
+ ewe: Ewe
133
+ ewo: Ewondo
134
+ fan: Fang
135
+ fao: Faroese
136
+ far: Faroese
137
+ fat: Fanti
138
+ fij: Fijian
139
+ fil: Filipino
140
+ fin: Finnish
141
+ fiu: Finno-Ugrian (Other)
142
+ fon: Fon
143
+ fre: French
144
+ fri: Frisian
145
+ frm: French, Middle (ca. 1300-1600)
146
+ fro: French, Old (ca. 842-1300)
147
+ frr: North Frisian
148
+ frs: East Frisian
149
+ fry: Frisian
150
+ ful: Fula
151
+ fur: Friulian
152
+ gaa: Gã
153
+ gae: Scottish Gaelix
154
+ gag: Galician
155
+ gal: Oromo
156
+ gay: Gayo
157
+ gba: Gbaya
158
+ gem: Germanic (Other)
159
+ geo: Georgian
160
+ ger: German
161
+ gez: Ethiopic
162
+ gil: Gilbertese
163
+ gla: Scottish Gaelic
164
+ gle: Irish
165
+ glg: Galician
166
+ glv: Manx
167
+ gmh: German, Middle High (ca. 1050-1500)
168
+ goh: German, Old High (ca. 750-1050)
169
+ gon: Gondi
170
+ gor: Gorontalo
171
+ got: Gothic
172
+ grb: Grebo
173
+ grc: Greek, Ancient (to 1453)
174
+ gre: Greek, Modern (1453-)
175
+ grn: Guarani
176
+ gsw: Swiss German
177
+ gua: Guarani
178
+ guj: Gujarati
179
+ gwi: Gwich'in
180
+ hai: Haida
181
+ hat: Haitian French Creole
182
+ hau: Hausa
183
+ haw: Hawaiian
184
+ heb: Hebrew
185
+ her: Herero
186
+ hil: Hiligaynon
187
+ him: Western Pahari languages
188
+ hin: Hindi
189
+ hit: Hittite
190
+ hmn: Hmong
191
+ hmo: Hiri Motu
192
+ hrv: Croatian
193
+ hsb: Upper Sorbian
194
+ hun: Hungarian
195
+ hup: Hupa
196
+ iba: Iban
197
+ ibo: Igbo
198
+ ice: Icelandic
199
+ ido: Ido
200
+ iii: Sichuan Yi
201
+ ijo: Ijo
202
+ iku: Inuktitut
203
+ ile: Interlingue
204
+ ilo: Iloko
205
+ ina: Interlingua (International Auxiliary Language Association)
206
+ inc: Indic (Other)
207
+ ind: Indonesian
208
+ ine: Indo-European (Other)
209
+ inh: Ingush
210
+ int: Interlingua (International Auxiliary Language Association)
211
+ ipk: Inupiaq
212
+ ira: Iranian (Other)
213
+ iri: Irish
214
+ iro: Iroquoian (Other)
215
+ ita: Italian
216
+ jav: Javanese
217
+ jbo: Lojban (Artificial language)
218
+ jpn: Japanese
219
+ jpr: Judeo-Persian
220
+ jrb: Judeo-Arabic
221
+ kaa: Kara-Kalpak
222
+ kab: Kabyle
223
+ kac: Kachin
224
+ kal: Kalâtdlisut
225
+ kam: Kamba
226
+ kan: Kannada
227
+ kar: Karen languages
228
+ kas: Kashmiri
229
+ kau: Kanuri
230
+ kaw: Kawi
231
+ kaz: Kazakh
232
+ kbd: Kabardian
233
+ kha: Khasi
234
+ khi: Khoisan (Other)
235
+ khm: Khmer
236
+ kho: Khotanese
237
+ kik: Kikuyu
238
+ kin: Kinyarwanda
239
+ kir: Kyrgyz
240
+ kmb: Kimbundu
241
+ kok: Konkani
242
+ kom: Komi
243
+ kon: Kongo
244
+ kor: Korean
245
+ kos: Kosraean
246
+ kpe: Kpelle
247
+ krc: Karachay-Balkar
248
+ krl: Karelian
249
+ kro: Kru (Other)
250
+ kru: Kurukh
251
+ kua: Kuanyama
252
+ kum: Kumyk
253
+ kur: Kurdish
254
+ kus: Kusaie
255
+ kut: Kootenai
256
+ lad: Ladino
257
+ lah: Lahndā
258
+ lam: Lamba (Zambia and Congo)
259
+ lan: Occitan (post 1500)
260
+ lao: Lao
261
+ lap: Sami
262
+ lat: Latin
263
+ lav: Latvian
264
+ lez: Lezgian
265
+ lim: Limburgish
266
+ lin: Lingala
267
+ lit: Lithuanian
268
+ lol: Mongo-Nkundu
269
+ loz: Lozi
270
+ ltz: Luxembourgish
271
+ lua: Luba-Lulua
272
+ lub: Luba-Katanga
273
+ lug: Ganda
274
+ lui: Luiseño
275
+ lun: Lunda
276
+ luo: Luo (Kenya and Tanzania)
277
+ lus: Lushai
278
+ mac: Macedonian
279
+ mad: Madurese
280
+ mag: Magahi
281
+ mah: Marshallese
282
+ mai: Maithili
283
+ mak: Makasar
284
+ mal: Malayalam
285
+ man: Mandingo
286
+ mao: Maori
287
+ map: Austronesian (Other)
288
+ mar: Marathi
289
+ mas: Maasai
290
+ max: Manx
291
+ may: Malay
292
+ mdf: Moksha
293
+ mdr: Mandar
294
+ men: Mende
295
+ mga: Irish, Middle (ca. 1100-1550)
296
+ mic: Micmac
297
+ min: Minangkabau
298
+ mis: Miscellaneous languages
299
+ mkh: Mon-Khmer (Other)
300
+ mla: Malagasy
301
+ mlg: Malagasy
302
+ mlt: Maltese
303
+ mnc: Manchu
304
+ mni: Manipuri
305
+ mno: Manobo languages
306
+ moh: Mohawk
307
+ mol: Moldavian
308
+ mon: Mongolian
309
+ mos: Mooré
310
+ mul: Multiple languages
311
+ mun: Munda (Other)
312
+ mus: Creek
313
+ mwl: Mirandese
314
+ mwr: Marwari
315
+ myn: Mayan languages
316
+ myv: Erzya
317
+ nah: Nahuatl
318
+ nai: North American Indian (Other)
319
+ nap: Neapolitan Italian
320
+ nau: Nauru
321
+ nav: Navajo
322
+ nbl: Ndebele (South Africa)
323
+ nde: Ndebele (Zimbabwe)
324
+ ndo: Ndonga
325
+ nds: Low German
326
+ nep: Nepali
327
+ new: Newari
328
+ nia: Nias
329
+ nic: Niger-Kordofanian (Other)
330
+ niu: Niuean
331
+ nno: Norwegian (Nynorsk)
332
+ nob: Norwegian (Bokmål)
333
+ nog: Nogai
334
+ non: Old Norse
335
+ nor: Norwegian
336
+ nqo: N'Ko
337
+ nso: Northern Sotho
338
+ nub: Nubian languages
339
+ nwc: Newari, Old
340
+ nya: Nyanja
341
+ nym: Nyamwezi
342
+ nyn: Nyankole
343
+ nyo: Nyoro
344
+ nzi: Nzima
345
+ oci: Occitan (post-1500)
346
+ oji: Ojibwa
347
+ ori: Oriya
348
+ orm: Oromo
349
+ osa: Osage
350
+ oss: Ossetic
351
+ ota: Turkish, Ottoman
352
+ oto: Otomian languages
353
+ paa: Papuan (Other)
354
+ pag: Pangasinan
355
+ pal: Pahlavi
356
+ pam: Pampanga
357
+ pan: Panjabi
358
+ pap: Papiamento
359
+ pau: Palauan
360
+ peo: Old Persian (ca. 600-400 B.C.)
361
+ per: Persian
362
+ phi: Philippine (Other)
363
+ phn: Phoenician
364
+ pli: Pali
365
+ pol: Polish
366
+ pon: Pohnpeian
367
+ por: Portuguese
368
+ pra: Prakrit languages
369
+ pro: Provençal (to 1500)
370
+ pus: Pushto
371
+ que: Quechua
372
+ raj: Rajasthani
373
+ rap: Rapanui
374
+ rar: Rarotongan
375
+ roa: Romance (Other)
376
+ roh: Raeto-Romance
377
+ rom: Romani
378
+ rum: Romanian
379
+ run: Rundi
380
+ rup: Aromanian
381
+ rus: Russian
382
+ sad: Sandawe
383
+ sag: Sango (Ubangi Creole)
384
+ sah: Yakut
385
+ sai: South American Indian (Other)
386
+ sal: Salishan languages
387
+ sam: Samaritan Aramaic
388
+ san: Sanskrit
389
+ sao: Samoan
390
+ sas: Sasak
391
+ sat: Santali
392
+ scc: Serbian
393
+ scn: Sicilian Italian
394
+ sco: Scots
395
+ scr: Croatian
396
+ sel: Selkup
397
+ sem: Semitic (Other)
398
+ sga: Irish, Old (to 1100)
399
+ sgn: Sign languages
400
+ shn: Shan
401
+ sho: Shona
402
+ sid: Sidamo
403
+ sin: Sinhalese
404
+ sio: Siouan (Other)
405
+ sit: Sino-Tibetan (Other)
406
+ sla: Slavic (Other)
407
+ slo: Slovak
408
+ slv: Slovenian
409
+ sma: Southern Sami
410
+ sme: Northern Sami
411
+ smi: Sami
412
+ smj: Lule Sami
413
+ smn: Inari Sami
414
+ smo: Samoan
415
+ sms: Skolt Sami
416
+ sna: Shona
417
+ snd: Sindhi
418
+ snh: Sinhalese
419
+ snk: Soninke
420
+ sog: Sogdian
421
+ som: Somali
422
+ son: Songhai
423
+ sot: Sotho
424
+ spa: Spanish
425
+ srd: Sardinian
426
+ srn: Sranan
427
+ srp: Serbian
428
+ srr: Serer
429
+ ssa: Nilo-Saharan (Other)
430
+ sso: Sotho
431
+ ssw: Swazi
432
+ suk: Sukuma
433
+ sun: Sundanese
434
+ sus: Susu
435
+ sux: Sumerian
436
+ swa: Swahili
437
+ swe: Swedish
438
+ swz: Swazi
439
+ syc: Syriac
440
+ syr: Syriac, Modern
441
+ tag: Tagalog
442
+ tah: Tahitian
443
+ tai: Tai (Other)
444
+ taj: Tajik
445
+ tam: Tamil
446
+ tar: Tatar
447
+ tat: Tatar
448
+ tel: Telugu
449
+ tem: Temne
450
+ ter: Terena
451
+ tet: Tetum
452
+ tgk: Tajik
453
+ tgl: Tagalog
454
+ tha: Thai
455
+ tib: Tibetan
456
+ tig: Tigré
457
+ tir: Tigrinya
458
+ tiv: Tiv
459
+ tkl: Tokelauan
460
+ tlh: Klingon (Artificial language)
461
+ tli: Tlingit
462
+ tmh: Tamashek
463
+ tog: Tonga (Nyasa)
464
+ ton: Tongan
465
+ tpi: Tok Pisin
466
+ tru: Truk
467
+ tsi: Tsimshian
468
+ tsn: Tswana
469
+ tso: Tsonga
470
+ tsw: Tswana
471
+ tuk: Turkmen
472
+ tum: Tumbuka
473
+ tup: Tupi languages
474
+ tur: Turkish
475
+ tut: Altaic (Other)
476
+ tvl: Tuvaluan
477
+ twi: Twi
478
+ tyv: Tuvinian
479
+ udm: Udmurt
480
+ uga: Ugaritic
481
+ uig: Uighur
482
+ ukr: Ukrainian
483
+ umb: Umbundu
484
+ und: Undetermined
485
+ urd: Urdu
486
+ uzb: Uzbek
487
+ vai: Vai
488
+ ven: Venda
489
+ vie: Vietnamese
490
+ vol: Volapük
491
+ vot: Votic
492
+ wak: Wakashan languages
493
+ wal: Wolayta
494
+ war: Waray
495
+ was: Washoe
496
+ wel: Welsh
497
+ wen: Sorbian (Other)
498
+ wln: Walloon
499
+ wol: Wolof
500
+ xal: Oirat
501
+ xho: Xhosa
502
+ yao: Yao (Africa)
503
+ yap: Yapese
504
+ yid: Yiddish
505
+ yor: Yoruba
506
+ ypk: Yupik languages
507
+ zap: Zapotec
508
+ zbl: Blissymbolics
509
+ zen: Zenaga
510
+ zha: Zhuang
511
+ znd: Zande languages
512
+ zul: Zulu
513
+ zun: Zuni
514
+ zxx: No linguistic content
515
+ zza: Zaza
516
+ ___: No linguistic content provided